1 /*
   2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.VectorMask;
  28 import jdk.incubator.vector.VectorShape;
  29 import jdk.incubator.vector.VectorSpecies;
  30 import jdk.incubator.vector.VectorShuffle;
  31 import jdk.incubator.vector.DoubleVector;
  32 
  33 import java.util.concurrent.TimeUnit;
  34 import java.util.function.BiFunction;
  35 import java.util.function.IntFunction;
  36 
  37 import org.openjdk.jmh.annotations.*;
  38 import org.openjdk.jmh.infra.Blackhole;
  39 
  40 @BenchmarkMode(Mode.Throughput)
  41 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  42 @State(Scope.Benchmark)
  43 @Warmup(iterations = 3, time = 1)
  44 @Measurement(iterations = 5, time = 1)
  45 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  46 public class Double64Vector extends AbstractVectorBenchmark {
  47     static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_64;
  48 
  49     static final int INVOC_COUNT = 1; // get rid of outer loop
  50 
  51     @Param("1024")
  52     int size;
  53 
  54     double[] fill(IntFunction<Double> f) {
  55         double[] array = new double[size];
  56         for (int i = 0; i < array.length; i++) {
  57             array[i] = f.apply(i);
  58         }
  59         return array;
  60     }
  61 
  62     double[] a, b, c, r;
  63     boolean[] m, rm;
  64     int[] s;
  65 
  66     @Setup
  67     public void init() {
  68         size += size % SPECIES.length(); // FIXME: add post-loops
  69 
  70         a = fill(i -> (double)(2*i));
  71         b = fill(i -> (double)(i+1));
  72         c = fill(i -> (double)(i+5));
  73         r = fill(i -> (double)0);
  74 
  75         m = fillMask(size, i -> (i % 2) == 0);
  76         rm = fillMask(size, i -> false);
  77 
  78         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  79     }
  80 
  81     final IntFunction<double[]> fa = vl -> a;
  82     final IntFunction<double[]> fb = vl -> b;
  83     final IntFunction<double[]> fc = vl -> c;
  84     final IntFunction<double[]> fr = vl -> r;
  85     final IntFunction<boolean[]> fm = vl -> m;
  86     final IntFunction<boolean[]> fmr = vl -> rm;
  87     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  88 
  89 
  90     @Benchmark
  91     public void add(Blackhole bh) {
  92         double[] a = fa.apply(SPECIES.length());
  93         double[] b = fb.apply(SPECIES.length());
  94         double[] r = fr.apply(SPECIES.length());
  95 
  96         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  97             for (int i = 0; i < a.length; i += SPECIES.length()) {
  98                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
  99                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 100                 av.add(bv).intoArray(r, i);
 101             }
 102         }
 103 
 104         bh.consume(r);
 105     }
 106 
 107     @Benchmark
 108     public void addMasked(Blackhole bh) {
 109         double[] a = fa.apply(SPECIES.length());
 110         double[] b = fb.apply(SPECIES.length());
 111         double[] r = fr.apply(SPECIES.length());
 112         boolean[] mask = fm.apply(SPECIES.length());
 113         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
 114 
 115         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 116             for (int i = 0; i < a.length; i += SPECIES.length()) {
 117                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 118                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 119                 av.add(bv, vmask).intoArray(r, i);
 120             }
 121         }
 122 
 123         bh.consume(r);
 124     }
 125 
 126     @Benchmark
 127     public void sub(Blackhole bh) {
 128         double[] a = fa.apply(SPECIES.length());
 129         double[] b = fb.apply(SPECIES.length());
 130         double[] r = fr.apply(SPECIES.length());
 131 
 132         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 133             for (int i = 0; i < a.length; i += SPECIES.length()) {
 134                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 135                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 136                 av.sub(bv).intoArray(r, i);
 137             }
 138         }
 139 
 140         bh.consume(r);
 141     }
 142 
 143     @Benchmark
 144     public void subMasked(Blackhole bh) {
 145         double[] a = fa.apply(SPECIES.length());
 146         double[] b = fb.apply(SPECIES.length());
 147         double[] r = fr.apply(SPECIES.length());
 148         boolean[] mask = fm.apply(SPECIES.length());
 149         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
 150 
 151         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 152             for (int i = 0; i < a.length; i += SPECIES.length()) {
 153                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 154                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 155                 av.sub(bv, vmask).intoArray(r, i);
 156             }
 157         }
 158 
 159         bh.consume(r);
 160     }
 161 
 162 
 163     @Benchmark
 164     public void div(Blackhole bh) {
 165         double[] a = fa.apply(SPECIES.length());
 166         double[] b = fb.apply(SPECIES.length());
 167         double[] r = fr.apply(SPECIES.length());
 168 
 169         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 170             for (int i = 0; i < a.length; i += SPECIES.length()) {
 171                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 172                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 173                 av.div(bv).intoArray(r, i);
 174             }
 175         }
 176 
 177         bh.consume(r);
 178     }
 179 
 180 
 181 
 182     @Benchmark
 183     public void divMasked(Blackhole bh) {
 184         double[] a = fa.apply(SPECIES.length());
 185         double[] b = fb.apply(SPECIES.length());
 186         double[] r = fr.apply(SPECIES.length());
 187         boolean[] mask = fm.apply(SPECIES.length());
 188         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
 189 
 190         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 191             for (int i = 0; i < a.length; i += SPECIES.length()) {
 192                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 193                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 194                 av.div(bv, vmask).intoArray(r, i);
 195             }
 196         }
 197 
 198         bh.consume(r);
 199     }
 200 
 201 
 202     @Benchmark
 203     public void mul(Blackhole bh) {
 204         double[] a = fa.apply(SPECIES.length());
 205         double[] b = fb.apply(SPECIES.length());
 206         double[] r = fr.apply(SPECIES.length());
 207 
 208         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 209             for (int i = 0; i < a.length; i += SPECIES.length()) {
 210                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 211                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 212                 av.mul(bv).intoArray(r, i);
 213             }
 214         }
 215 
 216         bh.consume(r);
 217     }
 218 
 219     @Benchmark
 220     public void mulMasked(Blackhole bh) {
 221         double[] a = fa.apply(SPECIES.length());
 222         double[] b = fb.apply(SPECIES.length());
 223         double[] r = fr.apply(SPECIES.length());
 224         boolean[] mask = fm.apply(SPECIES.length());
 225         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
 226 
 227         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 228             for (int i = 0; i < a.length; i += SPECIES.length()) {
 229                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 230                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 231                 av.mul(bv, vmask).intoArray(r, i);
 232             }
 233         }
 234 
 235         bh.consume(r);
 236     }
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250 
 251 
 252 
 253 
 254 
 255 
 256 
 257 
 258 
 259 
 260 
 261 
 262 
 263 
 264 
 265 
 266 
 267 
 268 
 269 
 270 
 271 
 272 
 273 
 274 
 275 
 276 
 277 
 278 
 279 
 280     @Benchmark
 281     public void max(Blackhole bh) {
 282         double[] a = fa.apply(SPECIES.length());
 283         double[] b = fb.apply(SPECIES.length());
 284         double[] r = fr.apply(SPECIES.length());
 285 
 286         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 287             for (int i = 0; i < a.length; i += SPECIES.length()) {
 288                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 289                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 290                 av.max(bv).intoArray(r, i);
 291             }
 292         }
 293 
 294         bh.consume(r);
 295     }
 296 
 297     @Benchmark
 298     public void min(Blackhole bh) {
 299         double[] a = fa.apply(SPECIES.length());
 300         double[] b = fb.apply(SPECIES.length());
 301         double[] r = fr.apply(SPECIES.length());
 302 
 303         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 304             for (int i = 0; i < a.length; i += SPECIES.length()) {
 305                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 306                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 307                 av.min(bv).intoArray(r, i);
 308             }
 309         }
 310 
 311         bh.consume(r);
 312     }
 313 
 314 
 315 
 316 
 317     @Benchmark
 318     public void addLanes(Blackhole bh) {
 319         double[] a = fa.apply(SPECIES.length());
 320         double ra = 0;
 321 
 322         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 323             ra = 0;
 324             for (int i = 0; i < a.length; i += SPECIES.length()) {
 325                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 326                 ra += av.addLanes();
 327             }
 328         }
 329         bh.consume(ra);
 330     }
 331 
 332     @Benchmark
 333     public void mulLanes(Blackhole bh) {
 334         double[] a = fa.apply(SPECIES.length());
 335         double ra = 1;
 336 
 337         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 338             ra = 1;
 339             for (int i = 0; i < a.length; i += SPECIES.length()) {
 340                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 341                 ra *= av.mulLanes();
 342             }
 343         }
 344         bh.consume(ra);
 345     }
 346 
 347     @Benchmark
 348     public void minLanes(Blackhole bh) {
 349         double[] a = fa.apply(SPECIES.length());
 350         double ra = Double.POSITIVE_INFINITY;
 351 
 352         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 353             ra = Double.POSITIVE_INFINITY;
 354             for (int i = 0; i < a.length; i += SPECIES.length()) {
 355                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 356                 ra = (double)Math.min(ra, av.minLanes());
 357             }
 358         }
 359         bh.consume(ra);
 360     }
 361 
 362     @Benchmark
 363     public void maxLanes(Blackhole bh) {
 364         double[] a = fa.apply(SPECIES.length());
 365         double ra = Double.NEGATIVE_INFINITY;
 366 
 367         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 368             ra = Double.NEGATIVE_INFINITY;
 369             for (int i = 0; i < a.length; i += SPECIES.length()) {
 370                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 371                 ra = (double)Math.max(ra, av.maxLanes());
 372             }
 373         }
 374         bh.consume(ra);
 375     }
 376 
 377 
 378 
 379     @Benchmark
 380     public void with(Blackhole bh) {
 381         double[] a = fa.apply(SPECIES.length());
 382         double[] r = fr.apply(SPECIES.length());
 383 
 384         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 385             for (int i = 0; i < a.length; i += SPECIES.length()) {
 386                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 387                 av.with(0, (double)4).intoArray(r, i);
 388             }
 389         }
 390 
 391         bh.consume(r);
 392     }
 393 
 394     @Benchmark
 395     public Object lessThan() {
 396         double[] a = fa.apply(size);
 397         double[] b = fb.apply(size);
 398         boolean[] ms = fm.apply(size);
 399         VectorMask<Double> m = VectorMask.fromArray(SPECIES, ms, 0);
 400 
 401         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 402             for (int i = 0; i < a.length; i += SPECIES.length()) {
 403                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 404                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 405                 VectorMask<Double> mv = av.lessThan(bv);
 406 
 407                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 408             }
 409         }
 410         return m;
 411     }
 412 
 413 
 414     @Benchmark
 415     public Object greaterThan() {
 416         double[] a = fa.apply(size);
 417         double[] b = fb.apply(size);
 418         boolean[] ms = fm.apply(size);
 419         VectorMask<Double> m = VectorMask.fromArray(SPECIES, ms, 0);
 420 
 421         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 422             for (int i = 0; i < a.length; i += SPECIES.length()) {
 423                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 424                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 425                 VectorMask<Double> mv = av.greaterThan(bv);
 426 
 427                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 428             }
 429         }
 430         return m;
 431     }
 432 
 433 
 434     @Benchmark
 435     public Object equal() {
 436         double[] a = fa.apply(size);
 437         double[] b = fb.apply(size);
 438         boolean[] ms = fm.apply(size);
 439         VectorMask<Double> m = VectorMask.fromArray(SPECIES, ms, 0);
 440 
 441         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 442             for (int i = 0; i < a.length; i += SPECIES.length()) {
 443                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 444                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 445                 VectorMask<Double> mv = av.equal(bv);
 446 
 447                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 448             }
 449         }
 450         return m;
 451     }
 452 
 453 
 454     @Benchmark
 455     public Object notEqual() {
 456         double[] a = fa.apply(size);
 457         double[] b = fb.apply(size);
 458         boolean[] ms = fm.apply(size);
 459         VectorMask<Double> m = VectorMask.fromArray(SPECIES, ms, 0);
 460 
 461         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 462             for (int i = 0; i < a.length; i += SPECIES.length()) {
 463                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 464                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 465                 VectorMask<Double> mv = av.notEqual(bv);
 466 
 467                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 468             }
 469         }
 470         return m;
 471     }
 472 
 473 
 474     @Benchmark
 475     public Object lessThanEq() {
 476         double[] a = fa.apply(size);
 477         double[] b = fb.apply(size);
 478         boolean[] ms = fm.apply(size);
 479         VectorMask<Double> m = VectorMask.fromArray(SPECIES, ms, 0);
 480 
 481         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 482             for (int i = 0; i < a.length; i += SPECIES.length()) {
 483                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 484                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 485                 VectorMask<Double> mv = av.lessThanEq(bv);
 486 
 487                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 488             }
 489         }
 490         return m;
 491     }
 492 
 493 
 494     @Benchmark
 495     public Object greaterThanEq() {
 496         double[] a = fa.apply(size);
 497         double[] b = fb.apply(size);
 498         boolean[] ms = fm.apply(size);
 499         VectorMask<Double> m = VectorMask.fromArray(SPECIES, ms, 0);
 500 
 501         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 502             for (int i = 0; i < a.length; i += SPECIES.length()) {
 503                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 504                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 505                 VectorMask<Double> mv = av.greaterThanEq(bv);
 506 
 507                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 508             }
 509         }
 510         return m;
 511     }
 512 
 513 
 514     @Benchmark
 515     public void blend(Blackhole bh) {
 516         double[] a = fa.apply(SPECIES.length());
 517         double[] b = fb.apply(SPECIES.length());
 518         double[] r = fr.apply(SPECIES.length());
 519         boolean[] mask = fm.apply(SPECIES.length());
 520         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
 521 
 522         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 523             for (int i = 0; i < a.length; i += SPECIES.length()) {
 524                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 525                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 526                 av.blend(bv, vmask).intoArray(r, i);
 527             }
 528         }
 529 
 530         bh.consume(r);
 531     }
 532 
 533     @Benchmark
 534     public void rearrange(Blackhole bh) {
 535         double[] a = fa.apply(SPECIES.length());
 536         int[] order = fs.apply(a.length, SPECIES.length());
 537         double[] r = fr.apply(SPECIES.length());
 538 
 539         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 540             for (int i = 0; i < a.length; i += SPECIES.length()) {
 541                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 542                 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
 543             }
 544         }
 545 
 546         bh.consume(r);
 547     }
 548 
 549     @Benchmark
 550     public void extract(Blackhole bh) {
 551         double[] a = fa.apply(SPECIES.length());
 552         double[] r = fr.apply(SPECIES.length());
 553 
 554         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 555             for (int i = 0; i < a.length; i += SPECIES.length()) {
 556                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 557                 int num_lanes = SPECIES.length();
 558                 // Manually unroll because full unroll happens after intrinsification.
 559                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 560                 if (num_lanes == 1) {
 561                     r[i]=av.lane(0);
 562                 } else if (num_lanes == 2) {
 563                     r[i]=av.lane(0);
 564                     r[i+1]=av.lane(1);
 565                 } else if (num_lanes == 4) {
 566                     r[i]=av.lane(0);
 567                     r[i+1]=av.lane(1);
 568                     r[i+2]=av.lane(2);
 569                     r[i+3]=av.lane(3);
 570                 } else if (num_lanes == 8) {
 571                     r[i]=av.lane(0);
 572                     r[i+1]=av.lane(1);
 573                     r[i+2]=av.lane(2);
 574                     r[i+3]=av.lane(3);
 575                     r[i+4]=av.lane(4);
 576                     r[i+5]=av.lane(5);
 577                     r[i+6]=av.lane(6);
 578                     r[i+7]=av.lane(7);
 579                 } else if (num_lanes == 16) {
 580                     r[i]=av.lane(0);
 581                     r[i+1]=av.lane(1);
 582                     r[i+2]=av.lane(2);
 583                     r[i+3]=av.lane(3);
 584                     r[i+4]=av.lane(4);
 585                     r[i+5]=av.lane(5);
 586                     r[i+6]=av.lane(6);
 587                     r[i+7]=av.lane(7);
 588                     r[i+8]=av.lane(8);
 589                     r[i+9]=av.lane(9);
 590                     r[i+10]=av.lane(10);
 591                     r[i+11]=av.lane(11);
 592                     r[i+12]=av.lane(12);
 593                     r[i+13]=av.lane(13);
 594                     r[i+14]=av.lane(14);
 595                     r[i+15]=av.lane(15);
 596                 } else if (num_lanes == 32) {
 597                     r[i]=av.lane(0);
 598                     r[i+1]=av.lane(1);
 599                     r[i+2]=av.lane(2);
 600                     r[i+3]=av.lane(3);
 601                     r[i+4]=av.lane(4);
 602                     r[i+5]=av.lane(5);
 603                     r[i+6]=av.lane(6);
 604                     r[i+7]=av.lane(7);
 605                     r[i+8]=av.lane(8);
 606                     r[i+9]=av.lane(9);
 607                     r[i+10]=av.lane(10);
 608                     r[i+11]=av.lane(11);
 609                     r[i+12]=av.lane(12);
 610                     r[i+13]=av.lane(13);
 611                     r[i+14]=av.lane(14);
 612                     r[i+15]=av.lane(15);
 613                     r[i+16]=av.lane(16);
 614                     r[i+17]=av.lane(17);
 615                     r[i+18]=av.lane(18);
 616                     r[i+19]=av.lane(19);
 617                     r[i+20]=av.lane(20);
 618                     r[i+21]=av.lane(21);
 619                     r[i+22]=av.lane(22);
 620                     r[i+23]=av.lane(23);
 621                     r[i+24]=av.lane(24);
 622                     r[i+25]=av.lane(25);
 623                     r[i+26]=av.lane(26);
 624                     r[i+27]=av.lane(27);
 625                     r[i+28]=av.lane(28);
 626                     r[i+29]=av.lane(29);
 627                     r[i+30]=av.lane(30);
 628                     r[i+31]=av.lane(31);
 629                 } else if (num_lanes == 64) {
 630                     r[i]=av.lane(0);
 631                     r[i+1]=av.lane(1);
 632                     r[i+2]=av.lane(2);
 633                     r[i+3]=av.lane(3);
 634                     r[i+4]=av.lane(4);
 635                     r[i+5]=av.lane(5);
 636                     r[i+6]=av.lane(6);
 637                     r[i+7]=av.lane(7);
 638                     r[i+8]=av.lane(8);
 639                     r[i+9]=av.lane(9);
 640                     r[i+10]=av.lane(10);
 641                     r[i+11]=av.lane(11);
 642                     r[i+12]=av.lane(12);
 643                     r[i+13]=av.lane(13);
 644                     r[i+14]=av.lane(14);
 645                     r[i+15]=av.lane(15);
 646                     r[i+16]=av.lane(16);
 647                     r[i+17]=av.lane(17);
 648                     r[i+18]=av.lane(18);
 649                     r[i+19]=av.lane(19);
 650                     r[i+20]=av.lane(20);
 651                     r[i+21]=av.lane(21);
 652                     r[i+22]=av.lane(22);
 653                     r[i+23]=av.lane(23);
 654                     r[i+24]=av.lane(24);
 655                     r[i+25]=av.lane(25);
 656                     r[i+26]=av.lane(26);
 657                     r[i+27]=av.lane(27);
 658                     r[i+28]=av.lane(28);
 659                     r[i+29]=av.lane(29);
 660                     r[i+30]=av.lane(30);
 661                     r[i+31]=av.lane(31);
 662                     r[i+32]=av.lane(32);
 663                     r[i+33]=av.lane(33);
 664                     r[i+34]=av.lane(34);
 665                     r[i+35]=av.lane(35);
 666                     r[i+36]=av.lane(36);
 667                     r[i+37]=av.lane(37);
 668                     r[i+38]=av.lane(38);
 669                     r[i+39]=av.lane(39);
 670                     r[i+40]=av.lane(40);
 671                     r[i+41]=av.lane(41);
 672                     r[i+42]=av.lane(42);
 673                     r[i+43]=av.lane(43);
 674                     r[i+44]=av.lane(44);
 675                     r[i+45]=av.lane(45);
 676                     r[i+46]=av.lane(46);
 677                     r[i+47]=av.lane(47);
 678                     r[i+48]=av.lane(48);
 679                     r[i+49]=av.lane(49);
 680                     r[i+50]=av.lane(50);
 681                     r[i+51]=av.lane(51);
 682                     r[i+52]=av.lane(52);
 683                     r[i+53]=av.lane(53);
 684                     r[i+54]=av.lane(54);
 685                     r[i+55]=av.lane(55);
 686                     r[i+56]=av.lane(56);
 687                     r[i+57]=av.lane(57);
 688                     r[i+58]=av.lane(58);
 689                     r[i+59]=av.lane(59);
 690                     r[i+60]=av.lane(60);
 691                     r[i+61]=av.lane(61);
 692                     r[i+62]=av.lane(62);
 693                     r[i+63]=av.lane(63);
 694                 } else {
 695                     for (int j = 0; j < SPECIES.length(); j++) {
 696                         r[i+j]=av.lane(j);
 697                     }
 698                 }
 699             }
 700         }
 701 
 702         bh.consume(r);
 703     }
 704 
 705 
 706     @Benchmark
 707     public void sin(Blackhole bh) {
 708         double[] a = fa.apply(SPECIES.length());
 709         double[] r = fr.apply(SPECIES.length());
 710 
 711         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 712             for (int i = 0; i < a.length; i += SPECIES.length()) {
 713                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 714                 av.sin().intoArray(r, i);
 715             }
 716         }
 717 
 718         bh.consume(r);
 719     }
 720 
 721 
 722 
 723     @Benchmark
 724     public void exp(Blackhole bh) {
 725         double[] a = fa.apply(SPECIES.length());
 726         double[] r = fr.apply(SPECIES.length());
 727 
 728         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 729             for (int i = 0; i < a.length; i += SPECIES.length()) {
 730                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 731                 av.exp().intoArray(r, i);
 732             }
 733         }
 734 
 735         bh.consume(r);
 736     }
 737 
 738 
 739 
 740     @Benchmark
 741     public void log1p(Blackhole bh) {
 742         double[] a = fa.apply(SPECIES.length());
 743         double[] r = fr.apply(SPECIES.length());
 744 
 745         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 746             for (int i = 0; i < a.length; i += SPECIES.length()) {
 747                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 748                 av.log1p().intoArray(r, i);
 749             }
 750         }
 751 
 752         bh.consume(r);
 753     }
 754 
 755 
 756 
 757     @Benchmark
 758     public void log(Blackhole bh) {
 759         double[] a = fa.apply(SPECIES.length());
 760         double[] r = fr.apply(SPECIES.length());
 761 
 762         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 763             for (int i = 0; i < a.length; i += SPECIES.length()) {
 764                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 765                 av.log().intoArray(r, i);
 766             }
 767         }
 768 
 769         bh.consume(r);
 770     }
 771 
 772 
 773 
 774     @Benchmark
 775     public void log10(Blackhole bh) {
 776         double[] a = fa.apply(SPECIES.length());
 777         double[] r = fr.apply(SPECIES.length());
 778 
 779         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 780             for (int i = 0; i < a.length; i += SPECIES.length()) {
 781                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 782                 av.log10().intoArray(r, i);
 783             }
 784         }
 785 
 786         bh.consume(r);
 787     }
 788 
 789 
 790 
 791     @Benchmark
 792     public void expm1(Blackhole bh) {
 793         double[] a = fa.apply(SPECIES.length());
 794         double[] r = fr.apply(SPECIES.length());
 795 
 796         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 797             for (int i = 0; i < a.length; i += SPECIES.length()) {
 798                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 799                 av.expm1().intoArray(r, i);
 800             }
 801         }
 802 
 803         bh.consume(r);
 804     }
 805 
 806 
 807 
 808     @Benchmark
 809     public void cos(Blackhole bh) {
 810         double[] a = fa.apply(SPECIES.length());
 811         double[] r = fr.apply(SPECIES.length());
 812 
 813         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 814             for (int i = 0; i < a.length; i += SPECIES.length()) {
 815                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 816                 av.cos().intoArray(r, i);
 817             }
 818         }
 819 
 820         bh.consume(r);
 821     }
 822 
 823 
 824 
 825     @Benchmark
 826     public void tan(Blackhole bh) {
 827         double[] a = fa.apply(SPECIES.length());
 828         double[] r = fr.apply(SPECIES.length());
 829 
 830         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 831             for (int i = 0; i < a.length; i += SPECIES.length()) {
 832                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 833                 av.tan().intoArray(r, i);
 834             }
 835         }
 836 
 837         bh.consume(r);
 838     }
 839 
 840 
 841 
 842     @Benchmark
 843     public void sinh(Blackhole bh) {
 844         double[] a = fa.apply(SPECIES.length());
 845         double[] r = fr.apply(SPECIES.length());
 846 
 847         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 848             for (int i = 0; i < a.length; i += SPECIES.length()) {
 849                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 850                 av.sinh().intoArray(r, i);
 851             }
 852         }
 853 
 854         bh.consume(r);
 855     }
 856 
 857 
 858 
 859     @Benchmark
 860     public void cosh(Blackhole bh) {
 861         double[] a = fa.apply(SPECIES.length());
 862         double[] r = fr.apply(SPECIES.length());
 863 
 864         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 865             for (int i = 0; i < a.length; i += SPECIES.length()) {
 866                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 867                 av.cosh().intoArray(r, i);
 868             }
 869         }
 870 
 871         bh.consume(r);
 872     }
 873 
 874 
 875 
 876     @Benchmark
 877     public void tanh(Blackhole bh) {
 878         double[] a = fa.apply(SPECIES.length());
 879         double[] r = fr.apply(SPECIES.length());
 880 
 881         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 882             for (int i = 0; i < a.length; i += SPECIES.length()) {
 883                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 884                 av.tanh().intoArray(r, i);
 885             }
 886         }
 887 
 888         bh.consume(r);
 889     }
 890 
 891 
 892 
 893     @Benchmark
 894     public void asin(Blackhole bh) {
 895         double[] a = fa.apply(SPECIES.length());
 896         double[] r = fr.apply(SPECIES.length());
 897 
 898         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 899             for (int i = 0; i < a.length; i += SPECIES.length()) {
 900                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 901                 av.asin().intoArray(r, i);
 902             }
 903         }
 904 
 905         bh.consume(r);
 906     }
 907 
 908 
 909 
 910     @Benchmark
 911     public void acos(Blackhole bh) {
 912         double[] a = fa.apply(SPECIES.length());
 913         double[] r = fr.apply(SPECIES.length());
 914 
 915         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 916             for (int i = 0; i < a.length; i += SPECIES.length()) {
 917                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 918                 av.acos().intoArray(r, i);
 919             }
 920         }
 921 
 922         bh.consume(r);
 923     }
 924 
 925 
 926 
 927     @Benchmark
 928     public void atan(Blackhole bh) {
 929         double[] a = fa.apply(SPECIES.length());
 930         double[] r = fr.apply(SPECIES.length());
 931 
 932         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 933             for (int i = 0; i < a.length; i += SPECIES.length()) {
 934                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 935                 av.atan().intoArray(r, i);
 936             }
 937         }
 938 
 939         bh.consume(r);
 940     }
 941 
 942 
 943 
 944     @Benchmark
 945     public void cbrt(Blackhole bh) {
 946         double[] a = fa.apply(SPECIES.length());
 947         double[] r = fr.apply(SPECIES.length());
 948 
 949         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 950             for (int i = 0; i < a.length; i += SPECIES.length()) {
 951                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 952                 av.cbrt().intoArray(r, i);
 953             }
 954         }
 955 
 956         bh.consume(r);
 957     }
 958 
 959 
 960 
 961     @Benchmark
 962     public void hypot(Blackhole bh) {
 963         double[] a = fa.apply(SPECIES.length());
 964         double[] b = fb.apply(SPECIES.length());
 965         double[] r = fr.apply(SPECIES.length());
 966 
 967         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 968             for (int i = 0; i < a.length; i += SPECIES.length()) {
 969                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 970                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 971                 av.hypot(bv).intoArray(r, i);
 972             }
 973         }
 974 
 975         bh.consume(r);
 976     }
 977 
 978 
 979 
 980     @Benchmark
 981     public void pow(Blackhole bh) {
 982         double[] a = fa.apply(SPECIES.length());
 983         double[] b = fb.apply(SPECIES.length());
 984         double[] r = fr.apply(SPECIES.length());
 985 
 986         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 987             for (int i = 0; i < a.length; i += SPECIES.length()) {
 988                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 989                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 990                 av.pow(bv).intoArray(r, i);
 991             }
 992         }
 993 
 994         bh.consume(r);
 995     }
 996 
 997 
 998 
 999     @Benchmark
1000     public void atan2(Blackhole bh) {
1001         double[] a = fa.apply(SPECIES.length());
1002         double[] b = fb.apply(SPECIES.length());
1003         double[] r = fr.apply(SPECIES.length());
1004 
1005         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1006             for (int i = 0; i < a.length; i += SPECIES.length()) {
1007                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1008                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
1009                 av.atan2(bv).intoArray(r, i);
1010             }
1011         }
1012 
1013         bh.consume(r);
1014     }
1015 
1016 
1017 
1018     @Benchmark
1019     public void fma(Blackhole bh) {
1020         double[] a = fa.apply(SPECIES.length());
1021         double[] b = fb.apply(SPECIES.length());
1022         double[] c = fc.apply(SPECIES.length());
1023         double[] r = fr.apply(SPECIES.length());
1024 
1025         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1026             for (int i = 0; i < a.length; i += SPECIES.length()) {
1027                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1028                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
1029                 DoubleVector cv = DoubleVector.fromArray(SPECIES, c, i);
1030                 av.fma(bv, cv).intoArray(r, i);
1031             }
1032         }
1033 
1034         bh.consume(r);
1035     }
1036 
1037 
1038 
1039     @Benchmark
1040     public void fmaMasked(Blackhole bh) {
1041         double[] a = fa.apply(SPECIES.length());
1042         double[] b = fb.apply(SPECIES.length());
1043         double[] c = fc.apply(SPECIES.length());
1044         double[] r = fr.apply(SPECIES.length());
1045         boolean[] mask = fm.apply(SPECIES.length());
1046         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
1047 
1048         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1049             for (int i = 0; i < a.length; i += SPECIES.length()) {
1050                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1051                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
1052                 DoubleVector cv = DoubleVector.fromArray(SPECIES, c, i);
1053                 av.fma(bv, cv, vmask).intoArray(r, i);
1054             }
1055         }
1056 
1057         bh.consume(r);
1058     }
1059 
1060 
1061     @Benchmark
1062     public void neg(Blackhole bh) {
1063         double[] a = fa.apply(SPECIES.length());
1064         double[] r = fr.apply(SPECIES.length());
1065 
1066         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1067             for (int i = 0; i < a.length; i += SPECIES.length()) {
1068                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1069                 av.neg().intoArray(r, i);
1070             }
1071         }
1072 
1073         bh.consume(r);
1074     }
1075 
1076     @Benchmark
1077     public void negMasked(Blackhole bh) {
1078         double[] a = fa.apply(SPECIES.length());
1079         double[] r = fr.apply(SPECIES.length());
1080         boolean[] mask = fm.apply(SPECIES.length());
1081         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
1082 
1083         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1084             for (int i = 0; i < a.length; i += SPECIES.length()) {
1085                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1086                 av.neg(vmask).intoArray(r, i);
1087             }
1088         }
1089 
1090         bh.consume(r);
1091     }
1092 
1093     @Benchmark
1094     public void abs(Blackhole bh) {
1095         double[] a = fa.apply(SPECIES.length());
1096         double[] r = fr.apply(SPECIES.length());
1097 
1098         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1099             for (int i = 0; i < a.length; i += SPECIES.length()) {
1100                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1101                 av.abs().intoArray(r, i);
1102             }
1103         }
1104 
1105         bh.consume(r);
1106     }
1107 
1108     @Benchmark
1109     public void absMasked(Blackhole bh) {
1110         double[] a = fa.apply(SPECIES.length());
1111         double[] r = fr.apply(SPECIES.length());
1112         boolean[] mask = fm.apply(SPECIES.length());
1113         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
1114 
1115         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1116             for (int i = 0; i < a.length; i += SPECIES.length()) {
1117                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1118                 av.abs(vmask).intoArray(r, i);
1119             }
1120         }
1121 
1122         bh.consume(r);
1123     }
1124 
1125 
1126 
1127 
1128     @Benchmark
1129     public void sqrt(Blackhole bh) {
1130         double[] a = fa.apply(SPECIES.length());
1131         double[] r = fr.apply(SPECIES.length());
1132 
1133         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1134             for (int i = 0; i < a.length; i += SPECIES.length()) {
1135                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1136                 av.sqrt().intoArray(r, i);
1137             }
1138         }
1139 
1140         bh.consume(r);
1141     }
1142 
1143 
1144 
1145     @Benchmark
1146     public void sqrtMasked(Blackhole bh) {
1147         double[] a = fa.apply(SPECIES.length());
1148         double[] r = fr.apply(SPECIES.length());
1149         boolean[] mask = fm.apply(SPECIES.length());
1150         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
1151 
1152         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1153             for (int i = 0; i < a.length; i += SPECIES.length()) {
1154                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1155                 av.sqrt(vmask).intoArray(r, i);
1156             }
1157         }
1158 
1159         bh.consume(r);
1160     }
1161 
1162 
1163 
1164     @Benchmark
1165     public void gather(Blackhole bh) {
1166         double[] a = fa.apply(SPECIES.length());
1167         int[] b    = fs.apply(a.length, SPECIES.length());
1168         double[] r = new double[a.length];
1169 
1170         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1171             for (int i = 0; i < a.length; i += SPECIES.length()) {
1172                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i, b, i);
1173                 av.intoArray(r, i);
1174             }
1175         }
1176 
1177         bh.consume(r);
1178     }
1179 
1180 
1181 
1182     @Benchmark
1183     public void scatter(Blackhole bh) {
1184         double[] a = fa.apply(SPECIES.length());
1185         int[] b = fs.apply(a.length, SPECIES.length());
1186         double[] r = new double[a.length];
1187 
1188         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1189             for (int i = 0; i < a.length; i += SPECIES.length()) {
1190                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1191                 av.intoArray(r, i, b, i);
1192             }
1193         }
1194 
1195         bh.consume(r);
1196     }
1197 
1198 }
1199