rev 54658 : refactored mask and shuffle creation methods, moved classes to top-level

   1 /*
   2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.VectorShape;
  28 import jdk.incubator.vector.VectorSpecies;
  29 import jdk.incubator.vector.VectorShuffle;
  30 import jdk.incubator.vector.DoubleVector;
  31 
  32 import java.util.concurrent.TimeUnit;
  33 import java.util.function.BiFunction;
  34 import java.util.function.IntFunction;
  35 
  36 import org.openjdk.jmh.annotations.*;
  37 import org.openjdk.jmh.infra.Blackhole;
  38 
  39 @BenchmarkMode(Mode.Throughput)
  40 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  41 @State(Scope.Benchmark)
  42 @Warmup(iterations = 3, time = 1)
  43 @Measurement(iterations = 5, time = 1)
  44 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  45 public class Double64Vector extends AbstractVectorBenchmark {
  46     static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_64;
  47 
  48     static final int INVOC_COUNT = 1; // get rid of outer loop
  49 
  50     @Param("1024")
  51     int size;
  52 
  53     double[] fill(IntFunction<Double> f) {
  54         double[] array = new double[size];
  55         for (int i = 0; i < array.length; i++) {
  56             array[i] = f.apply(i);
  57         }
  58         return array;
  59     }
  60 
  61     double[] a, b, c, r;
  62     boolean[] m, rm;
  63     int[] s;
  64 
  65     @Setup
  66     public void init() {
  67         size += size % SPECIES.length(); // FIXME: add post-loops
  68 
  69         a = fill(i -> (double)(2*i));
  70         b = fill(i -> (double)(i+1));
  71         c = fill(i -> (double)(i+5));
  72         r = fill(i -> (double)0);
  73 
  74         m = fillMask(size, i -> (i % 2) == 0);
  75         rm = fillMask(size, i -> false);
  76 
  77         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  78     }
  79 
  80     final IntFunction<double[]> fa = vl -> a;
  81     final IntFunction<double[]> fb = vl -> b;
  82     final IntFunction<double[]> fc = vl -> c;
  83     final IntFunction<double[]> fr = vl -> r;
  84     final IntFunction<boolean[]> fm = vl -> m;
  85     final IntFunction<boolean[]> fmr = vl -> rm;
  86     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  87 
  88 
  89     @Benchmark
  90     public void add(Blackhole bh) {
  91         double[] a = fa.apply(SPECIES.length());
  92         double[] b = fb.apply(SPECIES.length());
  93         double[] r = fr.apply(SPECIES.length());
  94 
  95         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  96             for (int i = 0; i < a.length; i += SPECIES.length()) {
  97                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
  98                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
  99                 av.add(bv).intoArray(r, i);
 100             }
 101         }
 102 
 103         bh.consume(r);
 104     }
 105 
 106     @Benchmark
 107     public void addMasked(Blackhole bh) {
 108         double[] a = fa.apply(SPECIES.length());
 109         double[] b = fb.apply(SPECIES.length());
 110         double[] r = fr.apply(SPECIES.length());
 111         boolean[] mask = fm.apply(SPECIES.length());
 112         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
 113 
 114         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 115             for (int i = 0; i < a.length; i += SPECIES.length()) {
 116                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 117                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 118                 av.add(bv, vmask).intoArray(r, i);
 119             }
 120         }
 121 
 122         bh.consume(r);
 123     }
 124 
 125     @Benchmark
 126     public void sub(Blackhole bh) {
 127         double[] a = fa.apply(SPECIES.length());
 128         double[] b = fb.apply(SPECIES.length());
 129         double[] r = fr.apply(SPECIES.length());
 130 
 131         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 132             for (int i = 0; i < a.length; i += SPECIES.length()) {
 133                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 134                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 135                 av.sub(bv).intoArray(r, i);
 136             }
 137         }
 138 
 139         bh.consume(r);
 140     }
 141 
 142     @Benchmark
 143     public void subMasked(Blackhole bh) {
 144         double[] a = fa.apply(SPECIES.length());
 145         double[] b = fb.apply(SPECIES.length());
 146         double[] r = fr.apply(SPECIES.length());
 147         boolean[] mask = fm.apply(SPECIES.length());
 148         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
 149 
 150         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 151             for (int i = 0; i < a.length; i += SPECIES.length()) {
 152                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 153                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 154                 av.sub(bv, vmask).intoArray(r, i);
 155             }
 156         }
 157 
 158         bh.consume(r);
 159     }
 160 
 161 
 162     @Benchmark
 163     public void div(Blackhole bh) {
 164         double[] a = fa.apply(SPECIES.length());
 165         double[] b = fb.apply(SPECIES.length());
 166         double[] r = fr.apply(SPECIES.length());
 167 
 168         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 169             for (int i = 0; i < a.length; i += SPECIES.length()) {
 170                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 171                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 172                 av.div(bv).intoArray(r, i);
 173             }
 174         }
 175 
 176         bh.consume(r);
 177     }
 178 
 179 
 180 
 181     @Benchmark
 182     public void divMasked(Blackhole bh) {
 183         double[] a = fa.apply(SPECIES.length());
 184         double[] b = fb.apply(SPECIES.length());
 185         double[] r = fr.apply(SPECIES.length());
 186         boolean[] mask = fm.apply(SPECIES.length());
 187         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
 188 
 189         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 190             for (int i = 0; i < a.length; i += SPECIES.length()) {
 191                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 192                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 193                 av.div(bv, vmask).intoArray(r, i);
 194             }
 195         }
 196 
 197         bh.consume(r);
 198     }
 199 
 200 
 201     @Benchmark
 202     public void mul(Blackhole bh) {
 203         double[] a = fa.apply(SPECIES.length());
 204         double[] b = fb.apply(SPECIES.length());
 205         double[] r = fr.apply(SPECIES.length());
 206 
 207         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 208             for (int i = 0; i < a.length; i += SPECIES.length()) {
 209                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 210                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 211                 av.mul(bv).intoArray(r, i);
 212             }
 213         }
 214 
 215         bh.consume(r);
 216     }
 217 
 218     @Benchmark
 219     public void mulMasked(Blackhole bh) {
 220         double[] a = fa.apply(SPECIES.length());
 221         double[] b = fb.apply(SPECIES.length());
 222         double[] r = fr.apply(SPECIES.length());
 223         boolean[] mask = fm.apply(SPECIES.length());
 224         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
 225 
 226         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 227             for (int i = 0; i < a.length; i += SPECIES.length()) {
 228                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 229                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 230                 av.mul(bv, vmask).intoArray(r, i);
 231             }
 232         }
 233 
 234         bh.consume(r);
 235     }
 236 
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250 
 251 
 252 
 253 
 254 
 255 
 256 
 257 
 258 
 259 
 260 
 261 
 262 
 263 
 264 
 265 
 266 
 267     @Benchmark
 268     public void max(Blackhole bh) {
 269         double[] a = fa.apply(SPECIES.length());
 270         double[] b = fb.apply(SPECIES.length());
 271         double[] r = fr.apply(SPECIES.length());
 272 
 273         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 274             for (int i = 0; i < a.length; i += SPECIES.length()) {
 275                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 276                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 277                 av.max(bv).intoArray(r, i);
 278             }
 279         }
 280 
 281         bh.consume(r);
 282     }
 283 
 284     @Benchmark
 285     public void min(Blackhole bh) {
 286         double[] a = fa.apply(SPECIES.length());
 287         double[] b = fb.apply(SPECIES.length());
 288         double[] r = fr.apply(SPECIES.length());
 289 
 290         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 291             for (int i = 0; i < a.length; i += SPECIES.length()) {
 292                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 293                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 294                 av.min(bv).intoArray(r, i);
 295             }
 296         }
 297 
 298         bh.consume(r);
 299     }
 300 
 301 
 302 
 303 
 304     @Benchmark
 305     public void addAll(Blackhole bh) {
 306         double[] a = fa.apply(SPECIES.length());
 307         double ra = 0;
 308 
 309         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 310             ra = 0;
 311             for (int i = 0; i < a.length; i += SPECIES.length()) {
 312                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 313                 ra += av.addAll();
 314             }
 315         }
 316         bh.consume(ra);
 317     }
 318 
 319     @Benchmark
 320     public void mulAll(Blackhole bh) {
 321         double[] a = fa.apply(SPECIES.length());
 322         double ra = 1;
 323 
 324         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 325             ra = 1;
 326             for (int i = 0; i < a.length; i += SPECIES.length()) {
 327                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 328                 ra *= av.mulAll();
 329             }
 330         }
 331         bh.consume(ra);
 332     }
 333 
 334     @Benchmark
 335     public void minAll(Blackhole bh) {
 336         double[] a = fa.apply(SPECIES.length());
 337         double ra = Double.POSITIVE_INFINITY;
 338 
 339         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 340             ra = Double.POSITIVE_INFINITY;
 341             for (int i = 0; i < a.length; i += SPECIES.length()) {
 342                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 343                 ra = (double)Math.min(ra, av.minAll());
 344             }
 345         }
 346         bh.consume(ra);
 347     }
 348 
 349     @Benchmark
 350     public void maxAll(Blackhole bh) {
 351         double[] a = fa.apply(SPECIES.length());
 352         double ra = Double.NEGATIVE_INFINITY;
 353 
 354         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 355             ra = Double.NEGATIVE_INFINITY;
 356             for (int i = 0; i < a.length; i += SPECIES.length()) {
 357                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 358                 ra = (double)Math.max(ra, av.maxAll());
 359             }
 360         }
 361         bh.consume(ra);
 362     }
 363 
 364 
 365 
 366     @Benchmark
 367     public void with(Blackhole bh) {
 368         double[] a = fa.apply(SPECIES.length());
 369         double[] r = fr.apply(SPECIES.length());
 370 
 371         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 372             for (int i = 0; i < a.length; i += SPECIES.length()) {
 373                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 374                 av.with(0, (double)4).intoArray(r, i);
 375             }
 376         }
 377 
 378         bh.consume(r);
 379     }
 380 
 381     @Benchmark
 382     public Object lessThan() {
 383         double[] a = fa.apply(size);
 384         double[] b = fb.apply(size);
 385         boolean[] ms = fm.apply(size);
 386         VectorMask<Double> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 387 
 388         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 389             for (int i = 0; i < a.length; i += SPECIES.length()) {
 390                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 391                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 392                 VectorMask<Double> mv = av.lessThan(bv);
 393 
 394                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 395             }
 396         }
 397         return m;
 398     }
 399 
 400 
 401     @Benchmark
 402     public Object greaterThan() {
 403         double[] a = fa.apply(size);
 404         double[] b = fb.apply(size);
 405         boolean[] ms = fm.apply(size);
 406         VectorMask<Double> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 407 
 408         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 409             for (int i = 0; i < a.length; i += SPECIES.length()) {
 410                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 411                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 412                 VectorMask<Double> mv = av.greaterThan(bv);
 413 
 414                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 415             }
 416         }
 417         return m;
 418     }
 419 
 420 
 421     @Benchmark
 422     public Object equal() {
 423         double[] a = fa.apply(size);
 424         double[] b = fb.apply(size);
 425         boolean[] ms = fm.apply(size);
 426         VectorMask<Double> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 427 
 428         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 429             for (int i = 0; i < a.length; i += SPECIES.length()) {
 430                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 431                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 432                 VectorMask<Double> mv = av.equal(bv);
 433 
 434                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 435             }
 436         }
 437         return m;
 438     }
 439 
 440 
 441     @Benchmark
 442     public Object notEqual() {
 443         double[] a = fa.apply(size);
 444         double[] b = fb.apply(size);
 445         boolean[] ms = fm.apply(size);
 446         VectorMask<Double> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 447 
 448         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 449             for (int i = 0; i < a.length; i += SPECIES.length()) {
 450                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 451                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 452                 VectorMask<Double> mv = av.notEqual(bv);
 453 
 454                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 455             }
 456         }
 457         return m;
 458     }
 459 
 460 
 461     @Benchmark
 462     public Object lessThanEq() {
 463         double[] a = fa.apply(size);
 464         double[] b = fb.apply(size);
 465         boolean[] ms = fm.apply(size);
 466         VectorMask<Double> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 467 
 468         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 469             for (int i = 0; i < a.length; i += SPECIES.length()) {
 470                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 471                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 472                 VectorMask<Double> mv = av.lessThanEq(bv);
 473 
 474                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 475             }
 476         }
 477         return m;
 478     }
 479 
 480 
 481     @Benchmark
 482     public Object greaterThanEq() {
 483         double[] a = fa.apply(size);
 484         double[] b = fb.apply(size);
 485         boolean[] ms = fm.apply(size);
 486         VectorMask<Double> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 487 
 488         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 489             for (int i = 0; i < a.length; i += SPECIES.length()) {
 490                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 491                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 492                 VectorMask<Double> mv = av.greaterThanEq(bv);
 493 
 494                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 495             }
 496         }
 497         return m;
 498     }
 499 
 500 
 501     @Benchmark
 502     public void blend(Blackhole bh) {
 503         double[] a = fa.apply(SPECIES.length());
 504         double[] b = fb.apply(SPECIES.length());
 505         double[] r = fr.apply(SPECIES.length());
 506         boolean[] mask = fm.apply(SPECIES.length());
 507         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
 508 
 509         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 510             for (int i = 0; i < a.length; i += SPECIES.length()) {
 511                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 512                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 513                 av.blend(bv, vmask).intoArray(r, i);
 514             }
 515         }
 516 
 517         bh.consume(r);
 518     }
 519 
 520     @Benchmark
 521     public void rearrange(Blackhole bh) {
 522         double[] a = fa.apply(SPECIES.length());
 523         int[] order = fs.apply(a.length, SPECIES.length());
 524         double[] r = fr.apply(SPECIES.length());
 525 
 526         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 527             for (int i = 0; i < a.length; i += SPECIES.length()) {
 528                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 529                 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
 530             }
 531         }
 532 
 533         bh.consume(r);
 534     }
 535 
 536     @Benchmark
 537     public void extract(Blackhole bh) {
 538         double[] a = fa.apply(SPECIES.length());
 539         double[] r = fr.apply(SPECIES.length());
 540 
 541         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 542             for (int i = 0; i < a.length; i += SPECIES.length()) {
 543                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 544                 int num_lanes = SPECIES.length();
 545                 // Manually unroll because full unroll happens after intrinsification.
 546                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 547                 if (num_lanes == 1) {
 548                     r[i]=av.get(0);
 549                 } else if (num_lanes == 2) {
 550                     r[i]=av.get(0);
 551                     r[i+1]=av.get(1);
 552                 } else if (num_lanes == 4) {
 553                     r[i]=av.get(0);
 554                     r[i+1]=av.get(1);
 555                     r[i+2]=av.get(2);
 556                     r[i+3]=av.get(3);
 557                 } else if (num_lanes == 8) {
 558                     r[i]=av.get(0);
 559                     r[i+1]=av.get(1);
 560                     r[i+2]=av.get(2);
 561                     r[i+3]=av.get(3);
 562                     r[i+4]=av.get(4);
 563                     r[i+5]=av.get(5);
 564                     r[i+6]=av.get(6);
 565                     r[i+7]=av.get(7);
 566                 } else if (num_lanes == 16) {
 567                     r[i]=av.get(0);
 568                     r[i+1]=av.get(1);
 569                     r[i+2]=av.get(2);
 570                     r[i+3]=av.get(3);
 571                     r[i+4]=av.get(4);
 572                     r[i+5]=av.get(5);
 573                     r[i+6]=av.get(6);
 574                     r[i+7]=av.get(7);
 575                     r[i+8]=av.get(8);
 576                     r[i+9]=av.get(9);
 577                     r[i+10]=av.get(10);
 578                     r[i+11]=av.get(11);
 579                     r[i+12]=av.get(12);
 580                     r[i+13]=av.get(13);
 581                     r[i+14]=av.get(14);
 582                     r[i+15]=av.get(15);
 583                 } else if (num_lanes == 32) {
 584                     r[i]=av.get(0);
 585                     r[i+1]=av.get(1);
 586                     r[i+2]=av.get(2);
 587                     r[i+3]=av.get(3);
 588                     r[i+4]=av.get(4);
 589                     r[i+5]=av.get(5);
 590                     r[i+6]=av.get(6);
 591                     r[i+7]=av.get(7);
 592                     r[i+8]=av.get(8);
 593                     r[i+9]=av.get(9);
 594                     r[i+10]=av.get(10);
 595                     r[i+11]=av.get(11);
 596                     r[i+12]=av.get(12);
 597                     r[i+13]=av.get(13);
 598                     r[i+14]=av.get(14);
 599                     r[i+15]=av.get(15);
 600                     r[i+16]=av.get(16);
 601                     r[i+17]=av.get(17);
 602                     r[i+18]=av.get(18);
 603                     r[i+19]=av.get(19);
 604                     r[i+20]=av.get(20);
 605                     r[i+21]=av.get(21);
 606                     r[i+22]=av.get(22);
 607                     r[i+23]=av.get(23);
 608                     r[i+24]=av.get(24);
 609                     r[i+25]=av.get(25);
 610                     r[i+26]=av.get(26);
 611                     r[i+27]=av.get(27);
 612                     r[i+28]=av.get(28);
 613                     r[i+29]=av.get(29);
 614                     r[i+30]=av.get(30);
 615                     r[i+31]=av.get(31);
 616                 } else if (num_lanes == 64) {
 617                     r[i]=av.get(0);
 618                     r[i+1]=av.get(1);
 619                     r[i+2]=av.get(2);
 620                     r[i+3]=av.get(3);
 621                     r[i+4]=av.get(4);
 622                     r[i+5]=av.get(5);
 623                     r[i+6]=av.get(6);
 624                     r[i+7]=av.get(7);
 625                     r[i+8]=av.get(8);
 626                     r[i+9]=av.get(9);
 627                     r[i+10]=av.get(10);
 628                     r[i+11]=av.get(11);
 629                     r[i+12]=av.get(12);
 630                     r[i+13]=av.get(13);
 631                     r[i+14]=av.get(14);
 632                     r[i+15]=av.get(15);
 633                     r[i+16]=av.get(16);
 634                     r[i+17]=av.get(17);
 635                     r[i+18]=av.get(18);
 636                     r[i+19]=av.get(19);
 637                     r[i+20]=av.get(20);
 638                     r[i+21]=av.get(21);
 639                     r[i+22]=av.get(22);
 640                     r[i+23]=av.get(23);
 641                     r[i+24]=av.get(24);
 642                     r[i+25]=av.get(25);
 643                     r[i+26]=av.get(26);
 644                     r[i+27]=av.get(27);
 645                     r[i+28]=av.get(28);
 646                     r[i+29]=av.get(29);
 647                     r[i+30]=av.get(30);
 648                     r[i+31]=av.get(31);
 649                     r[i+32]=av.get(32);
 650                     r[i+33]=av.get(33);
 651                     r[i+34]=av.get(34);
 652                     r[i+35]=av.get(35);
 653                     r[i+36]=av.get(36);
 654                     r[i+37]=av.get(37);
 655                     r[i+38]=av.get(38);
 656                     r[i+39]=av.get(39);
 657                     r[i+40]=av.get(40);
 658                     r[i+41]=av.get(41);
 659                     r[i+42]=av.get(42);
 660                     r[i+43]=av.get(43);
 661                     r[i+44]=av.get(44);
 662                     r[i+45]=av.get(45);
 663                     r[i+46]=av.get(46);
 664                     r[i+47]=av.get(47);
 665                     r[i+48]=av.get(48);
 666                     r[i+49]=av.get(49);
 667                     r[i+50]=av.get(50);
 668                     r[i+51]=av.get(51);
 669                     r[i+52]=av.get(52);
 670                     r[i+53]=av.get(53);
 671                     r[i+54]=av.get(54);
 672                     r[i+55]=av.get(55);
 673                     r[i+56]=av.get(56);
 674                     r[i+57]=av.get(57);
 675                     r[i+58]=av.get(58);
 676                     r[i+59]=av.get(59);
 677                     r[i+60]=av.get(60);
 678                     r[i+61]=av.get(61);
 679                     r[i+62]=av.get(62);
 680                     r[i+63]=av.get(63);
 681                 } else {
 682                     for (int j = 0; j < SPECIES.length(); j++) {
 683                         r[i+j]=av.get(j);
 684                     }
 685                 }
 686             }
 687         }
 688 
 689         bh.consume(r);
 690     }
 691 
 692 
 693     @Benchmark
 694     public void sin(Blackhole bh) {
 695         double[] a = fa.apply(SPECIES.length());
 696         double[] r = fr.apply(SPECIES.length());
 697 
 698         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 699             for (int i = 0; i < a.length; i += SPECIES.length()) {
 700                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 701                 av.sin().intoArray(r, i);
 702             }
 703         }
 704 
 705         bh.consume(r);
 706     }
 707 
 708 
 709 
 710     @Benchmark
 711     public void exp(Blackhole bh) {
 712         double[] a = fa.apply(SPECIES.length());
 713         double[] r = fr.apply(SPECIES.length());
 714 
 715         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 716             for (int i = 0; i < a.length; i += SPECIES.length()) {
 717                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 718                 av.exp().intoArray(r, i);
 719             }
 720         }
 721 
 722         bh.consume(r);
 723     }
 724 
 725 
 726 
 727     @Benchmark
 728     public void log1p(Blackhole bh) {
 729         double[] a = fa.apply(SPECIES.length());
 730         double[] r = fr.apply(SPECIES.length());
 731 
 732         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 733             for (int i = 0; i < a.length; i += SPECIES.length()) {
 734                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 735                 av.log1p().intoArray(r, i);
 736             }
 737         }
 738 
 739         bh.consume(r);
 740     }
 741 
 742 
 743 
 744     @Benchmark
 745     public void log(Blackhole bh) {
 746         double[] a = fa.apply(SPECIES.length());
 747         double[] r = fr.apply(SPECIES.length());
 748 
 749         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 750             for (int i = 0; i < a.length; i += SPECIES.length()) {
 751                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 752                 av.log().intoArray(r, i);
 753             }
 754         }
 755 
 756         bh.consume(r);
 757     }
 758 
 759 
 760 
 761     @Benchmark
 762     public void log10(Blackhole bh) {
 763         double[] a = fa.apply(SPECIES.length());
 764         double[] r = fr.apply(SPECIES.length());
 765 
 766         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 767             for (int i = 0; i < a.length; i += SPECIES.length()) {
 768                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 769                 av.log10().intoArray(r, i);
 770             }
 771         }
 772 
 773         bh.consume(r);
 774     }
 775 
 776 
 777 
 778     @Benchmark
 779     public void expm1(Blackhole bh) {
 780         double[] a = fa.apply(SPECIES.length());
 781         double[] r = fr.apply(SPECIES.length());
 782 
 783         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 784             for (int i = 0; i < a.length; i += SPECIES.length()) {
 785                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 786                 av.expm1().intoArray(r, i);
 787             }
 788         }
 789 
 790         bh.consume(r);
 791     }
 792 
 793 
 794 
 795     @Benchmark
 796     public void cos(Blackhole bh) {
 797         double[] a = fa.apply(SPECIES.length());
 798         double[] r = fr.apply(SPECIES.length());
 799 
 800         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 801             for (int i = 0; i < a.length; i += SPECIES.length()) {
 802                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 803                 av.cos().intoArray(r, i);
 804             }
 805         }
 806 
 807         bh.consume(r);
 808     }
 809 
 810 
 811 
 812     @Benchmark
 813     public void tan(Blackhole bh) {
 814         double[] a = fa.apply(SPECIES.length());
 815         double[] r = fr.apply(SPECIES.length());
 816 
 817         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 818             for (int i = 0; i < a.length; i += SPECIES.length()) {
 819                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 820                 av.tan().intoArray(r, i);
 821             }
 822         }
 823 
 824         bh.consume(r);
 825     }
 826 
 827 
 828 
 829     @Benchmark
 830     public void sinh(Blackhole bh) {
 831         double[] a = fa.apply(SPECIES.length());
 832         double[] r = fr.apply(SPECIES.length());
 833 
 834         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 835             for (int i = 0; i < a.length; i += SPECIES.length()) {
 836                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 837                 av.sinh().intoArray(r, i);
 838             }
 839         }
 840 
 841         bh.consume(r);
 842     }
 843 
 844 
 845 
 846     @Benchmark
 847     public void cosh(Blackhole bh) {
 848         double[] a = fa.apply(SPECIES.length());
 849         double[] r = fr.apply(SPECIES.length());
 850 
 851         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 852             for (int i = 0; i < a.length; i += SPECIES.length()) {
 853                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 854                 av.cosh().intoArray(r, i);
 855             }
 856         }
 857 
 858         bh.consume(r);
 859     }
 860 
 861 
 862 
 863     @Benchmark
 864     public void tanh(Blackhole bh) {
 865         double[] a = fa.apply(SPECIES.length());
 866         double[] r = fr.apply(SPECIES.length());
 867 
 868         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 869             for (int i = 0; i < a.length; i += SPECIES.length()) {
 870                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 871                 av.tanh().intoArray(r, i);
 872             }
 873         }
 874 
 875         bh.consume(r);
 876     }
 877 
 878 
 879 
 880     @Benchmark
 881     public void asin(Blackhole bh) {
 882         double[] a = fa.apply(SPECIES.length());
 883         double[] r = fr.apply(SPECIES.length());
 884 
 885         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 886             for (int i = 0; i < a.length; i += SPECIES.length()) {
 887                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 888                 av.asin().intoArray(r, i);
 889             }
 890         }
 891 
 892         bh.consume(r);
 893     }
 894 
 895 
 896 
 897     @Benchmark
 898     public void acos(Blackhole bh) {
 899         double[] a = fa.apply(SPECIES.length());
 900         double[] r = fr.apply(SPECIES.length());
 901 
 902         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 903             for (int i = 0; i < a.length; i += SPECIES.length()) {
 904                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 905                 av.acos().intoArray(r, i);
 906             }
 907         }
 908 
 909         bh.consume(r);
 910     }
 911 
 912 
 913 
 914     @Benchmark
 915     public void atan(Blackhole bh) {
 916         double[] a = fa.apply(SPECIES.length());
 917         double[] r = fr.apply(SPECIES.length());
 918 
 919         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 920             for (int i = 0; i < a.length; i += SPECIES.length()) {
 921                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 922                 av.atan().intoArray(r, i);
 923             }
 924         }
 925 
 926         bh.consume(r);
 927     }
 928 
 929 
 930 
 931     @Benchmark
 932     public void cbrt(Blackhole bh) {
 933         double[] a = fa.apply(SPECIES.length());
 934         double[] r = fr.apply(SPECIES.length());
 935 
 936         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 937             for (int i = 0; i < a.length; i += SPECIES.length()) {
 938                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 939                 av.cbrt().intoArray(r, i);
 940             }
 941         }
 942 
 943         bh.consume(r);
 944     }
 945 
 946 
 947 
 948     @Benchmark
 949     public void hypot(Blackhole bh) {
 950         double[] a = fa.apply(SPECIES.length());
 951         double[] b = fb.apply(SPECIES.length());
 952         double[] r = fr.apply(SPECIES.length());
 953 
 954         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 955             for (int i = 0; i < a.length; i += SPECIES.length()) {
 956                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 957                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 958                 av.hypot(bv).intoArray(r, i);
 959             }
 960         }
 961 
 962         bh.consume(r);
 963     }
 964 
 965 
 966 
 967     @Benchmark
 968     public void pow(Blackhole bh) {
 969         double[] a = fa.apply(SPECIES.length());
 970         double[] b = fb.apply(SPECIES.length());
 971         double[] r = fr.apply(SPECIES.length());
 972 
 973         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 974             for (int i = 0; i < a.length; i += SPECIES.length()) {
 975                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 976                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 977                 av.pow(bv).intoArray(r, i);
 978             }
 979         }
 980 
 981         bh.consume(r);
 982     }
 983 
 984 
 985 
 986     @Benchmark
 987     public void atan2(Blackhole bh) {
 988         double[] a = fa.apply(SPECIES.length());
 989         double[] b = fb.apply(SPECIES.length());
 990         double[] r = fr.apply(SPECIES.length());
 991 
 992         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 993             for (int i = 0; i < a.length; i += SPECIES.length()) {
 994                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 995                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 996                 av.atan2(bv).intoArray(r, i);
 997             }
 998         }
 999 
1000         bh.consume(r);
1001     }
1002 
1003 
1004 
1005     @Benchmark
1006     public void fma(Blackhole bh) {
1007         double[] a = fa.apply(SPECIES.length());
1008         double[] b = fb.apply(SPECIES.length());
1009         double[] c = fc.apply(SPECIES.length());
1010         double[] r = fr.apply(SPECIES.length());
1011 
1012         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1013             for (int i = 0; i < a.length; i += SPECIES.length()) {
1014                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1015                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
1016                 DoubleVector cv = DoubleVector.fromArray(SPECIES, c, i);
1017                 av.fma(bv, cv).intoArray(r, i);
1018             }
1019         }
1020 
1021         bh.consume(r);
1022     }
1023 
1024 
1025 
1026     @Benchmark
1027     public void fmaMasked(Blackhole bh) {
1028         double[] a = fa.apply(SPECIES.length());
1029         double[] b = fb.apply(SPECIES.length());
1030         double[] c = fc.apply(SPECIES.length());
1031         double[] r = fr.apply(SPECIES.length());
1032         boolean[] mask = fm.apply(SPECIES.length());
1033         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
1034 
1035         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1036             for (int i = 0; i < a.length; i += SPECIES.length()) {
1037                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1038                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
1039                 DoubleVector cv = DoubleVector.fromArray(SPECIES, c, i);
1040                 av.fma(bv, cv, vmask).intoArray(r, i);
1041             }
1042         }
1043 
1044         bh.consume(r);
1045     }
1046 
1047 
1048     @Benchmark
1049     public void neg(Blackhole bh) {
1050         double[] a = fa.apply(SPECIES.length());
1051         double[] r = fr.apply(SPECIES.length());
1052 
1053         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1054             for (int i = 0; i < a.length; i += SPECIES.length()) {
1055                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1056                 av.neg().intoArray(r, i);
1057             }
1058         }
1059 
1060         bh.consume(r);
1061     }
1062 
1063     @Benchmark
1064     public void negMasked(Blackhole bh) {
1065         double[] a = fa.apply(SPECIES.length());
1066         double[] r = fr.apply(SPECIES.length());
1067         boolean[] mask = fm.apply(SPECIES.length());
1068         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
1069 
1070         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1071             for (int i = 0; i < a.length; i += SPECIES.length()) {
1072                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1073                 av.neg(vmask).intoArray(r, i);
1074             }
1075         }
1076 
1077         bh.consume(r);
1078     }
1079 
1080     @Benchmark
1081     public void abs(Blackhole bh) {
1082         double[] a = fa.apply(SPECIES.length());
1083         double[] r = fr.apply(SPECIES.length());
1084 
1085         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1086             for (int i = 0; i < a.length; i += SPECIES.length()) {
1087                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1088                 av.abs().intoArray(r, i);
1089             }
1090         }
1091 
1092         bh.consume(r);
1093     }
1094 
1095     @Benchmark
1096     public void absMasked(Blackhole bh) {
1097         double[] a = fa.apply(SPECIES.length());
1098         double[] r = fr.apply(SPECIES.length());
1099         boolean[] mask = fm.apply(SPECIES.length());
1100         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
1101 
1102         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1103             for (int i = 0; i < a.length; i += SPECIES.length()) {
1104                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1105                 av.abs(vmask).intoArray(r, i);
1106             }
1107         }
1108 
1109         bh.consume(r);
1110     }
1111 
1112 
1113 
1114 
1115     @Benchmark
1116     public void sqrt(Blackhole bh) {
1117         double[] a = fa.apply(SPECIES.length());
1118         double[] r = fr.apply(SPECIES.length());
1119 
1120         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1121             for (int i = 0; i < a.length; i += SPECIES.length()) {
1122                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1123                 av.sqrt().intoArray(r, i);
1124             }
1125         }
1126 
1127         bh.consume(r);
1128     }
1129 
1130 
1131 
1132     @Benchmark
1133     public void sqrtMasked(Blackhole bh) {
1134         double[] a = fa.apply(SPECIES.length());
1135         double[] r = fr.apply(SPECIES.length());
1136         boolean[] mask = fm.apply(SPECIES.length());
1137         VectorMask<Double> vmask = VectorMask.fromValues(SPECIES, mask);
1138 
1139         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1140             for (int i = 0; i < a.length; i += SPECIES.length()) {
1141                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1142                 av.sqrt(vmask).intoArray(r, i);
1143             }
1144         }
1145 
1146         bh.consume(r);
1147     }
1148 
1149 
1150 
1151     @Benchmark
1152     public void gather(Blackhole bh) {
1153         double[] a = fa.apply(SPECIES.length());
1154         int[] b    = fs.apply(a.length, SPECIES.length());
1155         double[] r = new double[a.length];
1156 
1157         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1158             for (int i = 0; i < a.length; i += SPECIES.length()) {
1159                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i, b, i);
1160                 av.intoArray(r, i);
1161             }
1162         }
1163 
1164         bh.consume(r);
1165     }
1166 
1167 
1168 
1169     @Benchmark
1170     public void scatter(Blackhole bh) {
1171         double[] a = fa.apply(SPECIES.length());
1172         int[] b = fs.apply(a.length, SPECIES.length());
1173         double[] r = new double[a.length];
1174 
1175         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1176             for (int i = 0; i < a.length; i += SPECIES.length()) {
1177                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1178                 av.intoArray(r, i, b, i);
1179             }
1180         }
1181 
1182         bh.consume(r);
1183     }
1184 
1185 }
1186 
--- EOF ---