1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.Vector.Shape;
  28 import jdk.incubator.vector.Vector.Species;
  29 import jdk.incubator.vector.FloatVector;
  30 
  31 import java.util.concurrent.TimeUnit;
  32 import java.util.function.BiFunction;
  33 import java.util.function.IntFunction;
  34 
  35 import org.openjdk.jmh.annotations.*;
  36 import org.openjdk.jmh.infra.Blackhole;
  37 
  38 @BenchmarkMode(Mode.Throughput)
  39 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  40 @State(Scope.Benchmark)
  41 @Warmup(iterations = 3, time = 1)
  42 @Measurement(iterations = 5, time = 1)
  43 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  44 public class FloatMaxVector extends AbstractVectorBenchmark {
  45     static final Species<Float> SPECIES = FloatVector.SPECIES_MAX;
  46 
  47     static final int INVOC_COUNT = 1; // get rid of outer loop
  48 
  49     @Param("1024")
  50     int size;
  51 
  52     float[] fill(IntFunction<Float> f) {
  53         float[] array = new float[size];
  54         for (int i = 0; i < array.length; i++) {
  55             array[i] = f.apply(i);
  56         }
  57         return array;
  58     }
  59 
  60     float[] a, b, c, r;
  61     boolean[] m, rm;
  62     int[] s;
  63 
  64     @Setup
  65     public void init() {
  66         size += size % SPECIES.length(); // FIXME: add post-loops
  67 
  68         a = fill(i -> (float)(2*i));
  69         b = fill(i -> (float)(i+1));
  70         c = fill(i -> (float)(i+5));
  71         r = fill(i -> (float)0);
  72 
  73         m = fillMask(size, i -> (i % 2) == 0);
  74         rm = fillMask(size, i -> false);
  75 
  76         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  77     }
  78 
  79     final IntFunction<float[]> fa = vl -> a;
  80     final IntFunction<float[]> fb = vl -> b;
  81     final IntFunction<float[]> fc = vl -> c;
  82     final IntFunction<float[]> fr = vl -> r;
  83     final IntFunction<boolean[]> fm = vl -> m;
  84     final IntFunction<boolean[]> fmr = vl -> rm;
  85     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  86 
  87 
  88     @Benchmark
  89     public void add(Blackhole bh) {
  90         float[] a = fa.apply(SPECIES.length());
  91         float[] b = fb.apply(SPECIES.length());
  92         float[] r = fr.apply(SPECIES.length());
  93 
  94         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  95             for (int i = 0; i < a.length; i += SPECIES.length()) {
  96                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
  97                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
  98                 av.add(bv).intoArray(r, i);
  99             }
 100         }
 101 
 102         bh.consume(r);
 103     }
 104 
 105     @Benchmark
 106     public void addMasked(Blackhole bh) {
 107         float[] a = fa.apply(SPECIES.length());
 108         float[] b = fb.apply(SPECIES.length());
 109         float[] r = fr.apply(SPECIES.length());
 110         boolean[] mask = fm.apply(SPECIES.length());
 111         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
 112 
 113         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 114             for (int i = 0; i < a.length; i += SPECIES.length()) {
 115                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 116                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 117                 av.add(bv, vmask).intoArray(r, i);
 118             }
 119         }
 120 
 121         bh.consume(r);
 122     }
 123 
 124     @Benchmark
 125     public void sub(Blackhole bh) {
 126         float[] a = fa.apply(SPECIES.length());
 127         float[] b = fb.apply(SPECIES.length());
 128         float[] r = fr.apply(SPECIES.length());
 129 
 130         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 131             for (int i = 0; i < a.length; i += SPECIES.length()) {
 132                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 133                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 134                 av.sub(bv).intoArray(r, i);
 135             }
 136         }
 137 
 138         bh.consume(r);
 139     }
 140 
 141     @Benchmark
 142     public void subMasked(Blackhole bh) {
 143         float[] a = fa.apply(SPECIES.length());
 144         float[] b = fb.apply(SPECIES.length());
 145         float[] r = fr.apply(SPECIES.length());
 146         boolean[] mask = fm.apply(SPECIES.length());
 147         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
 148 
 149         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 150             for (int i = 0; i < a.length; i += SPECIES.length()) {
 151                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 152                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 153                 av.sub(bv, vmask).intoArray(r, i);
 154             }
 155         }
 156 
 157         bh.consume(r);
 158     }
 159 
 160 
 161     @Benchmark
 162     public void div(Blackhole bh) {
 163         float[] a = fa.apply(SPECIES.length());
 164         float[] b = fb.apply(SPECIES.length());
 165         float[] r = fr.apply(SPECIES.length());
 166 
 167         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 168             for (int i = 0; i < a.length; i += SPECIES.length()) {
 169                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 170                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 171                 av.div(bv).intoArray(r, i);
 172             }
 173         }
 174 
 175         bh.consume(r);
 176     }
 177 
 178 
 179 
 180     @Benchmark
 181     public void divMasked(Blackhole bh) {
 182         float[] a = fa.apply(SPECIES.length());
 183         float[] b = fb.apply(SPECIES.length());
 184         float[] r = fr.apply(SPECIES.length());
 185         boolean[] mask = fm.apply(SPECIES.length());
 186         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
 187 
 188         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 189             for (int i = 0; i < a.length; i += SPECIES.length()) {
 190                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 191                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 192                 av.div(bv, vmask).intoArray(r, i);
 193             }
 194         }
 195 
 196         bh.consume(r);
 197     }
 198 
 199 
 200     @Benchmark
 201     public void mul(Blackhole bh) {
 202         float[] a = fa.apply(SPECIES.length());
 203         float[] b = fb.apply(SPECIES.length());
 204         float[] r = fr.apply(SPECIES.length());
 205 
 206         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 207             for (int i = 0; i < a.length; i += SPECIES.length()) {
 208                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 209                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 210                 av.mul(bv).intoArray(r, i);
 211             }
 212         }
 213 
 214         bh.consume(r);
 215     }
 216 
 217     @Benchmark
 218     public void mulMasked(Blackhole bh) {
 219         float[] a = fa.apply(SPECIES.length());
 220         float[] b = fb.apply(SPECIES.length());
 221         float[] r = fr.apply(SPECIES.length());
 222         boolean[] mask = fm.apply(SPECIES.length());
 223         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
 224 
 225         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 226             for (int i = 0; i < a.length; i += SPECIES.length()) {
 227                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 228                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 229                 av.mul(bv, vmask).intoArray(r, i);
 230             }
 231         }
 232 
 233         bh.consume(r);
 234     }
 235 
 236 
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250 
 251 
 252 
 253 
 254 
 255 
 256 
 257 
 258 
 259 
 260 
 261 
 262 
 263 
 264 
 265 
 266     @Benchmark
 267     public void max(Blackhole bh) {
 268         float[] a = fa.apply(SPECIES.length());
 269         float[] b = fb.apply(SPECIES.length());
 270         float[] r = fr.apply(SPECIES.length());
 271 
 272         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 273             for (int i = 0; i < a.length; i += SPECIES.length()) {
 274                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 275                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 276                 av.max(bv).intoArray(r, i);
 277             }
 278         }
 279 
 280         bh.consume(r);
 281     }
 282 
 283     @Benchmark
 284     public void min(Blackhole bh) {
 285         float[] a = fa.apply(SPECIES.length());
 286         float[] b = fb.apply(SPECIES.length());
 287         float[] r = fr.apply(SPECIES.length());
 288 
 289         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 290             for (int i = 0; i < a.length; i += SPECIES.length()) {
 291                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 292                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 293                 av.min(bv).intoArray(r, i);
 294             }
 295         }
 296 
 297         bh.consume(r);
 298     }
 299 
 300 
 301 
 302 
 303     @Benchmark
 304     public void addAll(Blackhole bh) {
 305         float[] a = fa.apply(SPECIES.length());
 306         float[] r = fr.apply(SPECIES.length());
 307         float ra = 0;
 308 
 309         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 310             for (int i = 0; i < a.length; i += SPECIES.length()) {
 311                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 312                 r[i] = av.addAll();
 313             }
 314         }
 315 
 316         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 317             ra = 0;
 318             for (int i = 0; i < a.length; i += SPECIES.length()) {
 319                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 320                 ra += av.addAll();
 321             }
 322         }
 323 
 324         bh.consume(ra);
 325         bh.consume(r);
 326     }
 327 
 328     @Benchmark
 329     public void mulAll(Blackhole bh) {
 330         float[] a = fa.apply(SPECIES.length());
 331         float[] r = fr.apply(SPECIES.length());
 332         float ra = 1;
 333 
 334         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 335             for (int i = 0; i < a.length; i += SPECIES.length()) {
 336                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 337                 r[i] = av.mulAll();
 338             }
 339         }
 340 
 341         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 342             ra = 1;
 343             for (int i = 0; i < a.length; i += SPECIES.length()) {
 344                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 345                 ra *= av.mulAll();
 346             }
 347         }
 348 
 349         bh.consume(ra);
 350         bh.consume(r);
 351     }
 352 
 353     @Benchmark
 354     public void minAll(Blackhole bh) {
 355         float[] a = fa.apply(SPECIES.length());
 356         float[] r = fr.apply(SPECIES.length());
 357         float ra = Float.MAX_VALUE;
 358 
 359         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 360             for (int i = 0; i < a.length; i += SPECIES.length()) {
 361                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 362                 r[i] = av.minAll();
 363             }
 364         }
 365 
 366         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 367             ra = Float.MAX_VALUE;
 368             for (int i = 0; i < a.length; i += SPECIES.length()) {
 369                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 370                 ra = (float)Math.min(ra, av.minAll());
 371             }
 372         }
 373 
 374         bh.consume(ra);
 375         bh.consume(r);
 376     }
 377 
 378     @Benchmark
 379     public void maxAll(Blackhole bh) {
 380         float[] a = fa.apply(SPECIES.length());
 381         float[] r = fr.apply(SPECIES.length());
 382         float ra = Float.MIN_VALUE;
 383 
 384         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 385             for (int i = 0; i < a.length; i += SPECIES.length()) {
 386                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 387                 r[i] = av.maxAll();
 388             }
 389         }
 390 
 391         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 392             ra = Float.MIN_VALUE;
 393             for (int i = 0; i < a.length; i += SPECIES.length()) {
 394                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 395                 ra = (float)Math.max(ra, av.maxAll());
 396             }
 397         }
 398 
 399         bh.consume(ra);
 400         bh.consume(r);
 401     }
 402 
 403 
 404 
 405     @Benchmark
 406     public void with(Blackhole bh) {
 407         float[] a = fa.apply(SPECIES.length());
 408         float[] r = fr.apply(SPECIES.length());
 409 
 410         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 411             for (int i = 0; i < a.length; i += SPECIES.length()) {
 412                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 413                 av.with(0, (float)4).intoArray(r, i);
 414             }
 415         }
 416 
 417         bh.consume(r);
 418     }
 419 
 420     @Benchmark
 421     public Object lessThan() {
 422         float[] a = fa.apply(size);
 423         float[] b = fb.apply(size);
 424         boolean[] ms = fm.apply(size);
 425         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 426 
 427         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 428             for (int i = 0; i < a.length; i += SPECIES.length()) {
 429                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 430                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 431                 Vector.Mask<Float> mv = av.lessThan(bv);
 432 
 433                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 434             }
 435         }
 436         return m;
 437     }
 438 
 439 
 440     @Benchmark
 441     public Object greaterThan() {
 442         float[] a = fa.apply(size);
 443         float[] b = fb.apply(size);
 444         boolean[] ms = fm.apply(size);
 445         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 446 
 447         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 448             for (int i = 0; i < a.length; i += SPECIES.length()) {
 449                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 450                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 451                 Vector.Mask<Float> mv = av.greaterThan(bv);
 452 
 453                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 454             }
 455         }
 456         return m;
 457     }
 458 
 459 
 460     @Benchmark
 461     public Object equal() {
 462         float[] a = fa.apply(size);
 463         float[] b = fb.apply(size);
 464         boolean[] ms = fm.apply(size);
 465         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 466 
 467         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 468             for (int i = 0; i < a.length; i += SPECIES.length()) {
 469                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 470                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 471                 Vector.Mask<Float> mv = av.equal(bv);
 472 
 473                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 474             }
 475         }
 476         return m;
 477     }
 478 
 479 
 480     @Benchmark
 481     public Object notEqual() {
 482         float[] a = fa.apply(size);
 483         float[] b = fb.apply(size);
 484         boolean[] ms = fm.apply(size);
 485         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 486 
 487         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 488             for (int i = 0; i < a.length; i += SPECIES.length()) {
 489                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 490                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 491                 Vector.Mask<Float> mv = av.notEqual(bv);
 492 
 493                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 494             }
 495         }
 496         return m;
 497     }
 498 
 499 
 500     @Benchmark
 501     public Object lessThanEq() {
 502         float[] a = fa.apply(size);
 503         float[] b = fb.apply(size);
 504         boolean[] ms = fm.apply(size);
 505         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 506 
 507         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 508             for (int i = 0; i < a.length; i += SPECIES.length()) {
 509                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 510                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 511                 Vector.Mask<Float> mv = av.lessThanEq(bv);
 512 
 513                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 514             }
 515         }
 516         return m;
 517     }
 518 
 519 
 520     @Benchmark
 521     public Object greaterThanEq() {
 522         float[] a = fa.apply(size);
 523         float[] b = fb.apply(size);
 524         boolean[] ms = fm.apply(size);
 525         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 526 
 527         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 528             for (int i = 0; i < a.length; i += SPECIES.length()) {
 529                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 530                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 531                 Vector.Mask<Float> mv = av.greaterThanEq(bv);
 532 
 533                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 534             }
 535         }
 536         return m;
 537     }
 538 
 539 
 540     @Benchmark
 541     public void blend(Blackhole bh) {
 542         float[] a = fa.apply(SPECIES.length());
 543         float[] b = fb.apply(SPECIES.length());
 544         float[] r = fr.apply(SPECIES.length());
 545         boolean[] mask = fm.apply(SPECIES.length());
 546         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
 547 
 548         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 549             for (int i = 0; i < a.length; i += SPECIES.length()) {
 550                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 551                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 552                 av.blend(bv, vmask).intoArray(r, i);
 553             }
 554         }
 555 
 556         bh.consume(r);
 557     }
 558 
 559     @Benchmark
 560     public void rearrange(Blackhole bh) {
 561         float[] a = fa.apply(SPECIES.length());
 562         int[] order = fs.apply(a.length, SPECIES.length());
 563         float[] r = fr.apply(SPECIES.length());
 564 
 565         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 566             for (int i = 0; i < a.length; i += SPECIES.length()) {
 567                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 568                 av.rearrange(FloatVector.shuffleFromArray(SPECIES, order, i)).intoArray(r, i);
 569             }
 570         }
 571 
 572         bh.consume(r);
 573     }
 574 
 575     @Benchmark
 576     public void extract(Blackhole bh) {
 577         float[] a = fa.apply(SPECIES.length());
 578         float[] r = fr.apply(SPECIES.length());
 579 
 580         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 581             for (int i = 0; i < a.length; i += SPECIES.length()) {
 582                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 583                 int num_lanes = SPECIES.length();
 584                 // Manually unroll because full unroll happens after intrinsification.
 585                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 586                 if (num_lanes == 1) {
 587                     r[i]=av.get(0);
 588                 } else if (num_lanes == 2) {
 589                     r[i]=av.get(0);
 590                     r[i+1]=av.get(1);
 591                 } else if (num_lanes == 4) {
 592                     r[i]=av.get(0);
 593                     r[i+1]=av.get(1);
 594                     r[i+2]=av.get(2);
 595                     r[i+3]=av.get(3);
 596                 } else if (num_lanes == 8) {
 597                     r[i]=av.get(0);
 598                     r[i+1]=av.get(1);
 599                     r[i+2]=av.get(2);
 600                     r[i+3]=av.get(3);
 601                     r[i+4]=av.get(4);
 602                     r[i+5]=av.get(5);
 603                     r[i+6]=av.get(6);
 604                     r[i+7]=av.get(7);
 605                 } else if (num_lanes == 16) {
 606                     r[i]=av.get(0);
 607                     r[i+1]=av.get(1);
 608                     r[i+2]=av.get(2);
 609                     r[i+3]=av.get(3);
 610                     r[i+4]=av.get(4);
 611                     r[i+5]=av.get(5);
 612                     r[i+6]=av.get(6);
 613                     r[i+7]=av.get(7);
 614                     r[i+8]=av.get(8);
 615                     r[i+9]=av.get(9);
 616                     r[i+10]=av.get(10);
 617                     r[i+11]=av.get(11);
 618                     r[i+12]=av.get(12);
 619                     r[i+13]=av.get(13);
 620                     r[i+14]=av.get(14);
 621                     r[i+15]=av.get(15);
 622                 } else if (num_lanes == 32) {
 623                     r[i]=av.get(0);
 624                     r[i+1]=av.get(1);
 625                     r[i+2]=av.get(2);
 626                     r[i+3]=av.get(3);
 627                     r[i+4]=av.get(4);
 628                     r[i+5]=av.get(5);
 629                     r[i+6]=av.get(6);
 630                     r[i+7]=av.get(7);
 631                     r[i+8]=av.get(8);
 632                     r[i+9]=av.get(9);
 633                     r[i+10]=av.get(10);
 634                     r[i+11]=av.get(11);
 635                     r[i+12]=av.get(12);
 636                     r[i+13]=av.get(13);
 637                     r[i+14]=av.get(14);
 638                     r[i+15]=av.get(15);
 639                     r[i+16]=av.get(16);
 640                     r[i+17]=av.get(17);
 641                     r[i+18]=av.get(18);
 642                     r[i+19]=av.get(19);
 643                     r[i+20]=av.get(20);
 644                     r[i+21]=av.get(21);
 645                     r[i+22]=av.get(22);
 646                     r[i+23]=av.get(23);
 647                     r[i+24]=av.get(24);
 648                     r[i+25]=av.get(25);
 649                     r[i+26]=av.get(26);
 650                     r[i+27]=av.get(27);
 651                     r[i+28]=av.get(28);
 652                     r[i+29]=av.get(29);
 653                     r[i+30]=av.get(30);
 654                     r[i+31]=av.get(31);
 655                 } else if (num_lanes == 64) {
 656                     r[i]=av.get(0);
 657                     r[i+1]=av.get(1);
 658                     r[i+2]=av.get(2);
 659                     r[i+3]=av.get(3);
 660                     r[i+4]=av.get(4);
 661                     r[i+5]=av.get(5);
 662                     r[i+6]=av.get(6);
 663                     r[i+7]=av.get(7);
 664                     r[i+8]=av.get(8);
 665                     r[i+9]=av.get(9);
 666                     r[i+10]=av.get(10);
 667                     r[i+11]=av.get(11);
 668                     r[i+12]=av.get(12);
 669                     r[i+13]=av.get(13);
 670                     r[i+14]=av.get(14);
 671                     r[i+15]=av.get(15);
 672                     r[i+16]=av.get(16);
 673                     r[i+17]=av.get(17);
 674                     r[i+18]=av.get(18);
 675                     r[i+19]=av.get(19);
 676                     r[i+20]=av.get(20);
 677                     r[i+21]=av.get(21);
 678                     r[i+22]=av.get(22);
 679                     r[i+23]=av.get(23);
 680                     r[i+24]=av.get(24);
 681                     r[i+25]=av.get(25);
 682                     r[i+26]=av.get(26);
 683                     r[i+27]=av.get(27);
 684                     r[i+28]=av.get(28);
 685                     r[i+29]=av.get(29);
 686                     r[i+30]=av.get(30);
 687                     r[i+31]=av.get(31);
 688                     r[i+32]=av.get(32);
 689                     r[i+33]=av.get(33);
 690                     r[i+34]=av.get(34);
 691                     r[i+35]=av.get(35);
 692                     r[i+36]=av.get(36);
 693                     r[i+37]=av.get(37);
 694                     r[i+38]=av.get(38);
 695                     r[i+39]=av.get(39);
 696                     r[i+40]=av.get(40);
 697                     r[i+41]=av.get(41);
 698                     r[i+42]=av.get(42);
 699                     r[i+43]=av.get(43);
 700                     r[i+44]=av.get(44);
 701                     r[i+45]=av.get(45);
 702                     r[i+46]=av.get(46);
 703                     r[i+47]=av.get(47);
 704                     r[i+48]=av.get(48);
 705                     r[i+49]=av.get(49);
 706                     r[i+50]=av.get(50);
 707                     r[i+51]=av.get(51);
 708                     r[i+52]=av.get(52);
 709                     r[i+53]=av.get(53);
 710                     r[i+54]=av.get(54);
 711                     r[i+55]=av.get(55);
 712                     r[i+56]=av.get(56);
 713                     r[i+57]=av.get(57);
 714                     r[i+58]=av.get(58);
 715                     r[i+59]=av.get(59);
 716                     r[i+60]=av.get(60);
 717                     r[i+61]=av.get(61);
 718                     r[i+62]=av.get(62);
 719                     r[i+63]=av.get(63);
 720                 } else {
 721                     for (int j = 0; j < SPECIES.length(); j++) {
 722                         r[i+j]=av.get(j);
 723                     }
 724                 }
 725             }
 726         }
 727 
 728         bh.consume(r);
 729     }
 730 
 731 
 732     @Benchmark
 733     public void sin(Blackhole bh) {
 734         float[] a = fa.apply(SPECIES.length());
 735         float[] r = fr.apply(SPECIES.length());
 736 
 737         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 738             for (int i = 0; i < a.length; i += SPECIES.length()) {
 739                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 740                 av.sin().intoArray(r, i);
 741             }
 742         }
 743 
 744         bh.consume(r);
 745     }
 746 
 747 
 748 
 749     @Benchmark
 750     public void exp(Blackhole bh) {
 751         float[] a = fa.apply(SPECIES.length());
 752         float[] r = fr.apply(SPECIES.length());
 753 
 754         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 755             for (int i = 0; i < a.length; i += SPECIES.length()) {
 756                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 757                 av.exp().intoArray(r, i);
 758             }
 759         }
 760 
 761         bh.consume(r);
 762     }
 763 
 764 
 765 
 766     @Benchmark
 767     public void log1p(Blackhole bh) {
 768         float[] a = fa.apply(SPECIES.length());
 769         float[] r = fr.apply(SPECIES.length());
 770 
 771         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 772             for (int i = 0; i < a.length; i += SPECIES.length()) {
 773                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 774                 av.log1p().intoArray(r, i);
 775             }
 776         }
 777 
 778         bh.consume(r);
 779     }
 780 
 781 
 782 
 783     @Benchmark
 784     public void log(Blackhole bh) {
 785         float[] a = fa.apply(SPECIES.length());
 786         float[] r = fr.apply(SPECIES.length());
 787 
 788         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 789             for (int i = 0; i < a.length; i += SPECIES.length()) {
 790                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 791                 av.log().intoArray(r, i);
 792             }
 793         }
 794 
 795         bh.consume(r);
 796     }
 797 
 798 
 799 
 800     @Benchmark
 801     public void log10(Blackhole bh) {
 802         float[] a = fa.apply(SPECIES.length());
 803         float[] r = fr.apply(SPECIES.length());
 804 
 805         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 806             for (int i = 0; i < a.length; i += SPECIES.length()) {
 807                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 808                 av.log10().intoArray(r, i);
 809             }
 810         }
 811 
 812         bh.consume(r);
 813     }
 814 
 815 
 816 
 817     @Benchmark
 818     public void expm1(Blackhole bh) {
 819         float[] a = fa.apply(SPECIES.length());
 820         float[] r = fr.apply(SPECIES.length());
 821 
 822         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 823             for (int i = 0; i < a.length; i += SPECIES.length()) {
 824                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 825                 av.expm1().intoArray(r, i);
 826             }
 827         }
 828 
 829         bh.consume(r);
 830     }
 831 
 832 
 833 
 834     @Benchmark
 835     public void cos(Blackhole bh) {
 836         float[] a = fa.apply(SPECIES.length());
 837         float[] r = fr.apply(SPECIES.length());
 838 
 839         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 840             for (int i = 0; i < a.length; i += SPECIES.length()) {
 841                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 842                 av.cos().intoArray(r, i);
 843             }
 844         }
 845 
 846         bh.consume(r);
 847     }
 848 
 849 
 850 
 851     @Benchmark
 852     public void tan(Blackhole bh) {
 853         float[] a = fa.apply(SPECIES.length());
 854         float[] r = fr.apply(SPECIES.length());
 855 
 856         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 857             for (int i = 0; i < a.length; i += SPECIES.length()) {
 858                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 859                 av.tan().intoArray(r, i);
 860             }
 861         }
 862 
 863         bh.consume(r);
 864     }
 865 
 866 
 867 
 868     @Benchmark
 869     public void sinh(Blackhole bh) {
 870         float[] a = fa.apply(SPECIES.length());
 871         float[] r = fr.apply(SPECIES.length());
 872 
 873         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 874             for (int i = 0; i < a.length; i += SPECIES.length()) {
 875                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 876                 av.sinh().intoArray(r, i);
 877             }
 878         }
 879 
 880         bh.consume(r);
 881     }
 882 
 883 
 884 
 885     @Benchmark
 886     public void cosh(Blackhole bh) {
 887         float[] a = fa.apply(SPECIES.length());
 888         float[] r = fr.apply(SPECIES.length());
 889 
 890         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 891             for (int i = 0; i < a.length; i += SPECIES.length()) {
 892                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 893                 av.cosh().intoArray(r, i);
 894             }
 895         }
 896 
 897         bh.consume(r);
 898     }
 899 
 900 
 901 
 902     @Benchmark
 903     public void tanh(Blackhole bh) {
 904         float[] a = fa.apply(SPECIES.length());
 905         float[] r = fr.apply(SPECIES.length());
 906 
 907         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 908             for (int i = 0; i < a.length; i += SPECIES.length()) {
 909                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 910                 av.tanh().intoArray(r, i);
 911             }
 912         }
 913 
 914         bh.consume(r);
 915     }
 916 
 917 
 918 
 919     @Benchmark
 920     public void asin(Blackhole bh) {
 921         float[] a = fa.apply(SPECIES.length());
 922         float[] r = fr.apply(SPECIES.length());
 923 
 924         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 925             for (int i = 0; i < a.length; i += SPECIES.length()) {
 926                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 927                 av.asin().intoArray(r, i);
 928             }
 929         }
 930 
 931         bh.consume(r);
 932     }
 933 
 934 
 935 
 936     @Benchmark
 937     public void acos(Blackhole bh) {
 938         float[] a = fa.apply(SPECIES.length());
 939         float[] r = fr.apply(SPECIES.length());
 940 
 941         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 942             for (int i = 0; i < a.length; i += SPECIES.length()) {
 943                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 944                 av.acos().intoArray(r, i);
 945             }
 946         }
 947 
 948         bh.consume(r);
 949     }
 950 
 951 
 952 
 953     @Benchmark
 954     public void atan(Blackhole bh) {
 955         float[] a = fa.apply(SPECIES.length());
 956         float[] r = fr.apply(SPECIES.length());
 957 
 958         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 959             for (int i = 0; i < a.length; i += SPECIES.length()) {
 960                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 961                 av.atan().intoArray(r, i);
 962             }
 963         }
 964 
 965         bh.consume(r);
 966     }
 967 
 968 
 969 
 970     @Benchmark
 971     public void cbrt(Blackhole bh) {
 972         float[] a = fa.apply(SPECIES.length());
 973         float[] r = fr.apply(SPECIES.length());
 974 
 975         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 976             for (int i = 0; i < a.length; i += SPECIES.length()) {
 977                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 978                 av.cbrt().intoArray(r, i);
 979             }
 980         }
 981 
 982         bh.consume(r);
 983     }
 984 
 985 
 986 
 987     @Benchmark
 988     public void hypot(Blackhole bh) {
 989         float[] a = fa.apply(SPECIES.length());
 990         float[] b = fb.apply(SPECIES.length());
 991         float[] r = fr.apply(SPECIES.length());
 992 
 993         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 994             for (int i = 0; i < a.length; i += SPECIES.length()) {
 995                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 996                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 997                 av.hypot(bv).intoArray(r, i);
 998             }
 999         }
1000 
1001         bh.consume(r);
1002     }
1003 
1004 
1005 
1006     @Benchmark
1007     public void pow(Blackhole bh) {
1008         float[] a = fa.apply(SPECIES.length());
1009         float[] b = fb.apply(SPECIES.length());
1010         float[] r = fr.apply(SPECIES.length());
1011 
1012         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1013             for (int i = 0; i < a.length; i += SPECIES.length()) {
1014                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1015                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1016                 av.pow(bv).intoArray(r, i);
1017             }
1018         }
1019 
1020         bh.consume(r);
1021     }
1022 
1023 
1024 
1025     @Benchmark
1026     public void atan2(Blackhole bh) {
1027         float[] a = fa.apply(SPECIES.length());
1028         float[] b = fb.apply(SPECIES.length());
1029         float[] r = fr.apply(SPECIES.length());
1030 
1031         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1032             for (int i = 0; i < a.length; i += SPECIES.length()) {
1033                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1034                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1035                 av.atan2(bv).intoArray(r, i);
1036             }
1037         }
1038 
1039         bh.consume(r);
1040     }
1041 
1042 
1043 
1044     @Benchmark
1045     public void fma(Blackhole bh) {
1046         float[] a = fa.apply(SPECIES.length());
1047         float[] b = fb.apply(SPECIES.length());
1048         float[] c = fc.apply(SPECIES.length());
1049         float[] r = fr.apply(SPECIES.length());
1050 
1051         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1052             for (int i = 0; i < a.length; i += SPECIES.length()) {
1053                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1054                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1055                 FloatVector cv = FloatVector.fromArray(SPECIES, c, i);
1056                 av.fma(bv, cv).intoArray(r, i);
1057             }
1058         }
1059 
1060         bh.consume(r);
1061     }
1062 
1063 
1064 
1065     @Benchmark
1066     public void fmaMasked(Blackhole bh) {
1067         float[] a = fa.apply(SPECIES.length());
1068         float[] b = fb.apply(SPECIES.length());
1069         float[] c = fc.apply(SPECIES.length());
1070         float[] r = fr.apply(SPECIES.length());
1071         boolean[] mask = fm.apply(SPECIES.length());
1072         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
1073 
1074         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1075             for (int i = 0; i < a.length; i += SPECIES.length()) {
1076                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1077                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1078                 FloatVector cv = FloatVector.fromArray(SPECIES, c, i);
1079                 av.fma(bv, cv, vmask).intoArray(r, i);
1080             }
1081         }
1082 
1083         bh.consume(r);
1084     }
1085 
1086 
1087     @Benchmark
1088     public void neg(Blackhole bh) {
1089         float[] a = fa.apply(SPECIES.length());
1090         float[] r = fr.apply(SPECIES.length());
1091 
1092         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1093             for (int i = 0; i < a.length; i += SPECIES.length()) {
1094                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1095                 av.neg().intoArray(r, i);
1096             }
1097         }
1098 
1099         bh.consume(r);
1100     }
1101 
1102     @Benchmark
1103     public void negMasked(Blackhole bh) {
1104         float[] a = fa.apply(SPECIES.length());
1105         float[] r = fr.apply(SPECIES.length());
1106         boolean[] mask = fm.apply(SPECIES.length());
1107         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
1108 
1109         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1110             for (int i = 0; i < a.length; i += SPECIES.length()) {
1111                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1112                 av.neg(vmask).intoArray(r, i);
1113             }
1114         }
1115 
1116         bh.consume(r);
1117     }
1118 
1119     @Benchmark
1120     public void abs(Blackhole bh) {
1121         float[] a = fa.apply(SPECIES.length());
1122         float[] r = fr.apply(SPECIES.length());
1123 
1124         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1125             for (int i = 0; i < a.length; i += SPECIES.length()) {
1126                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1127                 av.abs().intoArray(r, i);
1128             }
1129         }
1130 
1131         bh.consume(r);
1132     }
1133 
1134     @Benchmark
1135     public void absMasked(Blackhole bh) {
1136         float[] a = fa.apply(SPECIES.length());
1137         float[] r = fr.apply(SPECIES.length());
1138         boolean[] mask = fm.apply(SPECIES.length());
1139         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
1140 
1141         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1142             for (int i = 0; i < a.length; i += SPECIES.length()) {
1143                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1144                 av.abs(vmask).intoArray(r, i);
1145             }
1146         }
1147 
1148         bh.consume(r);
1149     }
1150 
1151 
1152 
1153 
1154     @Benchmark
1155     public void sqrt(Blackhole bh) {
1156         float[] a = fa.apply(SPECIES.length());
1157         float[] r = fr.apply(SPECIES.length());
1158 
1159         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1160             for (int i = 0; i < a.length; i += SPECIES.length()) {
1161                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1162                 av.sqrt().intoArray(r, i);
1163             }
1164         }
1165 
1166         bh.consume(r);
1167     }
1168 
1169 
1170 
1171     @Benchmark
1172     public void sqrtMasked(Blackhole bh) {
1173         float[] a = fa.apply(SPECIES.length());
1174         float[] r = fr.apply(SPECIES.length());
1175         boolean[] mask = fm.apply(SPECIES.length());
1176         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
1177 
1178         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1179             for (int i = 0; i < a.length; i += SPECIES.length()) {
1180                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1181                 av.sqrt(vmask).intoArray(r, i);
1182             }
1183         }
1184 
1185         bh.consume(r);
1186     }
1187 
1188 
1189 
1190     @Benchmark
1191     public void gather(Blackhole bh) {
1192         float[] a = fa.apply(SPECIES.length());
1193         int[] b    = fs.apply(a.length, SPECIES.length());
1194         float[] r = new float[a.length];
1195 
1196         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1197             for (int i = 0; i < a.length; i += SPECIES.length()) {
1198                 FloatVector av = FloatVector.fromArray(SPECIES, a, i, b, i);
1199                 av.intoArray(r, i);
1200             }
1201         }
1202 
1203         bh.consume(r);
1204     }
1205 
1206 
1207 
1208     @Benchmark
1209     public void scatter(Blackhole bh) {
1210         float[] a = fa.apply(SPECIES.length());
1211         int[] b = fs.apply(a.length, SPECIES.length());
1212         float[] r = new float[a.length];
1213 
1214         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1215             for (int i = 0; i < a.length; i += SPECIES.length()) {
1216                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1217                 av.intoArray(r, i, b, i);
1218             }
1219         }
1220 
1221         bh.consume(r);
1222     }
1223 
1224 }
1225