1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.Vector.Shape;
  28 import jdk.incubator.vector.FloatVector;
  29 
  30 import java.util.concurrent.TimeUnit;
  31 import java.util.function.BiFunction;
  32 import java.util.function.IntFunction;
  33 
  34 import org.openjdk.jmh.annotations.*;
  35 import org.openjdk.jmh.infra.Blackhole;
  36 
  37 @BenchmarkMode(Mode.Throughput)
  38 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  39 @State(Scope.Benchmark)
  40 @Warmup(iterations = 3, time = 1)
  41 @Measurement(iterations = 5, time = 1)
  42 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  43 public class Float128Vector extends AbstractVectorBenchmark {
  44     static final FloatVector.FloatSpecies SPECIES = FloatVector.species(Shape.S_128_BIT);
  45 
  46     static final int INVOC_COUNT = 1; // get rid of outer loop
  47 
  48     @Param("1024")
  49     int size;
  50 
  51     float[] fill(IntFunction<Float> f) {
  52         float[] array = new float[size];
  53         for (int i = 0; i < array.length; i++) {
  54             array[i] = f.apply(i);
  55         }
  56         return array;
  57     }
  58 
  59     float[] a, b, c, r;
  60     boolean[] m, rm;
  61     int[] s;
  62 
  63     @Setup
  64     public void init() {
  65         size += size % SPECIES.length(); // FIXME: add post-loops
  66 
  67         a = fill(i -> (float)(2*i));
  68         b = fill(i -> (float)(i+1));
  69         c = fill(i -> (float)(i+5));
  70         r = fill(i -> (float)0);
  71 
  72         m = fillMask(size, i -> (i % 2) == 0);
  73         rm = fillMask(size, i -> false);
  74 
  75         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  76     }
  77 
  78     final IntFunction<float[]> fa = vl -> a;
  79     final IntFunction<float[]> fb = vl -> b;
  80     final IntFunction<float[]> fc = vl -> c;
  81     final IntFunction<float[]> fr = vl -> r;
  82     final IntFunction<boolean[]> fm = vl -> m;
  83     final IntFunction<boolean[]> fmr = vl -> rm;
  84     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  85 
  86 
  87     @Benchmark
  88     public void add(Blackhole bh) {
  89         float[] a = fa.apply(SPECIES.length());
  90         float[] b = fb.apply(SPECIES.length());
  91         float[] r = fr.apply(SPECIES.length());
  92 
  93         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  94             for (int i = 0; i < a.length; i += SPECIES.length()) {
  95                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
  96                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
  97                 av.add(bv).intoArray(r, i);
  98             }
  99         }
 100 
 101         bh.consume(r);
 102     }
 103 
 104     @Benchmark
 105     public void addMasked(Blackhole bh) {
 106         float[] a = fa.apply(SPECIES.length());
 107         float[] b = fb.apply(SPECIES.length());
 108         float[] r = fr.apply(SPECIES.length());
 109         boolean[] mask = fm.apply(SPECIES.length());
 110         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
 111 
 112         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 113             for (int i = 0; i < a.length; i += SPECIES.length()) {
 114                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 115                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 116                 av.add(bv, vmask).intoArray(r, i);
 117             }
 118         }
 119 
 120         bh.consume(r);
 121     }
 122 
 123     @Benchmark
 124     public void sub(Blackhole bh) {
 125         float[] a = fa.apply(SPECIES.length());
 126         float[] b = fb.apply(SPECIES.length());
 127         float[] r = fr.apply(SPECIES.length());
 128 
 129         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 130             for (int i = 0; i < a.length; i += SPECIES.length()) {
 131                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 132                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 133                 av.sub(bv).intoArray(r, i);
 134             }
 135         }
 136 
 137         bh.consume(r);
 138     }
 139 
 140     @Benchmark
 141     public void subMasked(Blackhole bh) {
 142         float[] a = fa.apply(SPECIES.length());
 143         float[] b = fb.apply(SPECIES.length());
 144         float[] r = fr.apply(SPECIES.length());
 145         boolean[] mask = fm.apply(SPECIES.length());
 146         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
 147 
 148         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 149             for (int i = 0; i < a.length; i += SPECIES.length()) {
 150                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 151                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 152                 av.sub(bv, vmask).intoArray(r, i);
 153             }
 154         }
 155 
 156         bh.consume(r);
 157     }
 158 
 159 
 160     @Benchmark
 161     public void div(Blackhole bh) {
 162         float[] a = fa.apply(SPECIES.length());
 163         float[] b = fb.apply(SPECIES.length());
 164         float[] r = fr.apply(SPECIES.length());
 165 
 166         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 167             for (int i = 0; i < a.length; i += SPECIES.length()) {
 168                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 169                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 170                 av.div(bv).intoArray(r, i);
 171             }
 172         }
 173 
 174         bh.consume(r);
 175     }
 176 
 177 
 178 
 179     @Benchmark
 180     public void divMasked(Blackhole bh) {
 181         float[] a = fa.apply(SPECIES.length());
 182         float[] b = fb.apply(SPECIES.length());
 183         float[] r = fr.apply(SPECIES.length());
 184         boolean[] mask = fm.apply(SPECIES.length());
 185         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
 186 
 187         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 188             for (int i = 0; i < a.length; i += SPECIES.length()) {
 189                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 190                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 191                 av.div(bv, vmask).intoArray(r, i);
 192             }
 193         }
 194 
 195         bh.consume(r);
 196     }
 197 
 198 
 199     @Benchmark
 200     public void mul(Blackhole bh) {
 201         float[] a = fa.apply(SPECIES.length());
 202         float[] b = fb.apply(SPECIES.length());
 203         float[] r = fr.apply(SPECIES.length());
 204 
 205         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 206             for (int i = 0; i < a.length; i += SPECIES.length()) {
 207                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 208                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 209                 av.mul(bv).intoArray(r, i);
 210             }
 211         }
 212 
 213         bh.consume(r);
 214     }
 215 
 216     @Benchmark
 217     public void mulMasked(Blackhole bh) {
 218         float[] a = fa.apply(SPECIES.length());
 219         float[] b = fb.apply(SPECIES.length());
 220         float[] r = fr.apply(SPECIES.length());
 221         boolean[] mask = fm.apply(SPECIES.length());
 222         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
 223 
 224         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 225             for (int i = 0; i < a.length; i += SPECIES.length()) {
 226                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 227                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 228                 av.mul(bv, vmask).intoArray(r, i);
 229             }
 230         }
 231 
 232         bh.consume(r);
 233     }
 234 
 235 
 236 
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250 
 251 
 252 
 253 
 254 
 255 
 256 
 257 
 258 
 259 
 260 
 261 
 262 
 263 
 264 
 265     @Benchmark
 266     public void max(Blackhole bh) {
 267         float[] a = fa.apply(SPECIES.length());
 268         float[] b = fb.apply(SPECIES.length());
 269         float[] r = fr.apply(SPECIES.length());
 270 
 271         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 272             for (int i = 0; i < a.length; i += SPECIES.length()) {
 273                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 274                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 275                 av.max(bv).intoArray(r, i);
 276             }
 277         }
 278 
 279         bh.consume(r);
 280     }
 281 
 282     @Benchmark
 283     public void min(Blackhole bh) {
 284         float[] a = fa.apply(SPECIES.length());
 285         float[] b = fb.apply(SPECIES.length());
 286         float[] r = fr.apply(SPECIES.length());
 287 
 288         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 289             for (int i = 0; i < a.length; i += SPECIES.length()) {
 290                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 291                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 292                 av.min(bv).intoArray(r, i);
 293             }
 294         }
 295 
 296         bh.consume(r);
 297     }
 298 
 299 
 300 
 301 
 302     @Benchmark
 303     public void addAll(Blackhole bh) {
 304         float[] a = fa.apply(SPECIES.length());
 305         float[] r = fr.apply(SPECIES.length());
 306         float ra = 0;
 307 
 308         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 309             for (int i = 0; i < a.length; i += SPECIES.length()) {
 310                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 311                 r[i] = av.addAll();
 312             }
 313         }
 314 
 315         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 316             ra = 0;
 317             for (int i = 0; i < a.length; i += SPECIES.length()) {
 318                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 319                 ra += av.addAll();
 320             }
 321         }
 322 
 323         bh.consume(ra);
 324         bh.consume(r);
 325     }
 326 
 327     @Benchmark
 328     public void mulAll(Blackhole bh) {
 329         float[] a = fa.apply(SPECIES.length());
 330         float[] r = fr.apply(SPECIES.length());
 331         float ra = 1;
 332 
 333         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 334             for (int i = 0; i < a.length; i += SPECIES.length()) {
 335                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 336                 r[i] = av.mulAll();
 337             }
 338         }
 339 
 340         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 341             ra = 1;
 342             for (int i = 0; i < a.length; i += SPECIES.length()) {
 343                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 344                 ra *= av.mulAll();
 345             }
 346         }
 347 
 348         bh.consume(ra);
 349         bh.consume(r);
 350     }
 351 
 352     @Benchmark
 353     public void minAll(Blackhole bh) {
 354         float[] a = fa.apply(SPECIES.length());
 355         float[] r = fr.apply(SPECIES.length());
 356         float ra = Float.MAX_VALUE;
 357 
 358         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 359             for (int i = 0; i < a.length; i += SPECIES.length()) {
 360                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 361                 r[i] = av.minAll();
 362             }
 363         }
 364 
 365         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 366             ra = Float.MAX_VALUE;
 367             for (int i = 0; i < a.length; i += SPECIES.length()) {
 368                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 369                 ra = (float)Math.min(ra, av.minAll());
 370             }
 371         }
 372 
 373         bh.consume(ra);
 374         bh.consume(r);
 375     }
 376 
 377     @Benchmark
 378     public void maxAll(Blackhole bh) {
 379         float[] a = fa.apply(SPECIES.length());
 380         float[] r = fr.apply(SPECIES.length());
 381         float ra = Float.MIN_VALUE;
 382 
 383         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 384             for (int i = 0; i < a.length; i += SPECIES.length()) {
 385                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 386                 r[i] = av.maxAll();
 387             }
 388         }
 389 
 390         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 391             ra = Float.MIN_VALUE;
 392             for (int i = 0; i < a.length; i += SPECIES.length()) {
 393                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 394                 ra = (float)Math.max(ra, av.maxAll());
 395             }
 396         }
 397 
 398         bh.consume(ra);
 399         bh.consume(r);
 400     }
 401 
 402 
 403 
 404     @Benchmark
 405     public void with(Blackhole bh) {
 406         float[] a = fa.apply(SPECIES.length());
 407         float[] r = fr.apply(SPECIES.length());
 408 
 409         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 410             for (int i = 0; i < a.length; i += SPECIES.length()) {
 411                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 412                 av.with(0, (float)4).intoArray(r, i);
 413             }
 414         }
 415 
 416         bh.consume(r);
 417     }
 418 
 419     @Benchmark
 420     public Object lessThan() {
 421         float[] a = fa.apply(size);
 422         float[] b = fb.apply(size);
 423         boolean[] ms = fm.apply(size);
 424         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 425 
 426         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 427             for (int i = 0; i < a.length; i += SPECIES.length()) {
 428                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 429                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 430                 Vector.Mask<Float> mv = av.lessThan(bv);
 431 
 432                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 433             }
 434         }
 435         return m;
 436     }
 437 
 438 
 439     @Benchmark
 440     public Object greaterThan() {
 441         float[] a = fa.apply(size);
 442         float[] b = fb.apply(size);
 443         boolean[] ms = fm.apply(size);
 444         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 445 
 446         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 447             for (int i = 0; i < a.length; i += SPECIES.length()) {
 448                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 449                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 450                 Vector.Mask<Float> mv = av.greaterThan(bv);
 451 
 452                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 453             }
 454         }
 455         return m;
 456     }
 457 
 458 
 459     @Benchmark
 460     public Object equal() {
 461         float[] a = fa.apply(size);
 462         float[] b = fb.apply(size);
 463         boolean[] ms = fm.apply(size);
 464         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 465 
 466         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 467             for (int i = 0; i < a.length; i += SPECIES.length()) {
 468                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 469                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 470                 Vector.Mask<Float> mv = av.equal(bv);
 471 
 472                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 473             }
 474         }
 475         return m;
 476     }
 477 
 478 
 479     @Benchmark
 480     public Object notEqual() {
 481         float[] a = fa.apply(size);
 482         float[] b = fb.apply(size);
 483         boolean[] ms = fm.apply(size);
 484         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 485 
 486         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 487             for (int i = 0; i < a.length; i += SPECIES.length()) {
 488                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 489                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 490                 Vector.Mask<Float> mv = av.notEqual(bv);
 491 
 492                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 493             }
 494         }
 495         return m;
 496     }
 497 
 498 
 499     @Benchmark
 500     public Object lessThanEq() {
 501         float[] a = fa.apply(size);
 502         float[] b = fb.apply(size);
 503         boolean[] ms = fm.apply(size);
 504         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 505 
 506         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 507             for (int i = 0; i < a.length; i += SPECIES.length()) {
 508                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 509                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 510                 Vector.Mask<Float> mv = av.lessThanEq(bv);
 511 
 512                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 513             }
 514         }
 515         return m;
 516     }
 517 
 518 
 519     @Benchmark
 520     public Object greaterThanEq() {
 521         float[] a = fa.apply(size);
 522         float[] b = fb.apply(size);
 523         boolean[] ms = fm.apply(size);
 524         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 525 
 526         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 527             for (int i = 0; i < a.length; i += SPECIES.length()) {
 528                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 529                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 530                 Vector.Mask<Float> mv = av.greaterThanEq(bv);
 531 
 532                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 533             }
 534         }
 535         return m;
 536     }
 537 
 538 
 539     @Benchmark
 540     public void blend(Blackhole bh) {
 541         float[] a = fa.apply(SPECIES.length());
 542         float[] b = fb.apply(SPECIES.length());
 543         float[] r = fr.apply(SPECIES.length());
 544         boolean[] mask = fm.apply(SPECIES.length());
 545         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
 546 
 547         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 548             for (int i = 0; i < a.length; i += SPECIES.length()) {
 549                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 550                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 551                 av.blend(bv, vmask).intoArray(r, i);
 552             }
 553         }
 554 
 555         bh.consume(r);
 556     }
 557 
 558     @Benchmark
 559     public void rearrange(Blackhole bh) {
 560         float[] a = fa.apply(SPECIES.length());
 561         int[] order = fs.apply(a.length, SPECIES.length());
 562         float[] r = fr.apply(SPECIES.length());
 563 
 564         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 565             for (int i = 0; i < a.length; i += SPECIES.length()) {
 566                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 567                 av.rearrange(FloatVector.shuffleFromArray(SPECIES, order, i)).intoArray(r, i);
 568             }
 569         }
 570 
 571         bh.consume(r);
 572     }
 573 
 574     @Benchmark
 575     public void extract(Blackhole bh) {
 576         float[] a = fa.apply(SPECIES.length());
 577         float[] r = fr.apply(SPECIES.length());
 578 
 579         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 580             for (int i = 0; i < a.length; i += SPECIES.length()) {
 581                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 582                 int num_lanes = SPECIES.length();
 583                 // Manually unroll because full unroll happens after intrinsification.
 584                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 585                 if (num_lanes == 1) {
 586                     r[i]=av.get(0);
 587                 } else if (num_lanes == 2) {
 588                     r[i]=av.get(0);
 589                     r[i+1]=av.get(1);
 590                 } else if (num_lanes == 4) {
 591                     r[i]=av.get(0);
 592                     r[i+1]=av.get(1);
 593                     r[i+2]=av.get(2);
 594                     r[i+3]=av.get(3);
 595                 } else if (num_lanes == 8) {
 596                     r[i]=av.get(0);
 597                     r[i+1]=av.get(1);
 598                     r[i+2]=av.get(2);
 599                     r[i+3]=av.get(3);
 600                     r[i+4]=av.get(4);
 601                     r[i+5]=av.get(5);
 602                     r[i+6]=av.get(6);
 603                     r[i+7]=av.get(7);
 604                 } else if (num_lanes == 16) {
 605                     r[i]=av.get(0);
 606                     r[i+1]=av.get(1);
 607                     r[i+2]=av.get(2);
 608                     r[i+3]=av.get(3);
 609                     r[i+4]=av.get(4);
 610                     r[i+5]=av.get(5);
 611                     r[i+6]=av.get(6);
 612                     r[i+7]=av.get(7);
 613                     r[i+8]=av.get(8);
 614                     r[i+9]=av.get(9);
 615                     r[i+10]=av.get(10);
 616                     r[i+11]=av.get(11);
 617                     r[i+12]=av.get(12);
 618                     r[i+13]=av.get(13);
 619                     r[i+14]=av.get(14);
 620                     r[i+15]=av.get(15);
 621                 } else if (num_lanes == 32) {
 622                     r[i]=av.get(0);
 623                     r[i+1]=av.get(1);
 624                     r[i+2]=av.get(2);
 625                     r[i+3]=av.get(3);
 626                     r[i+4]=av.get(4);
 627                     r[i+5]=av.get(5);
 628                     r[i+6]=av.get(6);
 629                     r[i+7]=av.get(7);
 630                     r[i+8]=av.get(8);
 631                     r[i+9]=av.get(9);
 632                     r[i+10]=av.get(10);
 633                     r[i+11]=av.get(11);
 634                     r[i+12]=av.get(12);
 635                     r[i+13]=av.get(13);
 636                     r[i+14]=av.get(14);
 637                     r[i+15]=av.get(15);
 638                     r[i+16]=av.get(16);
 639                     r[i+17]=av.get(17);
 640                     r[i+18]=av.get(18);
 641                     r[i+19]=av.get(19);
 642                     r[i+20]=av.get(20);
 643                     r[i+21]=av.get(21);
 644                     r[i+22]=av.get(22);
 645                     r[i+23]=av.get(23);
 646                     r[i+24]=av.get(24);
 647                     r[i+25]=av.get(25);
 648                     r[i+26]=av.get(26);
 649                     r[i+27]=av.get(27);
 650                     r[i+28]=av.get(28);
 651                     r[i+29]=av.get(29);
 652                     r[i+30]=av.get(30);
 653                     r[i+31]=av.get(31);
 654                 } else if (num_lanes == 64) {
 655                     r[i]=av.get(0);
 656                     r[i+1]=av.get(1);
 657                     r[i+2]=av.get(2);
 658                     r[i+3]=av.get(3);
 659                     r[i+4]=av.get(4);
 660                     r[i+5]=av.get(5);
 661                     r[i+6]=av.get(6);
 662                     r[i+7]=av.get(7);
 663                     r[i+8]=av.get(8);
 664                     r[i+9]=av.get(9);
 665                     r[i+10]=av.get(10);
 666                     r[i+11]=av.get(11);
 667                     r[i+12]=av.get(12);
 668                     r[i+13]=av.get(13);
 669                     r[i+14]=av.get(14);
 670                     r[i+15]=av.get(15);
 671                     r[i+16]=av.get(16);
 672                     r[i+17]=av.get(17);
 673                     r[i+18]=av.get(18);
 674                     r[i+19]=av.get(19);
 675                     r[i+20]=av.get(20);
 676                     r[i+21]=av.get(21);
 677                     r[i+22]=av.get(22);
 678                     r[i+23]=av.get(23);
 679                     r[i+24]=av.get(24);
 680                     r[i+25]=av.get(25);
 681                     r[i+26]=av.get(26);
 682                     r[i+27]=av.get(27);
 683                     r[i+28]=av.get(28);
 684                     r[i+29]=av.get(29);
 685                     r[i+30]=av.get(30);
 686                     r[i+31]=av.get(31);
 687                     r[i+32]=av.get(32);
 688                     r[i+33]=av.get(33);
 689                     r[i+34]=av.get(34);
 690                     r[i+35]=av.get(35);
 691                     r[i+36]=av.get(36);
 692                     r[i+37]=av.get(37);
 693                     r[i+38]=av.get(38);
 694                     r[i+39]=av.get(39);
 695                     r[i+40]=av.get(40);
 696                     r[i+41]=av.get(41);
 697                     r[i+42]=av.get(42);
 698                     r[i+43]=av.get(43);
 699                     r[i+44]=av.get(44);
 700                     r[i+45]=av.get(45);
 701                     r[i+46]=av.get(46);
 702                     r[i+47]=av.get(47);
 703                     r[i+48]=av.get(48);
 704                     r[i+49]=av.get(49);
 705                     r[i+50]=av.get(50);
 706                     r[i+51]=av.get(51);
 707                     r[i+52]=av.get(52);
 708                     r[i+53]=av.get(53);
 709                     r[i+54]=av.get(54);
 710                     r[i+55]=av.get(55);
 711                     r[i+56]=av.get(56);
 712                     r[i+57]=av.get(57);
 713                     r[i+58]=av.get(58);
 714                     r[i+59]=av.get(59);
 715                     r[i+60]=av.get(60);
 716                     r[i+61]=av.get(61);
 717                     r[i+62]=av.get(62);
 718                     r[i+63]=av.get(63);
 719                 } else {
 720                     for (int j = 0; j < SPECIES.length(); j++) {
 721                         r[i+j]=av.get(j);
 722                     }
 723                 }
 724             }
 725         }
 726 
 727         bh.consume(r);
 728     }
 729 
 730 
 731     @Benchmark
 732     public void sin(Blackhole bh) {
 733         float[] a = fa.apply(SPECIES.length());
 734         float[] r = fr.apply(SPECIES.length());
 735 
 736         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 737             for (int i = 0; i < a.length; i += SPECIES.length()) {
 738                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 739                 av.sin().intoArray(r, i);
 740             }
 741         }
 742 
 743         bh.consume(r);
 744     }
 745 
 746 
 747 
 748     @Benchmark
 749     public void exp(Blackhole bh) {
 750         float[] a = fa.apply(SPECIES.length());
 751         float[] r = fr.apply(SPECIES.length());
 752 
 753         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 754             for (int i = 0; i < a.length; i += SPECIES.length()) {
 755                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 756                 av.exp().intoArray(r, i);
 757             }
 758         }
 759 
 760         bh.consume(r);
 761     }
 762 
 763 
 764 
 765     @Benchmark
 766     public void log1p(Blackhole bh) {
 767         float[] a = fa.apply(SPECIES.length());
 768         float[] r = fr.apply(SPECIES.length());
 769 
 770         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 771             for (int i = 0; i < a.length; i += SPECIES.length()) {
 772                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 773                 av.log1p().intoArray(r, i);
 774             }
 775         }
 776 
 777         bh.consume(r);
 778     }
 779 
 780 
 781 
 782     @Benchmark
 783     public void log(Blackhole bh) {
 784         float[] a = fa.apply(SPECIES.length());
 785         float[] r = fr.apply(SPECIES.length());
 786 
 787         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 788             for (int i = 0; i < a.length; i += SPECIES.length()) {
 789                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 790                 av.log().intoArray(r, i);
 791             }
 792         }
 793 
 794         bh.consume(r);
 795     }
 796 
 797 
 798 
 799     @Benchmark
 800     public void log10(Blackhole bh) {
 801         float[] a = fa.apply(SPECIES.length());
 802         float[] r = fr.apply(SPECIES.length());
 803 
 804         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 805             for (int i = 0; i < a.length; i += SPECIES.length()) {
 806                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 807                 av.log10().intoArray(r, i);
 808             }
 809         }
 810 
 811         bh.consume(r);
 812     }
 813 
 814 
 815 
 816     @Benchmark
 817     public void expm1(Blackhole bh) {
 818         float[] a = fa.apply(SPECIES.length());
 819         float[] r = fr.apply(SPECIES.length());
 820 
 821         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 822             for (int i = 0; i < a.length; i += SPECIES.length()) {
 823                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 824                 av.expm1().intoArray(r, i);
 825             }
 826         }
 827 
 828         bh.consume(r);
 829     }
 830 
 831 
 832 
 833     @Benchmark
 834     public void cos(Blackhole bh) {
 835         float[] a = fa.apply(SPECIES.length());
 836         float[] r = fr.apply(SPECIES.length());
 837 
 838         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 839             for (int i = 0; i < a.length; i += SPECIES.length()) {
 840                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 841                 av.cos().intoArray(r, i);
 842             }
 843         }
 844 
 845         bh.consume(r);
 846     }
 847 
 848 
 849 
 850     @Benchmark
 851     public void tan(Blackhole bh) {
 852         float[] a = fa.apply(SPECIES.length());
 853         float[] r = fr.apply(SPECIES.length());
 854 
 855         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 856             for (int i = 0; i < a.length; i += SPECIES.length()) {
 857                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 858                 av.tan().intoArray(r, i);
 859             }
 860         }
 861 
 862         bh.consume(r);
 863     }
 864 
 865 
 866 
 867     @Benchmark
 868     public void sinh(Blackhole bh) {
 869         float[] a = fa.apply(SPECIES.length());
 870         float[] r = fr.apply(SPECIES.length());
 871 
 872         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 873             for (int i = 0; i < a.length; i += SPECIES.length()) {
 874                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 875                 av.sinh().intoArray(r, i);
 876             }
 877         }
 878 
 879         bh.consume(r);
 880     }
 881 
 882 
 883 
 884     @Benchmark
 885     public void cosh(Blackhole bh) {
 886         float[] a = fa.apply(SPECIES.length());
 887         float[] r = fr.apply(SPECIES.length());
 888 
 889         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 890             for (int i = 0; i < a.length; i += SPECIES.length()) {
 891                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 892                 av.cosh().intoArray(r, i);
 893             }
 894         }
 895 
 896         bh.consume(r);
 897     }
 898 
 899 
 900 
 901     @Benchmark
 902     public void tanh(Blackhole bh) {
 903         float[] a = fa.apply(SPECIES.length());
 904         float[] r = fr.apply(SPECIES.length());
 905 
 906         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 907             for (int i = 0; i < a.length; i += SPECIES.length()) {
 908                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 909                 av.tanh().intoArray(r, i);
 910             }
 911         }
 912 
 913         bh.consume(r);
 914     }
 915 
 916 
 917 
 918     @Benchmark
 919     public void asin(Blackhole bh) {
 920         float[] a = fa.apply(SPECIES.length());
 921         float[] r = fr.apply(SPECIES.length());
 922 
 923         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 924             for (int i = 0; i < a.length; i += SPECIES.length()) {
 925                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 926                 av.asin().intoArray(r, i);
 927             }
 928         }
 929 
 930         bh.consume(r);
 931     }
 932 
 933 
 934 
 935     @Benchmark
 936     public void acos(Blackhole bh) {
 937         float[] a = fa.apply(SPECIES.length());
 938         float[] r = fr.apply(SPECIES.length());
 939 
 940         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 941             for (int i = 0; i < a.length; i += SPECIES.length()) {
 942                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 943                 av.acos().intoArray(r, i);
 944             }
 945         }
 946 
 947         bh.consume(r);
 948     }
 949 
 950 
 951 
 952     @Benchmark
 953     public void atan(Blackhole bh) {
 954         float[] a = fa.apply(SPECIES.length());
 955         float[] r = fr.apply(SPECIES.length());
 956 
 957         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 958             for (int i = 0; i < a.length; i += SPECIES.length()) {
 959                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 960                 av.atan().intoArray(r, i);
 961             }
 962         }
 963 
 964         bh.consume(r);
 965     }
 966 
 967 
 968 
 969     @Benchmark
 970     public void cbrt(Blackhole bh) {
 971         float[] a = fa.apply(SPECIES.length());
 972         float[] r = fr.apply(SPECIES.length());
 973 
 974         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 975             for (int i = 0; i < a.length; i += SPECIES.length()) {
 976                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 977                 av.cbrt().intoArray(r, i);
 978             }
 979         }
 980 
 981         bh.consume(r);
 982     }
 983 
 984 
 985 
 986     @Benchmark
 987     public void hypot(Blackhole bh) {
 988         float[] a = fa.apply(SPECIES.length());
 989         float[] b = fb.apply(SPECIES.length());
 990         float[] r = fr.apply(SPECIES.length());
 991 
 992         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 993             for (int i = 0; i < a.length; i += SPECIES.length()) {
 994                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 995                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 996                 av.hypot(bv).intoArray(r, i);
 997             }
 998         }
 999 
1000         bh.consume(r);
1001     }
1002 
1003 
1004 
1005     @Benchmark
1006     public void pow(Blackhole bh) {
1007         float[] a = fa.apply(SPECIES.length());
1008         float[] b = fb.apply(SPECIES.length());
1009         float[] r = fr.apply(SPECIES.length());
1010 
1011         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1012             for (int i = 0; i < a.length; i += SPECIES.length()) {
1013                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1014                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1015                 av.pow(bv).intoArray(r, i);
1016             }
1017         }
1018 
1019         bh.consume(r);
1020     }
1021 
1022 
1023 
1024     @Benchmark
1025     public void atan2(Blackhole bh) {
1026         float[] a = fa.apply(SPECIES.length());
1027         float[] b = fb.apply(SPECIES.length());
1028         float[] r = fr.apply(SPECIES.length());
1029 
1030         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1031             for (int i = 0; i < a.length; i += SPECIES.length()) {
1032                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1033                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1034                 av.atan2(bv).intoArray(r, i);
1035             }
1036         }
1037 
1038         bh.consume(r);
1039     }
1040 
1041 
1042 
1043     @Benchmark
1044     public void fma(Blackhole bh) {
1045         float[] a = fa.apply(SPECIES.length());
1046         float[] b = fb.apply(SPECIES.length());
1047         float[] c = fc.apply(SPECIES.length());
1048         float[] r = fr.apply(SPECIES.length());
1049 
1050         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1051             for (int i = 0; i < a.length; i += SPECIES.length()) {
1052                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1053                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1054                 FloatVector cv = FloatVector.fromArray(SPECIES, c, i);
1055                 av.fma(bv, cv).intoArray(r, i);
1056             }
1057         }
1058 
1059         bh.consume(r);
1060     }
1061 
1062 
1063 
1064     @Benchmark
1065     public void fmaMasked(Blackhole bh) {
1066         float[] a = fa.apply(SPECIES.length());
1067         float[] b = fb.apply(SPECIES.length());
1068         float[] c = fc.apply(SPECIES.length());
1069         float[] r = fr.apply(SPECIES.length());
1070         boolean[] mask = fm.apply(SPECIES.length());
1071         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
1072 
1073         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1074             for (int i = 0; i < a.length; i += SPECIES.length()) {
1075                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1076                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1077                 FloatVector cv = FloatVector.fromArray(SPECIES, c, i);
1078                 av.fma(bv, cv, vmask).intoArray(r, i);
1079             }
1080         }
1081 
1082         bh.consume(r);
1083     }
1084 
1085 
1086     @Benchmark
1087     public void neg(Blackhole bh) {
1088         float[] a = fa.apply(SPECIES.length());
1089         float[] r = fr.apply(SPECIES.length());
1090 
1091         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1092             for (int i = 0; i < a.length; i += SPECIES.length()) {
1093                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1094                 av.neg().intoArray(r, i);
1095             }
1096         }
1097 
1098         bh.consume(r);
1099     }
1100 
1101     @Benchmark
1102     public void negMasked(Blackhole bh) {
1103         float[] a = fa.apply(SPECIES.length());
1104         float[] r = fr.apply(SPECIES.length());
1105         boolean[] mask = fm.apply(SPECIES.length());
1106         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
1107 
1108         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1109             for (int i = 0; i < a.length; i += SPECIES.length()) {
1110                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1111                 av.neg(vmask).intoArray(r, i);
1112             }
1113         }
1114 
1115         bh.consume(r);
1116     }
1117 
1118     @Benchmark
1119     public void abs(Blackhole bh) {
1120         float[] a = fa.apply(SPECIES.length());
1121         float[] r = fr.apply(SPECIES.length());
1122 
1123         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1124             for (int i = 0; i < a.length; i += SPECIES.length()) {
1125                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1126                 av.abs().intoArray(r, i);
1127             }
1128         }
1129 
1130         bh.consume(r);
1131     }
1132 
1133     @Benchmark
1134     public void absMasked(Blackhole bh) {
1135         float[] a = fa.apply(SPECIES.length());
1136         float[] r = fr.apply(SPECIES.length());
1137         boolean[] mask = fm.apply(SPECIES.length());
1138         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
1139 
1140         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1141             for (int i = 0; i < a.length; i += SPECIES.length()) {
1142                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1143                 av.abs(vmask).intoArray(r, i);
1144             }
1145         }
1146 
1147         bh.consume(r);
1148     }
1149 
1150 
1151 
1152 
1153     @Benchmark
1154     public void sqrt(Blackhole bh) {
1155         float[] a = fa.apply(SPECIES.length());
1156         float[] r = fr.apply(SPECIES.length());
1157 
1158         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1159             for (int i = 0; i < a.length; i += SPECIES.length()) {
1160                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1161                 av.sqrt().intoArray(r, i);
1162             }
1163         }
1164 
1165         bh.consume(r);
1166     }
1167 
1168 
1169 
1170     @Benchmark
1171     public void sqrtMasked(Blackhole bh) {
1172         float[] a = fa.apply(SPECIES.length());
1173         float[] r = fr.apply(SPECIES.length());
1174         boolean[] mask = fm.apply(SPECIES.length());
1175         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
1176 
1177         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1178             for (int i = 0; i < a.length; i += SPECIES.length()) {
1179                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1180                 av.sqrt(vmask).intoArray(r, i);
1181             }
1182         }
1183 
1184         bh.consume(r);
1185     }
1186 
1187 
1188 
1189     @Benchmark
1190     public void gather(Blackhole bh) {
1191         float[] a = fa.apply(SPECIES.length());
1192         int[] b    = fs.apply(a.length, SPECIES.length());
1193         float[] r = new float[a.length];
1194 
1195         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1196             for (int i = 0; i < a.length; i += SPECIES.length()) {
1197                 FloatVector av = FloatVector.fromArray(SPECIES, a, i, b, i);
1198                 av.intoArray(r, i);
1199             }
1200         }
1201 
1202         bh.consume(r);
1203     }
1204 
1205 
1206 
1207     @Benchmark
1208     public void scatter(Blackhole bh) {
1209         float[] a = fa.apply(SPECIES.length());
1210         int[] b = fs.apply(a.length, SPECIES.length());
1211         float[] r = new float[a.length];
1212 
1213         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1214             for (int i = 0; i < a.length; i += SPECIES.length()) {
1215                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1216                 av.intoArray(r, i, b, i);
1217             }
1218         }
1219 
1220         bh.consume(r);
1221     }
1222 
1223 }
1224