1 /*
   2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.Vector.Shape;
  28 import jdk.incubator.vector.Vector.Species;
  29 import jdk.incubator.vector.FloatVector;
  30 
  31 import java.util.concurrent.TimeUnit;
  32 import java.util.function.BiFunction;
  33 import java.util.function.IntFunction;
  34 
  35 import org.openjdk.jmh.annotations.*;
  36 import org.openjdk.jmh.infra.Blackhole;
  37 
  38 @BenchmarkMode(Mode.Throughput)
  39 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  40 @State(Scope.Benchmark)
  41 @Warmup(iterations = 3, time = 1)
  42 @Measurement(iterations = 5, time = 1)
  43 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  44 public class Float256Vector extends AbstractVectorBenchmark {
  45     static final Species<Float> SPECIES = FloatVector.SPECIES_256;
  46 
  47     static final int INVOC_COUNT = 1; // get rid of outer loop
  48 
  49     @Param("1024")
  50     int size;
  51 
  52     float[] fill(IntFunction<Float> f) {
  53         float[] array = new float[size];
  54         for (int i = 0; i < array.length; i++) {
  55             array[i] = f.apply(i);
  56         }
  57         return array;
  58     }
  59 
  60     float[] a, b, c, r;
  61     boolean[] m, rm;
  62     int[] s;
  63 
  64     @Setup
  65     public void init() {
  66         size += size % SPECIES.length(); // FIXME: add post-loops
  67 
  68         a = fill(i -> (float)(2*i));
  69         b = fill(i -> (float)(i+1));
  70         c = fill(i -> (float)(i+5));
  71         r = fill(i -> (float)0);
  72 
  73         m = fillMask(size, i -> (i % 2) == 0);
  74         rm = fillMask(size, i -> false);
  75 
  76         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  77     }
  78 
  79     final IntFunction<float[]> fa = vl -> a;
  80     final IntFunction<float[]> fb = vl -> b;
  81     final IntFunction<float[]> fc = vl -> c;
  82     final IntFunction<float[]> fr = vl -> r;
  83     final IntFunction<boolean[]> fm = vl -> m;
  84     final IntFunction<boolean[]> fmr = vl -> rm;
  85     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  86 
  87 
  88     @Benchmark
  89     public void add(Blackhole bh) {
  90         float[] a = fa.apply(SPECIES.length());
  91         float[] b = fb.apply(SPECIES.length());
  92         float[] r = fr.apply(SPECIES.length());
  93 
  94         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  95             for (int i = 0; i < a.length; i += SPECIES.length()) {
  96                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
  97                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
  98                 av.add(bv).intoArray(r, i);
  99             }
 100         }
 101 
 102         bh.consume(r);
 103     }
 104 
 105     @Benchmark
 106     public void addMasked(Blackhole bh) {
 107         float[] a = fa.apply(SPECIES.length());
 108         float[] b = fb.apply(SPECIES.length());
 109         float[] r = fr.apply(SPECIES.length());
 110         boolean[] mask = fm.apply(SPECIES.length());
 111         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
 112 
 113         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 114             for (int i = 0; i < a.length; i += SPECIES.length()) {
 115                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 116                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 117                 av.add(bv, vmask).intoArray(r, i);
 118             }
 119         }
 120 
 121         bh.consume(r);
 122     }
 123 
 124     @Benchmark
 125     public void sub(Blackhole bh) {
 126         float[] a = fa.apply(SPECIES.length());
 127         float[] b = fb.apply(SPECIES.length());
 128         float[] r = fr.apply(SPECIES.length());
 129 
 130         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 131             for (int i = 0; i < a.length; i += SPECIES.length()) {
 132                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 133                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 134                 av.sub(bv).intoArray(r, i);
 135             }
 136         }
 137 
 138         bh.consume(r);
 139     }
 140 
 141     @Benchmark
 142     public void subMasked(Blackhole bh) {
 143         float[] a = fa.apply(SPECIES.length());
 144         float[] b = fb.apply(SPECIES.length());
 145         float[] r = fr.apply(SPECIES.length());
 146         boolean[] mask = fm.apply(SPECIES.length());
 147         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
 148 
 149         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 150             for (int i = 0; i < a.length; i += SPECIES.length()) {
 151                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 152                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 153                 av.sub(bv, vmask).intoArray(r, i);
 154             }
 155         }
 156 
 157         bh.consume(r);
 158     }
 159 
 160 
 161     @Benchmark
 162     public void div(Blackhole bh) {
 163         float[] a = fa.apply(SPECIES.length());
 164         float[] b = fb.apply(SPECIES.length());
 165         float[] r = fr.apply(SPECIES.length());
 166 
 167         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 168             for (int i = 0; i < a.length; i += SPECIES.length()) {
 169                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 170                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 171                 av.div(bv).intoArray(r, i);
 172             }
 173         }
 174 
 175         bh.consume(r);
 176     }
 177 
 178 
 179 
 180     @Benchmark
 181     public void divMasked(Blackhole bh) {
 182         float[] a = fa.apply(SPECIES.length());
 183         float[] b = fb.apply(SPECIES.length());
 184         float[] r = fr.apply(SPECIES.length());
 185         boolean[] mask = fm.apply(SPECIES.length());
 186         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
 187 
 188         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 189             for (int i = 0; i < a.length; i += SPECIES.length()) {
 190                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 191                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 192                 av.div(bv, vmask).intoArray(r, i);
 193             }
 194         }
 195 
 196         bh.consume(r);
 197     }
 198 
 199 
 200     @Benchmark
 201     public void mul(Blackhole bh) {
 202         float[] a = fa.apply(SPECIES.length());
 203         float[] b = fb.apply(SPECIES.length());
 204         float[] r = fr.apply(SPECIES.length());
 205 
 206         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 207             for (int i = 0; i < a.length; i += SPECIES.length()) {
 208                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 209                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 210                 av.mul(bv).intoArray(r, i);
 211             }
 212         }
 213 
 214         bh.consume(r);
 215     }
 216 
 217     @Benchmark
 218     public void mulMasked(Blackhole bh) {
 219         float[] a = fa.apply(SPECIES.length());
 220         float[] b = fb.apply(SPECIES.length());
 221         float[] r = fr.apply(SPECIES.length());
 222         boolean[] mask = fm.apply(SPECIES.length());
 223         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
 224 
 225         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 226             for (int i = 0; i < a.length; i += SPECIES.length()) {
 227                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 228                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 229                 av.mul(bv, vmask).intoArray(r, i);
 230             }
 231         }
 232 
 233         bh.consume(r);
 234     }
 235 
 236 
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250 
 251 
 252 
 253 
 254 
 255 
 256 
 257 
 258 
 259 
 260 
 261 
 262 
 263 
 264 
 265 
 266     @Benchmark
 267     public void max(Blackhole bh) {
 268         float[] a = fa.apply(SPECIES.length());
 269         float[] b = fb.apply(SPECIES.length());
 270         float[] r = fr.apply(SPECIES.length());
 271 
 272         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 273             for (int i = 0; i < a.length; i += SPECIES.length()) {
 274                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 275                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 276                 av.max(bv).intoArray(r, i);
 277             }
 278         }
 279 
 280         bh.consume(r);
 281     }
 282 
 283     @Benchmark
 284     public void min(Blackhole bh) {
 285         float[] a = fa.apply(SPECIES.length());
 286         float[] b = fb.apply(SPECIES.length());
 287         float[] r = fr.apply(SPECIES.length());
 288 
 289         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 290             for (int i = 0; i < a.length; i += SPECIES.length()) {
 291                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 292                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 293                 av.min(bv).intoArray(r, i);
 294             }
 295         }
 296 
 297         bh.consume(r);
 298     }
 299 
 300 
 301 
 302 
 303     @Benchmark
 304     public void addAll(Blackhole bh) {
 305         float[] a = fa.apply(SPECIES.length());
 306         float ra = 0;
 307 
 308         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 309             ra = 0;
 310             for (int i = 0; i < a.length; i += SPECIES.length()) {
 311                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 312                 ra += av.addAll();
 313             }
 314         }
 315         bh.consume(ra);
 316     }
 317 
 318     @Benchmark
 319     public void mulAll(Blackhole bh) {
 320         float[] a = fa.apply(SPECIES.length());
 321         float ra = 1;
 322 
 323         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 324             ra = 1;
 325             for (int i = 0; i < a.length; i += SPECIES.length()) {
 326                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 327                 ra *= av.mulAll();
 328             }
 329         }
 330         bh.consume(ra);
 331     }
 332 
 333     @Benchmark
 334     public void minAll(Blackhole bh) {
 335         float[] a = fa.apply(SPECIES.length());
 336         float ra = Float.POSITIVE_INFINITY;
 337 
 338         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 339             ra = Float.POSITIVE_INFINITY;
 340             for (int i = 0; i < a.length; i += SPECIES.length()) {
 341                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 342                 ra = (float)Math.min(ra, av.minAll());
 343             }
 344         }
 345         bh.consume(ra);
 346     }
 347 
 348     @Benchmark
 349     public void maxAll(Blackhole bh) {
 350         float[] a = fa.apply(SPECIES.length());
 351         float ra = Float.NEGATIVE_INFINITY;
 352 
 353         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 354             ra = Float.NEGATIVE_INFINITY;
 355             for (int i = 0; i < a.length; i += SPECIES.length()) {
 356                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 357                 ra = (float)Math.max(ra, av.maxAll());
 358             }
 359         }
 360         bh.consume(ra);
 361     }
 362 
 363 
 364 
 365     @Benchmark
 366     public void with(Blackhole bh) {
 367         float[] a = fa.apply(SPECIES.length());
 368         float[] r = fr.apply(SPECIES.length());
 369 
 370         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 371             for (int i = 0; i < a.length; i += SPECIES.length()) {
 372                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 373                 av.with(0, (float)4).intoArray(r, i);
 374             }
 375         }
 376 
 377         bh.consume(r);
 378     }
 379 
 380     @Benchmark
 381     public Object lessThan() {
 382         float[] a = fa.apply(size);
 383         float[] b = fb.apply(size);
 384         boolean[] ms = fm.apply(size);
 385         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 386 
 387         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 388             for (int i = 0; i < a.length; i += SPECIES.length()) {
 389                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 390                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 391                 Vector.Mask<Float> mv = av.lessThan(bv);
 392 
 393                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 394             }
 395         }
 396         return m;
 397     }
 398 
 399 
 400     @Benchmark
 401     public Object greaterThan() {
 402         float[] a = fa.apply(size);
 403         float[] b = fb.apply(size);
 404         boolean[] ms = fm.apply(size);
 405         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 406 
 407         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 408             for (int i = 0; i < a.length; i += SPECIES.length()) {
 409                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 410                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 411                 Vector.Mask<Float> mv = av.greaterThan(bv);
 412 
 413                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 414             }
 415         }
 416         return m;
 417     }
 418 
 419 
 420     @Benchmark
 421     public Object equal() {
 422         float[] a = fa.apply(size);
 423         float[] b = fb.apply(size);
 424         boolean[] ms = fm.apply(size);
 425         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 426 
 427         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 428             for (int i = 0; i < a.length; i += SPECIES.length()) {
 429                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 430                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 431                 Vector.Mask<Float> mv = av.equal(bv);
 432 
 433                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 434             }
 435         }
 436         return m;
 437     }
 438 
 439 
 440     @Benchmark
 441     public Object notEqual() {
 442         float[] a = fa.apply(size);
 443         float[] b = fb.apply(size);
 444         boolean[] ms = fm.apply(size);
 445         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 446 
 447         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 448             for (int i = 0; i < a.length; i += SPECIES.length()) {
 449                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 450                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 451                 Vector.Mask<Float> mv = av.notEqual(bv);
 452 
 453                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 454             }
 455         }
 456         return m;
 457     }
 458 
 459 
 460     @Benchmark
 461     public Object lessThanEq() {
 462         float[] a = fa.apply(size);
 463         float[] b = fb.apply(size);
 464         boolean[] ms = fm.apply(size);
 465         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 466 
 467         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 468             for (int i = 0; i < a.length; i += SPECIES.length()) {
 469                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 470                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 471                 Vector.Mask<Float> mv = av.lessThanEq(bv);
 472 
 473                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 474             }
 475         }
 476         return m;
 477     }
 478 
 479 
 480     @Benchmark
 481     public Object greaterThanEq() {
 482         float[] a = fa.apply(size);
 483         float[] b = fb.apply(size);
 484         boolean[] ms = fm.apply(size);
 485         Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
 486 
 487         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 488             for (int i = 0; i < a.length; i += SPECIES.length()) {
 489                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 490                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 491                 Vector.Mask<Float> mv = av.greaterThanEq(bv);
 492 
 493                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 494             }
 495         }
 496         return m;
 497     }
 498 
 499 
 500     @Benchmark
 501     public void blend(Blackhole bh) {
 502         float[] a = fa.apply(SPECIES.length());
 503         float[] b = fb.apply(SPECIES.length());
 504         float[] r = fr.apply(SPECIES.length());
 505         boolean[] mask = fm.apply(SPECIES.length());
 506         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
 507 
 508         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 509             for (int i = 0; i < a.length; i += SPECIES.length()) {
 510                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 511                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 512                 av.blend(bv, vmask).intoArray(r, i);
 513             }
 514         }
 515 
 516         bh.consume(r);
 517     }
 518 
 519     @Benchmark
 520     public void rearrange(Blackhole bh) {
 521         float[] a = fa.apply(SPECIES.length());
 522         int[] order = fs.apply(a.length, SPECIES.length());
 523         float[] r = fr.apply(SPECIES.length());
 524 
 525         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 526             for (int i = 0; i < a.length; i += SPECIES.length()) {
 527                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 528                 av.rearrange(FloatVector.shuffleFromArray(SPECIES, order, i)).intoArray(r, i);
 529             }
 530         }
 531 
 532         bh.consume(r);
 533     }
 534 
 535     @Benchmark
 536     public void extract(Blackhole bh) {
 537         float[] a = fa.apply(SPECIES.length());
 538         float[] r = fr.apply(SPECIES.length());
 539 
 540         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 541             for (int i = 0; i < a.length; i += SPECIES.length()) {
 542                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 543                 int num_lanes = SPECIES.length();
 544                 // Manually unroll because full unroll happens after intrinsification.
 545                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 546                 if (num_lanes == 1) {
 547                     r[i]=av.get(0);
 548                 } else if (num_lanes == 2) {
 549                     r[i]=av.get(0);
 550                     r[i+1]=av.get(1);
 551                 } else if (num_lanes == 4) {
 552                     r[i]=av.get(0);
 553                     r[i+1]=av.get(1);
 554                     r[i+2]=av.get(2);
 555                     r[i+3]=av.get(3);
 556                 } else if (num_lanes == 8) {
 557                     r[i]=av.get(0);
 558                     r[i+1]=av.get(1);
 559                     r[i+2]=av.get(2);
 560                     r[i+3]=av.get(3);
 561                     r[i+4]=av.get(4);
 562                     r[i+5]=av.get(5);
 563                     r[i+6]=av.get(6);
 564                     r[i+7]=av.get(7);
 565                 } else if (num_lanes == 16) {
 566                     r[i]=av.get(0);
 567                     r[i+1]=av.get(1);
 568                     r[i+2]=av.get(2);
 569                     r[i+3]=av.get(3);
 570                     r[i+4]=av.get(4);
 571                     r[i+5]=av.get(5);
 572                     r[i+6]=av.get(6);
 573                     r[i+7]=av.get(7);
 574                     r[i+8]=av.get(8);
 575                     r[i+9]=av.get(9);
 576                     r[i+10]=av.get(10);
 577                     r[i+11]=av.get(11);
 578                     r[i+12]=av.get(12);
 579                     r[i+13]=av.get(13);
 580                     r[i+14]=av.get(14);
 581                     r[i+15]=av.get(15);
 582                 } else if (num_lanes == 32) {
 583                     r[i]=av.get(0);
 584                     r[i+1]=av.get(1);
 585                     r[i+2]=av.get(2);
 586                     r[i+3]=av.get(3);
 587                     r[i+4]=av.get(4);
 588                     r[i+5]=av.get(5);
 589                     r[i+6]=av.get(6);
 590                     r[i+7]=av.get(7);
 591                     r[i+8]=av.get(8);
 592                     r[i+9]=av.get(9);
 593                     r[i+10]=av.get(10);
 594                     r[i+11]=av.get(11);
 595                     r[i+12]=av.get(12);
 596                     r[i+13]=av.get(13);
 597                     r[i+14]=av.get(14);
 598                     r[i+15]=av.get(15);
 599                     r[i+16]=av.get(16);
 600                     r[i+17]=av.get(17);
 601                     r[i+18]=av.get(18);
 602                     r[i+19]=av.get(19);
 603                     r[i+20]=av.get(20);
 604                     r[i+21]=av.get(21);
 605                     r[i+22]=av.get(22);
 606                     r[i+23]=av.get(23);
 607                     r[i+24]=av.get(24);
 608                     r[i+25]=av.get(25);
 609                     r[i+26]=av.get(26);
 610                     r[i+27]=av.get(27);
 611                     r[i+28]=av.get(28);
 612                     r[i+29]=av.get(29);
 613                     r[i+30]=av.get(30);
 614                     r[i+31]=av.get(31);
 615                 } else if (num_lanes == 64) {
 616                     r[i]=av.get(0);
 617                     r[i+1]=av.get(1);
 618                     r[i+2]=av.get(2);
 619                     r[i+3]=av.get(3);
 620                     r[i+4]=av.get(4);
 621                     r[i+5]=av.get(5);
 622                     r[i+6]=av.get(6);
 623                     r[i+7]=av.get(7);
 624                     r[i+8]=av.get(8);
 625                     r[i+9]=av.get(9);
 626                     r[i+10]=av.get(10);
 627                     r[i+11]=av.get(11);
 628                     r[i+12]=av.get(12);
 629                     r[i+13]=av.get(13);
 630                     r[i+14]=av.get(14);
 631                     r[i+15]=av.get(15);
 632                     r[i+16]=av.get(16);
 633                     r[i+17]=av.get(17);
 634                     r[i+18]=av.get(18);
 635                     r[i+19]=av.get(19);
 636                     r[i+20]=av.get(20);
 637                     r[i+21]=av.get(21);
 638                     r[i+22]=av.get(22);
 639                     r[i+23]=av.get(23);
 640                     r[i+24]=av.get(24);
 641                     r[i+25]=av.get(25);
 642                     r[i+26]=av.get(26);
 643                     r[i+27]=av.get(27);
 644                     r[i+28]=av.get(28);
 645                     r[i+29]=av.get(29);
 646                     r[i+30]=av.get(30);
 647                     r[i+31]=av.get(31);
 648                     r[i+32]=av.get(32);
 649                     r[i+33]=av.get(33);
 650                     r[i+34]=av.get(34);
 651                     r[i+35]=av.get(35);
 652                     r[i+36]=av.get(36);
 653                     r[i+37]=av.get(37);
 654                     r[i+38]=av.get(38);
 655                     r[i+39]=av.get(39);
 656                     r[i+40]=av.get(40);
 657                     r[i+41]=av.get(41);
 658                     r[i+42]=av.get(42);
 659                     r[i+43]=av.get(43);
 660                     r[i+44]=av.get(44);
 661                     r[i+45]=av.get(45);
 662                     r[i+46]=av.get(46);
 663                     r[i+47]=av.get(47);
 664                     r[i+48]=av.get(48);
 665                     r[i+49]=av.get(49);
 666                     r[i+50]=av.get(50);
 667                     r[i+51]=av.get(51);
 668                     r[i+52]=av.get(52);
 669                     r[i+53]=av.get(53);
 670                     r[i+54]=av.get(54);
 671                     r[i+55]=av.get(55);
 672                     r[i+56]=av.get(56);
 673                     r[i+57]=av.get(57);
 674                     r[i+58]=av.get(58);
 675                     r[i+59]=av.get(59);
 676                     r[i+60]=av.get(60);
 677                     r[i+61]=av.get(61);
 678                     r[i+62]=av.get(62);
 679                     r[i+63]=av.get(63);
 680                 } else {
 681                     for (int j = 0; j < SPECIES.length(); j++) {
 682                         r[i+j]=av.get(j);
 683                     }
 684                 }
 685             }
 686         }
 687 
 688         bh.consume(r);
 689     }
 690 
 691 
 692     @Benchmark
 693     public void sin(Blackhole bh) {
 694         float[] a = fa.apply(SPECIES.length());
 695         float[] r = fr.apply(SPECIES.length());
 696 
 697         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 698             for (int i = 0; i < a.length; i += SPECIES.length()) {
 699                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 700                 av.sin().intoArray(r, i);
 701             }
 702         }
 703 
 704         bh.consume(r);
 705     }
 706 
 707 
 708 
 709     @Benchmark
 710     public void exp(Blackhole bh) {
 711         float[] a = fa.apply(SPECIES.length());
 712         float[] r = fr.apply(SPECIES.length());
 713 
 714         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 715             for (int i = 0; i < a.length; i += SPECIES.length()) {
 716                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 717                 av.exp().intoArray(r, i);
 718             }
 719         }
 720 
 721         bh.consume(r);
 722     }
 723 
 724 
 725 
 726     @Benchmark
 727     public void log1p(Blackhole bh) {
 728         float[] a = fa.apply(SPECIES.length());
 729         float[] r = fr.apply(SPECIES.length());
 730 
 731         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 732             for (int i = 0; i < a.length; i += SPECIES.length()) {
 733                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 734                 av.log1p().intoArray(r, i);
 735             }
 736         }
 737 
 738         bh.consume(r);
 739     }
 740 
 741 
 742 
 743     @Benchmark
 744     public void log(Blackhole bh) {
 745         float[] a = fa.apply(SPECIES.length());
 746         float[] r = fr.apply(SPECIES.length());
 747 
 748         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 749             for (int i = 0; i < a.length; i += SPECIES.length()) {
 750                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 751                 av.log().intoArray(r, i);
 752             }
 753         }
 754 
 755         bh.consume(r);
 756     }
 757 
 758 
 759 
 760     @Benchmark
 761     public void log10(Blackhole bh) {
 762         float[] a = fa.apply(SPECIES.length());
 763         float[] r = fr.apply(SPECIES.length());
 764 
 765         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 766             for (int i = 0; i < a.length; i += SPECIES.length()) {
 767                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 768                 av.log10().intoArray(r, i);
 769             }
 770         }
 771 
 772         bh.consume(r);
 773     }
 774 
 775 
 776 
 777     @Benchmark
 778     public void expm1(Blackhole bh) {
 779         float[] a = fa.apply(SPECIES.length());
 780         float[] r = fr.apply(SPECIES.length());
 781 
 782         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 783             for (int i = 0; i < a.length; i += SPECIES.length()) {
 784                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 785                 av.expm1().intoArray(r, i);
 786             }
 787         }
 788 
 789         bh.consume(r);
 790     }
 791 
 792 
 793 
 794     @Benchmark
 795     public void cos(Blackhole bh) {
 796         float[] a = fa.apply(SPECIES.length());
 797         float[] r = fr.apply(SPECIES.length());
 798 
 799         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 800             for (int i = 0; i < a.length; i += SPECIES.length()) {
 801                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 802                 av.cos().intoArray(r, i);
 803             }
 804         }
 805 
 806         bh.consume(r);
 807     }
 808 
 809 
 810 
 811     @Benchmark
 812     public void tan(Blackhole bh) {
 813         float[] a = fa.apply(SPECIES.length());
 814         float[] r = fr.apply(SPECIES.length());
 815 
 816         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 817             for (int i = 0; i < a.length; i += SPECIES.length()) {
 818                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 819                 av.tan().intoArray(r, i);
 820             }
 821         }
 822 
 823         bh.consume(r);
 824     }
 825 
 826 
 827 
 828     @Benchmark
 829     public void sinh(Blackhole bh) {
 830         float[] a = fa.apply(SPECIES.length());
 831         float[] r = fr.apply(SPECIES.length());
 832 
 833         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 834             for (int i = 0; i < a.length; i += SPECIES.length()) {
 835                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 836                 av.sinh().intoArray(r, i);
 837             }
 838         }
 839 
 840         bh.consume(r);
 841     }
 842 
 843 
 844 
 845     @Benchmark
 846     public void cosh(Blackhole bh) {
 847         float[] a = fa.apply(SPECIES.length());
 848         float[] r = fr.apply(SPECIES.length());
 849 
 850         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 851             for (int i = 0; i < a.length; i += SPECIES.length()) {
 852                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 853                 av.cosh().intoArray(r, i);
 854             }
 855         }
 856 
 857         bh.consume(r);
 858     }
 859 
 860 
 861 
 862     @Benchmark
 863     public void tanh(Blackhole bh) {
 864         float[] a = fa.apply(SPECIES.length());
 865         float[] r = fr.apply(SPECIES.length());
 866 
 867         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 868             for (int i = 0; i < a.length; i += SPECIES.length()) {
 869                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 870                 av.tanh().intoArray(r, i);
 871             }
 872         }
 873 
 874         bh.consume(r);
 875     }
 876 
 877 
 878 
 879     @Benchmark
 880     public void asin(Blackhole bh) {
 881         float[] a = fa.apply(SPECIES.length());
 882         float[] r = fr.apply(SPECIES.length());
 883 
 884         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 885             for (int i = 0; i < a.length; i += SPECIES.length()) {
 886                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 887                 av.asin().intoArray(r, i);
 888             }
 889         }
 890 
 891         bh.consume(r);
 892     }
 893 
 894 
 895 
 896     @Benchmark
 897     public void acos(Blackhole bh) {
 898         float[] a = fa.apply(SPECIES.length());
 899         float[] r = fr.apply(SPECIES.length());
 900 
 901         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 902             for (int i = 0; i < a.length; i += SPECIES.length()) {
 903                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 904                 av.acos().intoArray(r, i);
 905             }
 906         }
 907 
 908         bh.consume(r);
 909     }
 910 
 911 
 912 
 913     @Benchmark
 914     public void atan(Blackhole bh) {
 915         float[] a = fa.apply(SPECIES.length());
 916         float[] r = fr.apply(SPECIES.length());
 917 
 918         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 919             for (int i = 0; i < a.length; i += SPECIES.length()) {
 920                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 921                 av.atan().intoArray(r, i);
 922             }
 923         }
 924 
 925         bh.consume(r);
 926     }
 927 
 928 
 929 
 930     @Benchmark
 931     public void cbrt(Blackhole bh) {
 932         float[] a = fa.apply(SPECIES.length());
 933         float[] r = fr.apply(SPECIES.length());
 934 
 935         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 936             for (int i = 0; i < a.length; i += SPECIES.length()) {
 937                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 938                 av.cbrt().intoArray(r, i);
 939             }
 940         }
 941 
 942         bh.consume(r);
 943     }
 944 
 945 
 946 
 947     @Benchmark
 948     public void hypot(Blackhole bh) {
 949         float[] a = fa.apply(SPECIES.length());
 950         float[] b = fb.apply(SPECIES.length());
 951         float[] r = fr.apply(SPECIES.length());
 952 
 953         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 954             for (int i = 0; i < a.length; i += SPECIES.length()) {
 955                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 956                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 957                 av.hypot(bv).intoArray(r, i);
 958             }
 959         }
 960 
 961         bh.consume(r);
 962     }
 963 
 964 
 965 
 966     @Benchmark
 967     public void pow(Blackhole bh) {
 968         float[] a = fa.apply(SPECIES.length());
 969         float[] b = fb.apply(SPECIES.length());
 970         float[] r = fr.apply(SPECIES.length());
 971 
 972         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 973             for (int i = 0; i < a.length; i += SPECIES.length()) {
 974                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 975                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 976                 av.pow(bv).intoArray(r, i);
 977             }
 978         }
 979 
 980         bh.consume(r);
 981     }
 982 
 983 
 984 
 985     @Benchmark
 986     public void atan2(Blackhole bh) {
 987         float[] a = fa.apply(SPECIES.length());
 988         float[] b = fb.apply(SPECIES.length());
 989         float[] r = fr.apply(SPECIES.length());
 990 
 991         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 992             for (int i = 0; i < a.length; i += SPECIES.length()) {
 993                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 994                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 995                 av.atan2(bv).intoArray(r, i);
 996             }
 997         }
 998 
 999         bh.consume(r);
1000     }
1001 
1002 
1003 
1004     @Benchmark
1005     public void fma(Blackhole bh) {
1006         float[] a = fa.apply(SPECIES.length());
1007         float[] b = fb.apply(SPECIES.length());
1008         float[] c = fc.apply(SPECIES.length());
1009         float[] r = fr.apply(SPECIES.length());
1010 
1011         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1012             for (int i = 0; i < a.length; i += SPECIES.length()) {
1013                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1014                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1015                 FloatVector cv = FloatVector.fromArray(SPECIES, c, i);
1016                 av.fma(bv, cv).intoArray(r, i);
1017             }
1018         }
1019 
1020         bh.consume(r);
1021     }
1022 
1023 
1024 
1025     @Benchmark
1026     public void fmaMasked(Blackhole bh) {
1027         float[] a = fa.apply(SPECIES.length());
1028         float[] b = fb.apply(SPECIES.length());
1029         float[] c = fc.apply(SPECIES.length());
1030         float[] r = fr.apply(SPECIES.length());
1031         boolean[] mask = fm.apply(SPECIES.length());
1032         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
1033 
1034         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1035             for (int i = 0; i < a.length; i += SPECIES.length()) {
1036                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1037                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1038                 FloatVector cv = FloatVector.fromArray(SPECIES, c, i);
1039                 av.fma(bv, cv, vmask).intoArray(r, i);
1040             }
1041         }
1042 
1043         bh.consume(r);
1044     }
1045 
1046 
1047     @Benchmark
1048     public void neg(Blackhole bh) {
1049         float[] a = fa.apply(SPECIES.length());
1050         float[] r = fr.apply(SPECIES.length());
1051 
1052         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1053             for (int i = 0; i < a.length; i += SPECIES.length()) {
1054                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1055                 av.neg().intoArray(r, i);
1056             }
1057         }
1058 
1059         bh.consume(r);
1060     }
1061 
1062     @Benchmark
1063     public void negMasked(Blackhole bh) {
1064         float[] a = fa.apply(SPECIES.length());
1065         float[] r = fr.apply(SPECIES.length());
1066         boolean[] mask = fm.apply(SPECIES.length());
1067         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
1068 
1069         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1070             for (int i = 0; i < a.length; i += SPECIES.length()) {
1071                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1072                 av.neg(vmask).intoArray(r, i);
1073             }
1074         }
1075 
1076         bh.consume(r);
1077     }
1078 
1079     @Benchmark
1080     public void abs(Blackhole bh) {
1081         float[] a = fa.apply(SPECIES.length());
1082         float[] r = fr.apply(SPECIES.length());
1083 
1084         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1085             for (int i = 0; i < a.length; i += SPECIES.length()) {
1086                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1087                 av.abs().intoArray(r, i);
1088             }
1089         }
1090 
1091         bh.consume(r);
1092     }
1093 
1094     @Benchmark
1095     public void absMasked(Blackhole bh) {
1096         float[] a = fa.apply(SPECIES.length());
1097         float[] r = fr.apply(SPECIES.length());
1098         boolean[] mask = fm.apply(SPECIES.length());
1099         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
1100 
1101         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1102             for (int i = 0; i < a.length; i += SPECIES.length()) {
1103                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1104                 av.abs(vmask).intoArray(r, i);
1105             }
1106         }
1107 
1108         bh.consume(r);
1109     }
1110 
1111 
1112 
1113 
1114     @Benchmark
1115     public void sqrt(Blackhole bh) {
1116         float[] a = fa.apply(SPECIES.length());
1117         float[] r = fr.apply(SPECIES.length());
1118 
1119         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1120             for (int i = 0; i < a.length; i += SPECIES.length()) {
1121                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1122                 av.sqrt().intoArray(r, i);
1123             }
1124         }
1125 
1126         bh.consume(r);
1127     }
1128 
1129 
1130 
1131     @Benchmark
1132     public void sqrtMasked(Blackhole bh) {
1133         float[] a = fa.apply(SPECIES.length());
1134         float[] r = fr.apply(SPECIES.length());
1135         boolean[] mask = fm.apply(SPECIES.length());
1136         Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
1137 
1138         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1139             for (int i = 0; i < a.length; i += SPECIES.length()) {
1140                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1141                 av.sqrt(vmask).intoArray(r, i);
1142             }
1143         }
1144 
1145         bh.consume(r);
1146     }
1147 
1148 
1149 
1150     @Benchmark
1151     public void gather(Blackhole bh) {
1152         float[] a = fa.apply(SPECIES.length());
1153         int[] b    = fs.apply(a.length, SPECIES.length());
1154         float[] r = new float[a.length];
1155 
1156         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1157             for (int i = 0; i < a.length; i += SPECIES.length()) {
1158                 FloatVector av = FloatVector.fromArray(SPECIES, a, i, b, i);
1159                 av.intoArray(r, i);
1160             }
1161         }
1162 
1163         bh.consume(r);
1164     }
1165 
1166 
1167 
1168     @Benchmark
1169     public void scatter(Blackhole bh) {
1170         float[] a = fa.apply(SPECIES.length());
1171         int[] b = fs.apply(a.length, SPECIES.length());
1172         float[] r = new float[a.length];
1173 
1174         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1175             for (int i = 0; i < a.length; i += SPECIES.length()) {
1176                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1177                 av.intoArray(r, i, b, i);
1178             }
1179         }
1180 
1181         bh.consume(r);
1182     }
1183 
1184 }
1185