1 /*
   2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.VectorShape;
  28 import jdk.incubator.vector.VectorSpecies;
  29 import jdk.incubator.vector.VectorShuffle;
  30 import jdk.incubator.vector.FloatVector;
  31 
  32 import java.util.concurrent.TimeUnit;
  33 import java.util.function.BiFunction;
  34 import java.util.function.IntFunction;
  35 
  36 import org.openjdk.jmh.annotations.*;
  37 import org.openjdk.jmh.infra.Blackhole;
  38 
  39 @BenchmarkMode(Mode.Throughput)
  40 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  41 @State(Scope.Benchmark)
  42 @Warmup(iterations = 3, time = 1)
  43 @Measurement(iterations = 5, time = 1)
  44 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  45 public class Float512Vector extends AbstractVectorBenchmark {
  46     static final VectorSpecies<Float> SPECIES = FloatVector.SPECIES_512;
  47 
  48     static final int INVOC_COUNT = 1; // get rid of outer loop
  49 
  50     @Param("1024")
  51     int size;
  52 
  53     float[] fill(IntFunction<Float> f) {
  54         float[] array = new float[size];
  55         for (int i = 0; i < array.length; i++) {
  56             array[i] = f.apply(i);
  57         }
  58         return array;
  59     }
  60 
  61     float[] a, b, c, r;
  62     boolean[] m, rm;
  63     int[] s;
  64 
  65     @Setup
  66     public void init() {
  67         size += size % SPECIES.length(); // FIXME: add post-loops
  68 
  69         a = fill(i -> (float)(2*i));
  70         b = fill(i -> (float)(i+1));
  71         c = fill(i -> (float)(i+5));
  72         r = fill(i -> (float)0);
  73 
  74         m = fillMask(size, i -> (i % 2) == 0);
  75         rm = fillMask(size, i -> false);
  76 
  77         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  78     }
  79 
  80     final IntFunction<float[]> fa = vl -> a;
  81     final IntFunction<float[]> fb = vl -> b;
  82     final IntFunction<float[]> fc = vl -> c;
  83     final IntFunction<float[]> fr = vl -> r;
  84     final IntFunction<boolean[]> fm = vl -> m;
  85     final IntFunction<boolean[]> fmr = vl -> rm;
  86     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  87 
  88 
  89     @Benchmark
  90     public void add(Blackhole bh) {
  91         float[] a = fa.apply(SPECIES.length());
  92         float[] b = fb.apply(SPECIES.length());
  93         float[] r = fr.apply(SPECIES.length());
  94 
  95         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  96             for (int i = 0; i < a.length; i += SPECIES.length()) {
  97                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
  98                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
  99                 av.add(bv).intoArray(r, i);
 100             }
 101         }
 102 
 103         bh.consume(r);
 104     }
 105 
 106     @Benchmark
 107     public void addMasked(Blackhole bh) {
 108         float[] a = fa.apply(SPECIES.length());
 109         float[] b = fb.apply(SPECIES.length());
 110         float[] r = fr.apply(SPECIES.length());
 111         boolean[] mask = fm.apply(SPECIES.length());
 112         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
 113 
 114         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 115             for (int i = 0; i < a.length; i += SPECIES.length()) {
 116                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 117                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 118                 av.add(bv, vmask).intoArray(r, i);
 119             }
 120         }
 121 
 122         bh.consume(r);
 123     }
 124 
 125     @Benchmark
 126     public void sub(Blackhole bh) {
 127         float[] a = fa.apply(SPECIES.length());
 128         float[] b = fb.apply(SPECIES.length());
 129         float[] r = fr.apply(SPECIES.length());
 130 
 131         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 132             for (int i = 0; i < a.length; i += SPECIES.length()) {
 133                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 134                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 135                 av.sub(bv).intoArray(r, i);
 136             }
 137         }
 138 
 139         bh.consume(r);
 140     }
 141 
 142     @Benchmark
 143     public void subMasked(Blackhole bh) {
 144         float[] a = fa.apply(SPECIES.length());
 145         float[] b = fb.apply(SPECIES.length());
 146         float[] r = fr.apply(SPECIES.length());
 147         boolean[] mask = fm.apply(SPECIES.length());
 148         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
 149 
 150         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 151             for (int i = 0; i < a.length; i += SPECIES.length()) {
 152                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 153                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 154                 av.sub(bv, vmask).intoArray(r, i);
 155             }
 156         }
 157 
 158         bh.consume(r);
 159     }
 160 
 161 
 162     @Benchmark
 163     public void div(Blackhole bh) {
 164         float[] a = fa.apply(SPECIES.length());
 165         float[] b = fb.apply(SPECIES.length());
 166         float[] r = fr.apply(SPECIES.length());
 167 
 168         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 169             for (int i = 0; i < a.length; i += SPECIES.length()) {
 170                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 171                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 172                 av.div(bv).intoArray(r, i);
 173             }
 174         }
 175 
 176         bh.consume(r);
 177     }
 178 
 179 
 180 
 181     @Benchmark
 182     public void divMasked(Blackhole bh) {
 183         float[] a = fa.apply(SPECIES.length());
 184         float[] b = fb.apply(SPECIES.length());
 185         float[] r = fr.apply(SPECIES.length());
 186         boolean[] mask = fm.apply(SPECIES.length());
 187         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
 188 
 189         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 190             for (int i = 0; i < a.length; i += SPECIES.length()) {
 191                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 192                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 193                 av.div(bv, vmask).intoArray(r, i);
 194             }
 195         }
 196 
 197         bh.consume(r);
 198     }
 199 
 200 
 201     @Benchmark
 202     public void mul(Blackhole bh) {
 203         float[] a = fa.apply(SPECIES.length());
 204         float[] b = fb.apply(SPECIES.length());
 205         float[] r = fr.apply(SPECIES.length());
 206 
 207         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 208             for (int i = 0; i < a.length; i += SPECIES.length()) {
 209                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 210                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 211                 av.mul(bv).intoArray(r, i);
 212             }
 213         }
 214 
 215         bh.consume(r);
 216     }
 217 
 218     @Benchmark
 219     public void mulMasked(Blackhole bh) {
 220         float[] a = fa.apply(SPECIES.length());
 221         float[] b = fb.apply(SPECIES.length());
 222         float[] r = fr.apply(SPECIES.length());
 223         boolean[] mask = fm.apply(SPECIES.length());
 224         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
 225 
 226         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 227             for (int i = 0; i < a.length; i += SPECIES.length()) {
 228                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 229                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 230                 av.mul(bv, vmask).intoArray(r, i);
 231             }
 232         }
 233 
 234         bh.consume(r);
 235     }
 236 
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250 
 251 
 252 
 253 
 254 
 255 
 256 
 257 
 258 
 259 
 260 
 261 
 262 
 263 
 264 
 265 
 266 
 267     @Benchmark
 268     public void max(Blackhole bh) {
 269         float[] a = fa.apply(SPECIES.length());
 270         float[] b = fb.apply(SPECIES.length());
 271         float[] r = fr.apply(SPECIES.length());
 272 
 273         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 274             for (int i = 0; i < a.length; i += SPECIES.length()) {
 275                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 276                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 277                 av.max(bv).intoArray(r, i);
 278             }
 279         }
 280 
 281         bh.consume(r);
 282     }
 283 
 284     @Benchmark
 285     public void min(Blackhole bh) {
 286         float[] a = fa.apply(SPECIES.length());
 287         float[] b = fb.apply(SPECIES.length());
 288         float[] r = fr.apply(SPECIES.length());
 289 
 290         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 291             for (int i = 0; i < a.length; i += SPECIES.length()) {
 292                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 293                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 294                 av.min(bv).intoArray(r, i);
 295             }
 296         }
 297 
 298         bh.consume(r);
 299     }
 300 
 301 
 302 
 303 
 304     @Benchmark
 305     public void addAll(Blackhole bh) {
 306         float[] a = fa.apply(SPECIES.length());
 307         float ra = 0;
 308 
 309         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 310             ra = 0;
 311             for (int i = 0; i < a.length; i += SPECIES.length()) {
 312                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 313                 ra += av.addAll();
 314             }
 315         }
 316         bh.consume(ra);
 317     }
 318 
 319     @Benchmark
 320     public void mulAll(Blackhole bh) {
 321         float[] a = fa.apply(SPECIES.length());
 322         float ra = 1;
 323 
 324         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 325             ra = 1;
 326             for (int i = 0; i < a.length; i += SPECIES.length()) {
 327                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 328                 ra *= av.mulAll();
 329             }
 330         }
 331         bh.consume(ra);
 332     }
 333 
 334     @Benchmark
 335     public void minAll(Blackhole bh) {
 336         float[] a = fa.apply(SPECIES.length());
 337         float ra = Float.POSITIVE_INFINITY;
 338 
 339         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 340             ra = Float.POSITIVE_INFINITY;
 341             for (int i = 0; i < a.length; i += SPECIES.length()) {
 342                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 343                 ra = (float)Math.min(ra, av.minAll());
 344             }
 345         }
 346         bh.consume(ra);
 347     }
 348 
 349     @Benchmark
 350     public void maxAll(Blackhole bh) {
 351         float[] a = fa.apply(SPECIES.length());
 352         float ra = Float.NEGATIVE_INFINITY;
 353 
 354         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 355             ra = Float.NEGATIVE_INFINITY;
 356             for (int i = 0; i < a.length; i += SPECIES.length()) {
 357                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 358                 ra = (float)Math.max(ra, av.maxAll());
 359             }
 360         }
 361         bh.consume(ra);
 362     }
 363 
 364 
 365 
 366     @Benchmark
 367     public void with(Blackhole bh) {
 368         float[] a = fa.apply(SPECIES.length());
 369         float[] r = fr.apply(SPECIES.length());
 370 
 371         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 372             for (int i = 0; i < a.length; i += SPECIES.length()) {
 373                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 374                 av.with(0, (float)4).intoArray(r, i);
 375             }
 376         }
 377 
 378         bh.consume(r);
 379     }
 380 
 381     @Benchmark
 382     public Object lessThan() {
 383         float[] a = fa.apply(size);
 384         float[] b = fb.apply(size);
 385         boolean[] ms = fm.apply(size);
 386         VectorMask<Float> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 387 
 388         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 389             for (int i = 0; i < a.length; i += SPECIES.length()) {
 390                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 391                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 392                 VectorMask<Float> mv = av.lessThan(bv);
 393 
 394                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 395             }
 396         }
 397         return m;
 398     }
 399 
 400 
 401     @Benchmark
 402     public Object greaterThan() {
 403         float[] a = fa.apply(size);
 404         float[] b = fb.apply(size);
 405         boolean[] ms = fm.apply(size);
 406         VectorMask<Float> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 407 
 408         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 409             for (int i = 0; i < a.length; i += SPECIES.length()) {
 410                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 411                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 412                 VectorMask<Float> mv = av.greaterThan(bv);
 413 
 414                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 415             }
 416         }
 417         return m;
 418     }
 419 
 420 
 421     @Benchmark
 422     public Object equal() {
 423         float[] a = fa.apply(size);
 424         float[] b = fb.apply(size);
 425         boolean[] ms = fm.apply(size);
 426         VectorMask<Float> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 427 
 428         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 429             for (int i = 0; i < a.length; i += SPECIES.length()) {
 430                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 431                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 432                 VectorMask<Float> mv = av.equal(bv);
 433 
 434                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 435             }
 436         }
 437         return m;
 438     }
 439 
 440 
 441     @Benchmark
 442     public Object notEqual() {
 443         float[] a = fa.apply(size);
 444         float[] b = fb.apply(size);
 445         boolean[] ms = fm.apply(size);
 446         VectorMask<Float> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 447 
 448         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 449             for (int i = 0; i < a.length; i += SPECIES.length()) {
 450                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 451                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 452                 VectorMask<Float> mv = av.notEqual(bv);
 453 
 454                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 455             }
 456         }
 457         return m;
 458     }
 459 
 460 
 461     @Benchmark
 462     public Object lessThanEq() {
 463         float[] a = fa.apply(size);
 464         float[] b = fb.apply(size);
 465         boolean[] ms = fm.apply(size);
 466         VectorMask<Float> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 467 
 468         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 469             for (int i = 0; i < a.length; i += SPECIES.length()) {
 470                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 471                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 472                 VectorMask<Float> mv = av.lessThanEq(bv);
 473 
 474                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 475             }
 476         }
 477         return m;
 478     }
 479 
 480 
 481     @Benchmark
 482     public Object greaterThanEq() {
 483         float[] a = fa.apply(size);
 484         float[] b = fb.apply(size);
 485         boolean[] ms = fm.apply(size);
 486         VectorMask<Float> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 487 
 488         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 489             for (int i = 0; i < a.length; i += SPECIES.length()) {
 490                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 491                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 492                 VectorMask<Float> mv = av.greaterThanEq(bv);
 493 
 494                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 495             }
 496         }
 497         return m;
 498     }
 499 
 500 
 501     @Benchmark
 502     public void blend(Blackhole bh) {
 503         float[] a = fa.apply(SPECIES.length());
 504         float[] b = fb.apply(SPECIES.length());
 505         float[] r = fr.apply(SPECIES.length());
 506         boolean[] mask = fm.apply(SPECIES.length());
 507         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
 508 
 509         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 510             for (int i = 0; i < a.length; i += SPECIES.length()) {
 511                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 512                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 513                 av.blend(bv, vmask).intoArray(r, i);
 514             }
 515         }
 516 
 517         bh.consume(r);
 518     }
 519 
 520     @Benchmark
 521     public void rearrange(Blackhole bh) {
 522         float[] a = fa.apply(SPECIES.length());
 523         int[] order = fs.apply(a.length, SPECIES.length());
 524         float[] r = fr.apply(SPECIES.length());
 525 
 526         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 527             for (int i = 0; i < a.length; i += SPECIES.length()) {
 528                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 529                 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
 530             }
 531         }
 532 
 533         bh.consume(r);
 534     }
 535 
 536     @Benchmark
 537     public void extract(Blackhole bh) {
 538         float[] a = fa.apply(SPECIES.length());
 539         float[] r = fr.apply(SPECIES.length());
 540 
 541         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 542             for (int i = 0; i < a.length; i += SPECIES.length()) {
 543                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 544                 int num_lanes = SPECIES.length();
 545                 // Manually unroll because full unroll happens after intrinsification.
 546                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 547                 if (num_lanes == 1) {
 548                     r[i]=av.get(0);
 549                 } else if (num_lanes == 2) {
 550                     r[i]=av.get(0);
 551                     r[i+1]=av.get(1);
 552                 } else if (num_lanes == 4) {
 553                     r[i]=av.get(0);
 554                     r[i+1]=av.get(1);
 555                     r[i+2]=av.get(2);
 556                     r[i+3]=av.get(3);
 557                 } else if (num_lanes == 8) {
 558                     r[i]=av.get(0);
 559                     r[i+1]=av.get(1);
 560                     r[i+2]=av.get(2);
 561                     r[i+3]=av.get(3);
 562                     r[i+4]=av.get(4);
 563                     r[i+5]=av.get(5);
 564                     r[i+6]=av.get(6);
 565                     r[i+7]=av.get(7);
 566                 } else if (num_lanes == 16) {
 567                     r[i]=av.get(0);
 568                     r[i+1]=av.get(1);
 569                     r[i+2]=av.get(2);
 570                     r[i+3]=av.get(3);
 571                     r[i+4]=av.get(4);
 572                     r[i+5]=av.get(5);
 573                     r[i+6]=av.get(6);
 574                     r[i+7]=av.get(7);
 575                     r[i+8]=av.get(8);
 576                     r[i+9]=av.get(9);
 577                     r[i+10]=av.get(10);
 578                     r[i+11]=av.get(11);
 579                     r[i+12]=av.get(12);
 580                     r[i+13]=av.get(13);
 581                     r[i+14]=av.get(14);
 582                     r[i+15]=av.get(15);
 583                 } else if (num_lanes == 32) {
 584                     r[i]=av.get(0);
 585                     r[i+1]=av.get(1);
 586                     r[i+2]=av.get(2);
 587                     r[i+3]=av.get(3);
 588                     r[i+4]=av.get(4);
 589                     r[i+5]=av.get(5);
 590                     r[i+6]=av.get(6);
 591                     r[i+7]=av.get(7);
 592                     r[i+8]=av.get(8);
 593                     r[i+9]=av.get(9);
 594                     r[i+10]=av.get(10);
 595                     r[i+11]=av.get(11);
 596                     r[i+12]=av.get(12);
 597                     r[i+13]=av.get(13);
 598                     r[i+14]=av.get(14);
 599                     r[i+15]=av.get(15);
 600                     r[i+16]=av.get(16);
 601                     r[i+17]=av.get(17);
 602                     r[i+18]=av.get(18);
 603                     r[i+19]=av.get(19);
 604                     r[i+20]=av.get(20);
 605                     r[i+21]=av.get(21);
 606                     r[i+22]=av.get(22);
 607                     r[i+23]=av.get(23);
 608                     r[i+24]=av.get(24);
 609                     r[i+25]=av.get(25);
 610                     r[i+26]=av.get(26);
 611                     r[i+27]=av.get(27);
 612                     r[i+28]=av.get(28);
 613                     r[i+29]=av.get(29);
 614                     r[i+30]=av.get(30);
 615                     r[i+31]=av.get(31);
 616                 } else if (num_lanes == 64) {
 617                     r[i]=av.get(0);
 618                     r[i+1]=av.get(1);
 619                     r[i+2]=av.get(2);
 620                     r[i+3]=av.get(3);
 621                     r[i+4]=av.get(4);
 622                     r[i+5]=av.get(5);
 623                     r[i+6]=av.get(6);
 624                     r[i+7]=av.get(7);
 625                     r[i+8]=av.get(8);
 626                     r[i+9]=av.get(9);
 627                     r[i+10]=av.get(10);
 628                     r[i+11]=av.get(11);
 629                     r[i+12]=av.get(12);
 630                     r[i+13]=av.get(13);
 631                     r[i+14]=av.get(14);
 632                     r[i+15]=av.get(15);
 633                     r[i+16]=av.get(16);
 634                     r[i+17]=av.get(17);
 635                     r[i+18]=av.get(18);
 636                     r[i+19]=av.get(19);
 637                     r[i+20]=av.get(20);
 638                     r[i+21]=av.get(21);
 639                     r[i+22]=av.get(22);
 640                     r[i+23]=av.get(23);
 641                     r[i+24]=av.get(24);
 642                     r[i+25]=av.get(25);
 643                     r[i+26]=av.get(26);
 644                     r[i+27]=av.get(27);
 645                     r[i+28]=av.get(28);
 646                     r[i+29]=av.get(29);
 647                     r[i+30]=av.get(30);
 648                     r[i+31]=av.get(31);
 649                     r[i+32]=av.get(32);
 650                     r[i+33]=av.get(33);
 651                     r[i+34]=av.get(34);
 652                     r[i+35]=av.get(35);
 653                     r[i+36]=av.get(36);
 654                     r[i+37]=av.get(37);
 655                     r[i+38]=av.get(38);
 656                     r[i+39]=av.get(39);
 657                     r[i+40]=av.get(40);
 658                     r[i+41]=av.get(41);
 659                     r[i+42]=av.get(42);
 660                     r[i+43]=av.get(43);
 661                     r[i+44]=av.get(44);
 662                     r[i+45]=av.get(45);
 663                     r[i+46]=av.get(46);
 664                     r[i+47]=av.get(47);
 665                     r[i+48]=av.get(48);
 666                     r[i+49]=av.get(49);
 667                     r[i+50]=av.get(50);
 668                     r[i+51]=av.get(51);
 669                     r[i+52]=av.get(52);
 670                     r[i+53]=av.get(53);
 671                     r[i+54]=av.get(54);
 672                     r[i+55]=av.get(55);
 673                     r[i+56]=av.get(56);
 674                     r[i+57]=av.get(57);
 675                     r[i+58]=av.get(58);
 676                     r[i+59]=av.get(59);
 677                     r[i+60]=av.get(60);
 678                     r[i+61]=av.get(61);
 679                     r[i+62]=av.get(62);
 680                     r[i+63]=av.get(63);
 681                 } else {
 682                     for (int j = 0; j < SPECIES.length(); j++) {
 683                         r[i+j]=av.get(j);
 684                     }
 685                 }
 686             }
 687         }
 688 
 689         bh.consume(r);
 690     }
 691 
 692 
 693     @Benchmark
 694     public void sin(Blackhole bh) {
 695         float[] a = fa.apply(SPECIES.length());
 696         float[] r = fr.apply(SPECIES.length());
 697 
 698         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 699             for (int i = 0; i < a.length; i += SPECIES.length()) {
 700                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 701                 av.sin().intoArray(r, i);
 702             }
 703         }
 704 
 705         bh.consume(r);
 706     }
 707 
 708 
 709 
 710     @Benchmark
 711     public void exp(Blackhole bh) {
 712         float[] a = fa.apply(SPECIES.length());
 713         float[] r = fr.apply(SPECIES.length());
 714 
 715         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 716             for (int i = 0; i < a.length; i += SPECIES.length()) {
 717                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 718                 av.exp().intoArray(r, i);
 719             }
 720         }
 721 
 722         bh.consume(r);
 723     }
 724 
 725 
 726 
 727     @Benchmark
 728     public void log1p(Blackhole bh) {
 729         float[] a = fa.apply(SPECIES.length());
 730         float[] r = fr.apply(SPECIES.length());
 731 
 732         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 733             for (int i = 0; i < a.length; i += SPECIES.length()) {
 734                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 735                 av.log1p().intoArray(r, i);
 736             }
 737         }
 738 
 739         bh.consume(r);
 740     }
 741 
 742 
 743 
 744     @Benchmark
 745     public void log(Blackhole bh) {
 746         float[] a = fa.apply(SPECIES.length());
 747         float[] r = fr.apply(SPECIES.length());
 748 
 749         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 750             for (int i = 0; i < a.length; i += SPECIES.length()) {
 751                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 752                 av.log().intoArray(r, i);
 753             }
 754         }
 755 
 756         bh.consume(r);
 757     }
 758 
 759 
 760 
 761     @Benchmark
 762     public void log10(Blackhole bh) {
 763         float[] a = fa.apply(SPECIES.length());
 764         float[] r = fr.apply(SPECIES.length());
 765 
 766         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 767             for (int i = 0; i < a.length; i += SPECIES.length()) {
 768                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 769                 av.log10().intoArray(r, i);
 770             }
 771         }
 772 
 773         bh.consume(r);
 774     }
 775 
 776 
 777 
 778     @Benchmark
 779     public void expm1(Blackhole bh) {
 780         float[] a = fa.apply(SPECIES.length());
 781         float[] r = fr.apply(SPECIES.length());
 782 
 783         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 784             for (int i = 0; i < a.length; i += SPECIES.length()) {
 785                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 786                 av.expm1().intoArray(r, i);
 787             }
 788         }
 789 
 790         bh.consume(r);
 791     }
 792 
 793 
 794 
 795     @Benchmark
 796     public void cos(Blackhole bh) {
 797         float[] a = fa.apply(SPECIES.length());
 798         float[] r = fr.apply(SPECIES.length());
 799 
 800         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 801             for (int i = 0; i < a.length; i += SPECIES.length()) {
 802                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 803                 av.cos().intoArray(r, i);
 804             }
 805         }
 806 
 807         bh.consume(r);
 808     }
 809 
 810 
 811 
 812     @Benchmark
 813     public void tan(Blackhole bh) {
 814         float[] a = fa.apply(SPECIES.length());
 815         float[] r = fr.apply(SPECIES.length());
 816 
 817         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 818             for (int i = 0; i < a.length; i += SPECIES.length()) {
 819                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 820                 av.tan().intoArray(r, i);
 821             }
 822         }
 823 
 824         bh.consume(r);
 825     }
 826 
 827 
 828 
 829     @Benchmark
 830     public void sinh(Blackhole bh) {
 831         float[] a = fa.apply(SPECIES.length());
 832         float[] r = fr.apply(SPECIES.length());
 833 
 834         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 835             for (int i = 0; i < a.length; i += SPECIES.length()) {
 836                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 837                 av.sinh().intoArray(r, i);
 838             }
 839         }
 840 
 841         bh.consume(r);
 842     }
 843 
 844 
 845 
 846     @Benchmark
 847     public void cosh(Blackhole bh) {
 848         float[] a = fa.apply(SPECIES.length());
 849         float[] r = fr.apply(SPECIES.length());
 850 
 851         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 852             for (int i = 0; i < a.length; i += SPECIES.length()) {
 853                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 854                 av.cosh().intoArray(r, i);
 855             }
 856         }
 857 
 858         bh.consume(r);
 859     }
 860 
 861 
 862 
 863     @Benchmark
 864     public void tanh(Blackhole bh) {
 865         float[] a = fa.apply(SPECIES.length());
 866         float[] r = fr.apply(SPECIES.length());
 867 
 868         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 869             for (int i = 0; i < a.length; i += SPECIES.length()) {
 870                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 871                 av.tanh().intoArray(r, i);
 872             }
 873         }
 874 
 875         bh.consume(r);
 876     }
 877 
 878 
 879 
 880     @Benchmark
 881     public void asin(Blackhole bh) {
 882         float[] a = fa.apply(SPECIES.length());
 883         float[] r = fr.apply(SPECIES.length());
 884 
 885         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 886             for (int i = 0; i < a.length; i += SPECIES.length()) {
 887                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 888                 av.asin().intoArray(r, i);
 889             }
 890         }
 891 
 892         bh.consume(r);
 893     }
 894 
 895 
 896 
 897     @Benchmark
 898     public void acos(Blackhole bh) {
 899         float[] a = fa.apply(SPECIES.length());
 900         float[] r = fr.apply(SPECIES.length());
 901 
 902         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 903             for (int i = 0; i < a.length; i += SPECIES.length()) {
 904                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 905                 av.acos().intoArray(r, i);
 906             }
 907         }
 908 
 909         bh.consume(r);
 910     }
 911 
 912 
 913 
 914     @Benchmark
 915     public void atan(Blackhole bh) {
 916         float[] a = fa.apply(SPECIES.length());
 917         float[] r = fr.apply(SPECIES.length());
 918 
 919         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 920             for (int i = 0; i < a.length; i += SPECIES.length()) {
 921                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 922                 av.atan().intoArray(r, i);
 923             }
 924         }
 925 
 926         bh.consume(r);
 927     }
 928 
 929 
 930 
 931     @Benchmark
 932     public void cbrt(Blackhole bh) {
 933         float[] a = fa.apply(SPECIES.length());
 934         float[] r = fr.apply(SPECIES.length());
 935 
 936         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 937             for (int i = 0; i < a.length; i += SPECIES.length()) {
 938                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 939                 av.cbrt().intoArray(r, i);
 940             }
 941         }
 942 
 943         bh.consume(r);
 944     }
 945 
 946 
 947 
 948     @Benchmark
 949     public void hypot(Blackhole bh) {
 950         float[] a = fa.apply(SPECIES.length());
 951         float[] b = fb.apply(SPECIES.length());
 952         float[] r = fr.apply(SPECIES.length());
 953 
 954         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 955             for (int i = 0; i < a.length; i += SPECIES.length()) {
 956                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 957                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 958                 av.hypot(bv).intoArray(r, i);
 959             }
 960         }
 961 
 962         bh.consume(r);
 963     }
 964 
 965 
 966 
 967     @Benchmark
 968     public void pow(Blackhole bh) {
 969         float[] a = fa.apply(SPECIES.length());
 970         float[] b = fb.apply(SPECIES.length());
 971         float[] r = fr.apply(SPECIES.length());
 972 
 973         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 974             for (int i = 0; i < a.length; i += SPECIES.length()) {
 975                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 976                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 977                 av.pow(bv).intoArray(r, i);
 978             }
 979         }
 980 
 981         bh.consume(r);
 982     }
 983 
 984 
 985 
 986     @Benchmark
 987     public void atan2(Blackhole bh) {
 988         float[] a = fa.apply(SPECIES.length());
 989         float[] b = fb.apply(SPECIES.length());
 990         float[] r = fr.apply(SPECIES.length());
 991 
 992         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 993             for (int i = 0; i < a.length; i += SPECIES.length()) {
 994                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 995                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 996                 av.atan2(bv).intoArray(r, i);
 997             }
 998         }
 999 
1000         bh.consume(r);
1001     }
1002 
1003 
1004 
1005     @Benchmark
1006     public void fma(Blackhole bh) {
1007         float[] a = fa.apply(SPECIES.length());
1008         float[] b = fb.apply(SPECIES.length());
1009         float[] c = fc.apply(SPECIES.length());
1010         float[] r = fr.apply(SPECIES.length());
1011 
1012         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1013             for (int i = 0; i < a.length; i += SPECIES.length()) {
1014                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1015                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1016                 FloatVector cv = FloatVector.fromArray(SPECIES, c, i);
1017                 av.fma(bv, cv).intoArray(r, i);
1018             }
1019         }
1020 
1021         bh.consume(r);
1022     }
1023 
1024 
1025 
1026     @Benchmark
1027     public void fmaMasked(Blackhole bh) {
1028         float[] a = fa.apply(SPECIES.length());
1029         float[] b = fb.apply(SPECIES.length());
1030         float[] c = fc.apply(SPECIES.length());
1031         float[] r = fr.apply(SPECIES.length());
1032         boolean[] mask = fm.apply(SPECIES.length());
1033         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
1034 
1035         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1036             for (int i = 0; i < a.length; i += SPECIES.length()) {
1037                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1038                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1039                 FloatVector cv = FloatVector.fromArray(SPECIES, c, i);
1040                 av.fma(bv, cv, vmask).intoArray(r, i);
1041             }
1042         }
1043 
1044         bh.consume(r);
1045     }
1046 
1047 
1048     @Benchmark
1049     public void neg(Blackhole bh) {
1050         float[] a = fa.apply(SPECIES.length());
1051         float[] r = fr.apply(SPECIES.length());
1052 
1053         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1054             for (int i = 0; i < a.length; i += SPECIES.length()) {
1055                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1056                 av.neg().intoArray(r, i);
1057             }
1058         }
1059 
1060         bh.consume(r);
1061     }
1062 
1063     @Benchmark
1064     public void negMasked(Blackhole bh) {
1065         float[] a = fa.apply(SPECIES.length());
1066         float[] r = fr.apply(SPECIES.length());
1067         boolean[] mask = fm.apply(SPECIES.length());
1068         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
1069 
1070         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1071             for (int i = 0; i < a.length; i += SPECIES.length()) {
1072                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1073                 av.neg(vmask).intoArray(r, i);
1074             }
1075         }
1076 
1077         bh.consume(r);
1078     }
1079 
1080     @Benchmark
1081     public void abs(Blackhole bh) {
1082         float[] a = fa.apply(SPECIES.length());
1083         float[] r = fr.apply(SPECIES.length());
1084 
1085         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1086             for (int i = 0; i < a.length; i += SPECIES.length()) {
1087                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1088                 av.abs().intoArray(r, i);
1089             }
1090         }
1091 
1092         bh.consume(r);
1093     }
1094 
1095     @Benchmark
1096     public void absMasked(Blackhole bh) {
1097         float[] a = fa.apply(SPECIES.length());
1098         float[] r = fr.apply(SPECIES.length());
1099         boolean[] mask = fm.apply(SPECIES.length());
1100         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
1101 
1102         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1103             for (int i = 0; i < a.length; i += SPECIES.length()) {
1104                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1105                 av.abs(vmask).intoArray(r, i);
1106             }
1107         }
1108 
1109         bh.consume(r);
1110     }
1111 
1112 
1113 
1114 
1115     @Benchmark
1116     public void sqrt(Blackhole bh) {
1117         float[] a = fa.apply(SPECIES.length());
1118         float[] r = fr.apply(SPECIES.length());
1119 
1120         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1121             for (int i = 0; i < a.length; i += SPECIES.length()) {
1122                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1123                 av.sqrt().intoArray(r, i);
1124             }
1125         }
1126 
1127         bh.consume(r);
1128     }
1129 
1130 
1131 
1132     @Benchmark
1133     public void sqrtMasked(Blackhole bh) {
1134         float[] a = fa.apply(SPECIES.length());
1135         float[] r = fr.apply(SPECIES.length());
1136         boolean[] mask = fm.apply(SPECIES.length());
1137         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
1138 
1139         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1140             for (int i = 0; i < a.length; i += SPECIES.length()) {
1141                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1142                 av.sqrt(vmask).intoArray(r, i);
1143             }
1144         }
1145 
1146         bh.consume(r);
1147     }
1148 
1149 
1150 
1151     @Benchmark
1152     public void gather(Blackhole bh) {
1153         float[] a = fa.apply(SPECIES.length());
1154         int[] b    = fs.apply(a.length, SPECIES.length());
1155         float[] r = new float[a.length];
1156 
1157         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1158             for (int i = 0; i < a.length; i += SPECIES.length()) {
1159                 FloatVector av = FloatVector.fromArray(SPECIES, a, i, b, i);
1160                 av.intoArray(r, i);
1161             }
1162         }
1163 
1164         bh.consume(r);
1165     }
1166 
1167 
1168 
1169     @Benchmark
1170     public void scatter(Blackhole bh) {
1171         float[] a = fa.apply(SPECIES.length());
1172         int[] b = fs.apply(a.length, SPECIES.length());
1173         float[] r = new float[a.length];
1174 
1175         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1176             for (int i = 0; i < a.length; i += SPECIES.length()) {
1177                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1178                 av.intoArray(r, i, b, i);
1179             }
1180         }
1181 
1182         bh.consume(r);
1183     }
1184 
1185 }
1186