1 /*
   2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.VectorMask;
  28 import jdk.incubator.vector.VectorShape;
  29 import jdk.incubator.vector.VectorSpecies;
  30 import jdk.incubator.vector.VectorShuffle;
  31 import jdk.incubator.vector.FloatVector;
  32 
  33 import java.util.concurrent.TimeUnit;
  34 import java.util.function.BiFunction;
  35 import java.util.function.IntFunction;
  36 
  37 import org.openjdk.jmh.annotations.*;
  38 import org.openjdk.jmh.infra.Blackhole;
  39 
  40 @BenchmarkMode(Mode.Throughput)
  41 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  42 @State(Scope.Benchmark)
  43 @Warmup(iterations = 3, time = 1)
  44 @Measurement(iterations = 5, time = 1)
  45 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  46 public class Float128Vector extends AbstractVectorBenchmark {
  47     static final VectorSpecies<Float> SPECIES = FloatVector.SPECIES_128;
  48 
  49     static final int INVOC_COUNT = 1; // get rid of outer loop
  50 
  51     @Param("1024")
  52     int size;
  53 
  54     float[] fill(IntFunction<Float> f) {
  55         float[] array = new float[size];
  56         for (int i = 0; i < array.length; i++) {
  57             array[i] = f.apply(i);
  58         }
  59         return array;
  60     }
  61 
  62     float[] a, b, c, r;
  63     boolean[] m, rm;
  64     int[] s;
  65 
  66     @Setup
  67     public void init() {
  68         size += size % SPECIES.length(); // FIXME: add post-loops
  69 
  70         a = fill(i -> (float)(2*i));
  71         b = fill(i -> (float)(i+1));
  72         c = fill(i -> (float)(i+5));
  73         r = fill(i -> (float)0);
  74 
  75         m = fillMask(size, i -> (i % 2) == 0);
  76         rm = fillMask(size, i -> false);
  77 
  78         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  79     }
  80 
  81     final IntFunction<float[]> fa = vl -> a;
  82     final IntFunction<float[]> fb = vl -> b;
  83     final IntFunction<float[]> fc = vl -> c;
  84     final IntFunction<float[]> fr = vl -> r;
  85     final IntFunction<boolean[]> fm = vl -> m;
  86     final IntFunction<boolean[]> fmr = vl -> rm;
  87     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  88 
  89 
  90     @Benchmark
  91     public void add(Blackhole bh) {
  92         float[] a = fa.apply(SPECIES.length());
  93         float[] b = fb.apply(SPECIES.length());
  94         float[] r = fr.apply(SPECIES.length());
  95 
  96         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  97             for (int i = 0; i < a.length; i += SPECIES.length()) {
  98                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
  99                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 100                 av.add(bv).intoArray(r, i);
 101             }
 102         }
 103 
 104         bh.consume(r);
 105     }
 106 
 107     @Benchmark
 108     public void addMasked(Blackhole bh) {
 109         float[] a = fa.apply(SPECIES.length());
 110         float[] b = fb.apply(SPECIES.length());
 111         float[] r = fr.apply(SPECIES.length());
 112         boolean[] mask = fm.apply(SPECIES.length());
 113         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
 114 
 115         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 116             for (int i = 0; i < a.length; i += SPECIES.length()) {
 117                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 118                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 119                 av.add(bv, vmask).intoArray(r, i);
 120             }
 121         }
 122 
 123         bh.consume(r);
 124     }
 125 
 126     @Benchmark
 127     public void sub(Blackhole bh) {
 128         float[] a = fa.apply(SPECIES.length());
 129         float[] b = fb.apply(SPECIES.length());
 130         float[] r = fr.apply(SPECIES.length());
 131 
 132         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 133             for (int i = 0; i < a.length; i += SPECIES.length()) {
 134                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 135                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 136                 av.sub(bv).intoArray(r, i);
 137             }
 138         }
 139 
 140         bh.consume(r);
 141     }
 142 
 143     @Benchmark
 144     public void subMasked(Blackhole bh) {
 145         float[] a = fa.apply(SPECIES.length());
 146         float[] b = fb.apply(SPECIES.length());
 147         float[] r = fr.apply(SPECIES.length());
 148         boolean[] mask = fm.apply(SPECIES.length());
 149         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
 150 
 151         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 152             for (int i = 0; i < a.length; i += SPECIES.length()) {
 153                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 154                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 155                 av.sub(bv, vmask).intoArray(r, i);
 156             }
 157         }
 158 
 159         bh.consume(r);
 160     }
 161 
 162 
 163     @Benchmark
 164     public void div(Blackhole bh) {
 165         float[] a = fa.apply(SPECIES.length());
 166         float[] b = fb.apply(SPECIES.length());
 167         float[] r = fr.apply(SPECIES.length());
 168 
 169         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 170             for (int i = 0; i < a.length; i += SPECIES.length()) {
 171                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 172                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 173                 av.div(bv).intoArray(r, i);
 174             }
 175         }
 176 
 177         bh.consume(r);
 178     }
 179 
 180 
 181 
 182     @Benchmark
 183     public void divMasked(Blackhole bh) {
 184         float[] a = fa.apply(SPECIES.length());
 185         float[] b = fb.apply(SPECIES.length());
 186         float[] r = fr.apply(SPECIES.length());
 187         boolean[] mask = fm.apply(SPECIES.length());
 188         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
 189 
 190         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 191             for (int i = 0; i < a.length; i += SPECIES.length()) {
 192                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 193                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 194                 av.div(bv, vmask).intoArray(r, i);
 195             }
 196         }
 197 
 198         bh.consume(r);
 199     }
 200 
 201 
 202     @Benchmark
 203     public void mul(Blackhole bh) {
 204         float[] a = fa.apply(SPECIES.length());
 205         float[] b = fb.apply(SPECIES.length());
 206         float[] r = fr.apply(SPECIES.length());
 207 
 208         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 209             for (int i = 0; i < a.length; i += SPECIES.length()) {
 210                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 211                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 212                 av.mul(bv).intoArray(r, i);
 213             }
 214         }
 215 
 216         bh.consume(r);
 217     }
 218 
 219     @Benchmark
 220     public void mulMasked(Blackhole bh) {
 221         float[] a = fa.apply(SPECIES.length());
 222         float[] b = fb.apply(SPECIES.length());
 223         float[] r = fr.apply(SPECIES.length());
 224         boolean[] mask = fm.apply(SPECIES.length());
 225         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
 226 
 227         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 228             for (int i = 0; i < a.length; i += SPECIES.length()) {
 229                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 230                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 231                 av.mul(bv, vmask).intoArray(r, i);
 232             }
 233         }
 234 
 235         bh.consume(r);
 236     }
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250 
 251 
 252 
 253 
 254 
 255 
 256 
 257 
 258 
 259 
 260 
 261 
 262 
 263 
 264 
 265 
 266 
 267 
 268 
 269 
 270 
 271 
 272 
 273 
 274 
 275 
 276 
 277 
 278 
 279 
 280     @Benchmark
 281     public void max(Blackhole bh) {
 282         float[] a = fa.apply(SPECIES.length());
 283         float[] b = fb.apply(SPECIES.length());
 284         float[] r = fr.apply(SPECIES.length());
 285 
 286         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 287             for (int i = 0; i < a.length; i += SPECIES.length()) {
 288                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 289                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 290                 av.max(bv).intoArray(r, i);
 291             }
 292         }
 293 
 294         bh.consume(r);
 295     }
 296 
 297     @Benchmark
 298     public void min(Blackhole bh) {
 299         float[] a = fa.apply(SPECIES.length());
 300         float[] b = fb.apply(SPECIES.length());
 301         float[] r = fr.apply(SPECIES.length());
 302 
 303         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 304             for (int i = 0; i < a.length; i += SPECIES.length()) {
 305                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 306                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 307                 av.min(bv).intoArray(r, i);
 308             }
 309         }
 310 
 311         bh.consume(r);
 312     }
 313 
 314 
 315 
 316 
 317     @Benchmark
 318     public void addLanes(Blackhole bh) {
 319         float[] a = fa.apply(SPECIES.length());
 320         float ra = 0;
 321 
 322         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 323             ra = 0;
 324             for (int i = 0; i < a.length; i += SPECIES.length()) {
 325                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 326                 ra += av.addLanes();
 327             }
 328         }
 329         bh.consume(ra);
 330     }
 331 
 332     @Benchmark
 333     public void mulLanes(Blackhole bh) {
 334         float[] a = fa.apply(SPECIES.length());
 335         float ra = 1;
 336 
 337         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 338             ra = 1;
 339             for (int i = 0; i < a.length; i += SPECIES.length()) {
 340                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 341                 ra *= av.mulLanes();
 342             }
 343         }
 344         bh.consume(ra);
 345     }
 346 
 347     @Benchmark
 348     public void minLanes(Blackhole bh) {
 349         float[] a = fa.apply(SPECIES.length());
 350         float ra = Float.POSITIVE_INFINITY;
 351 
 352         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 353             ra = Float.POSITIVE_INFINITY;
 354             for (int i = 0; i < a.length; i += SPECIES.length()) {
 355                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 356                 ra = (float)Math.min(ra, av.minLanes());
 357             }
 358         }
 359         bh.consume(ra);
 360     }
 361 
 362     @Benchmark
 363     public void maxLanes(Blackhole bh) {
 364         float[] a = fa.apply(SPECIES.length());
 365         float ra = Float.NEGATIVE_INFINITY;
 366 
 367         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 368             ra = Float.NEGATIVE_INFINITY;
 369             for (int i = 0; i < a.length; i += SPECIES.length()) {
 370                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 371                 ra = (float)Math.max(ra, av.maxLanes());
 372             }
 373         }
 374         bh.consume(ra);
 375     }
 376 
 377 
 378 
 379     @Benchmark
 380     public void with(Blackhole bh) {
 381         float[] a = fa.apply(SPECIES.length());
 382         float[] r = fr.apply(SPECIES.length());
 383 
 384         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 385             for (int i = 0; i < a.length; i += SPECIES.length()) {
 386                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 387                 av.with(0, (float)4).intoArray(r, i);
 388             }
 389         }
 390 
 391         bh.consume(r);
 392     }
 393 
 394     @Benchmark
 395     public Object lessThan() {
 396         float[] a = fa.apply(size);
 397         float[] b = fb.apply(size);
 398         boolean[] ms = fm.apply(size);
 399         VectorMask<Float> m = VectorMask.fromArray(SPECIES, ms, 0);
 400 
 401         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 402             for (int i = 0; i < a.length; i += SPECIES.length()) {
 403                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 404                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 405                 VectorMask<Float> mv = av.lessThan(bv);
 406 
 407                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 408             }
 409         }
 410         return m;
 411     }
 412 
 413 
 414     @Benchmark
 415     public Object greaterThan() {
 416         float[] a = fa.apply(size);
 417         float[] b = fb.apply(size);
 418         boolean[] ms = fm.apply(size);
 419         VectorMask<Float> m = VectorMask.fromArray(SPECIES, ms, 0);
 420 
 421         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 422             for (int i = 0; i < a.length; i += SPECIES.length()) {
 423                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 424                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 425                 VectorMask<Float> mv = av.greaterThan(bv);
 426 
 427                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 428             }
 429         }
 430         return m;
 431     }
 432 
 433 
 434     @Benchmark
 435     public Object equal() {
 436         float[] a = fa.apply(size);
 437         float[] b = fb.apply(size);
 438         boolean[] ms = fm.apply(size);
 439         VectorMask<Float> m = VectorMask.fromArray(SPECIES, ms, 0);
 440 
 441         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 442             for (int i = 0; i < a.length; i += SPECIES.length()) {
 443                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 444                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 445                 VectorMask<Float> mv = av.equal(bv);
 446 
 447                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 448             }
 449         }
 450         return m;
 451     }
 452 
 453 
 454     @Benchmark
 455     public Object notEqual() {
 456         float[] a = fa.apply(size);
 457         float[] b = fb.apply(size);
 458         boolean[] ms = fm.apply(size);
 459         VectorMask<Float> m = VectorMask.fromArray(SPECIES, ms, 0);
 460 
 461         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 462             for (int i = 0; i < a.length; i += SPECIES.length()) {
 463                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 464                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 465                 VectorMask<Float> mv = av.notEqual(bv);
 466 
 467                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 468             }
 469         }
 470         return m;
 471     }
 472 
 473 
 474     @Benchmark
 475     public Object lessThanEq() {
 476         float[] a = fa.apply(size);
 477         float[] b = fb.apply(size);
 478         boolean[] ms = fm.apply(size);
 479         VectorMask<Float> m = VectorMask.fromArray(SPECIES, ms, 0);
 480 
 481         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 482             for (int i = 0; i < a.length; i += SPECIES.length()) {
 483                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 484                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 485                 VectorMask<Float> mv = av.lessThanEq(bv);
 486 
 487                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 488             }
 489         }
 490         return m;
 491     }
 492 
 493 
 494     @Benchmark
 495     public Object greaterThanEq() {
 496         float[] a = fa.apply(size);
 497         float[] b = fb.apply(size);
 498         boolean[] ms = fm.apply(size);
 499         VectorMask<Float> m = VectorMask.fromArray(SPECIES, ms, 0);
 500 
 501         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 502             for (int i = 0; i < a.length; i += SPECIES.length()) {
 503                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 504                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 505                 VectorMask<Float> mv = av.greaterThanEq(bv);
 506 
 507                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 508             }
 509         }
 510         return m;
 511     }
 512 
 513 
 514     @Benchmark
 515     public void blend(Blackhole bh) {
 516         float[] a = fa.apply(SPECIES.length());
 517         float[] b = fb.apply(SPECIES.length());
 518         float[] r = fr.apply(SPECIES.length());
 519         boolean[] mask = fm.apply(SPECIES.length());
 520         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
 521 
 522         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 523             for (int i = 0; i < a.length; i += SPECIES.length()) {
 524                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 525                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 526                 av.blend(bv, vmask).intoArray(r, i);
 527             }
 528         }
 529 
 530         bh.consume(r);
 531     }
 532 
 533     @Benchmark
 534     public void rearrange(Blackhole bh) {
 535         float[] a = fa.apply(SPECIES.length());
 536         int[] order = fs.apply(a.length, SPECIES.length());
 537         float[] r = fr.apply(SPECIES.length());
 538 
 539         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 540             for (int i = 0; i < a.length; i += SPECIES.length()) {
 541                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 542                 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
 543             }
 544         }
 545 
 546         bh.consume(r);
 547     }
 548 
 549     @Benchmark
 550     public void extract(Blackhole bh) {
 551         float[] a = fa.apply(SPECIES.length());
 552         float[] r = fr.apply(SPECIES.length());
 553 
 554         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 555             for (int i = 0; i < a.length; i += SPECIES.length()) {
 556                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 557                 int num_lanes = SPECIES.length();
 558                 // Manually unroll because full unroll happens after intrinsification.
 559                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 560                 if (num_lanes == 1) {
 561                     r[i]=av.lane(0);
 562                 } else if (num_lanes == 2) {
 563                     r[i]=av.lane(0);
 564                     r[i+1]=av.lane(1);
 565                 } else if (num_lanes == 4) {
 566                     r[i]=av.lane(0);
 567                     r[i+1]=av.lane(1);
 568                     r[i+2]=av.lane(2);
 569                     r[i+3]=av.lane(3);
 570                 } else if (num_lanes == 8) {
 571                     r[i]=av.lane(0);
 572                     r[i+1]=av.lane(1);
 573                     r[i+2]=av.lane(2);
 574                     r[i+3]=av.lane(3);
 575                     r[i+4]=av.lane(4);
 576                     r[i+5]=av.lane(5);
 577                     r[i+6]=av.lane(6);
 578                     r[i+7]=av.lane(7);
 579                 } else if (num_lanes == 16) {
 580                     r[i]=av.lane(0);
 581                     r[i+1]=av.lane(1);
 582                     r[i+2]=av.lane(2);
 583                     r[i+3]=av.lane(3);
 584                     r[i+4]=av.lane(4);
 585                     r[i+5]=av.lane(5);
 586                     r[i+6]=av.lane(6);
 587                     r[i+7]=av.lane(7);
 588                     r[i+8]=av.lane(8);
 589                     r[i+9]=av.lane(9);
 590                     r[i+10]=av.lane(10);
 591                     r[i+11]=av.lane(11);
 592                     r[i+12]=av.lane(12);
 593                     r[i+13]=av.lane(13);
 594                     r[i+14]=av.lane(14);
 595                     r[i+15]=av.lane(15);
 596                 } else if (num_lanes == 32) {
 597                     r[i]=av.lane(0);
 598                     r[i+1]=av.lane(1);
 599                     r[i+2]=av.lane(2);
 600                     r[i+3]=av.lane(3);
 601                     r[i+4]=av.lane(4);
 602                     r[i+5]=av.lane(5);
 603                     r[i+6]=av.lane(6);
 604                     r[i+7]=av.lane(7);
 605                     r[i+8]=av.lane(8);
 606                     r[i+9]=av.lane(9);
 607                     r[i+10]=av.lane(10);
 608                     r[i+11]=av.lane(11);
 609                     r[i+12]=av.lane(12);
 610                     r[i+13]=av.lane(13);
 611                     r[i+14]=av.lane(14);
 612                     r[i+15]=av.lane(15);
 613                     r[i+16]=av.lane(16);
 614                     r[i+17]=av.lane(17);
 615                     r[i+18]=av.lane(18);
 616                     r[i+19]=av.lane(19);
 617                     r[i+20]=av.lane(20);
 618                     r[i+21]=av.lane(21);
 619                     r[i+22]=av.lane(22);
 620                     r[i+23]=av.lane(23);
 621                     r[i+24]=av.lane(24);
 622                     r[i+25]=av.lane(25);
 623                     r[i+26]=av.lane(26);
 624                     r[i+27]=av.lane(27);
 625                     r[i+28]=av.lane(28);
 626                     r[i+29]=av.lane(29);
 627                     r[i+30]=av.lane(30);
 628                     r[i+31]=av.lane(31);
 629                 } else if (num_lanes == 64) {
 630                     r[i]=av.lane(0);
 631                     r[i+1]=av.lane(1);
 632                     r[i+2]=av.lane(2);
 633                     r[i+3]=av.lane(3);
 634                     r[i+4]=av.lane(4);
 635                     r[i+5]=av.lane(5);
 636                     r[i+6]=av.lane(6);
 637                     r[i+7]=av.lane(7);
 638                     r[i+8]=av.lane(8);
 639                     r[i+9]=av.lane(9);
 640                     r[i+10]=av.lane(10);
 641                     r[i+11]=av.lane(11);
 642                     r[i+12]=av.lane(12);
 643                     r[i+13]=av.lane(13);
 644                     r[i+14]=av.lane(14);
 645                     r[i+15]=av.lane(15);
 646                     r[i+16]=av.lane(16);
 647                     r[i+17]=av.lane(17);
 648                     r[i+18]=av.lane(18);
 649                     r[i+19]=av.lane(19);
 650                     r[i+20]=av.lane(20);
 651                     r[i+21]=av.lane(21);
 652                     r[i+22]=av.lane(22);
 653                     r[i+23]=av.lane(23);
 654                     r[i+24]=av.lane(24);
 655                     r[i+25]=av.lane(25);
 656                     r[i+26]=av.lane(26);
 657                     r[i+27]=av.lane(27);
 658                     r[i+28]=av.lane(28);
 659                     r[i+29]=av.lane(29);
 660                     r[i+30]=av.lane(30);
 661                     r[i+31]=av.lane(31);
 662                     r[i+32]=av.lane(32);
 663                     r[i+33]=av.lane(33);
 664                     r[i+34]=av.lane(34);
 665                     r[i+35]=av.lane(35);
 666                     r[i+36]=av.lane(36);
 667                     r[i+37]=av.lane(37);
 668                     r[i+38]=av.lane(38);
 669                     r[i+39]=av.lane(39);
 670                     r[i+40]=av.lane(40);
 671                     r[i+41]=av.lane(41);
 672                     r[i+42]=av.lane(42);
 673                     r[i+43]=av.lane(43);
 674                     r[i+44]=av.lane(44);
 675                     r[i+45]=av.lane(45);
 676                     r[i+46]=av.lane(46);
 677                     r[i+47]=av.lane(47);
 678                     r[i+48]=av.lane(48);
 679                     r[i+49]=av.lane(49);
 680                     r[i+50]=av.lane(50);
 681                     r[i+51]=av.lane(51);
 682                     r[i+52]=av.lane(52);
 683                     r[i+53]=av.lane(53);
 684                     r[i+54]=av.lane(54);
 685                     r[i+55]=av.lane(55);
 686                     r[i+56]=av.lane(56);
 687                     r[i+57]=av.lane(57);
 688                     r[i+58]=av.lane(58);
 689                     r[i+59]=av.lane(59);
 690                     r[i+60]=av.lane(60);
 691                     r[i+61]=av.lane(61);
 692                     r[i+62]=av.lane(62);
 693                     r[i+63]=av.lane(63);
 694                 } else {
 695                     for (int j = 0; j < SPECIES.length(); j++) {
 696                         r[i+j]=av.lane(j);
 697                     }
 698                 }
 699             }
 700         }
 701 
 702         bh.consume(r);
 703     }
 704 
 705 
 706     @Benchmark
 707     public void sin(Blackhole bh) {
 708         float[] a = fa.apply(SPECIES.length());
 709         float[] r = fr.apply(SPECIES.length());
 710 
 711         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 712             for (int i = 0; i < a.length; i += SPECIES.length()) {
 713                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 714                 av.sin().intoArray(r, i);
 715             }
 716         }
 717 
 718         bh.consume(r);
 719     }
 720 
 721 
 722 
 723     @Benchmark
 724     public void exp(Blackhole bh) {
 725         float[] a = fa.apply(SPECIES.length());
 726         float[] r = fr.apply(SPECIES.length());
 727 
 728         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 729             for (int i = 0; i < a.length; i += SPECIES.length()) {
 730                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 731                 av.exp().intoArray(r, i);
 732             }
 733         }
 734 
 735         bh.consume(r);
 736     }
 737 
 738 
 739 
 740     @Benchmark
 741     public void log1p(Blackhole bh) {
 742         float[] a = fa.apply(SPECIES.length());
 743         float[] r = fr.apply(SPECIES.length());
 744 
 745         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 746             for (int i = 0; i < a.length; i += SPECIES.length()) {
 747                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 748                 av.log1p().intoArray(r, i);
 749             }
 750         }
 751 
 752         bh.consume(r);
 753     }
 754 
 755 
 756 
 757     @Benchmark
 758     public void log(Blackhole bh) {
 759         float[] a = fa.apply(SPECIES.length());
 760         float[] r = fr.apply(SPECIES.length());
 761 
 762         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 763             for (int i = 0; i < a.length; i += SPECIES.length()) {
 764                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 765                 av.log().intoArray(r, i);
 766             }
 767         }
 768 
 769         bh.consume(r);
 770     }
 771 
 772 
 773 
 774     @Benchmark
 775     public void log10(Blackhole bh) {
 776         float[] a = fa.apply(SPECIES.length());
 777         float[] r = fr.apply(SPECIES.length());
 778 
 779         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 780             for (int i = 0; i < a.length; i += SPECIES.length()) {
 781                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 782                 av.log10().intoArray(r, i);
 783             }
 784         }
 785 
 786         bh.consume(r);
 787     }
 788 
 789 
 790 
 791     @Benchmark
 792     public void expm1(Blackhole bh) {
 793         float[] a = fa.apply(SPECIES.length());
 794         float[] r = fr.apply(SPECIES.length());
 795 
 796         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 797             for (int i = 0; i < a.length; i += SPECIES.length()) {
 798                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 799                 av.expm1().intoArray(r, i);
 800             }
 801         }
 802 
 803         bh.consume(r);
 804     }
 805 
 806 
 807 
 808     @Benchmark
 809     public void cos(Blackhole bh) {
 810         float[] a = fa.apply(SPECIES.length());
 811         float[] r = fr.apply(SPECIES.length());
 812 
 813         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 814             for (int i = 0; i < a.length; i += SPECIES.length()) {
 815                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 816                 av.cos().intoArray(r, i);
 817             }
 818         }
 819 
 820         bh.consume(r);
 821     }
 822 
 823 
 824 
 825     @Benchmark
 826     public void tan(Blackhole bh) {
 827         float[] a = fa.apply(SPECIES.length());
 828         float[] r = fr.apply(SPECIES.length());
 829 
 830         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 831             for (int i = 0; i < a.length; i += SPECIES.length()) {
 832                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 833                 av.tan().intoArray(r, i);
 834             }
 835         }
 836 
 837         bh.consume(r);
 838     }
 839 
 840 
 841 
 842     @Benchmark
 843     public void sinh(Blackhole bh) {
 844         float[] a = fa.apply(SPECIES.length());
 845         float[] r = fr.apply(SPECIES.length());
 846 
 847         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 848             for (int i = 0; i < a.length; i += SPECIES.length()) {
 849                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 850                 av.sinh().intoArray(r, i);
 851             }
 852         }
 853 
 854         bh.consume(r);
 855     }
 856 
 857 
 858 
 859     @Benchmark
 860     public void cosh(Blackhole bh) {
 861         float[] a = fa.apply(SPECIES.length());
 862         float[] r = fr.apply(SPECIES.length());
 863 
 864         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 865             for (int i = 0; i < a.length; i += SPECIES.length()) {
 866                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 867                 av.cosh().intoArray(r, i);
 868             }
 869         }
 870 
 871         bh.consume(r);
 872     }
 873 
 874 
 875 
 876     @Benchmark
 877     public void tanh(Blackhole bh) {
 878         float[] a = fa.apply(SPECIES.length());
 879         float[] r = fr.apply(SPECIES.length());
 880 
 881         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 882             for (int i = 0; i < a.length; i += SPECIES.length()) {
 883                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 884                 av.tanh().intoArray(r, i);
 885             }
 886         }
 887 
 888         bh.consume(r);
 889     }
 890 
 891 
 892 
 893     @Benchmark
 894     public void asin(Blackhole bh) {
 895         float[] a = fa.apply(SPECIES.length());
 896         float[] r = fr.apply(SPECIES.length());
 897 
 898         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 899             for (int i = 0; i < a.length; i += SPECIES.length()) {
 900                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 901                 av.asin().intoArray(r, i);
 902             }
 903         }
 904 
 905         bh.consume(r);
 906     }
 907 
 908 
 909 
 910     @Benchmark
 911     public void acos(Blackhole bh) {
 912         float[] a = fa.apply(SPECIES.length());
 913         float[] r = fr.apply(SPECIES.length());
 914 
 915         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 916             for (int i = 0; i < a.length; i += SPECIES.length()) {
 917                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 918                 av.acos().intoArray(r, i);
 919             }
 920         }
 921 
 922         bh.consume(r);
 923     }
 924 
 925 
 926 
 927     @Benchmark
 928     public void atan(Blackhole bh) {
 929         float[] a = fa.apply(SPECIES.length());
 930         float[] r = fr.apply(SPECIES.length());
 931 
 932         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 933             for (int i = 0; i < a.length; i += SPECIES.length()) {
 934                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 935                 av.atan().intoArray(r, i);
 936             }
 937         }
 938 
 939         bh.consume(r);
 940     }
 941 
 942 
 943 
 944     @Benchmark
 945     public void cbrt(Blackhole bh) {
 946         float[] a = fa.apply(SPECIES.length());
 947         float[] r = fr.apply(SPECIES.length());
 948 
 949         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 950             for (int i = 0; i < a.length; i += SPECIES.length()) {
 951                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 952                 av.cbrt().intoArray(r, i);
 953             }
 954         }
 955 
 956         bh.consume(r);
 957     }
 958 
 959 
 960 
 961     @Benchmark
 962     public void hypot(Blackhole bh) {
 963         float[] a = fa.apply(SPECIES.length());
 964         float[] b = fb.apply(SPECIES.length());
 965         float[] r = fr.apply(SPECIES.length());
 966 
 967         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 968             for (int i = 0; i < a.length; i += SPECIES.length()) {
 969                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 970                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 971                 av.hypot(bv).intoArray(r, i);
 972             }
 973         }
 974 
 975         bh.consume(r);
 976     }
 977 
 978 
 979 
 980     @Benchmark
 981     public void pow(Blackhole bh) {
 982         float[] a = fa.apply(SPECIES.length());
 983         float[] b = fb.apply(SPECIES.length());
 984         float[] r = fr.apply(SPECIES.length());
 985 
 986         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 987             for (int i = 0; i < a.length; i += SPECIES.length()) {
 988                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
 989                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
 990                 av.pow(bv).intoArray(r, i);
 991             }
 992         }
 993 
 994         bh.consume(r);
 995     }
 996 
 997 
 998 
 999     @Benchmark
1000     public void atan2(Blackhole bh) {
1001         float[] a = fa.apply(SPECIES.length());
1002         float[] b = fb.apply(SPECIES.length());
1003         float[] r = fr.apply(SPECIES.length());
1004 
1005         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1006             for (int i = 0; i < a.length; i += SPECIES.length()) {
1007                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1008                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1009                 av.atan2(bv).intoArray(r, i);
1010             }
1011         }
1012 
1013         bh.consume(r);
1014     }
1015 
1016 
1017 
1018     @Benchmark
1019     public void fma(Blackhole bh) {
1020         float[] a = fa.apply(SPECIES.length());
1021         float[] b = fb.apply(SPECIES.length());
1022         float[] c = fc.apply(SPECIES.length());
1023         float[] r = fr.apply(SPECIES.length());
1024 
1025         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1026             for (int i = 0; i < a.length; i += SPECIES.length()) {
1027                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1028                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1029                 FloatVector cv = FloatVector.fromArray(SPECIES, c, i);
1030                 av.fma(bv, cv).intoArray(r, i);
1031             }
1032         }
1033 
1034         bh.consume(r);
1035     }
1036 
1037 
1038 
1039     @Benchmark
1040     public void fmaMasked(Blackhole bh) {
1041         float[] a = fa.apply(SPECIES.length());
1042         float[] b = fb.apply(SPECIES.length());
1043         float[] c = fc.apply(SPECIES.length());
1044         float[] r = fr.apply(SPECIES.length());
1045         boolean[] mask = fm.apply(SPECIES.length());
1046         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
1047 
1048         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1049             for (int i = 0; i < a.length; i += SPECIES.length()) {
1050                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1051                 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1052                 FloatVector cv = FloatVector.fromArray(SPECIES, c, i);
1053                 av.fma(bv, cv, vmask).intoArray(r, i);
1054             }
1055         }
1056 
1057         bh.consume(r);
1058     }
1059 
1060 
1061     @Benchmark
1062     public void neg(Blackhole bh) {
1063         float[] a = fa.apply(SPECIES.length());
1064         float[] r = fr.apply(SPECIES.length());
1065 
1066         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1067             for (int i = 0; i < a.length; i += SPECIES.length()) {
1068                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1069                 av.neg().intoArray(r, i);
1070             }
1071         }
1072 
1073         bh.consume(r);
1074     }
1075 
1076     @Benchmark
1077     public void negMasked(Blackhole bh) {
1078         float[] a = fa.apply(SPECIES.length());
1079         float[] r = fr.apply(SPECIES.length());
1080         boolean[] mask = fm.apply(SPECIES.length());
1081         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
1082 
1083         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1084             for (int i = 0; i < a.length; i += SPECIES.length()) {
1085                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1086                 av.neg(vmask).intoArray(r, i);
1087             }
1088         }
1089 
1090         bh.consume(r);
1091     }
1092 
1093     @Benchmark
1094     public void abs(Blackhole bh) {
1095         float[] a = fa.apply(SPECIES.length());
1096         float[] r = fr.apply(SPECIES.length());
1097 
1098         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1099             for (int i = 0; i < a.length; i += SPECIES.length()) {
1100                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1101                 av.abs().intoArray(r, i);
1102             }
1103         }
1104 
1105         bh.consume(r);
1106     }
1107 
1108     @Benchmark
1109     public void absMasked(Blackhole bh) {
1110         float[] a = fa.apply(SPECIES.length());
1111         float[] r = fr.apply(SPECIES.length());
1112         boolean[] mask = fm.apply(SPECIES.length());
1113         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
1114 
1115         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1116             for (int i = 0; i < a.length; i += SPECIES.length()) {
1117                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1118                 av.abs(vmask).intoArray(r, i);
1119             }
1120         }
1121 
1122         bh.consume(r);
1123     }
1124 
1125 
1126 
1127 
1128     @Benchmark
1129     public void sqrt(Blackhole bh) {
1130         float[] a = fa.apply(SPECIES.length());
1131         float[] r = fr.apply(SPECIES.length());
1132 
1133         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1134             for (int i = 0; i < a.length; i += SPECIES.length()) {
1135                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1136                 av.sqrt().intoArray(r, i);
1137             }
1138         }
1139 
1140         bh.consume(r);
1141     }
1142 
1143 
1144 
1145     @Benchmark
1146     public void sqrtMasked(Blackhole bh) {
1147         float[] a = fa.apply(SPECIES.length());
1148         float[] r = fr.apply(SPECIES.length());
1149         boolean[] mask = fm.apply(SPECIES.length());
1150         VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
1151 
1152         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1153             for (int i = 0; i < a.length; i += SPECIES.length()) {
1154                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1155                 av.sqrt(vmask).intoArray(r, i);
1156             }
1157         }
1158 
1159         bh.consume(r);
1160     }
1161 
1162 
1163 
1164     @Benchmark
1165     public void gather(Blackhole bh) {
1166         float[] a = fa.apply(SPECIES.length());
1167         int[] b    = fs.apply(a.length, SPECIES.length());
1168         float[] r = new float[a.length];
1169 
1170         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1171             for (int i = 0; i < a.length; i += SPECIES.length()) {
1172                 FloatVector av = FloatVector.fromArray(SPECIES, a, i, b, i);
1173                 av.intoArray(r, i);
1174             }
1175         }
1176 
1177         bh.consume(r);
1178     }
1179 
1180 
1181 
1182     @Benchmark
1183     public void scatter(Blackhole bh) {
1184         float[] a = fa.apply(SPECIES.length());
1185         int[] b = fs.apply(a.length, SPECIES.length());
1186         float[] r = new float[a.length];
1187 
1188         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1189             for (int i = 0; i < a.length; i += SPECIES.length()) {
1190                 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1191                 av.intoArray(r, i, b, i);
1192             }
1193         }
1194 
1195         bh.consume(r);
1196     }
1197 
1198 }
1199