1 /*
   2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import java.util.concurrent.TimeUnit;
  27 import java.util.function.IntFunction;
  28 
  29 import org.openjdk.jmh.annotations.*;
  30 import org.openjdk.jmh.infra.Blackhole;
  31 
  32 @BenchmarkMode(Mode.Throughput)
  33 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  34 @State(Scope.Benchmark)
  35 @Warmup(iterations = 3, time = 1)
  36 @Measurement(iterations = 5, time = 1)
  37 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  38 public class FloatScalar extends AbstractVectorBenchmark {
  39     static final int INVOC_COUNT = 1; // To align with vector benchmarks.
  40 
  41     @Param("1024")
  42     int size;
  43 
  44     float[] fill(IntFunction<Float> f) {
  45         float[] array = new float[size];
  46         for (int i = 0; i < array.length; i++) {
  47             array[i] = f.apply(i);
  48         }
  49         return array;
  50     }
  51 
  52     float[] as, bs, cs, rs;
  53     boolean[] ms, rms;
  54     int[] ss;
  55 
  56     @Setup
  57     public void init() {
  58         as = fill(i -> (float)(2*i));
  59         bs = fill(i -> (float)(i+1));
  60         cs = fill(i -> (float)(i+5));
  61         rs = fill(i -> (float)0);
  62         ms = fillMask(size, i -> (i % 2) == 0);
  63         rms = fillMask(size, i -> false);
  64 
  65         ss = fillInt(size, i -> RANDOM.nextInt(Math.max(i,1)));
  66     }
  67 
  68     final IntFunction<float[]> fa = vl -> as;
  69     final IntFunction<float[]> fb = vl -> bs;
  70     final IntFunction<float[]> fc = vl -> cs;
  71     final IntFunction<float[]> fr = vl -> rs;
  72     final IntFunction<boolean[]> fm = vl -> ms;
  73     final IntFunction<boolean[]> fmr = vl -> rms;
  74     final IntFunction<int[]> fs = vl -> ss;
  75 
  76 
  77     @Benchmark
  78     public void add(Blackhole bh) {
  79         float[] as = fa.apply(size);
  80         float[] bs = fb.apply(size);
  81         float[] rs = fr.apply(size);
  82 
  83         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  84             for (int i = 0; i < as.length; i++) {
  85                 float a = as[i];
  86                 float b = bs[i];
  87                 rs[i] = (float)(a + b);
  88             }
  89         }
  90 
  91         bh.consume(rs);
  92     }
  93 
  94     @Benchmark
  95     public void addMasked(Blackhole bh) {
  96         float[] as = fa.apply(size);
  97         float[] bs = fb.apply(size);
  98         float[] rs = fr.apply(size);
  99         boolean[] ms = fm.apply(size);
 100 
 101         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 102             for (int i = 0; i < as.length; i++) {
 103                 float a = as[i];
 104                 float b = bs[i];
 105                 if (ms[i % ms.length]) {
 106                     rs[i] = (float)(a + b);
 107                 } else {
 108                     rs[i] = a;
 109                 }
 110             }
 111         }
 112         bh.consume(rs);
 113     }
 114 
 115     @Benchmark
 116     public void sub(Blackhole bh) {
 117         float[] as = fa.apply(size);
 118         float[] bs = fb.apply(size);
 119         float[] rs = fr.apply(size);
 120 
 121         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 122             for (int i = 0; i < as.length; i++) {
 123                 float a = as[i];
 124                 float b = bs[i];
 125                 rs[i] = (float)(a - b);
 126             }
 127         }
 128 
 129         bh.consume(rs);
 130     }
 131 
 132     @Benchmark
 133     public void subMasked(Blackhole bh) {
 134         float[] as = fa.apply(size);
 135         float[] bs = fb.apply(size);
 136         float[] rs = fr.apply(size);
 137         boolean[] ms = fm.apply(size);
 138 
 139         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 140             for (int i = 0; i < as.length; i++) {
 141                 float a = as[i];
 142                 float b = bs[i];
 143                 if (ms[i % ms.length]) {
 144                     rs[i] = (float)(a - b);
 145                 } else {
 146                     rs[i] = a;
 147                 }
 148             }
 149         }
 150         bh.consume(rs);
 151     }
 152 
 153 
 154     @Benchmark
 155     public void div(Blackhole bh) {
 156         float[] as = fa.apply(size);
 157         float[] bs = fb.apply(size);
 158         float[] rs = fr.apply(size);
 159 
 160         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 161             for (int i = 0; i < as.length; i++) {
 162                 float a = as[i];
 163                 float b = bs[i];
 164                 rs[i] = (float)(a / b);
 165             }
 166         }
 167 
 168         bh.consume(rs);
 169     }
 170 
 171 
 172 
 173     @Benchmark
 174     public void divMasked(Blackhole bh) {
 175         float[] as = fa.apply(size);
 176         float[] bs = fb.apply(size);
 177         float[] rs = fr.apply(size);
 178         boolean[] ms = fm.apply(size);
 179 
 180         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 181             for (int i = 0; i < as.length; i++) {
 182                 float a = as[i];
 183                 float b = bs[i];
 184                 if (ms[i % ms.length]) {
 185                     rs[i] = (float)(a / b);
 186                 } else {
 187                     rs[i] = a;
 188                 }
 189             }
 190         }
 191         bh.consume(rs);
 192     }
 193 
 194 
 195     @Benchmark
 196     public void mul(Blackhole bh) {
 197         float[] as = fa.apply(size);
 198         float[] bs = fb.apply(size);
 199         float[] rs = fr.apply(size);
 200 
 201         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 202             for (int i = 0; i < as.length; i++) {
 203                 float a = as[i];
 204                 float b = bs[i];
 205                 rs[i] = (float)(a * b);
 206             }
 207         }
 208 
 209         bh.consume(rs);
 210     }
 211 
 212     @Benchmark
 213     public void mulMasked(Blackhole bh) {
 214         float[] as = fa.apply(size);
 215         float[] bs = fb.apply(size);
 216         float[] rs = fr.apply(size);
 217         boolean[] ms = fm.apply(size);
 218 
 219         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 220             for (int i = 0; i < as.length; i++) {
 221                 float a = as[i];
 222                 float b = bs[i];
 223                 if (ms[i % ms.length]) {
 224                     rs[i] = (float)(a * b);
 225                 } else {
 226                     rs[i] = a;
 227                 }
 228             }
 229         }
 230         bh.consume(rs);
 231     }
 232 
 233 
 234 
 235 
 236 
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250 
 251 
 252 
 253 
 254 
 255 
 256 
 257 
 258 
 259 
 260 
 261 
 262 
 263     @Benchmark
 264     public void max(Blackhole bh) {
 265         float[] as = fa.apply(size);
 266         float[] bs = fb.apply(size);
 267         float[] rs = fr.apply(size);
 268 
 269         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 270             for (int i = 0; i < as.length; i++) {
 271                 float a = as[i];
 272                 float b = bs[i];
 273                 rs[i] = (float)(Math.max(a, b));
 274             }
 275         }
 276 
 277         bh.consume(rs);
 278     }
 279 
 280     @Benchmark
 281     public void min(Blackhole bh) {
 282         float[] as = fa.apply(size);
 283         float[] bs = fb.apply(size);
 284         float[] rs = fr.apply(size);
 285 
 286         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 287             for (int i = 0; i < as.length; i++) {
 288                 float a = as[i];
 289                 float b = bs[i];
 290                 rs[i] = (float)(Math.min(a, b));
 291             }
 292         }
 293 
 294         bh.consume(rs);
 295     }
 296 
 297 
 298 
 299 
 300     @Benchmark
 301     public void addAll(Blackhole bh) {
 302         float[] as = fa.apply(size);
 303         float r = 0;
 304         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 305             r = 0;
 306             for (int i = 0; i < as.length; i++) {
 307                 r += as[i];
 308             }
 309         }
 310         bh.consume(r);
 311     }
 312 
 313     @Benchmark
 314     public void mulAll(Blackhole bh) {
 315         float[] as = fa.apply(size);
 316         float r = 1;
 317         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 318             r = 1;
 319             for (int i = 0; i < as.length; i++) {
 320                 r *= as[i];
 321             }
 322         }
 323         bh.consume(r);
 324     }
 325 
 326     @Benchmark
 327     public void minAll(Blackhole bh) {
 328         float[] as = fa.apply(size);
 329         float r = Float.POSITIVE_INFINITY;
 330         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 331             r = Float.POSITIVE_INFINITY;
 332             for (int i = 0; i < as.length; i++) {
 333                 r = (float)Math.min(r, as[i]);
 334             }
 335         }
 336         bh.consume(r);
 337     }
 338 
 339     @Benchmark
 340     public void maxAll(Blackhole bh) {
 341         float[] as = fa.apply(size);
 342         float r = Float.NEGATIVE_INFINITY;
 343         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 344             r = Float.NEGATIVE_INFINITY;
 345             for (int i = 0; i < as.length; i++) {
 346                 r = (float)Math.max(r, as[i]);
 347             }
 348         }
 349         bh.consume(r);
 350     }
 351 
 352 
 353 
 354     @Benchmark
 355     public void lessThan(Blackhole bh) {
 356         float[] as = fa.apply(size);
 357         float[] bs = fb.apply(size);
 358 
 359         boolean r = false;
 360         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 361             r = false;
 362             for (int i = 0; i < as.length; i++) {
 363                 boolean m = (as[i] < bs[i]);
 364                 r |= m; // accumulate so JIT can't eliminate the computation
 365             }
 366         }
 367 
 368         bh.consume(r);
 369     }
 370 
 371     @Benchmark
 372     public void greaterThan(Blackhole bh) {
 373         float[] as = fa.apply(size);
 374         float[] bs = fb.apply(size);
 375 
 376         boolean r = false;
 377         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 378             r = false;
 379             for (int i = 0; i < as.length; i++) {
 380                 boolean m = (as[i] > bs[i]);
 381                 r |= m; // accumulate so JIT can't eliminate the computation
 382             }
 383         }
 384 
 385         bh.consume(r);
 386     }
 387 
 388     @Benchmark
 389     public void equal(Blackhole bh) {
 390         float[] as = fa.apply(size);
 391         float[] bs = fb.apply(size);
 392 
 393         boolean r = false;
 394         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 395             r = false;
 396             for (int i = 0; i < as.length; i++) {
 397                 boolean m = (as[i] == bs[i]);
 398                 r |= m; // accumulate so JIT can't eliminate the computation
 399             }
 400         }
 401 
 402         bh.consume(r);
 403     }
 404 
 405     @Benchmark
 406     public void notEqual(Blackhole bh) {
 407         float[] as = fa.apply(size);
 408         float[] bs = fb.apply(size);
 409 
 410         boolean r = false;
 411         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 412             r = false;
 413             for (int i = 0; i < as.length; i++) {
 414                 boolean m = (as[i] != bs[i]);
 415                 r |= m; // accumulate so JIT can't eliminate the computation
 416             }
 417         }
 418 
 419         bh.consume(r);
 420     }
 421 
 422     @Benchmark
 423     public void lessThanEq(Blackhole bh) {
 424         float[] as = fa.apply(size);
 425         float[] bs = fb.apply(size);
 426 
 427         boolean r = false;
 428         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 429             r = false;
 430             for (int i = 0; i < as.length; i++) {
 431                 boolean m = (as[i] <= bs[i]);
 432                 r |= m; // accumulate so JIT can't eliminate the computation
 433             }
 434         }
 435 
 436         bh.consume(r);
 437     }
 438 
 439     @Benchmark
 440     public void greaterThanEq(Blackhole bh) {
 441         float[] as = fa.apply(size);
 442         float[] bs = fb.apply(size);
 443 
 444         boolean r = false;
 445         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 446             r = false;
 447             for (int i = 0; i < as.length; i++) {
 448                 boolean m = (as[i] >= bs[i]);
 449                 r |= m; // accumulate so JIT can't eliminate the computation
 450             }
 451         }
 452 
 453         bh.consume(r);
 454     }
 455 
 456     @Benchmark
 457     public void blend(Blackhole bh) {
 458         float[] as = fa.apply(size);
 459         float[] bs = fb.apply(size);
 460         float[] rs = fr.apply(size);
 461         boolean[] ms = fm.apply(size);
 462 
 463         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 464             for (int i = 0; i < as.length; i++) {
 465                 float a = as[i];
 466                 float b = bs[i];
 467                 boolean m = ms[i % ms.length];
 468                 rs[i] = (m ? b : a);
 469             }
 470         }
 471 
 472         bh.consume(rs);
 473     }
 474     void rearrangeShared(int window, Blackhole bh) {
 475         float[] as = fa.apply(size);
 476         int[] order = fs.apply(size);
 477         float[] rs = fr.apply(size);
 478 
 479         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 480             for (int i = 0; i < as.length; i += window) {
 481                 for (int j = 0; j < window; j++) {
 482                     float a = as[i+j];
 483                     int pos = order[j];
 484                     rs[i + pos] = a;
 485                 }
 486             }
 487         }
 488 
 489         bh.consume(rs);
 490     }
 491 
 492     @Benchmark
 493     public void rearrange064(Blackhole bh) {
 494         int window = 64 / Float.SIZE;
 495         rearrangeShared(window, bh);
 496     }
 497 
 498     @Benchmark
 499     public void rearrange128(Blackhole bh) {
 500         int window = 128 / Float.SIZE;
 501         rearrangeShared(window, bh);
 502     }
 503 
 504     @Benchmark
 505     public void rearrange256(Blackhole bh) {
 506         int window = 256 / Float.SIZE;
 507         rearrangeShared(window, bh);
 508     }
 509 
 510     @Benchmark
 511     public void rearrange512(Blackhole bh) {
 512         int window = 512 / Float.SIZE;
 513         rearrangeShared(window, bh);
 514     }
 515 
 516 
 517     @Benchmark
 518     public void sin(Blackhole bh) {
 519         float[] as = fa.apply(size);
 520         float[] rs = fr.apply(size);
 521 
 522         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 523             for (int i = 0; i < as.length; i++) {
 524                 float a = as[i];
 525                 rs[i] = (float)(Math.sin((double)a));
 526             }
 527         }
 528 
 529         bh.consume(rs);
 530     }
 531 
 532 
 533 
 534     @Benchmark
 535     public void exp(Blackhole bh) {
 536         float[] as = fa.apply(size);
 537         float[] rs = fr.apply(size);
 538 
 539         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 540             for (int i = 0; i < as.length; i++) {
 541                 float a = as[i];
 542                 rs[i] = (float)(Math.exp((double)a));
 543             }
 544         }
 545 
 546         bh.consume(rs);
 547     }
 548 
 549 
 550 
 551     @Benchmark
 552     public void log1p(Blackhole bh) {
 553         float[] as = fa.apply(size);
 554         float[] rs = fr.apply(size);
 555 
 556         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 557             for (int i = 0; i < as.length; i++) {
 558                 float a = as[i];
 559                 rs[i] = (float)(Math.log1p((double)a));
 560             }
 561         }
 562 
 563         bh.consume(rs);
 564     }
 565 
 566 
 567 
 568     @Benchmark
 569     public void log(Blackhole bh) {
 570         float[] as = fa.apply(size);
 571         float[] rs = fr.apply(size);
 572 
 573         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 574             for (int i = 0; i < as.length; i++) {
 575                 float a = as[i];
 576                 rs[i] = (float)(Math.log((double)a));
 577             }
 578         }
 579 
 580         bh.consume(rs);
 581     }
 582 
 583 
 584 
 585     @Benchmark
 586     public void log10(Blackhole bh) {
 587         float[] as = fa.apply(size);
 588         float[] rs = fr.apply(size);
 589 
 590         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 591             for (int i = 0; i < as.length; i++) {
 592                 float a = as[i];
 593                 rs[i] = (float)(Math.log10((double)a));
 594             }
 595         }
 596 
 597         bh.consume(rs);
 598     }
 599 
 600 
 601 
 602     @Benchmark
 603     public void expm1(Blackhole bh) {
 604         float[] as = fa.apply(size);
 605         float[] rs = fr.apply(size);
 606 
 607         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 608             for (int i = 0; i < as.length; i++) {
 609                 float a = as[i];
 610                 rs[i] = (float)(Math.expm1((double)a));
 611             }
 612         }
 613 
 614         bh.consume(rs);
 615     }
 616 
 617 
 618 
 619     @Benchmark
 620     public void cos(Blackhole bh) {
 621         float[] as = fa.apply(size);
 622         float[] rs = fr.apply(size);
 623 
 624         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 625             for (int i = 0; i < as.length; i++) {
 626                 float a = as[i];
 627                 rs[i] = (float)(Math.cos((double)a));
 628             }
 629         }
 630 
 631         bh.consume(rs);
 632     }
 633 
 634 
 635 
 636     @Benchmark
 637     public void tan(Blackhole bh) {
 638         float[] as = fa.apply(size);
 639         float[] rs = fr.apply(size);
 640 
 641         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 642             for (int i = 0; i < as.length; i++) {
 643                 float a = as[i];
 644                 rs[i] = (float)(Math.tan((double)a));
 645             }
 646         }
 647 
 648         bh.consume(rs);
 649     }
 650 
 651 
 652 
 653     @Benchmark
 654     public void sinh(Blackhole bh) {
 655         float[] as = fa.apply(size);
 656         float[] rs = fr.apply(size);
 657 
 658         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 659             for (int i = 0; i < as.length; i++) {
 660                 float a = as[i];
 661                 rs[i] = (float)(Math.sinh((double)a));
 662             }
 663         }
 664 
 665         bh.consume(rs);
 666     }
 667 
 668 
 669 
 670     @Benchmark
 671     public void cosh(Blackhole bh) {
 672         float[] as = fa.apply(size);
 673         float[] rs = fr.apply(size);
 674 
 675         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 676             for (int i = 0; i < as.length; i++) {
 677                 float a = as[i];
 678                 rs[i] = (float)(Math.cosh((double)a));
 679             }
 680         }
 681 
 682         bh.consume(rs);
 683     }
 684 
 685 
 686 
 687     @Benchmark
 688     public void tanh(Blackhole bh) {
 689         float[] as = fa.apply(size);
 690         float[] rs = fr.apply(size);
 691 
 692         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 693             for (int i = 0; i < as.length; i++) {
 694                 float a = as[i];
 695                 rs[i] = (float)(Math.tanh((double)a));
 696             }
 697         }
 698 
 699         bh.consume(rs);
 700     }
 701 
 702 
 703 
 704     @Benchmark
 705     public void asin(Blackhole bh) {
 706         float[] as = fa.apply(size);
 707         float[] rs = fr.apply(size);
 708 
 709         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 710             for (int i = 0; i < as.length; i++) {
 711                 float a = as[i];
 712                 rs[i] = (float)(Math.asin((double)a));
 713             }
 714         }
 715 
 716         bh.consume(rs);
 717     }
 718 
 719 
 720 
 721     @Benchmark
 722     public void acos(Blackhole bh) {
 723         float[] as = fa.apply(size);
 724         float[] rs = fr.apply(size);
 725 
 726         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 727             for (int i = 0; i < as.length; i++) {
 728                 float a = as[i];
 729                 rs[i] = (float)(Math.acos((double)a));
 730             }
 731         }
 732 
 733         bh.consume(rs);
 734     }
 735 
 736 
 737 
 738     @Benchmark
 739     public void atan(Blackhole bh) {
 740         float[] as = fa.apply(size);
 741         float[] rs = fr.apply(size);
 742 
 743         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 744             for (int i = 0; i < as.length; i++) {
 745                 float a = as[i];
 746                 rs[i] = (float)(Math.atan((double)a));
 747             }
 748         }
 749 
 750         bh.consume(rs);
 751     }
 752 
 753 
 754 
 755     @Benchmark
 756     public void cbrt(Blackhole bh) {
 757         float[] as = fa.apply(size);
 758         float[] rs = fr.apply(size);
 759 
 760         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 761             for (int i = 0; i < as.length; i++) {
 762                 float a = as[i];
 763                 rs[i] = (float)(Math.cbrt((double)a));
 764             }
 765         }
 766 
 767         bh.consume(rs);
 768     }
 769 
 770 
 771 
 772     @Benchmark
 773     public void hypot(Blackhole bh) {
 774         float[] as = fa.apply(size);
 775         float[] bs = fb.apply(size);
 776         float[] rs = fr.apply(size);
 777 
 778         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 779             for (int i = 0; i < as.length; i++) {
 780                 float a = as[i];
 781                 float b = bs[i];
 782                 rs[i] = (float)(Math.hypot((double)a, (double)b));
 783             }
 784         }
 785 
 786         bh.consume(rs);
 787     }
 788 
 789 
 790 
 791     @Benchmark
 792     public void pow(Blackhole bh) {
 793         float[] as = fa.apply(size);
 794         float[] bs = fb.apply(size);
 795         float[] rs = fr.apply(size);
 796 
 797         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 798             for (int i = 0; i < as.length; i++) {
 799                 float a = as[i];
 800                 float b = bs[i];
 801                 rs[i] = (float)(Math.pow((double)a, (double)b));
 802             }
 803         }
 804 
 805         bh.consume(rs);
 806     }
 807 
 808 
 809 
 810     @Benchmark
 811     public void atan2(Blackhole bh) {
 812         float[] as = fa.apply(size);
 813         float[] bs = fb.apply(size);
 814         float[] rs = fr.apply(size);
 815 
 816         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 817             for (int i = 0; i < as.length; i++) {
 818                 float a = as[i];
 819                 float b = bs[i];
 820                 rs[i] = (float)(Math.atan2((double)a, (double)b));
 821             }
 822         }
 823 
 824         bh.consume(rs);
 825     }
 826 
 827 
 828 
 829     @Benchmark
 830     public void fma(Blackhole bh) {
 831         float[] as = fa.apply(size);
 832         float[] bs = fb.apply(size);
 833         float[] cs = fc.apply(size);
 834         float[] rs = fr.apply(size);
 835 
 836         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 837             for (int i = 0; i < as.length; i++) {
 838                 float a = as[i];
 839                 float b = bs[i];
 840                 float c = cs[i];
 841                 rs[i] = (float)(Math.fma(a, b, c));
 842             }
 843         }
 844 
 845         bh.consume(rs);
 846     }
 847 
 848 
 849 
 850 
 851     @Benchmark
 852     public void fmaMasked(Blackhole bh) {
 853         float[] as = fa.apply(size);
 854         float[] bs = fb.apply(size);
 855         float[] cs = fc.apply(size);
 856         float[] rs = fr.apply(size);
 857         boolean[] ms = fm.apply(size);
 858 
 859         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 860             for (int i = 0; i < as.length; i++) {
 861                 float a = as[i];
 862                 float b = bs[i];
 863                 float c = cs[i];
 864                 if (ms[i % ms.length]) {
 865                     rs[i] = (float)(Math.fma(a, b, c));
 866                 } else {
 867                     rs[i] = a;
 868                 }
 869             }
 870         }
 871         bh.consume(rs);
 872     }
 873 
 874 
 875     @Benchmark
 876     public void neg(Blackhole bh) {
 877         float[] as = fa.apply(size);
 878         float[] rs = fr.apply(size);
 879 
 880         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 881             for (int i = 0; i < as.length; i++) {
 882                 float a = as[i];
 883                 rs[i] = (float)(-((float)a));
 884             }
 885         }
 886 
 887         bh.consume(rs);
 888     }
 889 
 890     @Benchmark
 891     public void negMasked(Blackhole bh) {
 892         float[] as = fa.apply(size);
 893         float[] rs = fr.apply(size);
 894         boolean[] ms = fm.apply(size);
 895 
 896         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 897             for (int i = 0; i < as.length; i++) {
 898                 float a = as[i];
 899                 boolean m = ms[i % ms.length];
 900                 rs[i] = (m ? (float)(-((float)a)) : a);
 901             }
 902         }
 903 
 904         bh.consume(rs);
 905     }
 906 
 907     @Benchmark
 908     public void abs(Blackhole bh) {
 909         float[] as = fa.apply(size);
 910         float[] rs = fr.apply(size);
 911 
 912         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 913             for (int i = 0; i < as.length; i++) {
 914                 float a = as[i];
 915                 rs[i] = (float)(Math.abs((float)a));
 916             }
 917         }
 918 
 919         bh.consume(rs);
 920     }
 921 
 922     @Benchmark
 923     public void absMasked(Blackhole bh) {
 924         float[] as = fa.apply(size);
 925         float[] rs = fr.apply(size);
 926         boolean[] ms = fm.apply(size);
 927 
 928         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 929             for (int i = 0; i < as.length; i++) {
 930                 float a = as[i];
 931                 boolean m = ms[i % ms.length];
 932                 rs[i] = (m ? (float)(Math.abs((float)a)) : a);
 933             }
 934         }
 935 
 936         bh.consume(rs);
 937     }
 938 
 939 
 940 
 941 
 942     @Benchmark
 943     public void sqrt(Blackhole bh) {
 944         float[] as = fa.apply(size);
 945         float[] rs = fr.apply(size);
 946 
 947         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 948             for (int i = 0; i < as.length; i++) {
 949                 float a = as[i];
 950                 rs[i] = (float)(Math.sqrt((double)a));
 951             }
 952         }
 953 
 954         bh.consume(rs);
 955     }
 956 
 957 
 958 
 959     @Benchmark
 960     public void sqrtMasked(Blackhole bh) {
 961         float[] as = fa.apply(size);
 962         float[] rs = fr.apply(size);
 963         boolean[] ms = fm.apply(size);
 964 
 965         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 966             for (int i = 0; i < as.length; i++) {
 967                 float a = as[i];
 968                 boolean m = ms[i % ms.length];
 969                 rs[i] = (m ? (float)(Math.sqrt((double)a)) : a);
 970             }
 971         }
 972 
 973         bh.consume(rs);
 974     }
 975 
 976 
 977     @Benchmark
 978     public void gatherBase0(Blackhole bh) {
 979         float[] as = fa.apply(size);
 980         int[] is    = fs.apply(size);
 981         float[] rs = fr.apply(size);
 982 
 983         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 984             for (int i = 0; i < as.length; i++) {
 985                 int ix = 0 + is[i];
 986                 rs[i] = as[ix];
 987             }
 988         }
 989 
 990         bh.consume(rs);
 991     }
 992 
 993 
 994     void gather(int window, Blackhole bh) {
 995         float[] as = fa.apply(size);
 996         int[] is    = fs.apply(size);
 997         float[] rs = fr.apply(size);
 998 
 999         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1000             for (int i = 0; i < as.length; i += window) {
1001                 for (int j = 0; j < window; j++) {
1002                     int ix = i + is[i + j];
1003                     rs[i + j] = as[ix];
1004                 }
1005             }
1006         }
1007 
1008         bh.consume(rs);
1009     }
1010 
1011     @Benchmark
1012     public void gather064(Blackhole bh) {
1013         int window = 64 / Float.SIZE;
1014         gather(window, bh);
1015     }
1016 
1017     @Benchmark
1018     public void gather128(Blackhole bh) {
1019         int window = 128 / Float.SIZE;
1020         gather(window, bh);
1021     }
1022 
1023     @Benchmark
1024     public void gather256(Blackhole bh) {
1025         int window = 256 / Float.SIZE;
1026         gather(window, bh);
1027     }
1028 
1029     @Benchmark
1030     public void gather512(Blackhole bh) {
1031         int window = 512 / Float.SIZE;
1032         gather(window, bh);
1033     }
1034 
1035 
1036 
1037     @Benchmark
1038     public void scatterBase0(Blackhole bh) {
1039         float[] as = fa.apply(size);
1040         int[] is    = fs.apply(size);
1041         float[] rs = fr.apply(size);
1042 
1043         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1044             for (int i = 0; i < as.length; i++) {
1045                 int ix = 0 + is[i];
1046                 rs[ix] = as[i];
1047             }
1048         }
1049 
1050         bh.consume(rs);
1051     }
1052 
1053     void scatter(int window, Blackhole bh) {
1054         float[] as = fa.apply(size);
1055         int[] is    = fs.apply(size);
1056         float[] rs = fr.apply(size);
1057 
1058         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1059             for (int i = 0; i < as.length; i += window) {
1060                 for (int j = 0; j < window; j++) {
1061                     int ix = i + is[i + j];
1062                     rs[ix] = as[i + j];
1063                 }
1064             }
1065         }
1066 
1067         bh.consume(rs);
1068     }
1069 
1070     @Benchmark
1071     public void scatter064(Blackhole bh) {
1072         int window = 64 / Float.SIZE;
1073         scatter(window, bh);
1074     }
1075 
1076     @Benchmark
1077     public void scatter128(Blackhole bh) {
1078         int window = 128 / Float.SIZE;
1079         scatter(window, bh);
1080     }
1081 
1082     @Benchmark
1083     public void scatter256(Blackhole bh) {
1084         int window = 256 / Float.SIZE;
1085         scatter(window, bh);
1086     }
1087 
1088     @Benchmark
1089     public void scatter512(Blackhole bh) {
1090         int window = 512 / Float.SIZE;
1091         scatter(window, bh);
1092     }
1093 
1094 }
1095