1 /*
   2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import java.util.concurrent.TimeUnit;
  27 import java.util.function.IntFunction;
  28 
  29 import org.openjdk.jmh.annotations.*;
  30 import org.openjdk.jmh.infra.Blackhole;
  31 
  32 @BenchmarkMode(Mode.Throughput)
  33 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  34 @State(Scope.Benchmark)
  35 @Warmup(iterations = 3, time = 1)
  36 @Measurement(iterations = 5, time = 1)
  37 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  38 public class FloatScalar extends AbstractVectorBenchmark {
  39     static final int INVOC_COUNT = 1; // To align with vector benchmarks.
  40 
  41     @Param("1024")
  42     int size;
  43 
  44     float[] fill(IntFunction<Float> f) {
  45         float[] array = new float[size];
  46         for (int i = 0; i < array.length; i++) {
  47             array[i] = f.apply(i);
  48         }
  49         return array;
  50     }
  51 
  52     float[] as, bs, cs, rs;
  53     boolean[] ms, rms;
  54     int[] ss;
  55 
  56     @Setup
  57     public void init() {
  58         as = fill(i -> (float)(2*i));
  59         bs = fill(i -> (float)(i+1));
  60         cs = fill(i -> (float)(i+5));
  61         rs = fill(i -> (float)0);
  62         ms = fillMask(size, i -> (i % 2) == 0);
  63         rms = fillMask(size, i -> false);
  64 
  65         ss = fillInt(size, i -> RANDOM.nextInt(Math.max(i,1)));
  66     }
  67 
  68     final IntFunction<float[]> fa = vl -> as;
  69     final IntFunction<float[]> fb = vl -> bs;
  70     final IntFunction<float[]> fc = vl -> cs;
  71     final IntFunction<float[]> fr = vl -> rs;
  72     final IntFunction<boolean[]> fm = vl -> ms;
  73     final IntFunction<boolean[]> fmr = vl -> rms;
  74     final IntFunction<int[]> fs = vl -> ss;
  75 
  76 
  77     @Benchmark
  78     public void add(Blackhole bh) {
  79         float[] as = fa.apply(size);
  80         float[] bs = fb.apply(size);
  81         float[] rs = fr.apply(size);
  82 
  83         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  84             for (int i = 0; i < as.length; i++) {
  85                 float a = as[i];
  86                 float b = bs[i];
  87                 rs[i] = (float)(a + b);
  88             }
  89         }
  90 
  91         bh.consume(rs);
  92     }
  93 
  94     @Benchmark
  95     public void addMasked(Blackhole bh) {
  96         float[] as = fa.apply(size);
  97         float[] bs = fb.apply(size);
  98         float[] rs = fr.apply(size);
  99         boolean[] ms = fm.apply(size);
 100 
 101         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 102             for (int i = 0; i < as.length; i++) {
 103                 float a = as[i];
 104                 float b = bs[i];
 105                 if (ms[i % ms.length]) {
 106                     rs[i] = (float)(a + b);
 107                 } else {
 108                     rs[i] = a;
 109                 }
 110             }
 111         }
 112         bh.consume(rs);
 113     }
 114 
 115     @Benchmark
 116     public void sub(Blackhole bh) {
 117         float[] as = fa.apply(size);
 118         float[] bs = fb.apply(size);
 119         float[] rs = fr.apply(size);
 120 
 121         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 122             for (int i = 0; i < as.length; i++) {
 123                 float a = as[i];
 124                 float b = bs[i];
 125                 rs[i] = (float)(a - b);
 126             }
 127         }
 128 
 129         bh.consume(rs);
 130     }
 131 
 132     @Benchmark
 133     public void subMasked(Blackhole bh) {
 134         float[] as = fa.apply(size);
 135         float[] bs = fb.apply(size);
 136         float[] rs = fr.apply(size);
 137         boolean[] ms = fm.apply(size);
 138 
 139         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 140             for (int i = 0; i < as.length; i++) {
 141                 float a = as[i];
 142                 float b = bs[i];
 143                 if (ms[i % ms.length]) {
 144                     rs[i] = (float)(a - b);
 145                 } else {
 146                     rs[i] = a;
 147                 }
 148             }
 149         }
 150         bh.consume(rs);
 151     }
 152 
 153 
 154     @Benchmark
 155     public void div(Blackhole bh) {
 156         float[] as = fa.apply(size);
 157         float[] bs = fb.apply(size);
 158         float[] rs = fr.apply(size);
 159 
 160         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 161             for (int i = 0; i < as.length; i++) {
 162                 float a = as[i];
 163                 float b = bs[i];
 164                 rs[i] = (float)(a / b);
 165             }
 166         }
 167 
 168         bh.consume(rs);
 169     }
 170 
 171 
 172 
 173     @Benchmark
 174     public void divMasked(Blackhole bh) {
 175         float[] as = fa.apply(size);
 176         float[] bs = fb.apply(size);
 177         float[] rs = fr.apply(size);
 178         boolean[] ms = fm.apply(size);
 179 
 180         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 181             for (int i = 0; i < as.length; i++) {
 182                 float a = as[i];
 183                 float b = bs[i];
 184                 if (ms[i % ms.length]) {
 185                     rs[i] = (float)(a / b);
 186                 } else {
 187                     rs[i] = a;
 188                 }
 189             }
 190         }
 191         bh.consume(rs);
 192     }
 193 
 194 
 195     @Benchmark
 196     public void mul(Blackhole bh) {
 197         float[] as = fa.apply(size);
 198         float[] bs = fb.apply(size);
 199         float[] rs = fr.apply(size);
 200 
 201         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 202             for (int i = 0; i < as.length; i++) {
 203                 float a = as[i];
 204                 float b = bs[i];
 205                 rs[i] = (float)(a * b);
 206             }
 207         }
 208 
 209         bh.consume(rs);
 210     }
 211 
 212     @Benchmark
 213     public void mulMasked(Blackhole bh) {
 214         float[] as = fa.apply(size);
 215         float[] bs = fb.apply(size);
 216         float[] rs = fr.apply(size);
 217         boolean[] ms = fm.apply(size);
 218 
 219         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 220             for (int i = 0; i < as.length; i++) {
 221                 float a = as[i];
 222                 float b = bs[i];
 223                 if (ms[i % ms.length]) {
 224                     rs[i] = (float)(a * b);
 225                 } else {
 226                     rs[i] = a;
 227                 }
 228             }
 229         }
 230         bh.consume(rs);
 231     }
 232 
 233 
 234 
 235 
 236 
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250 
 251 
 252 
 253 
 254 
 255 
 256 
 257 
 258 
 259 
 260 
 261 
 262 
 263 
 264 
 265 
 266 
 267 
 268 
 269 
 270 
 271 
 272 
 273 
 274 
 275     @Benchmark
 276     public void max(Blackhole bh) {
 277         float[] as = fa.apply(size);
 278         float[] bs = fb.apply(size);
 279         float[] rs = fr.apply(size);
 280 
 281         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 282             for (int i = 0; i < as.length; i++) {
 283                 float a = as[i];
 284                 float b = bs[i];
 285                 rs[i] = (float)(Math.max(a, b));
 286             }
 287         }
 288 
 289         bh.consume(rs);
 290     }
 291 
 292     @Benchmark
 293     public void min(Blackhole bh) {
 294         float[] as = fa.apply(size);
 295         float[] bs = fb.apply(size);
 296         float[] rs = fr.apply(size);
 297 
 298         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 299             for (int i = 0; i < as.length; i++) {
 300                 float a = as[i];
 301                 float b = bs[i];
 302                 rs[i] = (float)(Math.min(a, b));
 303             }
 304         }
 305 
 306         bh.consume(rs);
 307     }
 308 
 309 
 310 
 311 
 312     @Benchmark
 313     public void addLanes(Blackhole bh) {
 314         float[] as = fa.apply(size);
 315         float r = 0;
 316         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 317             r = 0;
 318             for (int i = 0; i < as.length; i++) {
 319                 r += as[i];
 320             }
 321         }
 322         bh.consume(r);
 323     }
 324 
 325     @Benchmark
 326     public void mulLanes(Blackhole bh) {
 327         float[] as = fa.apply(size);
 328         float r = 1;
 329         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 330             r = 1;
 331             for (int i = 0; i < as.length; i++) {
 332                 r *= as[i];
 333             }
 334         }
 335         bh.consume(r);
 336     }
 337 
 338     @Benchmark
 339     public void minLanes(Blackhole bh) {
 340         float[] as = fa.apply(size);
 341         float r = Float.POSITIVE_INFINITY;
 342         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 343             r = Float.POSITIVE_INFINITY;
 344             for (int i = 0; i < as.length; i++) {
 345                 r = (float)Math.min(r, as[i]);
 346             }
 347         }
 348         bh.consume(r);
 349     }
 350 
 351     @Benchmark
 352     public void maxLanes(Blackhole bh) {
 353         float[] as = fa.apply(size);
 354         float r = Float.NEGATIVE_INFINITY;
 355         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 356             r = Float.NEGATIVE_INFINITY;
 357             for (int i = 0; i < as.length; i++) {
 358                 r = (float)Math.max(r, as[i]);
 359             }
 360         }
 361         bh.consume(r);
 362     }
 363 
 364 
 365 
 366     @Benchmark
 367     public void lessThan(Blackhole bh) {
 368         float[] as = fa.apply(size);
 369         float[] bs = fb.apply(size);
 370 
 371         boolean r = false;
 372         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 373             r = false;
 374             for (int i = 0; i < as.length; i++) {
 375                 boolean m = (as[i] < bs[i]);
 376                 r |= m; // accumulate so JIT can't eliminate the computation
 377             }
 378         }
 379 
 380         bh.consume(r);
 381     }
 382 
 383     @Benchmark
 384     public void greaterThan(Blackhole bh) {
 385         float[] as = fa.apply(size);
 386         float[] bs = fb.apply(size);
 387 
 388         boolean r = false;
 389         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 390             r = false;
 391             for (int i = 0; i < as.length; i++) {
 392                 boolean m = (as[i] > bs[i]);
 393                 r |= m; // accumulate so JIT can't eliminate the computation
 394             }
 395         }
 396 
 397         bh.consume(r);
 398     }
 399 
 400     @Benchmark
 401     public void equal(Blackhole bh) {
 402         float[] as = fa.apply(size);
 403         float[] bs = fb.apply(size);
 404 
 405         boolean r = false;
 406         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 407             r = false;
 408             for (int i = 0; i < as.length; i++) {
 409                 boolean m = (as[i] == bs[i]);
 410                 r |= m; // accumulate so JIT can't eliminate the computation
 411             }
 412         }
 413 
 414         bh.consume(r);
 415     }
 416 
 417     @Benchmark
 418     public void notEqual(Blackhole bh) {
 419         float[] as = fa.apply(size);
 420         float[] bs = fb.apply(size);
 421 
 422         boolean r = false;
 423         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 424             r = false;
 425             for (int i = 0; i < as.length; i++) {
 426                 boolean m = (as[i] != bs[i]);
 427                 r |= m; // accumulate so JIT can't eliminate the computation
 428             }
 429         }
 430 
 431         bh.consume(r);
 432     }
 433 
 434     @Benchmark
 435     public void lessThanEq(Blackhole bh) {
 436         float[] as = fa.apply(size);
 437         float[] bs = fb.apply(size);
 438 
 439         boolean r = false;
 440         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 441             r = false;
 442             for (int i = 0; i < as.length; i++) {
 443                 boolean m = (as[i] <= bs[i]);
 444                 r |= m; // accumulate so JIT can't eliminate the computation
 445             }
 446         }
 447 
 448         bh.consume(r);
 449     }
 450 
 451     @Benchmark
 452     public void greaterThanEq(Blackhole bh) {
 453         float[] as = fa.apply(size);
 454         float[] bs = fb.apply(size);
 455 
 456         boolean r = false;
 457         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 458             r = false;
 459             for (int i = 0; i < as.length; i++) {
 460                 boolean m = (as[i] >= bs[i]);
 461                 r |= m; // accumulate so JIT can't eliminate the computation
 462             }
 463         }
 464 
 465         bh.consume(r);
 466     }
 467 
 468     @Benchmark
 469     public void blend(Blackhole bh) {
 470         float[] as = fa.apply(size);
 471         float[] bs = fb.apply(size);
 472         float[] rs = fr.apply(size);
 473         boolean[] ms = fm.apply(size);
 474 
 475         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 476             for (int i = 0; i < as.length; i++) {
 477                 float a = as[i];
 478                 float b = bs[i];
 479                 boolean m = ms[i % ms.length];
 480                 rs[i] = (m ? b : a);
 481             }
 482         }
 483 
 484         bh.consume(rs);
 485     }
 486     void rearrangeShared(int window, Blackhole bh) {
 487         float[] as = fa.apply(size);
 488         int[] order = fs.apply(size);
 489         float[] rs = fr.apply(size);
 490 
 491         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 492             for (int i = 0; i < as.length; i += window) {
 493                 for (int j = 0; j < window; j++) {
 494                     float a = as[i+j];
 495                     int pos = order[j];
 496                     rs[i + pos] = a;
 497                 }
 498             }
 499         }
 500 
 501         bh.consume(rs);
 502     }
 503 
 504     @Benchmark
 505     public void rearrange064(Blackhole bh) {
 506         int window = 64 / Float.SIZE;
 507         rearrangeShared(window, bh);
 508     }
 509 
 510     @Benchmark
 511     public void rearrange128(Blackhole bh) {
 512         int window = 128 / Float.SIZE;
 513         rearrangeShared(window, bh);
 514     }
 515 
 516     @Benchmark
 517     public void rearrange256(Blackhole bh) {
 518         int window = 256 / Float.SIZE;
 519         rearrangeShared(window, bh);
 520     }
 521 
 522     @Benchmark
 523     public void rearrange512(Blackhole bh) {
 524         int window = 512 / Float.SIZE;
 525         rearrangeShared(window, bh);
 526     }
 527 
 528 
 529     @Benchmark
 530     public void sin(Blackhole bh) {
 531         float[] as = fa.apply(size);
 532         float[] rs = fr.apply(size);
 533 
 534         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 535             for (int i = 0; i < as.length; i++) {
 536                 float a = as[i];
 537                 rs[i] = (float)(Math.sin((double)a));
 538             }
 539         }
 540 
 541         bh.consume(rs);
 542     }
 543 
 544 
 545 
 546     @Benchmark
 547     public void exp(Blackhole bh) {
 548         float[] as = fa.apply(size);
 549         float[] rs = fr.apply(size);
 550 
 551         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 552             for (int i = 0; i < as.length; i++) {
 553                 float a = as[i];
 554                 rs[i] = (float)(Math.exp((double)a));
 555             }
 556         }
 557 
 558         bh.consume(rs);
 559     }
 560 
 561 
 562 
 563     @Benchmark
 564     public void log1p(Blackhole bh) {
 565         float[] as = fa.apply(size);
 566         float[] rs = fr.apply(size);
 567 
 568         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 569             for (int i = 0; i < as.length; i++) {
 570                 float a = as[i];
 571                 rs[i] = (float)(Math.log1p((double)a));
 572             }
 573         }
 574 
 575         bh.consume(rs);
 576     }
 577 
 578 
 579 
 580     @Benchmark
 581     public void log(Blackhole bh) {
 582         float[] as = fa.apply(size);
 583         float[] rs = fr.apply(size);
 584 
 585         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 586             for (int i = 0; i < as.length; i++) {
 587                 float a = as[i];
 588                 rs[i] = (float)(Math.log((double)a));
 589             }
 590         }
 591 
 592         bh.consume(rs);
 593     }
 594 
 595 
 596 
 597     @Benchmark
 598     public void log10(Blackhole bh) {
 599         float[] as = fa.apply(size);
 600         float[] rs = fr.apply(size);
 601 
 602         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 603             for (int i = 0; i < as.length; i++) {
 604                 float a = as[i];
 605                 rs[i] = (float)(Math.log10((double)a));
 606             }
 607         }
 608 
 609         bh.consume(rs);
 610     }
 611 
 612 
 613 
 614     @Benchmark
 615     public void expm1(Blackhole bh) {
 616         float[] as = fa.apply(size);
 617         float[] rs = fr.apply(size);
 618 
 619         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 620             for (int i = 0; i < as.length; i++) {
 621                 float a = as[i];
 622                 rs[i] = (float)(Math.expm1((double)a));
 623             }
 624         }
 625 
 626         bh.consume(rs);
 627     }
 628 
 629 
 630 
 631     @Benchmark
 632     public void cos(Blackhole bh) {
 633         float[] as = fa.apply(size);
 634         float[] rs = fr.apply(size);
 635 
 636         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 637             for (int i = 0; i < as.length; i++) {
 638                 float a = as[i];
 639                 rs[i] = (float)(Math.cos((double)a));
 640             }
 641         }
 642 
 643         bh.consume(rs);
 644     }
 645 
 646 
 647 
 648     @Benchmark
 649     public void tan(Blackhole bh) {
 650         float[] as = fa.apply(size);
 651         float[] rs = fr.apply(size);
 652 
 653         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 654             for (int i = 0; i < as.length; i++) {
 655                 float a = as[i];
 656                 rs[i] = (float)(Math.tan((double)a));
 657             }
 658         }
 659 
 660         bh.consume(rs);
 661     }
 662 
 663 
 664 
 665     @Benchmark
 666     public void sinh(Blackhole bh) {
 667         float[] as = fa.apply(size);
 668         float[] rs = fr.apply(size);
 669 
 670         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 671             for (int i = 0; i < as.length; i++) {
 672                 float a = as[i];
 673                 rs[i] = (float)(Math.sinh((double)a));
 674             }
 675         }
 676 
 677         bh.consume(rs);
 678     }
 679 
 680 
 681 
 682     @Benchmark
 683     public void cosh(Blackhole bh) {
 684         float[] as = fa.apply(size);
 685         float[] rs = fr.apply(size);
 686 
 687         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 688             for (int i = 0; i < as.length; i++) {
 689                 float a = as[i];
 690                 rs[i] = (float)(Math.cosh((double)a));
 691             }
 692         }
 693 
 694         bh.consume(rs);
 695     }
 696 
 697 
 698 
 699     @Benchmark
 700     public void tanh(Blackhole bh) {
 701         float[] as = fa.apply(size);
 702         float[] rs = fr.apply(size);
 703 
 704         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 705             for (int i = 0; i < as.length; i++) {
 706                 float a = as[i];
 707                 rs[i] = (float)(Math.tanh((double)a));
 708             }
 709         }
 710 
 711         bh.consume(rs);
 712     }
 713 
 714 
 715 
 716     @Benchmark
 717     public void asin(Blackhole bh) {
 718         float[] as = fa.apply(size);
 719         float[] rs = fr.apply(size);
 720 
 721         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 722             for (int i = 0; i < as.length; i++) {
 723                 float a = as[i];
 724                 rs[i] = (float)(Math.asin((double)a));
 725             }
 726         }
 727 
 728         bh.consume(rs);
 729     }
 730 
 731 
 732 
 733     @Benchmark
 734     public void acos(Blackhole bh) {
 735         float[] as = fa.apply(size);
 736         float[] rs = fr.apply(size);
 737 
 738         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 739             for (int i = 0; i < as.length; i++) {
 740                 float a = as[i];
 741                 rs[i] = (float)(Math.acos((double)a));
 742             }
 743         }
 744 
 745         bh.consume(rs);
 746     }
 747 
 748 
 749 
 750     @Benchmark
 751     public void atan(Blackhole bh) {
 752         float[] as = fa.apply(size);
 753         float[] rs = fr.apply(size);
 754 
 755         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 756             for (int i = 0; i < as.length; i++) {
 757                 float a = as[i];
 758                 rs[i] = (float)(Math.atan((double)a));
 759             }
 760         }
 761 
 762         bh.consume(rs);
 763     }
 764 
 765 
 766 
 767     @Benchmark
 768     public void cbrt(Blackhole bh) {
 769         float[] as = fa.apply(size);
 770         float[] rs = fr.apply(size);
 771 
 772         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 773             for (int i = 0; i < as.length; i++) {
 774                 float a = as[i];
 775                 rs[i] = (float)(Math.cbrt((double)a));
 776             }
 777         }
 778 
 779         bh.consume(rs);
 780     }
 781 
 782 
 783 
 784     @Benchmark
 785     public void hypot(Blackhole bh) {
 786         float[] as = fa.apply(size);
 787         float[] bs = fb.apply(size);
 788         float[] rs = fr.apply(size);
 789 
 790         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 791             for (int i = 0; i < as.length; i++) {
 792                 float a = as[i];
 793                 float b = bs[i];
 794                 rs[i] = (float)(Math.hypot((double)a, (double)b));
 795             }
 796         }
 797 
 798         bh.consume(rs);
 799     }
 800 
 801 
 802 
 803     @Benchmark
 804     public void pow(Blackhole bh) {
 805         float[] as = fa.apply(size);
 806         float[] bs = fb.apply(size);
 807         float[] rs = fr.apply(size);
 808 
 809         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 810             for (int i = 0; i < as.length; i++) {
 811                 float a = as[i];
 812                 float b = bs[i];
 813                 rs[i] = (float)(Math.pow((double)a, (double)b));
 814             }
 815         }
 816 
 817         bh.consume(rs);
 818     }
 819 
 820 
 821 
 822     @Benchmark
 823     public void atan2(Blackhole bh) {
 824         float[] as = fa.apply(size);
 825         float[] bs = fb.apply(size);
 826         float[] rs = fr.apply(size);
 827 
 828         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 829             for (int i = 0; i < as.length; i++) {
 830                 float a = as[i];
 831                 float b = bs[i];
 832                 rs[i] = (float)(Math.atan2((double)a, (double)b));
 833             }
 834         }
 835 
 836         bh.consume(rs);
 837     }
 838 
 839 
 840 
 841     @Benchmark
 842     public void fma(Blackhole bh) {
 843         float[] as = fa.apply(size);
 844         float[] bs = fb.apply(size);
 845         float[] cs = fc.apply(size);
 846         float[] rs = fr.apply(size);
 847 
 848         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 849             for (int i = 0; i < as.length; i++) {
 850                 float a = as[i];
 851                 float b = bs[i];
 852                 float c = cs[i];
 853                 rs[i] = (float)(Math.fma(a, b, c));
 854             }
 855         }
 856 
 857         bh.consume(rs);
 858     }
 859 
 860 
 861 
 862 
 863     @Benchmark
 864     public void fmaMasked(Blackhole bh) {
 865         float[] as = fa.apply(size);
 866         float[] bs = fb.apply(size);
 867         float[] cs = fc.apply(size);
 868         float[] rs = fr.apply(size);
 869         boolean[] ms = fm.apply(size);
 870 
 871         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 872             for (int i = 0; i < as.length; i++) {
 873                 float a = as[i];
 874                 float b = bs[i];
 875                 float c = cs[i];
 876                 if (ms[i % ms.length]) {
 877                     rs[i] = (float)(Math.fma(a, b, c));
 878                 } else {
 879                     rs[i] = a;
 880                 }
 881             }
 882         }
 883         bh.consume(rs);
 884     }
 885 
 886 
 887     @Benchmark
 888     public void neg(Blackhole bh) {
 889         float[] as = fa.apply(size);
 890         float[] rs = fr.apply(size);
 891 
 892         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 893             for (int i = 0; i < as.length; i++) {
 894                 float a = as[i];
 895                 rs[i] = (float)(-((float)a));
 896             }
 897         }
 898 
 899         bh.consume(rs);
 900     }
 901 
 902     @Benchmark
 903     public void negMasked(Blackhole bh) {
 904         float[] as = fa.apply(size);
 905         float[] rs = fr.apply(size);
 906         boolean[] ms = fm.apply(size);
 907 
 908         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 909             for (int i = 0; i < as.length; i++) {
 910                 float a = as[i];
 911                 boolean m = ms[i % ms.length];
 912                 rs[i] = (m ? (float)(-((float)a)) : a);
 913             }
 914         }
 915 
 916         bh.consume(rs);
 917     }
 918 
 919     @Benchmark
 920     public void abs(Blackhole bh) {
 921         float[] as = fa.apply(size);
 922         float[] rs = fr.apply(size);
 923 
 924         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 925             for (int i = 0; i < as.length; i++) {
 926                 float a = as[i];
 927                 rs[i] = (float)(Math.abs((float)a));
 928             }
 929         }
 930 
 931         bh.consume(rs);
 932     }
 933 
 934     @Benchmark
 935     public void absMasked(Blackhole bh) {
 936         float[] as = fa.apply(size);
 937         float[] rs = fr.apply(size);
 938         boolean[] ms = fm.apply(size);
 939 
 940         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 941             for (int i = 0; i < as.length; i++) {
 942                 float a = as[i];
 943                 boolean m = ms[i % ms.length];
 944                 rs[i] = (m ? (float)(Math.abs((float)a)) : a);
 945             }
 946         }
 947 
 948         bh.consume(rs);
 949     }
 950 
 951 
 952 
 953 
 954     @Benchmark
 955     public void sqrt(Blackhole bh) {
 956         float[] as = fa.apply(size);
 957         float[] rs = fr.apply(size);
 958 
 959         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 960             for (int i = 0; i < as.length; i++) {
 961                 float a = as[i];
 962                 rs[i] = (float)(Math.sqrt((double)a));
 963             }
 964         }
 965 
 966         bh.consume(rs);
 967     }
 968 
 969 
 970 
 971     @Benchmark
 972     public void sqrtMasked(Blackhole bh) {
 973         float[] as = fa.apply(size);
 974         float[] rs = fr.apply(size);
 975         boolean[] ms = fm.apply(size);
 976 
 977         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 978             for (int i = 0; i < as.length; i++) {
 979                 float a = as[i];
 980                 boolean m = ms[i % ms.length];
 981                 rs[i] = (m ? (float)(Math.sqrt((double)a)) : a);
 982             }
 983         }
 984 
 985         bh.consume(rs);
 986     }
 987 
 988 
 989     @Benchmark
 990     public void gatherBase0(Blackhole bh) {
 991         float[] as = fa.apply(size);
 992         int[] is    = fs.apply(size);
 993         float[] rs = fr.apply(size);
 994 
 995         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 996             for (int i = 0; i < as.length; i++) {
 997                 int ix = 0 + is[i];
 998                 rs[i] = as[ix];
 999             }
1000         }
1001 
1002         bh.consume(rs);
1003     }
1004 
1005 
1006     void gather(int window, Blackhole bh) {
1007         float[] as = fa.apply(size);
1008         int[] is    = fs.apply(size);
1009         float[] rs = fr.apply(size);
1010 
1011         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1012             for (int i = 0; i < as.length; i += window) {
1013                 for (int j = 0; j < window; j++) {
1014                     int ix = i + is[i + j];
1015                     rs[i + j] = as[ix];
1016                 }
1017             }
1018         }
1019 
1020         bh.consume(rs);
1021     }
1022 
1023     @Benchmark
1024     public void gather064(Blackhole bh) {
1025         int window = 64 / Float.SIZE;
1026         gather(window, bh);
1027     }
1028 
1029     @Benchmark
1030     public void gather128(Blackhole bh) {
1031         int window = 128 / Float.SIZE;
1032         gather(window, bh);
1033     }
1034 
1035     @Benchmark
1036     public void gather256(Blackhole bh) {
1037         int window = 256 / Float.SIZE;
1038         gather(window, bh);
1039     }
1040 
1041     @Benchmark
1042     public void gather512(Blackhole bh) {
1043         int window = 512 / Float.SIZE;
1044         gather(window, bh);
1045     }
1046 
1047 
1048 
1049     @Benchmark
1050     public void scatterBase0(Blackhole bh) {
1051         float[] as = fa.apply(size);
1052         int[] is    = fs.apply(size);
1053         float[] rs = fr.apply(size);
1054 
1055         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1056             for (int i = 0; i < as.length; i++) {
1057                 int ix = 0 + is[i];
1058                 rs[ix] = as[i];
1059             }
1060         }
1061 
1062         bh.consume(rs);
1063     }
1064 
1065     void scatter(int window, Blackhole bh) {
1066         float[] as = fa.apply(size);
1067         int[] is    = fs.apply(size);
1068         float[] rs = fr.apply(size);
1069 
1070         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1071             for (int i = 0; i < as.length; i += window) {
1072                 for (int j = 0; j < window; j++) {
1073                     int ix = i + is[i + j];
1074                     rs[ix] = as[i + j];
1075                 }
1076             }
1077         }
1078 
1079         bh.consume(rs);
1080     }
1081 
1082     @Benchmark
1083     public void scatter064(Blackhole bh) {
1084         int window = 64 / Float.SIZE;
1085         scatter(window, bh);
1086     }
1087 
1088     @Benchmark
1089     public void scatter128(Blackhole bh) {
1090         int window = 128 / Float.SIZE;
1091         scatter(window, bh);
1092     }
1093 
1094     @Benchmark
1095     public void scatter256(Blackhole bh) {
1096         int window = 256 / Float.SIZE;
1097         scatter(window, bh);
1098     }
1099 
1100     @Benchmark
1101     public void scatter512(Blackhole bh) {
1102         int window = 512 / Float.SIZE;
1103         scatter(window, bh);
1104     }
1105 
1106 }
1107