< prev index next >

test/jdk/jdk/incubator/vector/benchmark/src/main/java/benchmark/jdk/incubator/vector/FloatScalar.java

Print this page
rev 55606 : 8221812: Fine-tune jmh test for vector api
Summary: To compare performance of vector api and auto vectorization, vector
api and scalar test cases are updated to keep aligned.
Reviewed-by: duke
   1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import java.util.concurrent.TimeUnit;
  27 import java.util.function.IntFunction;
  28 
  29 import org.openjdk.jmh.annotations.*;

  30 
  31 @BenchmarkMode(Mode.Throughput)
  32 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  33 @State(Scope.Benchmark)
  34 @Warmup(iterations = 3, time = 1)
  35 @Measurement(iterations = 5, time = 1)
  36 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  37 public class FloatScalar extends AbstractVectorBenchmark {


  38     @Param("1024")
  39     int size;
  40 
  41     float[] fill(IntFunction<Float> f) {
  42         float[] array = new float[size];
  43         for (int i = 0; i < array.length; i++) {
  44             array[i] = f.apply(i);
  45         }
  46         return array;
  47     }
  48 
  49     float[] as, bs, cs, rs;
  50     boolean[] ms, rms;
  51     int[] ss;
  52 
  53     @Setup
  54     public void init() {
  55         as = fill(i -> (float)(2*i));
  56         bs = fill(i -> (float)(i+1));
  57         cs = fill(i -> (float)(i+5));
  58         rs = fill(i -> (float)0);
  59         ms = fillMask(size, i -> (i % 2) == 0);
  60         rms = fillMask(size, i -> false);
  61 
  62         ss = fillInt(size, i -> RANDOM.nextInt(Math.max(i,1)));
  63     }
  64 
  65     final IntFunction<float[]> fa = vl -> as;
  66     final IntFunction<float[]> fb = vl -> bs;
  67     final IntFunction<float[]> fc = vl -> cs;
  68     final IntFunction<float[]> fr = vl -> rs;
  69     final IntFunction<boolean[]> fm = vl -> ms;
  70     final IntFunction<boolean[]> fmr = vl -> rms;
  71     final IntFunction<int[]> fs = vl -> ss;
  72 
  73 
  74     @Benchmark
  75     public Object add() {
  76         float[] as = fa.apply(size);
  77         float[] bs = fb.apply(size);
  78         float[] rs = fr.apply(size);
  79 

  80         for (int i = 0; i < as.length; i++) {
  81             float a = as[i];
  82             float b = bs[i];
  83             rs[i] = (float)(a + b);
  84         }

  85 
  86         return rs;
  87     }
  88 
  89     @Benchmark
  90     public Object addMasked() {
  91         float[] as = fa.apply(size);
  92         float[] bs = fb.apply(size);
  93         float[] rs = fr.apply(size);
  94         boolean[] ms = fm.apply(size);
  95 

  96         for (int i = 0; i < as.length; i++) {
  97             float a = as[i];
  98             float b = bs[i];
  99             if (ms[i % ms.length]) {
 100                 rs[i] = (float)(a + b);
 101             } else {
 102                 rs[i] = a;
 103             }
 104         }
 105         return rs;

 106     }
 107 
 108     @Benchmark
 109     public Object sub() {
 110         float[] as = fa.apply(size);
 111         float[] bs = fb.apply(size);
 112         float[] rs = fr.apply(size);
 113 

 114         for (int i = 0; i < as.length; i++) {
 115             float a = as[i];
 116             float b = bs[i];
 117             rs[i] = (float)(a - b);
 118         }

 119 
 120         return rs;
 121     }
 122 
 123     @Benchmark
 124     public Object subMasked() {
 125         float[] as = fa.apply(size);
 126         float[] bs = fb.apply(size);
 127         float[] rs = fr.apply(size);
 128         boolean[] ms = fm.apply(size);
 129 

 130         for (int i = 0; i < as.length; i++) {
 131             float a = as[i];
 132             float b = bs[i];
 133             if (ms[i % ms.length]) {
 134                 rs[i] = (float)(a - b);
 135             } else {
 136                 rs[i] = a;
 137             }
 138         }
 139         return rs;

 140     }
 141 
 142 
 143     @Benchmark
 144     public Object div() {
 145         float[] as = fa.apply(size);
 146         float[] bs = fb.apply(size);
 147         float[] rs = fr.apply(size);
 148 

 149         for (int i = 0; i < as.length; i++) {
 150             float a = as[i];
 151             float b = bs[i];
 152             rs[i] = (float)(a / b);
 153         }

 154 
 155         return rs;
 156     }
 157 
 158 
 159 
 160     @Benchmark
 161     public Object divMasked() {
 162         float[] as = fa.apply(size);
 163         float[] bs = fb.apply(size);
 164         float[] rs = fr.apply(size);
 165         boolean[] ms = fm.apply(size);
 166 

 167         for (int i = 0; i < as.length; i++) {
 168             float a = as[i];
 169             float b = bs[i];
 170             if (ms[i % ms.length]) {
 171                 rs[i] = (float)(a / b);
 172             } else {
 173                 rs[i] = a;
 174             }
 175         }
 176         return rs;

 177     }
 178 
 179 
 180     @Benchmark
 181     public Object mul() {
 182         float[] as = fa.apply(size);
 183         float[] bs = fb.apply(size);
 184         float[] rs = fr.apply(size);
 185 

 186         for (int i = 0; i < as.length; i++) {
 187             float a = as[i];
 188             float b = bs[i];
 189             rs[i] = (float)(a * b);
 190         }

 191 
 192         return rs;
 193     }
 194 
 195     @Benchmark
 196     public Object mulMasked() {
 197         float[] as = fa.apply(size);
 198         float[] bs = fb.apply(size);
 199         float[] rs = fr.apply(size);
 200         boolean[] ms = fm.apply(size);
 201 

 202         for (int i = 0; i < as.length; i++) {
 203             float a = as[i];
 204             float b = bs[i];
 205             if (ms[i % ms.length]) {
 206                 rs[i] = (float)(a * b);
 207             } else {
 208                 rs[i] = a;
 209             }
 210         }
 211         return rs;

 212     }
 213 
 214 
 215 
 216 
 217 
 218 
 219 
 220 
 221 
 222 
 223 
 224 
 225 
 226 
 227 
 228 
 229 
 230 
 231 
 232 
 233 
 234 
 235 
 236 
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244     @Benchmark
 245     public Object max() {
 246         float[] as = fa.apply(size);
 247         float[] bs = fb.apply(size);
 248         float[] rs = fr.apply(size);
 249 

 250         for (int i = 0; i < as.length; i++) {
 251             float a = as[i];
 252             float b = bs[i];
 253             rs[i] = (float)(Math.max(a, b));
 254         }

 255 
 256         return rs;
 257     }
 258 
 259     @Benchmark
 260     public Object min() {
 261         float[] as = fa.apply(size);
 262         float[] bs = fb.apply(size);
 263         float[] rs = fr.apply(size);
 264 

 265         for (int i = 0; i < as.length; i++) {
 266             float a = as[i];
 267             float b = bs[i];
 268             rs[i] = (float)(Math.min(a, b));
 269         }

 270 
 271         return rs;
 272     }
 273 
 274 
 275 
 276 
 277     @Benchmark
 278     public float addAll() {
 279         float[] as = fa.apply(size);
 280         float r = 0;


 281         for (int i = 0; i < as.length; i++) {
 282             r += as[i];
 283         }
 284         return r;

 285     }
 286 
 287     @Benchmark
 288     public float mulAll() {
 289         float[] as = fa.apply(size);
 290         float r = 1;


 291         for (int i = 0; i < as.length; i++) {
 292             r *= as[i];
 293         }
 294         return r;

 295     }
 296 
 297     @Benchmark
 298     public float minAll() {
 299         float[] as = fa.apply(size);
 300         float r = Float.POSITIVE_INFINITY;


 301         for (int i = 0; i < as.length; i++) {
 302             r = (float)Math.min(r, as[i]);
 303         }
 304         return r;

 305     }
 306 
 307     @Benchmark
 308     public float maxAll() {
 309         float[] as = fa.apply(size);
 310         float r = Float.NEGATIVE_INFINITY;


 311         for (int i = 0; i < as.length; i++) {
 312             r = (float)Math.max(r, as[i]);
 313         }
 314         return r;

 315     }
 316 
 317 
 318 
 319     @Benchmark
 320     public boolean lessThan() {
 321         float[] as = fa.apply(size);
 322         float[] bs = fb.apply(size);
 323 
 324         boolean r = false;


 325         for (int i = 0; i < as.length; i++) {
 326             boolean m = (as[i] < bs[i]);
 327             r |= m; // accumulate so JIT can't eliminate the computation
 328         }

 329 
 330         return r;
 331     }
 332 
 333     @Benchmark
 334     public boolean greaterThan() {
 335         float[] as = fa.apply(size);
 336         float[] bs = fb.apply(size);
 337 
 338         boolean r = false;


 339         for (int i = 0; i < as.length; i++) {
 340             boolean m = (as[i] > bs[i]);
 341             r |= m; // accumulate so JIT can't eliminate the computation
 342         }

 343 
 344         return r;
 345     }
 346 
 347     @Benchmark
 348     public boolean equal() {
 349         float[] as = fa.apply(size);
 350         float[] bs = fb.apply(size);
 351 
 352         boolean r = false;


 353         for (int i = 0; i < as.length; i++) {
 354             boolean m = (as[i] == bs[i]);
 355             r |= m; // accumulate so JIT can't eliminate the computation
 356         }

 357 
 358         return r;
 359     }
 360 
 361     @Benchmark
 362     public boolean notEqual() {
 363         float[] as = fa.apply(size);
 364         float[] bs = fb.apply(size);
 365 
 366         boolean r = false;


 367         for (int i = 0; i < as.length; i++) {
 368             boolean m = (as[i] != bs[i]);
 369             r |= m; // accumulate so JIT can't eliminate the computation
 370         }

 371 
 372         return r;
 373     }
 374 
 375     @Benchmark
 376     public boolean lessThanEq() {
 377         float[] as = fa.apply(size);
 378         float[] bs = fb.apply(size);
 379 
 380         boolean r = false;


 381         for (int i = 0; i < as.length; i++) {
 382             boolean m = (as[i] <= bs[i]);
 383             r |= m; // accumulate so JIT can't eliminate the computation
 384         }

 385 
 386         return r;
 387     }
 388 
 389     @Benchmark
 390     public boolean greaterThanEq() {
 391         float[] as = fa.apply(size);
 392         float[] bs = fb.apply(size);
 393 
 394         boolean r = false;


 395         for (int i = 0; i < as.length; i++) {
 396             boolean m = (as[i] >= bs[i]);
 397             r |= m; // accumulate so JIT can't eliminate the computation
 398         }

 399 
 400         return r;
 401     }
 402 
 403     @Benchmark
 404     public Object blend() {
 405         float[] as = fa.apply(size);
 406         float[] bs = fb.apply(size);
 407         float[] rs = fr.apply(size);
 408         boolean[] ms = fm.apply(size);
 409 

 410         for (int i = 0; i < as.length; i++) {
 411             float a = as[i];
 412             float b = bs[i];
 413             boolean m = ms[i % ms.length];
 414             rs[i] = (m ? b : a);
 415         }

 416 
 417         return rs;
 418     }
 419     Object rearrangeShared(int window) {
 420         float[] as = fa.apply(size);
 421         int[] order = fs.apply(size);
 422         float[] rs = fr.apply(size);
 423 

 424         for (int i = 0; i < as.length; i += window) {
 425             for (int j = 0; j < window; j++) {
 426                 float a = as[i+j];
 427                 int pos = order[j];
 428                 rs[i + pos] = a;
 429             }
 430         }

 431 
 432         return rs;
 433     }
 434 
 435     @Benchmark
 436     public Object rearrange064() {
 437         int window = 64 / Float.SIZE;
 438         return rearrangeShared(window);
 439     }
 440 
 441     @Benchmark
 442     public Object rearrange128() {
 443         int window = 128 / Float.SIZE;
 444         return rearrangeShared(window);
 445     }
 446 
 447     @Benchmark
 448     public Object rearrange256() {
 449         int window = 256 / Float.SIZE;
 450         return rearrangeShared(window);
 451     }
 452 
 453     @Benchmark
 454     public Object rearrange512() {
 455         int window = 512 / Float.SIZE;
 456         return rearrangeShared(window);
 457     }
 458 
 459 
 460     @Benchmark
 461     public Object sin() {
 462         float[] as = fa.apply(size);
 463         float[] rs = fr.apply(size);
 464 

 465         for (int i = 0; i < as.length; i++) {
 466             float a = as[i];
 467             rs[i] = (float)(Math.sin((double)a));
 468         }

 469 
 470         return rs;
 471     }
 472 
 473 
 474 
 475     @Benchmark
 476     public Object exp() {
 477         float[] as = fa.apply(size);
 478         float[] rs = fr.apply(size);
 479 

 480         for (int i = 0; i < as.length; i++) {
 481             float a = as[i];
 482             rs[i] = (float)(Math.exp((double)a));
 483         }

 484 
 485         return rs;
 486     }
 487 
 488 
 489 
 490     @Benchmark
 491     public Object log1p() {
 492         float[] as = fa.apply(size);
 493         float[] rs = fr.apply(size);
 494 

 495         for (int i = 0; i < as.length; i++) {
 496             float a = as[i];
 497             rs[i] = (float)(Math.log1p((double)a));
 498         }

 499 
 500         return rs;
 501     }
 502 
 503 
 504 
 505     @Benchmark
 506     public Object log() {
 507         float[] as = fa.apply(size);
 508         float[] rs = fr.apply(size);
 509 

 510         for (int i = 0; i < as.length; i++) {
 511             float a = as[i];
 512             rs[i] = (float)(Math.log((double)a));
 513         }

 514 
 515         return rs;
 516     }
 517 
 518 
 519 
 520     @Benchmark
 521     public Object log10() {
 522         float[] as = fa.apply(size);
 523         float[] rs = fr.apply(size);
 524 

 525         for (int i = 0; i < as.length; i++) {
 526             float a = as[i];
 527             rs[i] = (float)(Math.log10((double)a));
 528         }

 529 
 530         return rs;
 531     }
 532 
 533 
 534 
 535     @Benchmark
 536     public Object expm1() {
 537         float[] as = fa.apply(size);
 538         float[] rs = fr.apply(size);
 539 

 540         for (int i = 0; i < as.length; i++) {
 541             float a = as[i];
 542             rs[i] = (float)(Math.expm1((double)a));
 543         }

 544 
 545         return rs;
 546     }
 547 
 548 
 549 
 550     @Benchmark
 551     public Object cos() {
 552         float[] as = fa.apply(size);
 553         float[] rs = fr.apply(size);
 554 

 555         for (int i = 0; i < as.length; i++) {
 556             float a = as[i];
 557             rs[i] = (float)(Math.cos((double)a));
 558         }

 559 
 560         return rs;
 561     }
 562 
 563 
 564 
 565     @Benchmark
 566     public Object tan() {
 567         float[] as = fa.apply(size);
 568         float[] rs = fr.apply(size);
 569 

 570         for (int i = 0; i < as.length; i++) {
 571             float a = as[i];
 572             rs[i] = (float)(Math.tan((double)a));
 573         }

 574 
 575         return rs;
 576     }
 577 
 578 
 579 
 580     @Benchmark
 581     public Object sinh() {
 582         float[] as = fa.apply(size);
 583         float[] rs = fr.apply(size);
 584 

 585         for (int i = 0; i < as.length; i++) {
 586             float a = as[i];
 587             rs[i] = (float)(Math.sinh((double)a));
 588         }

 589 
 590         return rs;
 591     }
 592 
 593 
 594 
 595     @Benchmark
 596     public Object cosh() {
 597         float[] as = fa.apply(size);
 598         float[] rs = fr.apply(size);
 599 

 600         for (int i = 0; i < as.length; i++) {
 601             float a = as[i];
 602             rs[i] = (float)(Math.cosh((double)a));
 603         }

 604 
 605         return rs;
 606     }
 607 
 608 
 609 
 610     @Benchmark
 611     public Object tanh() {
 612         float[] as = fa.apply(size);
 613         float[] rs = fr.apply(size);
 614 

 615         for (int i = 0; i < as.length; i++) {
 616             float a = as[i];
 617             rs[i] = (float)(Math.tanh((double)a));
 618         }

 619 
 620         return rs;
 621     }
 622 
 623 
 624 
 625     @Benchmark
 626     public Object asin() {
 627         float[] as = fa.apply(size);
 628         float[] rs = fr.apply(size);
 629 

 630         for (int i = 0; i < as.length; i++) {
 631             float a = as[i];
 632             rs[i] = (float)(Math.asin((double)a));
 633         }

 634 
 635         return rs;
 636     }
 637 
 638 
 639 
 640     @Benchmark
 641     public Object acos() {
 642         float[] as = fa.apply(size);
 643         float[] rs = fr.apply(size);
 644 

 645         for (int i = 0; i < as.length; i++) {
 646             float a = as[i];
 647             rs[i] = (float)(Math.acos((double)a));
 648         }

 649 
 650         return rs;
 651     }
 652 
 653 
 654 
 655     @Benchmark
 656     public Object atan() {
 657         float[] as = fa.apply(size);
 658         float[] rs = fr.apply(size);
 659 

 660         for (int i = 0; i < as.length; i++) {
 661             float a = as[i];
 662             rs[i] = (float)(Math.atan((double)a));
 663         }

 664 
 665         return rs;
 666     }
 667 
 668 
 669 
 670     @Benchmark
 671     public Object cbrt() {
 672         float[] as = fa.apply(size);
 673         float[] rs = fr.apply(size);
 674 

 675         for (int i = 0; i < as.length; i++) {
 676             float a = as[i];
 677             rs[i] = (float)(Math.cbrt((double)a));
 678         }

 679 
 680         return rs;
 681     }
 682 
 683 
 684 
 685     @Benchmark
 686     public Object hypot() {
 687         float[] as = fa.apply(size);
 688         float[] bs = fb.apply(size);
 689         float[] rs = fr.apply(size);
 690 

 691         for (int i = 0; i < as.length; i++) {
 692             float a = as[i];
 693             float b = bs[i];
 694             rs[i] = (float)(Math.hypot((double)a, (double)b));
 695         }

 696 
 697         return rs;
 698     }
 699 
 700 
 701 
 702     @Benchmark
 703     public Object pow() {
 704         float[] as = fa.apply(size);
 705         float[] bs = fb.apply(size);
 706         float[] rs = fr.apply(size);
 707 

 708         for (int i = 0; i < as.length; i++) {
 709             float a = as[i];
 710             float b = bs[i];
 711             rs[i] = (float)(Math.pow((double)a, (double)b));
 712         }

 713 
 714         return rs;
 715     }
 716 
 717 
 718 
 719     @Benchmark
 720     public Object atan2() {
 721         float[] as = fa.apply(size);
 722         float[] bs = fb.apply(size);
 723         float[] rs = fr.apply(size);
 724 

 725         for (int i = 0; i < as.length; i++) {
 726             float a = as[i];
 727             float b = bs[i];
 728             rs[i] = (float)(Math.atan2((double)a, (double)b));
 729         }

 730 
 731         return rs;
 732     }
 733 
 734 
 735 
 736     @Benchmark
 737     public Object fma() {
 738         float[] as = fa.apply(size);
 739         float[] bs = fb.apply(size);
 740         float[] cs = fc.apply(size);
 741         float[] rs = fr.apply(size);
 742 

 743         for (int i = 0; i < as.length; i++) {
 744             float a = as[i];
 745             float b = bs[i];
 746             float c = cs[i];
 747             rs[i] = (float)(Math.fma(a, b, c));
 748         }

 749 
 750         return rs;
 751     }
 752 
 753 
 754 
 755 
 756     @Benchmark
 757     public Object fmaMasked() {
 758         float[] as = fa.apply(size);
 759         float[] bs = fb.apply(size);
 760         float[] cs = fc.apply(size);
 761         float[] rs = fr.apply(size);
 762         boolean[] ms = fm.apply(size);
 763 

 764         for (int i = 0; i < as.length; i++) {
 765             float a = as[i];
 766             float b = bs[i];
 767             float c = cs[i];
 768             if (ms[i % ms.length]) {
 769                 rs[i] = (float)(Math.fma(a, b, c));
 770             } else {
 771                 rs[i] = a;
 772             }
 773         }
 774         return rs;

 775     }
 776 
 777 
 778     @Benchmark
 779     public Object neg() {
 780         float[] as = fa.apply(size);
 781         float[] rs = fr.apply(size);
 782 

 783         for (int i = 0; i < as.length; i++) {
 784             float a = as[i];
 785             rs[i] = (float)(-((float)a));
 786         }

 787 
 788         return rs;
 789     }
 790 
 791     @Benchmark
 792     public Object negMasked() {
 793         float[] as = fa.apply(size);
 794         float[] rs = fr.apply(size);
 795         boolean[] ms = fm.apply(size);
 796 

 797         for (int i = 0; i < as.length; i++) {
 798             float a = as[i];
 799             boolean m = ms[i % ms.length];
 800             rs[i] = (m ? (float)(-((float)a)) : a);
 801         }

 802 
 803         return rs;
 804     }
 805 
 806     @Benchmark
 807     public Object abs() {
 808         float[] as = fa.apply(size);
 809         float[] rs = fr.apply(size);
 810 

 811         for (int i = 0; i < as.length; i++) {
 812             float a = as[i];
 813             rs[i] = (float)(Math.abs((float)a));
 814         }

 815 
 816         return rs;
 817     }
 818 
 819     @Benchmark
 820     public Object absMasked() {
 821         float[] as = fa.apply(size);
 822         float[] rs = fr.apply(size);
 823         boolean[] ms = fm.apply(size);
 824 

 825         for (int i = 0; i < as.length; i++) {
 826             float a = as[i];
 827             boolean m = ms[i % ms.length];
 828             rs[i] = (m ? (float)(Math.abs((float)a)) : a);
 829         }

 830 
 831         return rs;
 832     }
 833 
 834 
 835 
 836 
 837     @Benchmark
 838     public Object sqrt() {
 839         float[] as = fa.apply(size);
 840         float[] rs = fr.apply(size);
 841 

 842         for (int i = 0; i < as.length; i++) {
 843             float a = as[i];
 844             rs[i] = (float)(Math.sqrt((double)a));
 845         }

 846 
 847         return rs;
 848     }
 849 
 850 
 851 
 852     @Benchmark
 853     public Object sqrtMasked() {
 854         float[] as = fa.apply(size);
 855         float[] rs = fr.apply(size);
 856         boolean[] ms = fm.apply(size);
 857 

 858         for (int i = 0; i < as.length; i++) {
 859             float a = as[i];
 860             boolean m = ms[i % ms.length];
 861             rs[i] = (m ? (float)(Math.sqrt((double)a)) : a);
 862         }

 863 
 864         return rs;
 865     }
 866 
 867 
 868     @Benchmark
 869     public Object gatherBase0() {
 870         float[] as = fa.apply(size);
 871         int[] is    = fs.apply(size);
 872         float[] rs = fr.apply(size);
 873 

 874         for (int i = 0; i < as.length; i++) {
 875             int ix = 0 + is[i];
 876             rs[i] = as[ix];
 877         }

 878 
 879         return rs;
 880     }
 881 
 882 
 883     Object gather(int window) {
 884         float[] as = fa.apply(size);
 885         int[] is    = fs.apply(size);
 886         float[] rs = fr.apply(size);
 887 

 888         for (int i = 0; i < as.length; i += window) {
 889             for (int j = 0; j < window; j++) {
 890                 int ix = i + is[i + j];
 891                 rs[i + j] = as[ix];
 892             }
 893         }

 894 
 895         return rs;
 896     }
 897 
 898     @Benchmark
 899     public Object gather064() {
 900         int window = 64 / Float.SIZE;
 901         return gather(window);
 902     }
 903 
 904     @Benchmark
 905     public Object gather128() {
 906         int window = 128 / Float.SIZE;
 907         return gather(window);
 908     }
 909 
 910     @Benchmark
 911     public Object gather256() {
 912         int window = 256 / Float.SIZE;
 913         return gather(window);
 914     }
 915 
 916     @Benchmark
 917     public Object gather512() {
 918         int window = 512 / Float.SIZE;
 919         return gather(window);
 920     }
 921 
 922 
 923 
 924     @Benchmark
 925     public Object scatterBase0() {
 926         float[] as = fa.apply(size);
 927         int[] is    = fs.apply(size);
 928         float[] rs = fr.apply(size);
 929 

 930         for (int i = 0; i < as.length; i++) {
 931             int ix = 0 + is[i];
 932             rs[ix] = as[i];
 933         }

 934 
 935         return rs;
 936     }
 937 
 938     Object scatter(int window) {
 939         float[] as = fa.apply(size);
 940         int[] is    = fs.apply(size);
 941         float[] rs = fr.apply(size);
 942 

 943         for (int i = 0; i < as.length; i += window) {
 944             for (int j = 0; j < window; j++) {
 945                 int ix = i + is[i + j];
 946                 rs[ix] = as[i + j];
 947             }
 948         }

 949 
 950         return rs;
 951     }
 952 
 953     @Benchmark
 954     public Object scatter064() {
 955         int window = 64 / Float.SIZE;
 956         return scatter(window);
 957     }
 958 
 959     @Benchmark
 960     public Object scatter128() {
 961         int window = 128 / Float.SIZE;
 962         return scatter(window);
 963     }
 964 
 965     @Benchmark
 966     public Object scatter256() {
 967         int window = 256 / Float.SIZE;
 968         return scatter(window);
 969     }
 970 
 971     @Benchmark
 972     public Object scatter512() {
 973         int window = 512 / Float.SIZE;
 974         return scatter(window);
 975     }
 976 
 977 }
 978 
   1 /*
   2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import java.util.concurrent.TimeUnit;
  27 import java.util.function.IntFunction;
  28 
  29 import org.openjdk.jmh.annotations.*;
  30 import org.openjdk.jmh.infra.Blackhole;
  31 
  32 @BenchmarkMode(Mode.Throughput)
  33 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  34 @State(Scope.Benchmark)
  35 @Warmup(iterations = 3, time = 1)
  36 @Measurement(iterations = 5, time = 1)
  37 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  38 public class FloatScalar extends AbstractVectorBenchmark {
  39     static final int INVOC_COUNT = 1; // To align with vector benchmarks.
  40 
  41     @Param("1024")
  42     int size;
  43 
  44     float[] fill(IntFunction<Float> f) {
  45         float[] array = new float[size];
  46         for (int i = 0; i < array.length; i++) {
  47             array[i] = f.apply(i);
  48         }
  49         return array;
  50     }
  51 
  52     float[] as, bs, cs, rs;
  53     boolean[] ms, rms;
  54     int[] ss;
  55 
  56     @Setup
  57     public void init() {
  58         as = fill(i -> (float)(2*i));
  59         bs = fill(i -> (float)(i+1));
  60         cs = fill(i -> (float)(i+5));
  61         rs = fill(i -> (float)0);
  62         ms = fillMask(size, i -> (i % 2) == 0);
  63         rms = fillMask(size, i -> false);
  64 
  65         ss = fillInt(size, i -> RANDOM.nextInt(Math.max(i,1)));
  66     }
  67 
  68     final IntFunction<float[]> fa = vl -> as;
  69     final IntFunction<float[]> fb = vl -> bs;
  70     final IntFunction<float[]> fc = vl -> cs;
  71     final IntFunction<float[]> fr = vl -> rs;
  72     final IntFunction<boolean[]> fm = vl -> ms;
  73     final IntFunction<boolean[]> fmr = vl -> rms;
  74     final IntFunction<int[]> fs = vl -> ss;
  75 
  76 
  77     @Benchmark
  78     public void add(Blackhole bh) {
  79         float[] as = fa.apply(size);
  80         float[] bs = fb.apply(size);
  81         float[] rs = fr.apply(size);
  82 
  83         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  84             for (int i = 0; i < as.length; i++) {
  85                 float a = as[i];
  86                 float b = bs[i];
  87                 rs[i] = (float)(a + b);
  88             }
  89         }
  90 
  91         bh.consume(rs);
  92     }
  93 
  94     @Benchmark
  95     public void addMasked(Blackhole bh) {
  96         float[] as = fa.apply(size);
  97         float[] bs = fb.apply(size);
  98         float[] rs = fr.apply(size);
  99         boolean[] ms = fm.apply(size);
 100 
 101         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 102             for (int i = 0; i < as.length; i++) {
 103                 float a = as[i];
 104                 float b = bs[i];
 105                 if (ms[i % ms.length]) {
 106                     rs[i] = (float)(a + b);
 107                 } else {
 108                     rs[i] = a;
 109                 }
 110             }
 111         }
 112         bh.consume(rs);
 113     }
 114 
 115     @Benchmark
 116     public void sub(Blackhole bh) {
 117         float[] as = fa.apply(size);
 118         float[] bs = fb.apply(size);
 119         float[] rs = fr.apply(size);
 120 
 121         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 122             for (int i = 0; i < as.length; i++) {
 123                 float a = as[i];
 124                 float b = bs[i];
 125                 rs[i] = (float)(a - b);
 126             }
 127         }
 128 
 129         bh.consume(rs);
 130     }
 131 
 132     @Benchmark
 133     public void subMasked(Blackhole bh) {
 134         float[] as = fa.apply(size);
 135         float[] bs = fb.apply(size);
 136         float[] rs = fr.apply(size);
 137         boolean[] ms = fm.apply(size);
 138 
 139         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 140             for (int i = 0; i < as.length; i++) {
 141                 float a = as[i];
 142                 float b = bs[i];
 143                 if (ms[i % ms.length]) {
 144                     rs[i] = (float)(a - b);
 145                 } else {
 146                     rs[i] = a;
 147                 }
 148             }
 149         }
 150         bh.consume(rs);
 151     }
 152 
 153 
 154     @Benchmark
 155     public void div(Blackhole bh) {
 156         float[] as = fa.apply(size);
 157         float[] bs = fb.apply(size);
 158         float[] rs = fr.apply(size);
 159 
 160         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 161             for (int i = 0; i < as.length; i++) {
 162                 float a = as[i];
 163                 float b = bs[i];
 164                 rs[i] = (float)(a / b);
 165             }
 166         }
 167 
 168         bh.consume(rs);
 169     }
 170 
 171 
 172 
 173     @Benchmark
 174     public void divMasked(Blackhole bh) {
 175         float[] as = fa.apply(size);
 176         float[] bs = fb.apply(size);
 177         float[] rs = fr.apply(size);
 178         boolean[] ms = fm.apply(size);
 179 
 180         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 181             for (int i = 0; i < as.length; i++) {
 182                 float a = as[i];
 183                 float b = bs[i];
 184                 if (ms[i % ms.length]) {
 185                     rs[i] = (float)(a / b);
 186                 } else {
 187                     rs[i] = a;
 188                 }
 189             }
 190         }
 191         bh.consume(rs);
 192     }
 193 
 194 
 195     @Benchmark
 196     public void mul(Blackhole bh) {
 197         float[] as = fa.apply(size);
 198         float[] bs = fb.apply(size);
 199         float[] rs = fr.apply(size);
 200 
 201         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 202             for (int i = 0; i < as.length; i++) {
 203                 float a = as[i];
 204                 float b = bs[i];
 205                 rs[i] = (float)(a * b);
 206             }
 207         }
 208 
 209         bh.consume(rs);
 210     }
 211 
 212     @Benchmark
 213     public void mulMasked(Blackhole bh) {
 214         float[] as = fa.apply(size);
 215         float[] bs = fb.apply(size);
 216         float[] rs = fr.apply(size);
 217         boolean[] ms = fm.apply(size);
 218 
 219         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 220             for (int i = 0; i < as.length; i++) {
 221                 float a = as[i];
 222                 float b = bs[i];
 223                 if (ms[i % ms.length]) {
 224                     rs[i] = (float)(a * b);
 225                 } else {
 226                     rs[i] = a;
 227                 }
 228             }
 229         }
 230         bh.consume(rs);
 231     }
 232 
 233 
 234 
 235 
 236 
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250 
 251 
 252 
 253 
 254 
 255 
 256 
 257 
 258 
 259 
 260 
 261 
 262 
 263     @Benchmark
 264     public void max(Blackhole bh) {
 265         float[] as = fa.apply(size);
 266         float[] bs = fb.apply(size);
 267         float[] rs = fr.apply(size);
 268 
 269         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 270             for (int i = 0; i < as.length; i++) {
 271                 float a = as[i];
 272                 float b = bs[i];
 273                 rs[i] = (float)(Math.max(a, b));
 274             }
 275         }
 276 
 277         bh.consume(rs);
 278     }
 279 
 280     @Benchmark
 281     public void min(Blackhole bh) {
 282         float[] as = fa.apply(size);
 283         float[] bs = fb.apply(size);
 284         float[] rs = fr.apply(size);
 285 
 286         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 287             for (int i = 0; i < as.length; i++) {
 288                 float a = as[i];
 289                 float b = bs[i];
 290                 rs[i] = (float)(Math.min(a, b));
 291             }
 292         }
 293 
 294         bh.consume(rs);
 295     }
 296 
 297 
 298 
 299 
 300     @Benchmark
 301     public void addAll(Blackhole bh) {
 302         float[] as = fa.apply(size);
 303         float r = 0;
 304         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 305             r = 0;
 306             for (int i = 0; i < as.length; i++) {
 307                 r += as[i];
 308             }
 309         }
 310         bh.consume(r);
 311     }
 312 
 313     @Benchmark
 314     public void mulAll(Blackhole bh) {
 315         float[] as = fa.apply(size);
 316         float r = 1;
 317         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 318             r = 1;
 319             for (int i = 0; i < as.length; i++) {
 320                 r *= as[i];
 321             }
 322         }
 323         bh.consume(r);
 324     }
 325 
 326     @Benchmark
 327     public void minAll(Blackhole bh) {
 328         float[] as = fa.apply(size);
 329         float r = Float.POSITIVE_INFINITY;
 330         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 331             r = Float.POSITIVE_INFINITY;
 332             for (int i = 0; i < as.length; i++) {
 333                 r = (float)Math.min(r, as[i]);
 334             }
 335         }
 336         bh.consume(r);
 337     }
 338 
 339     @Benchmark
 340     public void maxAll(Blackhole bh) {
 341         float[] as = fa.apply(size);
 342         float r = Float.NEGATIVE_INFINITY;
 343         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 344             r = Float.NEGATIVE_INFINITY;
 345             for (int i = 0; i < as.length; i++) {
 346                 r = (float)Math.max(r, as[i]);
 347             }
 348         }
 349         bh.consume(r);
 350     }
 351 
 352 
 353 
 354     @Benchmark
 355     public void lessThan(Blackhole bh) {
 356         float[] as = fa.apply(size);
 357         float[] bs = fb.apply(size);
 358 
 359         boolean r = false;
 360         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 361             r = false;
 362             for (int i = 0; i < as.length; i++) {
 363                 boolean m = (as[i] < bs[i]);
 364                 r |= m; // accumulate so JIT can't eliminate the computation
 365             }
 366         }
 367 
 368         bh.consume(r);
 369     }
 370 
 371     @Benchmark
 372     public void greaterThan(Blackhole bh) {
 373         float[] as = fa.apply(size);
 374         float[] bs = fb.apply(size);
 375 
 376         boolean r = false;
 377         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 378             r = false;
 379             for (int i = 0; i < as.length; i++) {
 380                 boolean m = (as[i] > bs[i]);
 381                 r |= m; // accumulate so JIT can't eliminate the computation
 382             }
 383         }
 384 
 385         bh.consume(r);
 386     }
 387 
 388     @Benchmark
 389     public void equal(Blackhole bh) {
 390         float[] as = fa.apply(size);
 391         float[] bs = fb.apply(size);
 392 
 393         boolean r = false;
 394         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 395             r = false;
 396             for (int i = 0; i < as.length; i++) {
 397                 boolean m = (as[i] == bs[i]);
 398                 r |= m; // accumulate so JIT can't eliminate the computation
 399             }
 400         }
 401 
 402         bh.consume(r);
 403     }
 404 
 405     @Benchmark
 406     public void notEqual(Blackhole bh) {
 407         float[] as = fa.apply(size);
 408         float[] bs = fb.apply(size);
 409 
 410         boolean r = false;
 411         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 412             r = false;
 413             for (int i = 0; i < as.length; i++) {
 414                 boolean m = (as[i] != bs[i]);
 415                 r |= m; // accumulate so JIT can't eliminate the computation
 416             }
 417         }
 418 
 419         bh.consume(r);
 420     }
 421 
 422     @Benchmark
 423     public void lessThanEq(Blackhole bh) {
 424         float[] as = fa.apply(size);
 425         float[] bs = fb.apply(size);
 426 
 427         boolean r = false;
 428         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 429             r = false;
 430             for (int i = 0; i < as.length; i++) {
 431                 boolean m = (as[i] <= bs[i]);
 432                 r |= m; // accumulate so JIT can't eliminate the computation
 433             }
 434         }
 435 
 436         bh.consume(r);
 437     }
 438 
 439     @Benchmark
 440     public void greaterThanEq(Blackhole bh) {
 441         float[] as = fa.apply(size);
 442         float[] bs = fb.apply(size);
 443 
 444         boolean r = false;
 445         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 446             r = false;
 447             for (int i = 0; i < as.length; i++) {
 448                 boolean m = (as[i] >= bs[i]);
 449                 r |= m; // accumulate so JIT can't eliminate the computation
 450             }
 451         }
 452 
 453         bh.consume(r);
 454     }
 455 
 456     @Benchmark
 457     public void blend(Blackhole bh) {
 458         float[] as = fa.apply(size);
 459         float[] bs = fb.apply(size);
 460         float[] rs = fr.apply(size);
 461         boolean[] ms = fm.apply(size);
 462 
 463         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 464             for (int i = 0; i < as.length; i++) {
 465                 float a = as[i];
 466                 float b = bs[i];
 467                 boolean m = ms[i % ms.length];
 468                 rs[i] = (m ? b : a);
 469             }
 470         }
 471 
 472         bh.consume(rs);
 473     }
 474     void rearrangeShared(int window, Blackhole bh) {
 475         float[] as = fa.apply(size);
 476         int[] order = fs.apply(size);
 477         float[] rs = fr.apply(size);
 478 
 479         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 480             for (int i = 0; i < as.length; i += window) {
 481                 for (int j = 0; j < window; j++) {
 482                     float a = as[i+j];
 483                     int pos = order[j];
 484                     rs[i + pos] = a;
 485                 }
 486             }
 487         }
 488 
 489         bh.consume(rs);
 490     }
 491 
 492     @Benchmark
 493     public void rearrange064(Blackhole bh) {
 494         int window = 64 / Float.SIZE;
 495         rearrangeShared(window, bh);
 496     }
 497 
 498     @Benchmark
 499     public void rearrange128(Blackhole bh) {
 500         int window = 128 / Float.SIZE;
 501         rearrangeShared(window, bh);
 502     }
 503 
 504     @Benchmark
 505     public void rearrange256(Blackhole bh) {
 506         int window = 256 / Float.SIZE;
 507         rearrangeShared(window, bh);
 508     }
 509 
 510     @Benchmark
 511     public void rearrange512(Blackhole bh) {
 512         int window = 512 / Float.SIZE;
 513         rearrangeShared(window, bh);
 514     }
 515 
 516 
 517     @Benchmark
 518     public void sin(Blackhole bh) {
 519         float[] as = fa.apply(size);
 520         float[] rs = fr.apply(size);
 521 
 522         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 523             for (int i = 0; i < as.length; i++) {
 524                 float a = as[i];
 525                 rs[i] = (float)(Math.sin((double)a));
 526             }
 527         }
 528 
 529         bh.consume(rs);
 530     }
 531 
 532 
 533 
 534     @Benchmark
 535     public void exp(Blackhole bh) {
 536         float[] as = fa.apply(size);
 537         float[] rs = fr.apply(size);
 538 
 539         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 540             for (int i = 0; i < as.length; i++) {
 541                 float a = as[i];
 542                 rs[i] = (float)(Math.exp((double)a));
 543             }
 544         }
 545 
 546         bh.consume(rs);
 547     }
 548 
 549 
 550 
 551     @Benchmark
 552     public void log1p(Blackhole bh) {
 553         float[] as = fa.apply(size);
 554         float[] rs = fr.apply(size);
 555 
 556         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 557             for (int i = 0; i < as.length; i++) {
 558                 float a = as[i];
 559                 rs[i] = (float)(Math.log1p((double)a));
 560             }
 561         }
 562 
 563         bh.consume(rs);
 564     }
 565 
 566 
 567 
 568     @Benchmark
 569     public void log(Blackhole bh) {
 570         float[] as = fa.apply(size);
 571         float[] rs = fr.apply(size);
 572 
 573         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 574             for (int i = 0; i < as.length; i++) {
 575                 float a = as[i];
 576                 rs[i] = (float)(Math.log((double)a));
 577             }
 578         }
 579 
 580         bh.consume(rs);
 581     }
 582 
 583 
 584 
 585     @Benchmark
 586     public void log10(Blackhole bh) {
 587         float[] as = fa.apply(size);
 588         float[] rs = fr.apply(size);
 589 
 590         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 591             for (int i = 0; i < as.length; i++) {
 592                 float a = as[i];
 593                 rs[i] = (float)(Math.log10((double)a));
 594             }
 595         }
 596 
 597         bh.consume(rs);
 598     }
 599 
 600 
 601 
 602     @Benchmark
 603     public void expm1(Blackhole bh) {
 604         float[] as = fa.apply(size);
 605         float[] rs = fr.apply(size);
 606 
 607         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 608             for (int i = 0; i < as.length; i++) {
 609                 float a = as[i];
 610                 rs[i] = (float)(Math.expm1((double)a));
 611             }
 612         }
 613 
 614         bh.consume(rs);
 615     }
 616 
 617 
 618 
 619     @Benchmark
 620     public void cos(Blackhole bh) {
 621         float[] as = fa.apply(size);
 622         float[] rs = fr.apply(size);
 623 
 624         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 625             for (int i = 0; i < as.length; i++) {
 626                 float a = as[i];
 627                 rs[i] = (float)(Math.cos((double)a));
 628             }
 629         }
 630 
 631         bh.consume(rs);
 632     }
 633 
 634 
 635 
 636     @Benchmark
 637     public void tan(Blackhole bh) {
 638         float[] as = fa.apply(size);
 639         float[] rs = fr.apply(size);
 640 
 641         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 642             for (int i = 0; i < as.length; i++) {
 643                 float a = as[i];
 644                 rs[i] = (float)(Math.tan((double)a));
 645             }
 646         }
 647 
 648         bh.consume(rs);
 649     }
 650 
 651 
 652 
 653     @Benchmark
 654     public void sinh(Blackhole bh) {
 655         float[] as = fa.apply(size);
 656         float[] rs = fr.apply(size);
 657 
 658         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 659             for (int i = 0; i < as.length; i++) {
 660                 float a = as[i];
 661                 rs[i] = (float)(Math.sinh((double)a));
 662             }
 663         }
 664 
 665         bh.consume(rs);
 666     }
 667 
 668 
 669 
 670     @Benchmark
 671     public void cosh(Blackhole bh) {
 672         float[] as = fa.apply(size);
 673         float[] rs = fr.apply(size);
 674 
 675         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 676             for (int i = 0; i < as.length; i++) {
 677                 float a = as[i];
 678                 rs[i] = (float)(Math.cosh((double)a));
 679             }
 680         }
 681 
 682         bh.consume(rs);
 683     }
 684 
 685 
 686 
 687     @Benchmark
 688     public void tanh(Blackhole bh) {
 689         float[] as = fa.apply(size);
 690         float[] rs = fr.apply(size);
 691 
 692         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 693             for (int i = 0; i < as.length; i++) {
 694                 float a = as[i];
 695                 rs[i] = (float)(Math.tanh((double)a));
 696             }
 697         }
 698 
 699         bh.consume(rs);
 700     }
 701 
 702 
 703 
 704     @Benchmark
 705     public void asin(Blackhole bh) {
 706         float[] as = fa.apply(size);
 707         float[] rs = fr.apply(size);
 708 
 709         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 710             for (int i = 0; i < as.length; i++) {
 711                 float a = as[i];
 712                 rs[i] = (float)(Math.asin((double)a));
 713             }
 714         }
 715 
 716         bh.consume(rs);
 717     }
 718 
 719 
 720 
 721     @Benchmark
 722     public void acos(Blackhole bh) {
 723         float[] as = fa.apply(size);
 724         float[] rs = fr.apply(size);
 725 
 726         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 727             for (int i = 0; i < as.length; i++) {
 728                 float a = as[i];
 729                 rs[i] = (float)(Math.acos((double)a));
 730             }
 731         }
 732 
 733         bh.consume(rs);
 734     }
 735 
 736 
 737 
 738     @Benchmark
 739     public void atan(Blackhole bh) {
 740         float[] as = fa.apply(size);
 741         float[] rs = fr.apply(size);
 742 
 743         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 744             for (int i = 0; i < as.length; i++) {
 745                 float a = as[i];
 746                 rs[i] = (float)(Math.atan((double)a));
 747             }
 748         }
 749 
 750         bh.consume(rs);
 751     }
 752 
 753 
 754 
 755     @Benchmark
 756     public void cbrt(Blackhole bh) {
 757         float[] as = fa.apply(size);
 758         float[] rs = fr.apply(size);
 759 
 760         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 761             for (int i = 0; i < as.length; i++) {
 762                 float a = as[i];
 763                 rs[i] = (float)(Math.cbrt((double)a));
 764             }
 765         }
 766 
 767         bh.consume(rs);
 768     }
 769 
 770 
 771 
 772     @Benchmark
 773     public void hypot(Blackhole bh) {
 774         float[] as = fa.apply(size);
 775         float[] bs = fb.apply(size);
 776         float[] rs = fr.apply(size);
 777 
 778         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 779             for (int i = 0; i < as.length; i++) {
 780                 float a = as[i];
 781                 float b = bs[i];
 782                 rs[i] = (float)(Math.hypot((double)a, (double)b));
 783             }
 784         }
 785 
 786         bh.consume(rs);
 787     }
 788 
 789 
 790 
 791     @Benchmark
 792     public void pow(Blackhole bh) {
 793         float[] as = fa.apply(size);
 794         float[] bs = fb.apply(size);
 795         float[] rs = fr.apply(size);
 796 
 797         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 798             for (int i = 0; i < as.length; i++) {
 799                 float a = as[i];
 800                 float b = bs[i];
 801                 rs[i] = (float)(Math.pow((double)a, (double)b));
 802             }
 803         }
 804 
 805         bh.consume(rs);
 806     }
 807 
 808 
 809 
 810     @Benchmark
 811     public void atan2(Blackhole bh) {
 812         float[] as = fa.apply(size);
 813         float[] bs = fb.apply(size);
 814         float[] rs = fr.apply(size);
 815 
 816         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 817             for (int i = 0; i < as.length; i++) {
 818                 float a = as[i];
 819                 float b = bs[i];
 820                 rs[i] = (float)(Math.atan2((double)a, (double)b));
 821             }
 822         }
 823 
 824         bh.consume(rs);
 825     }
 826 
 827 
 828 
 829     @Benchmark
 830     public void fma(Blackhole bh) {
 831         float[] as = fa.apply(size);
 832         float[] bs = fb.apply(size);
 833         float[] cs = fc.apply(size);
 834         float[] rs = fr.apply(size);
 835 
 836         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 837             for (int i = 0; i < as.length; i++) {
 838                 float a = as[i];
 839                 float b = bs[i];
 840                 float c = cs[i];
 841                 rs[i] = (float)(Math.fma(a, b, c));
 842             }
 843         }
 844 
 845         bh.consume(rs);
 846     }
 847 
 848 
 849 
 850 
 851     @Benchmark
 852     public void fmaMasked(Blackhole bh) {
 853         float[] as = fa.apply(size);
 854         float[] bs = fb.apply(size);
 855         float[] cs = fc.apply(size);
 856         float[] rs = fr.apply(size);
 857         boolean[] ms = fm.apply(size);
 858 
 859         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 860             for (int i = 0; i < as.length; i++) {
 861                 float a = as[i];
 862                 float b = bs[i];
 863                 float c = cs[i];
 864                 if (ms[i % ms.length]) {
 865                     rs[i] = (float)(Math.fma(a, b, c));
 866                 } else {
 867                     rs[i] = a;
 868                 }
 869             }
 870         }
 871         bh.consume(rs);
 872     }
 873 
 874 
 875     @Benchmark
 876     public void neg(Blackhole bh) {
 877         float[] as = fa.apply(size);
 878         float[] rs = fr.apply(size);
 879 
 880         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 881             for (int i = 0; i < as.length; i++) {
 882                 float a = as[i];
 883                 rs[i] = (float)(-((float)a));
 884             }
 885         }
 886 
 887         bh.consume(rs);
 888     }
 889 
 890     @Benchmark
 891     public void negMasked(Blackhole bh) {
 892         float[] as = fa.apply(size);
 893         float[] rs = fr.apply(size);
 894         boolean[] ms = fm.apply(size);
 895 
 896         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 897             for (int i = 0; i < as.length; i++) {
 898                 float a = as[i];
 899                 boolean m = ms[i % ms.length];
 900                 rs[i] = (m ? (float)(-((float)a)) : a);
 901             }
 902         }
 903 
 904         bh.consume(rs);
 905     }
 906 
 907     @Benchmark
 908     public void abs(Blackhole bh) {
 909         float[] as = fa.apply(size);
 910         float[] rs = fr.apply(size);
 911 
 912         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 913             for (int i = 0; i < as.length; i++) {
 914                 float a = as[i];
 915                 rs[i] = (float)(Math.abs((float)a));
 916             }
 917         }
 918 
 919         bh.consume(rs);
 920     }
 921 
 922     @Benchmark
 923     public void absMasked(Blackhole bh) {
 924         float[] as = fa.apply(size);
 925         float[] rs = fr.apply(size);
 926         boolean[] ms = fm.apply(size);
 927 
 928         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 929             for (int i = 0; i < as.length; i++) {
 930                 float a = as[i];
 931                 boolean m = ms[i % ms.length];
 932                 rs[i] = (m ? (float)(Math.abs((float)a)) : a);
 933             }
 934         }
 935 
 936         bh.consume(rs);
 937     }
 938 
 939 
 940 
 941 
 942     @Benchmark
 943     public void sqrt(Blackhole bh) {
 944         float[] as = fa.apply(size);
 945         float[] rs = fr.apply(size);
 946 
 947         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 948             for (int i = 0; i < as.length; i++) {
 949                 float a = as[i];
 950                 rs[i] = (float)(Math.sqrt((double)a));
 951             }
 952         }
 953 
 954         bh.consume(rs);
 955     }
 956 
 957 
 958 
 959     @Benchmark
 960     public void sqrtMasked(Blackhole bh) {
 961         float[] as = fa.apply(size);
 962         float[] rs = fr.apply(size);
 963         boolean[] ms = fm.apply(size);
 964 
 965         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 966             for (int i = 0; i < as.length; i++) {
 967                 float a = as[i];
 968                 boolean m = ms[i % ms.length];
 969                 rs[i] = (m ? (float)(Math.sqrt((double)a)) : a);
 970             }
 971         }
 972 
 973         bh.consume(rs);
 974     }
 975 
 976 
 977     @Benchmark
 978     public void gatherBase0(Blackhole bh) {
 979         float[] as = fa.apply(size);
 980         int[] is    = fs.apply(size);
 981         float[] rs = fr.apply(size);
 982 
 983         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 984             for (int i = 0; i < as.length; i++) {
 985                 int ix = 0 + is[i];
 986                 rs[i] = as[ix];
 987             }
 988         }
 989 
 990         bh.consume(rs);
 991     }
 992 
 993 
 994     void gather(int window, Blackhole bh) {
 995         float[] as = fa.apply(size);
 996         int[] is    = fs.apply(size);
 997         float[] rs = fr.apply(size);
 998 
 999         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1000             for (int i = 0; i < as.length; i += window) {
1001                 for (int j = 0; j < window; j++) {
1002                     int ix = i + is[i + j];
1003                     rs[i + j] = as[ix];
1004                 }
1005             }
1006         }
1007 
1008         bh.consume(rs);
1009     }
1010 
1011     @Benchmark
1012     public void gather064(Blackhole bh) {
1013         int window = 64 / Float.SIZE;
1014         gather(window, bh);
1015     }
1016 
1017     @Benchmark
1018     public void gather128(Blackhole bh) {
1019         int window = 128 / Float.SIZE;
1020         gather(window, bh);
1021     }
1022 
1023     @Benchmark
1024     public void gather256(Blackhole bh) {
1025         int window = 256 / Float.SIZE;
1026         gather(window, bh);
1027     }
1028 
1029     @Benchmark
1030     public void gather512(Blackhole bh) {
1031         int window = 512 / Float.SIZE;
1032         gather(window, bh);
1033     }
1034 
1035 
1036 
1037     @Benchmark
1038     public void scatterBase0(Blackhole bh) {
1039         float[] as = fa.apply(size);
1040         int[] is    = fs.apply(size);
1041         float[] rs = fr.apply(size);
1042 
1043         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1044             for (int i = 0; i < as.length; i++) {
1045                 int ix = 0 + is[i];
1046                 rs[ix] = as[i];
1047             }
1048         }
1049 
1050         bh.consume(rs);
1051     }
1052 
1053     void scatter(int window, Blackhole bh) {
1054         float[] as = fa.apply(size);
1055         int[] is    = fs.apply(size);
1056         float[] rs = fr.apply(size);
1057 
1058         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1059             for (int i = 0; i < as.length; i += window) {
1060                 for (int j = 0; j < window; j++) {
1061                     int ix = i + is[i + j];
1062                     rs[ix] = as[i + j];
1063                 }
1064             }
1065         }
1066 
1067         bh.consume(rs);
1068     }
1069 
1070     @Benchmark
1071     public void scatter064(Blackhole bh) {
1072         int window = 64 / Float.SIZE;
1073         scatter(window, bh);
1074     }
1075 
1076     @Benchmark
1077     public void scatter128(Blackhole bh) {
1078         int window = 128 / Float.SIZE;
1079         scatter(window, bh);
1080     }
1081 
1082     @Benchmark
1083     public void scatter256(Blackhole bh) {
1084         int window = 256 / Float.SIZE;
1085         scatter(window, bh);
1086     }
1087 
1088     @Benchmark
1089     public void scatter512(Blackhole bh) {
1090         int window = 512 / Float.SIZE;
1091         scatter(window, bh);
1092     }
1093 
1094 }
1095 
< prev index next >