1 /*
   2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import java.util.concurrent.TimeUnit;
  27 import java.util.function.IntFunction;
  28 
  29 import org.openjdk.jmh.annotations.*;
  30 import org.openjdk.jmh.infra.Blackhole;
  31 
  32 @BenchmarkMode(Mode.Throughput)
  33 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  34 @State(Scope.Benchmark)
  35 @Warmup(iterations = 3, time = 1)
  36 @Measurement(iterations = 5, time = 1)
  37 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  38 public class IntScalar extends AbstractVectorBenchmark {
  39     static final int INVOC_COUNT = 1; // To align with vector benchmarks.
  40 
  41     @Param("1024")
  42     int size;
  43 
  44     int[] fill(IntFunction<Integer> f) {
  45         int[] array = new int[size];
  46         for (int i = 0; i < array.length; i++) {
  47             array[i] = f.apply(i);
  48         }
  49         return array;
  50     }
  51 
  52     int[] as, bs, cs, rs;
  53     boolean[] ms, rms;
  54     int[] ss;
  55 
  56     @Setup
  57     public void init() {
  58         as = fill(i -> (int)(2*i));
  59         bs = fill(i -> (int)(i+1));
  60         cs = fill(i -> (int)(i+5));
  61         rs = fill(i -> (int)0);
  62         ms = fillMask(size, i -> (i % 2) == 0);
  63         rms = fillMask(size, i -> false);
  64 
  65         ss = fillInt(size, i -> RANDOM.nextInt(Math.max(i,1)));
  66     }
  67 
  68     final IntFunction<int[]> fa = vl -> as;
  69     final IntFunction<int[]> fb = vl -> bs;
  70     final IntFunction<int[]> fc = vl -> cs;
  71     final IntFunction<int[]> fr = vl -> rs;
  72     final IntFunction<boolean[]> fm = vl -> ms;
  73     final IntFunction<boolean[]> fmr = vl -> rms;
  74     final IntFunction<int[]> fs = vl -> ss;
  75 
  76 
  77     @Benchmark
  78     public void add(Blackhole bh) {
  79         int[] as = fa.apply(size);
  80         int[] bs = fb.apply(size);
  81         int[] rs = fr.apply(size);
  82 
  83         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  84             for (int i = 0; i < as.length; i++) {
  85                 int a = as[i];
  86                 int b = bs[i];
  87                 rs[i] = (int)(a + b);
  88             }
  89         }
  90 
  91         bh.consume(rs);
  92     }
  93 
  94     @Benchmark
  95     public void addMasked(Blackhole bh) {
  96         int[] as = fa.apply(size);
  97         int[] bs = fb.apply(size);
  98         int[] rs = fr.apply(size);
  99         boolean[] ms = fm.apply(size);
 100 
 101         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 102             for (int i = 0; i < as.length; i++) {
 103                 int a = as[i];
 104                 int b = bs[i];
 105                 if (ms[i % ms.length]) {
 106                     rs[i] = (int)(a + b);
 107                 } else {
 108                     rs[i] = a;
 109                 }
 110             }
 111         }
 112         bh.consume(rs);
 113     }
 114 
 115     @Benchmark
 116     public void sub(Blackhole bh) {
 117         int[] as = fa.apply(size);
 118         int[] bs = fb.apply(size);
 119         int[] rs = fr.apply(size);
 120 
 121         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 122             for (int i = 0; i < as.length; i++) {
 123                 int a = as[i];
 124                 int b = bs[i];
 125                 rs[i] = (int)(a - b);
 126             }
 127         }
 128 
 129         bh.consume(rs);
 130     }
 131 
 132     @Benchmark
 133     public void subMasked(Blackhole bh) {
 134         int[] as = fa.apply(size);
 135         int[] bs = fb.apply(size);
 136         int[] rs = fr.apply(size);
 137         boolean[] ms = fm.apply(size);
 138 
 139         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 140             for (int i = 0; i < as.length; i++) {
 141                 int a = as[i];
 142                 int b = bs[i];
 143                 if (ms[i % ms.length]) {
 144                     rs[i] = (int)(a - b);
 145                 } else {
 146                     rs[i] = a;
 147                 }
 148             }
 149         }
 150         bh.consume(rs);
 151     }
 152 
 153 
 154 
 155     @Benchmark
 156     public void mul(Blackhole bh) {
 157         int[] as = fa.apply(size);
 158         int[] bs = fb.apply(size);
 159         int[] rs = fr.apply(size);
 160 
 161         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 162             for (int i = 0; i < as.length; i++) {
 163                 int a = as[i];
 164                 int b = bs[i];
 165                 rs[i] = (int)(a * b);
 166             }
 167         }
 168 
 169         bh.consume(rs);
 170     }
 171 
 172     @Benchmark
 173     public void mulMasked(Blackhole bh) {
 174         int[] as = fa.apply(size);
 175         int[] bs = fb.apply(size);
 176         int[] rs = fr.apply(size);
 177         boolean[] ms = fm.apply(size);
 178 
 179         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 180             for (int i = 0; i < as.length; i++) {
 181                 int a = as[i];
 182                 int b = bs[i];
 183                 if (ms[i % ms.length]) {
 184                     rs[i] = (int)(a * b);
 185                 } else {
 186                     rs[i] = a;
 187                 }
 188             }
 189         }
 190         bh.consume(rs);
 191     }
 192 
 193 
 194     @Benchmark
 195     public void and(Blackhole bh) {
 196         int[] as = fa.apply(size);
 197         int[] bs = fb.apply(size);
 198         int[] rs = fr.apply(size);
 199 
 200         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 201             for (int i = 0; i < as.length; i++) {
 202                 int a = as[i];
 203                 int b = bs[i];
 204                 rs[i] = (int)(a & b);
 205             }
 206         }
 207 
 208         bh.consume(rs);
 209     }
 210 
 211 
 212 
 213     @Benchmark
 214     public void andMasked(Blackhole bh) {
 215         int[] as = fa.apply(size);
 216         int[] bs = fb.apply(size);
 217         int[] rs = fr.apply(size);
 218         boolean[] ms = fm.apply(size);
 219 
 220         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 221             for (int i = 0; i < as.length; i++) {
 222                 int a = as[i];
 223                 int b = bs[i];
 224                 if (ms[i % ms.length]) {
 225                     rs[i] = (int)(a & b);
 226                 } else {
 227                     rs[i] = a;
 228                 }
 229             }
 230         }
 231         bh.consume(rs);
 232     }
 233 
 234 
 235 
 236     @Benchmark
 237     public void or(Blackhole bh) {
 238         int[] as = fa.apply(size);
 239         int[] bs = fb.apply(size);
 240         int[] rs = fr.apply(size);
 241 
 242         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 243             for (int i = 0; i < as.length; i++) {
 244                 int a = as[i];
 245                 int b = bs[i];
 246                 rs[i] = (int)(a | b);
 247             }
 248         }
 249 
 250         bh.consume(rs);
 251     }
 252 
 253 
 254 
 255     @Benchmark
 256     public void orMasked(Blackhole bh) {
 257         int[] as = fa.apply(size);
 258         int[] bs = fb.apply(size);
 259         int[] rs = fr.apply(size);
 260         boolean[] ms = fm.apply(size);
 261 
 262         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 263             for (int i = 0; i < as.length; i++) {
 264                 int a = as[i];
 265                 int b = bs[i];
 266                 if (ms[i % ms.length]) {
 267                     rs[i] = (int)(a | b);
 268                 } else {
 269                     rs[i] = a;
 270                 }
 271             }
 272         }
 273         bh.consume(rs);
 274     }
 275 
 276 
 277 
 278     @Benchmark
 279     public void xor(Blackhole bh) {
 280         int[] as = fa.apply(size);
 281         int[] bs = fb.apply(size);
 282         int[] rs = fr.apply(size);
 283 
 284         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 285             for (int i = 0; i < as.length; i++) {
 286                 int a = as[i];
 287                 int b = bs[i];
 288                 rs[i] = (int)(a ^ b);
 289             }
 290         }
 291 
 292         bh.consume(rs);
 293     }
 294 
 295 
 296 
 297     @Benchmark
 298     public void xorMasked(Blackhole bh) {
 299         int[] as = fa.apply(size);
 300         int[] bs = fb.apply(size);
 301         int[] rs = fr.apply(size);
 302         boolean[] ms = fm.apply(size);
 303 
 304         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 305             for (int i = 0; i < as.length; i++) {
 306                 int a = as[i];
 307                 int b = bs[i];
 308                 if (ms[i % ms.length]) {
 309                     rs[i] = (int)(a ^ b);
 310                 } else {
 311                     rs[i] = a;
 312                 }
 313             }
 314         }
 315         bh.consume(rs);
 316     }
 317 
 318 
 319 
 320     @Benchmark
 321     public void shiftLeft(Blackhole bh) {
 322         int[] as = fa.apply(size);
 323         int[] bs = fb.apply(size);
 324         int[] rs = fr.apply(size);
 325 
 326         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 327             for (int i = 0; i < as.length; i++) {
 328                 int a = as[i];
 329                 int b = bs[i];
 330                 rs[i] = (int)((a << b));
 331             }
 332         }
 333 
 334         bh.consume(rs);
 335     }
 336 
 337 
 338 
 339     @Benchmark
 340     public void shiftLeftMasked(Blackhole bh) {
 341         int[] as = fa.apply(size);
 342         int[] bs = fb.apply(size);
 343         int[] rs = fr.apply(size);
 344         boolean[] ms = fm.apply(size);
 345 
 346         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 347             for (int i = 0; i < as.length; i++) {
 348                 int a = as[i];
 349                 int b = bs[i];
 350                 if (ms[i % ms.length]) {
 351                     rs[i] = (int)((a << b));
 352                 } else {
 353                     rs[i] = a;
 354                 }
 355             }
 356         }
 357         bh.consume(rs);
 358     }
 359 
 360 
 361 
 362 
 363 
 364 
 365 
 366     @Benchmark
 367     public void shiftRight(Blackhole bh) {
 368         int[] as = fa.apply(size);
 369         int[] bs = fb.apply(size);
 370         int[] rs = fr.apply(size);
 371 
 372         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 373             for (int i = 0; i < as.length; i++) {
 374                 int a = as[i];
 375                 int b = bs[i];
 376                 rs[i] = (int)((a >>> b));
 377             }
 378         }
 379 
 380         bh.consume(rs);
 381     }
 382 
 383 
 384 
 385     @Benchmark
 386     public void shiftRightMasked(Blackhole bh) {
 387         int[] as = fa.apply(size);
 388         int[] bs = fb.apply(size);
 389         int[] rs = fr.apply(size);
 390         boolean[] ms = fm.apply(size);
 391 
 392         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 393             for (int i = 0; i < as.length; i++) {
 394                 int a = as[i];
 395                 int b = bs[i];
 396                 if (ms[i % ms.length]) {
 397                     rs[i] = (int)((a >>> b));
 398                 } else {
 399                     rs[i] = a;
 400                 }
 401             }
 402         }
 403         bh.consume(rs);
 404     }
 405 
 406 
 407 
 408 
 409 
 410 
 411 
 412     @Benchmark
 413     public void shiftArithmeticRight(Blackhole bh) {
 414         int[] as = fa.apply(size);
 415         int[] bs = fb.apply(size);
 416         int[] rs = fr.apply(size);
 417 
 418         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 419             for (int i = 0; i < as.length; i++) {
 420                 int a = as[i];
 421                 int b = bs[i];
 422                 rs[i] = (int)((a >> b));
 423             }
 424         }
 425 
 426         bh.consume(rs);
 427     }
 428 
 429 
 430 
 431     @Benchmark
 432     public void shiftArithmeticRightMasked(Blackhole bh) {
 433         int[] as = fa.apply(size);
 434         int[] bs = fb.apply(size);
 435         int[] rs = fr.apply(size);
 436         boolean[] ms = fm.apply(size);
 437 
 438         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 439             for (int i = 0; i < as.length; i++) {
 440                 int a = as[i];
 441                 int b = bs[i];
 442                 if (ms[i % ms.length]) {
 443                     rs[i] = (int)((a >> b));
 444                 } else {
 445                     rs[i] = a;
 446                 }
 447             }
 448         }
 449         bh.consume(rs);
 450     }
 451 
 452 
 453 
 454 
 455 
 456 
 457 
 458     @Benchmark
 459     public void shiftLeftShift(Blackhole bh) {
 460         int[] as = fa.apply(size);
 461         int[] bs = fb.apply(size);
 462         int[] rs = fr.apply(size);
 463 
 464         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 465             for (int i = 0; i < as.length; i++) {
 466                 int a = as[i];
 467                 int b = bs[i];
 468                 rs[i] = (int)((a << b));
 469             }
 470         }
 471 
 472         bh.consume(rs);
 473     }
 474 
 475 
 476 
 477     @Benchmark
 478     public void shiftLeftMaskedShift(Blackhole bh) {
 479         int[] as = fa.apply(size);
 480         int[] bs = fb.apply(size);
 481         int[] rs = fr.apply(size);
 482         boolean[] ms = fm.apply(size);
 483 
 484         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 485             for (int i = 0; i < as.length; i++) {
 486                 int a = as[i];
 487                 int b = bs[i];
 488                 boolean m = ms[i % ms.length];
 489                 rs[i] = (m ? (int)((a << b)) : a);
 490             }
 491         }
 492 
 493         bh.consume(rs);
 494     }
 495 
 496 
 497 
 498 
 499 
 500 
 501 
 502     @Benchmark
 503     public void shiftRightShift(Blackhole bh) {
 504         int[] as = fa.apply(size);
 505         int[] bs = fb.apply(size);
 506         int[] rs = fr.apply(size);
 507 
 508         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 509             for (int i = 0; i < as.length; i++) {
 510                 int a = as[i];
 511                 int b = bs[i];
 512                 rs[i] = (int)((a >>> b));
 513             }
 514         }
 515 
 516         bh.consume(rs);
 517     }
 518 
 519 
 520 
 521     @Benchmark
 522     public void shiftRightMaskedShift(Blackhole bh) {
 523         int[] as = fa.apply(size);
 524         int[] bs = fb.apply(size);
 525         int[] rs = fr.apply(size);
 526         boolean[] ms = fm.apply(size);
 527 
 528         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 529             for (int i = 0; i < as.length; i++) {
 530                 int a = as[i];
 531                 int b = bs[i];
 532                 boolean m = ms[i % ms.length];
 533                 rs[i] = (m ? (int)((a >>> b)) : a);
 534             }
 535         }
 536 
 537         bh.consume(rs);
 538     }
 539 
 540 
 541 
 542 
 543 
 544 
 545 
 546     @Benchmark
 547     public void shiftArithmeticRightShift(Blackhole bh) {
 548         int[] as = fa.apply(size);
 549         int[] bs = fb.apply(size);
 550         int[] rs = fr.apply(size);
 551 
 552         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 553             for (int i = 0; i < as.length; i++) {
 554                 int a = as[i];
 555                 int b = bs[i];
 556                 rs[i] = (int)((a >> b));
 557             }
 558         }
 559 
 560         bh.consume(rs);
 561     }
 562 
 563 
 564 
 565     @Benchmark
 566     public void shiftArithmeticRightMaskedShift(Blackhole bh) {
 567         int[] as = fa.apply(size);
 568         int[] bs = fb.apply(size);
 569         int[] rs = fr.apply(size);
 570         boolean[] ms = fm.apply(size);
 571 
 572         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 573             for (int i = 0; i < as.length; i++) {
 574                 int a = as[i];
 575                 int b = bs[i];
 576                 boolean m = ms[i % ms.length];
 577                 rs[i] = (m ? (int)((a >> b)) : a);
 578             }
 579         }
 580 
 581         bh.consume(rs);
 582     }
 583 
 584 
 585 
 586 
 587 
 588 
 589     @Benchmark
 590     public void max(Blackhole bh) {
 591         int[] as = fa.apply(size);
 592         int[] bs = fb.apply(size);
 593         int[] rs = fr.apply(size);
 594 
 595         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 596             for (int i = 0; i < as.length; i++) {
 597                 int a = as[i];
 598                 int b = bs[i];
 599                 rs[i] = (int)(Math.max(a, b));
 600             }
 601         }
 602 
 603         bh.consume(rs);
 604     }
 605 
 606     @Benchmark
 607     public void min(Blackhole bh) {
 608         int[] as = fa.apply(size);
 609         int[] bs = fb.apply(size);
 610         int[] rs = fr.apply(size);
 611 
 612         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 613             for (int i = 0; i < as.length; i++) {
 614                 int a = as[i];
 615                 int b = bs[i];
 616                 rs[i] = (int)(Math.min(a, b));
 617             }
 618         }
 619 
 620         bh.consume(rs);
 621     }
 622 
 623 
 624     @Benchmark
 625     public void andLanes(Blackhole bh) {
 626         int[] as = fa.apply(size);
 627         int r = -1;
 628         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 629             r = -1;
 630             for (int i = 0; i < as.length; i++) {
 631                 r &= as[i];
 632             }
 633         }
 634         bh.consume(r);
 635     }
 636 
 637 
 638 
 639     @Benchmark
 640     public void orLanes(Blackhole bh) {
 641         int[] as = fa.apply(size);
 642         int r = 0;
 643         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 644             r = 0;
 645             for (int i = 0; i < as.length; i++) {
 646                 r |= as[i];
 647             }
 648         }
 649         bh.consume(r);
 650     }
 651 
 652 
 653 
 654     @Benchmark
 655     public void xorLanes(Blackhole bh) {
 656         int[] as = fa.apply(size);
 657         int r = 0;
 658         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 659             r = 0;
 660             for (int i = 0; i < as.length; i++) {
 661                 r ^= as[i];
 662             }
 663         }
 664         bh.consume(r);
 665     }
 666 
 667 
 668     @Benchmark
 669     public void addLanes(Blackhole bh) {
 670         int[] as = fa.apply(size);
 671         int r = 0;
 672         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 673             r = 0;
 674             for (int i = 0; i < as.length; i++) {
 675                 r += as[i];
 676             }
 677         }
 678         bh.consume(r);
 679     }
 680 
 681     @Benchmark
 682     public void mulLanes(Blackhole bh) {
 683         int[] as = fa.apply(size);
 684         int r = 1;
 685         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 686             r = 1;
 687             for (int i = 0; i < as.length; i++) {
 688                 r *= as[i];
 689             }
 690         }
 691         bh.consume(r);
 692     }
 693 
 694     @Benchmark
 695     public void minLanes(Blackhole bh) {
 696         int[] as = fa.apply(size);
 697         int r = Integer.MAX_VALUE;
 698         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 699             r = Integer.MAX_VALUE;
 700             for (int i = 0; i < as.length; i++) {
 701                 r = (int)Math.min(r, as[i]);
 702             }
 703         }
 704         bh.consume(r);
 705     }
 706 
 707     @Benchmark
 708     public void maxLanes(Blackhole bh) {
 709         int[] as = fa.apply(size);
 710         int r = Integer.MIN_VALUE;
 711         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 712             r = Integer.MIN_VALUE;
 713             for (int i = 0; i < as.length; i++) {
 714                 r = (int)Math.max(r, as[i]);
 715             }
 716         }
 717         bh.consume(r);
 718     }
 719 
 720 
 721     @Benchmark
 722     public void anyTrue(Blackhole bh) {
 723         boolean[] ms = fm.apply(size);
 724         boolean r = false;
 725         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 726             r = false;
 727             for (int i = 0; i < ms.length; i++) {
 728                 r |= ms[i];
 729             }
 730         }
 731         bh.consume(r);
 732     }
 733 
 734 
 735 
 736     @Benchmark
 737     public void allTrue(Blackhole bh) {
 738         boolean[] ms = fm.apply(size);
 739         boolean r = true;
 740         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 741             r = true;
 742             for (int i = 0; i < ms.length; i++) {
 743                 r &= ms[i];
 744             }
 745         }
 746         bh.consume(r);
 747     }
 748 
 749 
 750     @Benchmark
 751     public void lessThan(Blackhole bh) {
 752         int[] as = fa.apply(size);
 753         int[] bs = fb.apply(size);
 754 
 755         boolean r = false;
 756         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 757             r = false;
 758             for (int i = 0; i < as.length; i++) {
 759                 boolean m = (as[i] < bs[i]);
 760                 r |= m; // accumulate so JIT can't eliminate the computation
 761             }
 762         }
 763 
 764         bh.consume(r);
 765     }
 766 
 767     @Benchmark
 768     public void greaterThan(Blackhole bh) {
 769         int[] as = fa.apply(size);
 770         int[] bs = fb.apply(size);
 771 
 772         boolean r = false;
 773         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 774             r = false;
 775             for (int i = 0; i < as.length; i++) {
 776                 boolean m = (as[i] > bs[i]);
 777                 r |= m; // accumulate so JIT can't eliminate the computation
 778             }
 779         }
 780 
 781         bh.consume(r);
 782     }
 783 
 784     @Benchmark
 785     public void equal(Blackhole bh) {
 786         int[] as = fa.apply(size);
 787         int[] bs = fb.apply(size);
 788 
 789         boolean r = false;
 790         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 791             r = false;
 792             for (int i = 0; i < as.length; i++) {
 793                 boolean m = (as[i] == bs[i]);
 794                 r |= m; // accumulate so JIT can't eliminate the computation
 795             }
 796         }
 797 
 798         bh.consume(r);
 799     }
 800 
 801     @Benchmark
 802     public void notEqual(Blackhole bh) {
 803         int[] as = fa.apply(size);
 804         int[] bs = fb.apply(size);
 805 
 806         boolean r = false;
 807         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 808             r = false;
 809             for (int i = 0; i < as.length; i++) {
 810                 boolean m = (as[i] != bs[i]);
 811                 r |= m; // accumulate so JIT can't eliminate the computation
 812             }
 813         }
 814 
 815         bh.consume(r);
 816     }
 817 
 818     @Benchmark
 819     public void lessThanEq(Blackhole bh) {
 820         int[] as = fa.apply(size);
 821         int[] bs = fb.apply(size);
 822 
 823         boolean r = false;
 824         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 825             r = false;
 826             for (int i = 0; i < as.length; i++) {
 827                 boolean m = (as[i] <= bs[i]);
 828                 r |= m; // accumulate so JIT can't eliminate the computation
 829             }
 830         }
 831 
 832         bh.consume(r);
 833     }
 834 
 835     @Benchmark
 836     public void greaterThanEq(Blackhole bh) {
 837         int[] as = fa.apply(size);
 838         int[] bs = fb.apply(size);
 839 
 840         boolean r = false;
 841         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 842             r = false;
 843             for (int i = 0; i < as.length; i++) {
 844                 boolean m = (as[i] >= bs[i]);
 845                 r |= m; // accumulate so JIT can't eliminate the computation
 846             }
 847         }
 848 
 849         bh.consume(r);
 850     }
 851 
 852     @Benchmark
 853     public void blend(Blackhole bh) {
 854         int[] as = fa.apply(size);
 855         int[] bs = fb.apply(size);
 856         int[] rs = fr.apply(size);
 857         boolean[] ms = fm.apply(size);
 858 
 859         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 860             for (int i = 0; i < as.length; i++) {
 861                 int a = as[i];
 862                 int b = bs[i];
 863                 boolean m = ms[i % ms.length];
 864                 rs[i] = (m ? b : a);
 865             }
 866         }
 867 
 868         bh.consume(rs);
 869     }
 870     void rearrangeShared(int window, Blackhole bh) {
 871         int[] as = fa.apply(size);
 872         int[] order = fs.apply(size);
 873         int[] rs = fr.apply(size);
 874 
 875         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 876             for (int i = 0; i < as.length; i += window) {
 877                 for (int j = 0; j < window; j++) {
 878                     int a = as[i+j];
 879                     int pos = order[j];
 880                     rs[i + pos] = a;
 881                 }
 882             }
 883         }
 884 
 885         bh.consume(rs);
 886     }
 887 
 888     @Benchmark
 889     public void rearrange064(Blackhole bh) {
 890         int window = 64 / Integer.SIZE;
 891         rearrangeShared(window, bh);
 892     }
 893 
 894     @Benchmark
 895     public void rearrange128(Blackhole bh) {
 896         int window = 128 / Integer.SIZE;
 897         rearrangeShared(window, bh);
 898     }
 899 
 900     @Benchmark
 901     public void rearrange256(Blackhole bh) {
 902         int window = 256 / Integer.SIZE;
 903         rearrangeShared(window, bh);
 904     }
 905 
 906     @Benchmark
 907     public void rearrange512(Blackhole bh) {
 908         int window = 512 / Integer.SIZE;
 909         rearrangeShared(window, bh);
 910     }
 911 
 912 
 913 
 914 
 915 
 916 
 917 
 918 
 919 
 920 
 921 
 922 
 923 
 924 
 925 
 926 
 927 
 928 
 929 
 930 
 931 
 932     @Benchmark
 933     public void neg(Blackhole bh) {
 934         int[] as = fa.apply(size);
 935         int[] rs = fr.apply(size);
 936 
 937         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 938             for (int i = 0; i < as.length; i++) {
 939                 int a = as[i];
 940                 rs[i] = (int)(-((int)a));
 941             }
 942         }
 943 
 944         bh.consume(rs);
 945     }
 946 
 947     @Benchmark
 948     public void negMasked(Blackhole bh) {
 949         int[] as = fa.apply(size);
 950         int[] rs = fr.apply(size);
 951         boolean[] ms = fm.apply(size);
 952 
 953         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 954             for (int i = 0; i < as.length; i++) {
 955                 int a = as[i];
 956                 boolean m = ms[i % ms.length];
 957                 rs[i] = (m ? (int)(-((int)a)) : a);
 958             }
 959         }
 960 
 961         bh.consume(rs);
 962     }
 963 
 964     @Benchmark
 965     public void abs(Blackhole bh) {
 966         int[] as = fa.apply(size);
 967         int[] rs = fr.apply(size);
 968 
 969         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 970             for (int i = 0; i < as.length; i++) {
 971                 int a = as[i];
 972                 rs[i] = (int)(Math.abs((int)a));
 973             }
 974         }
 975 
 976         bh.consume(rs);
 977     }
 978 
 979     @Benchmark
 980     public void absMasked(Blackhole bh) {
 981         int[] as = fa.apply(size);
 982         int[] rs = fr.apply(size);
 983         boolean[] ms = fm.apply(size);
 984 
 985         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 986             for (int i = 0; i < as.length; i++) {
 987                 int a = as[i];
 988                 boolean m = ms[i % ms.length];
 989                 rs[i] = (m ? (int)(Math.abs((int)a)) : a);
 990             }
 991         }
 992 
 993         bh.consume(rs);
 994     }
 995 
 996 
 997     @Benchmark
 998     public void not(Blackhole bh) {
 999         int[] as = fa.apply(size);
1000         int[] rs = fr.apply(size);
1001 
1002         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1003             for (int i = 0; i < as.length; i++) {
1004                 int a = as[i];
1005                 rs[i] = (int)(~((int)a));
1006             }
1007         }
1008 
1009         bh.consume(rs);
1010     }
1011 
1012 
1013 
1014     @Benchmark
1015     public void notMasked(Blackhole bh) {
1016         int[] as = fa.apply(size);
1017         int[] rs = fr.apply(size);
1018         boolean[] ms = fm.apply(size);
1019 
1020         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1021             for (int i = 0; i < as.length; i++) {
1022                 int a = as[i];
1023                 boolean m = ms[i % ms.length];
1024                 rs[i] = (m ? (int)(~((int)a)) : a);
1025             }
1026         }
1027 
1028         bh.consume(rs);
1029     }
1030 
1031 
1032 
1033 
1034     @Benchmark
1035     public void gatherBase0(Blackhole bh) {
1036         int[] as = fa.apply(size);
1037         int[] is    = fs.apply(size);
1038         int[] rs = fr.apply(size);
1039 
1040         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1041             for (int i = 0; i < as.length; i++) {
1042                 int ix = 0 + is[i];
1043                 rs[i] = as[ix];
1044             }
1045         }
1046 
1047         bh.consume(rs);
1048     }
1049 
1050 
1051     void gather(int window, Blackhole bh) {
1052         int[] as = fa.apply(size);
1053         int[] is    = fs.apply(size);
1054         int[] rs = fr.apply(size);
1055 
1056         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1057             for (int i = 0; i < as.length; i += window) {
1058                 for (int j = 0; j < window; j++) {
1059                     int ix = i + is[i + j];
1060                     rs[i + j] = as[ix];
1061                 }
1062             }
1063         }
1064 
1065         bh.consume(rs);
1066     }
1067 
1068     @Benchmark
1069     public void gather064(Blackhole bh) {
1070         int window = 64 / Integer.SIZE;
1071         gather(window, bh);
1072     }
1073 
1074     @Benchmark
1075     public void gather128(Blackhole bh) {
1076         int window = 128 / Integer.SIZE;
1077         gather(window, bh);
1078     }
1079 
1080     @Benchmark
1081     public void gather256(Blackhole bh) {
1082         int window = 256 / Integer.SIZE;
1083         gather(window, bh);
1084     }
1085 
1086     @Benchmark
1087     public void gather512(Blackhole bh) {
1088         int window = 512 / Integer.SIZE;
1089         gather(window, bh);
1090     }
1091 
1092 
1093 
1094     @Benchmark
1095     public void scatterBase0(Blackhole bh) {
1096         int[] as = fa.apply(size);
1097         int[] is    = fs.apply(size);
1098         int[] rs = fr.apply(size);
1099 
1100         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1101             for (int i = 0; i < as.length; i++) {
1102                 int ix = 0 + is[i];
1103                 rs[ix] = as[i];
1104             }
1105         }
1106 
1107         bh.consume(rs);
1108     }
1109 
1110     void scatter(int window, Blackhole bh) {
1111         int[] as = fa.apply(size);
1112         int[] is    = fs.apply(size);
1113         int[] rs = fr.apply(size);
1114 
1115         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1116             for (int i = 0; i < as.length; i += window) {
1117                 for (int j = 0; j < window; j++) {
1118                     int ix = i + is[i + j];
1119                     rs[ix] = as[i + j];
1120                 }
1121             }
1122         }
1123 
1124         bh.consume(rs);
1125     }
1126 
1127     @Benchmark
1128     public void scatter064(Blackhole bh) {
1129         int window = 64 / Integer.SIZE;
1130         scatter(window, bh);
1131     }
1132 
1133     @Benchmark
1134     public void scatter128(Blackhole bh) {
1135         int window = 128 / Integer.SIZE;
1136         scatter(window, bh);
1137     }
1138 
1139     @Benchmark
1140     public void scatter256(Blackhole bh) {
1141         int window = 256 / Integer.SIZE;
1142         scatter(window, bh);
1143     }
1144 
1145     @Benchmark
1146     public void scatter512(Blackhole bh) {
1147         int window = 512 / Integer.SIZE;
1148         scatter(window, bh);
1149     }
1150 
1151 }
1152