1 /*
   2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import java.util.concurrent.TimeUnit;
  27 import java.util.function.IntFunction;
  28 
  29 import org.openjdk.jmh.annotations.*;
  30 import org.openjdk.jmh.infra.Blackhole;
  31 
  32 @BenchmarkMode(Mode.Throughput)
  33 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  34 @State(Scope.Benchmark)
  35 @Warmup(iterations = 3, time = 1)
  36 @Measurement(iterations = 5, time = 1)
  37 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  38 public class ByteScalar extends AbstractVectorBenchmark {
  39     static final int INVOC_COUNT = 1; // To align with vector benchmarks.
  40 
  41     @Param("1024")
  42     int size;
  43 
  44     byte[] fill(IntFunction<Byte> f) {
  45         byte[] array = new byte[size];
  46         for (int i = 0; i < array.length; i++) {
  47             array[i] = f.apply(i);
  48         }
  49         return array;
  50     }
  51 
  52     byte[] as, bs, cs, rs;
  53     boolean[] ms, rms;
  54     int[] ss;
  55 
  56     @Setup
  57     public void init() {
  58         as = fill(i -> (byte)(2*i));
  59         bs = fill(i -> (byte)(i+1));
  60         cs = fill(i -> (byte)(i+5));
  61         rs = fill(i -> (byte)0);
  62         ms = fillMask(size, i -> (i % 2) == 0);
  63         rms = fillMask(size, i -> false);
  64 
  65         ss = fillInt(size, i -> RANDOM.nextInt(Math.max(i,1)));
  66     }
  67 
  68     final IntFunction<byte[]> fa = vl -> as;
  69     final IntFunction<byte[]> fb = vl -> bs;
  70     final IntFunction<byte[]> fc = vl -> cs;
  71     final IntFunction<byte[]> fr = vl -> rs;
  72     final IntFunction<boolean[]> fm = vl -> ms;
  73     final IntFunction<boolean[]> fmr = vl -> rms;
  74     final IntFunction<int[]> fs = vl -> ss;
  75 
  76 
  77     @Benchmark
  78     public void add(Blackhole bh) {
  79         byte[] as = fa.apply(size);
  80         byte[] bs = fb.apply(size);
  81         byte[] rs = fr.apply(size);
  82 
  83         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  84             for (int i = 0; i < as.length; i++) {
  85                 byte a = as[i];
  86                 byte b = bs[i];
  87                 rs[i] = (byte)(a + b);
  88             }
  89         }
  90 
  91         bh.consume(rs);
  92     }
  93 
  94     @Benchmark
  95     public void addMasked(Blackhole bh) {
  96         byte[] as = fa.apply(size);
  97         byte[] bs = fb.apply(size);
  98         byte[] rs = fr.apply(size);
  99         boolean[] ms = fm.apply(size);
 100 
 101         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 102             for (int i = 0; i < as.length; i++) {
 103                 byte a = as[i];
 104                 byte b = bs[i];
 105                 if (ms[i % ms.length]) {
 106                     rs[i] = (byte)(a + b);
 107                 } else {
 108                     rs[i] = a;
 109                 }
 110             }
 111         }
 112         bh.consume(rs);
 113     }
 114 
 115     @Benchmark
 116     public void sub(Blackhole bh) {
 117         byte[] as = fa.apply(size);
 118         byte[] bs = fb.apply(size);
 119         byte[] rs = fr.apply(size);
 120 
 121         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 122             for (int i = 0; i < as.length; i++) {
 123                 byte a = as[i];
 124                 byte b = bs[i];
 125                 rs[i] = (byte)(a - b);
 126             }
 127         }
 128 
 129         bh.consume(rs);
 130     }
 131 
 132     @Benchmark
 133     public void subMasked(Blackhole bh) {
 134         byte[] as = fa.apply(size);
 135         byte[] bs = fb.apply(size);
 136         byte[] rs = fr.apply(size);
 137         boolean[] ms = fm.apply(size);
 138 
 139         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 140             for (int i = 0; i < as.length; i++) {
 141                 byte a = as[i];
 142                 byte b = bs[i];
 143                 if (ms[i % ms.length]) {
 144                     rs[i] = (byte)(a - b);
 145                 } else {
 146                     rs[i] = a;
 147                 }
 148             }
 149         }
 150         bh.consume(rs);
 151     }
 152 
 153 
 154 
 155     @Benchmark
 156     public void mul(Blackhole bh) {
 157         byte[] as = fa.apply(size);
 158         byte[] bs = fb.apply(size);
 159         byte[] rs = fr.apply(size);
 160 
 161         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 162             for (int i = 0; i < as.length; i++) {
 163                 byte a = as[i];
 164                 byte b = bs[i];
 165                 rs[i] = (byte)(a * b);
 166             }
 167         }
 168 
 169         bh.consume(rs);
 170     }
 171 
 172     @Benchmark
 173     public void mulMasked(Blackhole bh) {
 174         byte[] as = fa.apply(size);
 175         byte[] bs = fb.apply(size);
 176         byte[] rs = fr.apply(size);
 177         boolean[] ms = fm.apply(size);
 178 
 179         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 180             for (int i = 0; i < as.length; i++) {
 181                 byte a = as[i];
 182                 byte b = bs[i];
 183                 if (ms[i % ms.length]) {
 184                     rs[i] = (byte)(a * b);
 185                 } else {
 186                     rs[i] = a;
 187                 }
 188             }
 189         }
 190         bh.consume(rs);
 191     }
 192 
 193 
 194     @Benchmark
 195     public void and(Blackhole bh) {
 196         byte[] as = fa.apply(size);
 197         byte[] bs = fb.apply(size);
 198         byte[] rs = fr.apply(size);
 199 
 200         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 201             for (int i = 0; i < as.length; i++) {
 202                 byte a = as[i];
 203                 byte b = bs[i];
 204                 rs[i] = (byte)(a & b);
 205             }
 206         }
 207 
 208         bh.consume(rs);
 209     }
 210 
 211 
 212 
 213     @Benchmark
 214     public void andMasked(Blackhole bh) {
 215         byte[] as = fa.apply(size);
 216         byte[] bs = fb.apply(size);
 217         byte[] rs = fr.apply(size);
 218         boolean[] ms = fm.apply(size);
 219 
 220         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 221             for (int i = 0; i < as.length; i++) {
 222                 byte a = as[i];
 223                 byte b = bs[i];
 224                 if (ms[i % ms.length]) {
 225                     rs[i] = (byte)(a & b);
 226                 } else {
 227                     rs[i] = a;
 228                 }
 229             }
 230         }
 231         bh.consume(rs);
 232     }
 233 
 234 
 235 
 236     @Benchmark
 237     public void or(Blackhole bh) {
 238         byte[] as = fa.apply(size);
 239         byte[] bs = fb.apply(size);
 240         byte[] rs = fr.apply(size);
 241 
 242         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 243             for (int i = 0; i < as.length; i++) {
 244                 byte a = as[i];
 245                 byte b = bs[i];
 246                 rs[i] = (byte)(a | b);
 247             }
 248         }
 249 
 250         bh.consume(rs);
 251     }
 252 
 253 
 254 
 255     @Benchmark
 256     public void orMasked(Blackhole bh) {
 257         byte[] as = fa.apply(size);
 258         byte[] bs = fb.apply(size);
 259         byte[] rs = fr.apply(size);
 260         boolean[] ms = fm.apply(size);
 261 
 262         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 263             for (int i = 0; i < as.length; i++) {
 264                 byte a = as[i];
 265                 byte b = bs[i];
 266                 if (ms[i % ms.length]) {
 267                     rs[i] = (byte)(a | b);
 268                 } else {
 269                     rs[i] = a;
 270                 }
 271             }
 272         }
 273         bh.consume(rs);
 274     }
 275 
 276 
 277 
 278     @Benchmark
 279     public void xor(Blackhole bh) {
 280         byte[] as = fa.apply(size);
 281         byte[] bs = fb.apply(size);
 282         byte[] rs = fr.apply(size);
 283 
 284         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 285             for (int i = 0; i < as.length; i++) {
 286                 byte a = as[i];
 287                 byte b = bs[i];
 288                 rs[i] = (byte)(a ^ b);
 289             }
 290         }
 291 
 292         bh.consume(rs);
 293     }
 294 
 295 
 296 
 297     @Benchmark
 298     public void xorMasked(Blackhole bh) {
 299         byte[] as = fa.apply(size);
 300         byte[] bs = fb.apply(size);
 301         byte[] rs = fr.apply(size);
 302         boolean[] ms = fm.apply(size);
 303 
 304         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 305             for (int i = 0; i < as.length; i++) {
 306                 byte a = as[i];
 307                 byte b = bs[i];
 308                 if (ms[i % ms.length]) {
 309                     rs[i] = (byte)(a ^ b);
 310                 } else {
 311                     rs[i] = a;
 312                 }
 313             }
 314         }
 315         bh.consume(rs);
 316     }
 317 
 318 
 319 
 320 
 321 
 322 
 323 
 324 
 325 
 326 
 327 
 328 
 329 
 330 
 331 
 332     @Benchmark
 333     public void aShiftRShift(Blackhole bh) {
 334         byte[] as = fa.apply(size);
 335         byte[] bs = fb.apply(size);
 336         byte[] rs = fr.apply(size);
 337 
 338         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 339             for (int i = 0; i < as.length; i++) {
 340                 byte a = as[i];
 341                 byte b = bs[i];
 342                 rs[i] = (byte)((a >> (b & 7)));
 343             }
 344         }
 345 
 346         bh.consume(rs);
 347     }
 348 
 349 
 350 
 351     @Benchmark
 352     public void aShiftRMaskedShift(Blackhole bh) {
 353         byte[] as = fa.apply(size);
 354         byte[] bs = fb.apply(size);
 355         byte[] rs = fr.apply(size);
 356         boolean[] ms = fm.apply(size);
 357 
 358         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 359             for (int i = 0; i < as.length; i++) {
 360                 byte a = as[i];
 361                 byte b = bs[i];
 362                 boolean m = ms[i % ms.length];
 363                 rs[i] = (m ? (byte)((a >> (b & 7))) : a);
 364             }
 365         }
 366 
 367         bh.consume(rs);
 368     }
 369 
 370 
 371 
 372     @Benchmark
 373     public void shiftLShift(Blackhole bh) {
 374         byte[] as = fa.apply(size);
 375         byte[] bs = fb.apply(size);
 376         byte[] rs = fr.apply(size);
 377 
 378         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 379             for (int i = 0; i < as.length; i++) {
 380                 byte a = as[i];
 381                 byte b = bs[i];
 382                 rs[i] = (byte)((a << (b & 7)));
 383             }
 384         }
 385 
 386         bh.consume(rs);
 387     }
 388 
 389 
 390 
 391     @Benchmark
 392     public void shiftLMaskedShift(Blackhole bh) {
 393         byte[] as = fa.apply(size);
 394         byte[] bs = fb.apply(size);
 395         byte[] rs = fr.apply(size);
 396         boolean[] ms = fm.apply(size);
 397 
 398         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 399             for (int i = 0; i < as.length; i++) {
 400                 byte a = as[i];
 401                 byte b = bs[i];
 402                 boolean m = ms[i % ms.length];
 403                 rs[i] = (m ? (byte)((a << (b & 7))) : a);
 404             }
 405         }
 406 
 407         bh.consume(rs);
 408     }
 409 
 410 
 411 
 412     @Benchmark
 413     public void shiftRShift(Blackhole bh) {
 414         byte[] as = fa.apply(size);
 415         byte[] bs = fb.apply(size);
 416         byte[] rs = fr.apply(size);
 417 
 418         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 419             for (int i = 0; i < as.length; i++) {
 420                 byte a = as[i];
 421                 byte b = bs[i];
 422                 rs[i] = (byte)(((a & 0xFF) >>> (b & 7)));
 423             }
 424         }
 425 
 426         bh.consume(rs);
 427     }
 428 
 429 
 430 
 431     @Benchmark
 432     public void shiftRMaskedShift(Blackhole bh) {
 433         byte[] as = fa.apply(size);
 434         byte[] bs = fb.apply(size);
 435         byte[] rs = fr.apply(size);
 436         boolean[] ms = fm.apply(size);
 437 
 438         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 439             for (int i = 0; i < as.length; i++) {
 440                 byte a = as[i];
 441                 byte b = bs[i];
 442                 boolean m = ms[i % ms.length];
 443                 rs[i] = (m ? (byte)(((a & 0xFF) >>> (b & 7))) : a);
 444             }
 445         }
 446 
 447         bh.consume(rs);
 448     }
 449 
 450 
 451 
 452 
 453 
 454 
 455 
 456 
 457     @Benchmark
 458     public void max(Blackhole bh) {
 459         byte[] as = fa.apply(size);
 460         byte[] bs = fb.apply(size);
 461         byte[] rs = fr.apply(size);
 462 
 463         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 464             for (int i = 0; i < as.length; i++) {
 465                 byte a = as[i];
 466                 byte b = bs[i];
 467                 rs[i] = (byte)(Math.max(a, b));
 468             }
 469         }
 470 
 471         bh.consume(rs);
 472     }
 473 
 474     @Benchmark
 475     public void min(Blackhole bh) {
 476         byte[] as = fa.apply(size);
 477         byte[] bs = fb.apply(size);
 478         byte[] rs = fr.apply(size);
 479 
 480         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 481             for (int i = 0; i < as.length; i++) {
 482                 byte a = as[i];
 483                 byte b = bs[i];
 484                 rs[i] = (byte)(Math.min(a, b));
 485             }
 486         }
 487 
 488         bh.consume(rs);
 489     }
 490 
 491 
 492     @Benchmark
 493     public void andAll(Blackhole bh) {
 494         byte[] as = fa.apply(size);
 495         byte r = -1;
 496         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 497             r = -1;
 498             for (int i = 0; i < as.length; i++) {
 499                 r &= as[i];
 500             }
 501         }
 502         bh.consume(r);
 503     }
 504 
 505 
 506 
 507     @Benchmark
 508     public void orAll(Blackhole bh) {
 509         byte[] as = fa.apply(size);
 510         byte r = 0;
 511         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 512             r = 0;
 513             for (int i = 0; i < as.length; i++) {
 514                 r |= as[i];
 515             }
 516         }
 517         bh.consume(r);
 518     }
 519 
 520 
 521 
 522     @Benchmark
 523     public void xorAll(Blackhole bh) {
 524         byte[] as = fa.apply(size);
 525         byte r = 0;
 526         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 527             r = 0;
 528             for (int i = 0; i < as.length; i++) {
 529                 r ^= as[i];
 530             }
 531         }
 532         bh.consume(r);
 533     }
 534 
 535 
 536     @Benchmark
 537     public void addAll(Blackhole bh) {
 538         byte[] as = fa.apply(size);
 539         byte r = 0;
 540         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 541             r = 0;
 542             for (int i = 0; i < as.length; i++) {
 543                 r += as[i];
 544             }
 545         }
 546         bh.consume(r);
 547     }
 548 
 549     @Benchmark
 550     public void mulAll(Blackhole bh) {
 551         byte[] as = fa.apply(size);
 552         byte r = 1;
 553         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 554             r = 1;
 555             for (int i = 0; i < as.length; i++) {
 556                 r *= as[i];
 557             }
 558         }
 559         bh.consume(r);
 560     }
 561 
 562     @Benchmark
 563     public void minAll(Blackhole bh) {
 564         byte[] as = fa.apply(size);
 565         byte r = Byte.MAX_VALUE;
 566         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 567             r = Byte.MAX_VALUE;
 568             for (int i = 0; i < as.length; i++) {
 569                 r = (byte)Math.min(r, as[i]);
 570             }
 571         }
 572         bh.consume(r);
 573     }
 574 
 575     @Benchmark
 576     public void maxAll(Blackhole bh) {
 577         byte[] as = fa.apply(size);
 578         byte r = Byte.MIN_VALUE;
 579         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 580             r = Byte.MIN_VALUE;
 581             for (int i = 0; i < as.length; i++) {
 582                 r = (byte)Math.max(r, as[i]);
 583             }
 584         }
 585         bh.consume(r);
 586     }
 587 
 588 
 589     @Benchmark
 590     public void anyTrue(Blackhole bh) {
 591         boolean[] ms = fm.apply(size);
 592         boolean r = false;
 593         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 594             r = false;
 595             for (int i = 0; i < ms.length; i++) {
 596                 r |= ms[i];
 597             }
 598         }
 599         bh.consume(r);
 600     }
 601 
 602 
 603 
 604     @Benchmark
 605     public void allTrue(Blackhole bh) {
 606         boolean[] ms = fm.apply(size);
 607         boolean r = true;
 608         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 609             r = true;
 610             for (int i = 0; i < ms.length; i++) {
 611                 r &= ms[i];
 612             }
 613         }
 614         bh.consume(r);
 615     }
 616 
 617 
 618     @Benchmark
 619     public void lessThan(Blackhole bh) {
 620         byte[] as = fa.apply(size);
 621         byte[] bs = fb.apply(size);
 622 
 623         boolean r = false;
 624         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 625             r = false;
 626             for (int i = 0; i < as.length; i++) {
 627                 boolean m = (as[i] < bs[i]);
 628                 r |= m; // accumulate so JIT can't eliminate the computation
 629             }
 630         }
 631 
 632         bh.consume(r);
 633     }
 634 
 635     @Benchmark
 636     public void greaterThan(Blackhole bh) {
 637         byte[] as = fa.apply(size);
 638         byte[] bs = fb.apply(size);
 639 
 640         boolean r = false;
 641         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 642             r = false;
 643             for (int i = 0; i < as.length; i++) {
 644                 boolean m = (as[i] > bs[i]);
 645                 r |= m; // accumulate so JIT can't eliminate the computation
 646             }
 647         }
 648 
 649         bh.consume(r);
 650     }
 651 
 652     @Benchmark
 653     public void equal(Blackhole bh) {
 654         byte[] as = fa.apply(size);
 655         byte[] bs = fb.apply(size);
 656 
 657         boolean r = false;
 658         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 659             r = false;
 660             for (int i = 0; i < as.length; i++) {
 661                 boolean m = (as[i] == bs[i]);
 662                 r |= m; // accumulate so JIT can't eliminate the computation
 663             }
 664         }
 665 
 666         bh.consume(r);
 667     }
 668 
 669     @Benchmark
 670     public void notEqual(Blackhole bh) {
 671         byte[] as = fa.apply(size);
 672         byte[] bs = fb.apply(size);
 673 
 674         boolean r = false;
 675         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 676             r = false;
 677             for (int i = 0; i < as.length; i++) {
 678                 boolean m = (as[i] != bs[i]);
 679                 r |= m; // accumulate so JIT can't eliminate the computation
 680             }
 681         }
 682 
 683         bh.consume(r);
 684     }
 685 
 686     @Benchmark
 687     public void lessThanEq(Blackhole bh) {
 688         byte[] as = fa.apply(size);
 689         byte[] bs = fb.apply(size);
 690 
 691         boolean r = false;
 692         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 693             r = false;
 694             for (int i = 0; i < as.length; i++) {
 695                 boolean m = (as[i] <= bs[i]);
 696                 r |= m; // accumulate so JIT can't eliminate the computation
 697             }
 698         }
 699 
 700         bh.consume(r);
 701     }
 702 
 703     @Benchmark
 704     public void greaterThanEq(Blackhole bh) {
 705         byte[] as = fa.apply(size);
 706         byte[] bs = fb.apply(size);
 707 
 708         boolean r = false;
 709         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 710             r = false;
 711             for (int i = 0; i < as.length; i++) {
 712                 boolean m = (as[i] >= bs[i]);
 713                 r |= m; // accumulate so JIT can't eliminate the computation
 714             }
 715         }
 716 
 717         bh.consume(r);
 718     }
 719 
 720     @Benchmark
 721     public void blend(Blackhole bh) {
 722         byte[] as = fa.apply(size);
 723         byte[] bs = fb.apply(size);
 724         byte[] rs = fr.apply(size);
 725         boolean[] ms = fm.apply(size);
 726 
 727         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 728             for (int i = 0; i < as.length; i++) {
 729                 byte a = as[i];
 730                 byte b = bs[i];
 731                 boolean m = ms[i % ms.length];
 732                 rs[i] = (m ? b : a);
 733             }
 734         }
 735 
 736         bh.consume(rs);
 737     }
 738     void rearrangeShared(int window, Blackhole bh) {
 739         byte[] as = fa.apply(size);
 740         int[] order = fs.apply(size);
 741         byte[] rs = fr.apply(size);
 742 
 743         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 744             for (int i = 0; i < as.length; i += window) {
 745                 for (int j = 0; j < window; j++) {
 746                     byte a = as[i+j];
 747                     int pos = order[j];
 748                     rs[i + pos] = a;
 749                 }
 750             }
 751         }
 752 
 753         bh.consume(rs);
 754     }
 755 
 756     @Benchmark
 757     public void rearrange064(Blackhole bh) {
 758         int window = 64 / Byte.SIZE;
 759         rearrangeShared(window, bh);
 760     }
 761 
 762     @Benchmark
 763     public void rearrange128(Blackhole bh) {
 764         int window = 128 / Byte.SIZE;
 765         rearrangeShared(window, bh);
 766     }
 767 
 768     @Benchmark
 769     public void rearrange256(Blackhole bh) {
 770         int window = 256 / Byte.SIZE;
 771         rearrangeShared(window, bh);
 772     }
 773 
 774     @Benchmark
 775     public void rearrange512(Blackhole bh) {
 776         int window = 512 / Byte.SIZE;
 777         rearrangeShared(window, bh);
 778     }
 779 
 780 
 781 
 782 
 783 
 784 
 785 
 786 
 787 
 788 
 789 
 790 
 791 
 792 
 793 
 794 
 795 
 796 
 797 
 798 
 799 
 800     @Benchmark
 801     public void neg(Blackhole bh) {
 802         byte[] as = fa.apply(size);
 803         byte[] rs = fr.apply(size);
 804 
 805         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 806             for (int i = 0; i < as.length; i++) {
 807                 byte a = as[i];
 808                 rs[i] = (byte)(-((byte)a));
 809             }
 810         }
 811 
 812         bh.consume(rs);
 813     }
 814 
 815     @Benchmark
 816     public void negMasked(Blackhole bh) {
 817         byte[] as = fa.apply(size);
 818         byte[] rs = fr.apply(size);
 819         boolean[] ms = fm.apply(size);
 820 
 821         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 822             for (int i = 0; i < as.length; i++) {
 823                 byte a = as[i];
 824                 boolean m = ms[i % ms.length];
 825                 rs[i] = (m ? (byte)(-((byte)a)) : a);
 826             }
 827         }
 828 
 829         bh.consume(rs);
 830     }
 831 
 832     @Benchmark
 833     public void abs(Blackhole bh) {
 834         byte[] as = fa.apply(size);
 835         byte[] rs = fr.apply(size);
 836 
 837         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 838             for (int i = 0; i < as.length; i++) {
 839                 byte a = as[i];
 840                 rs[i] = (byte)(Math.abs((byte)a));
 841             }
 842         }
 843 
 844         bh.consume(rs);
 845     }
 846 
 847     @Benchmark
 848     public void absMasked(Blackhole bh) {
 849         byte[] as = fa.apply(size);
 850         byte[] rs = fr.apply(size);
 851         boolean[] ms = fm.apply(size);
 852 
 853         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 854             for (int i = 0; i < as.length; i++) {
 855                 byte a = as[i];
 856                 boolean m = ms[i % ms.length];
 857                 rs[i] = (m ? (byte)(Math.abs((byte)a)) : a);
 858             }
 859         }
 860 
 861         bh.consume(rs);
 862     }
 863 
 864 
 865     @Benchmark
 866     public void not(Blackhole bh) {
 867         byte[] as = fa.apply(size);
 868         byte[] rs = fr.apply(size);
 869 
 870         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 871             for (int i = 0; i < as.length; i++) {
 872                 byte a = as[i];
 873                 rs[i] = (byte)(~((byte)a));
 874             }
 875         }
 876 
 877         bh.consume(rs);
 878     }
 879 
 880 
 881 
 882     @Benchmark
 883     public void notMasked(Blackhole bh) {
 884         byte[] as = fa.apply(size);
 885         byte[] rs = fr.apply(size);
 886         boolean[] ms = fm.apply(size);
 887 
 888         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 889             for (int i = 0; i < as.length; i++) {
 890                 byte a = as[i];
 891                 boolean m = ms[i % ms.length];
 892                 rs[i] = (m ? (byte)(~((byte)a)) : a);
 893             }
 894         }
 895 
 896         bh.consume(rs);
 897     }
 898 
 899 
 900 
 901 
 902 
 903 }
 904