1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have 21 * questions. 22 */ 23 24 package benchmark.jdk.incubator.vector; 25 26 import jdk.incubator.vector.Vector; 27 import jdk.incubator.vector.Vector.Shape; 28 import jdk.incubator.vector.Vector.Species; 29 import jdk.incubator.vector.ByteVector; 30 31 import java.util.concurrent.TimeUnit; 32 import java.util.function.BiFunction; 33 import java.util.function.IntFunction; 34 35 import org.openjdk.jmh.annotations.*; 36 import org.openjdk.jmh.infra.Blackhole; 37 38 @BenchmarkMode(Mode.Throughput) 39 @OutputTimeUnit(TimeUnit.MILLISECONDS) 40 @State(Scope.Benchmark) 41 @Warmup(iterations = 3, time = 1) 42 @Measurement(iterations = 5, time = 1) 43 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) 44 public class ByteMaxVector extends AbstractVectorBenchmark { 45 static final Species<Byte> SPECIES = ByteVector.SPECIES_MAX; 46 47 static final int INVOC_COUNT = 1; // get rid of outer loop 48 49 @Param("1024") 50 int size; 51 52 byte[] fill(IntFunction<Byte> f) { 53 byte[] array = new byte[size]; 54 for (int i = 0; i < array.length; i++) { 55 array[i] = f.apply(i); 56 } 57 return array; 58 } 59 60 byte[] a, b, c, r; 61 boolean[] m, rm; 62 int[] s; 63 64 @Setup 65 public void init() { 66 size += size % SPECIES.length(); // FIXME: add post-loops 67 68 a = fill(i -> (byte)(2*i)); 69 b = fill(i -> (byte)(i+1)); 70 c = fill(i -> (byte)(i+5)); 71 r = fill(i -> (byte)0); 72 73 m = fillMask(size, i -> (i % 2) == 0); 74 rm = fillMask(size, i -> false); 75 76 s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length())); 77 } 78 79 final IntFunction<byte[]> fa = vl -> a; 80 final IntFunction<byte[]> fb = vl -> b; 81 final IntFunction<byte[]> fc = vl -> c; 82 final IntFunction<byte[]> fr = vl -> r; 83 final IntFunction<boolean[]> fm = vl -> m; 84 final IntFunction<boolean[]> fmr = vl -> rm; 85 final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s; 86 87 88 @Benchmark 89 public void add(Blackhole bh) { 90 byte[] a = fa.apply(SPECIES.length()); 91 byte[] b = fb.apply(SPECIES.length()); 92 byte[] r = fr.apply(SPECIES.length()); 93 94 for (int ic = 0; ic < INVOC_COUNT; ic++) { 95 for (int i = 0; i < a.length; i += SPECIES.length()) { 96 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 97 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 98 av.add(bv).intoArray(r, i); 99 } 100 } 101 102 bh.consume(r); 103 } 104 105 @Benchmark 106 public void addMasked(Blackhole bh) { 107 byte[] a = fa.apply(SPECIES.length()); 108 byte[] b = fb.apply(SPECIES.length()); 109 byte[] r = fr.apply(SPECIES.length()); 110 boolean[] mask = fm.apply(SPECIES.length()); 111 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 112 113 for (int ic = 0; ic < INVOC_COUNT; ic++) { 114 for (int i = 0; i < a.length; i += SPECIES.length()) { 115 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 116 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 117 av.add(bv, vmask).intoArray(r, i); 118 } 119 } 120 121 bh.consume(r); 122 } 123 124 @Benchmark 125 public void sub(Blackhole bh) { 126 byte[] a = fa.apply(SPECIES.length()); 127 byte[] b = fb.apply(SPECIES.length()); 128 byte[] r = fr.apply(SPECIES.length()); 129 130 for (int ic = 0; ic < INVOC_COUNT; ic++) { 131 for (int i = 0; i < a.length; i += SPECIES.length()) { 132 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 133 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 134 av.sub(bv).intoArray(r, i); 135 } 136 } 137 138 bh.consume(r); 139 } 140 141 @Benchmark 142 public void subMasked(Blackhole bh) { 143 byte[] a = fa.apply(SPECIES.length()); 144 byte[] b = fb.apply(SPECIES.length()); 145 byte[] r = fr.apply(SPECIES.length()); 146 boolean[] mask = fm.apply(SPECIES.length()); 147 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 148 149 for (int ic = 0; ic < INVOC_COUNT; ic++) { 150 for (int i = 0; i < a.length; i += SPECIES.length()) { 151 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 152 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 153 av.sub(bv, vmask).intoArray(r, i); 154 } 155 } 156 157 bh.consume(r); 158 } 159 160 161 162 @Benchmark 163 public void mul(Blackhole bh) { 164 byte[] a = fa.apply(SPECIES.length()); 165 byte[] b = fb.apply(SPECIES.length()); 166 byte[] r = fr.apply(SPECIES.length()); 167 168 for (int ic = 0; ic < INVOC_COUNT; ic++) { 169 for (int i = 0; i < a.length; i += SPECIES.length()) { 170 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 171 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 172 av.mul(bv).intoArray(r, i); 173 } 174 } 175 176 bh.consume(r); 177 } 178 179 @Benchmark 180 public void mulMasked(Blackhole bh) { 181 byte[] a = fa.apply(SPECIES.length()); 182 byte[] b = fb.apply(SPECIES.length()); 183 byte[] r = fr.apply(SPECIES.length()); 184 boolean[] mask = fm.apply(SPECIES.length()); 185 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 186 187 for (int ic = 0; ic < INVOC_COUNT; ic++) { 188 for (int i = 0; i < a.length; i += SPECIES.length()) { 189 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 190 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 191 av.mul(bv, vmask).intoArray(r, i); 192 } 193 } 194 195 bh.consume(r); 196 } 197 198 199 @Benchmark 200 public void and(Blackhole bh) { 201 byte[] a = fa.apply(SPECIES.length()); 202 byte[] b = fb.apply(SPECIES.length()); 203 byte[] r = fr.apply(SPECIES.length()); 204 205 for (int ic = 0; ic < INVOC_COUNT; ic++) { 206 for (int i = 0; i < a.length; i += SPECIES.length()) { 207 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 208 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 209 av.and(bv).intoArray(r, i); 210 } 211 } 212 213 bh.consume(r); 214 } 215 216 217 218 @Benchmark 219 public void andMasked(Blackhole bh) { 220 byte[] a = fa.apply(SPECIES.length()); 221 byte[] b = fb.apply(SPECIES.length()); 222 byte[] r = fr.apply(SPECIES.length()); 223 boolean[] mask = fm.apply(SPECIES.length()); 224 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 225 226 for (int ic = 0; ic < INVOC_COUNT; ic++) { 227 for (int i = 0; i < a.length; i += SPECIES.length()) { 228 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 229 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 230 av.and(bv, vmask).intoArray(r, i); 231 } 232 } 233 234 bh.consume(r); 235 } 236 237 238 239 @Benchmark 240 public void or(Blackhole bh) { 241 byte[] a = fa.apply(SPECIES.length()); 242 byte[] b = fb.apply(SPECIES.length()); 243 byte[] r = fr.apply(SPECIES.length()); 244 245 for (int ic = 0; ic < INVOC_COUNT; ic++) { 246 for (int i = 0; i < a.length; i += SPECIES.length()) { 247 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 248 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 249 av.or(bv).intoArray(r, i); 250 } 251 } 252 253 bh.consume(r); 254 } 255 256 257 258 @Benchmark 259 public void orMasked(Blackhole bh) { 260 byte[] a = fa.apply(SPECIES.length()); 261 byte[] b = fb.apply(SPECIES.length()); 262 byte[] r = fr.apply(SPECIES.length()); 263 boolean[] mask = fm.apply(SPECIES.length()); 264 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 265 266 for (int ic = 0; ic < INVOC_COUNT; ic++) { 267 for (int i = 0; i < a.length; i += SPECIES.length()) { 268 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 269 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 270 av.or(bv, vmask).intoArray(r, i); 271 } 272 } 273 274 bh.consume(r); 275 } 276 277 278 279 @Benchmark 280 public void xor(Blackhole bh) { 281 byte[] a = fa.apply(SPECIES.length()); 282 byte[] b = fb.apply(SPECIES.length()); 283 byte[] r = fr.apply(SPECIES.length()); 284 285 for (int ic = 0; ic < INVOC_COUNT; ic++) { 286 for (int i = 0; i < a.length; i += SPECIES.length()) { 287 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 288 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 289 av.xor(bv).intoArray(r, i); 290 } 291 } 292 293 bh.consume(r); 294 } 295 296 297 298 @Benchmark 299 public void xorMasked(Blackhole bh) { 300 byte[] a = fa.apply(SPECIES.length()); 301 byte[] b = fb.apply(SPECIES.length()); 302 byte[] r = fr.apply(SPECIES.length()); 303 boolean[] mask = fm.apply(SPECIES.length()); 304 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 305 306 for (int ic = 0; ic < INVOC_COUNT; ic++) { 307 for (int i = 0; i < a.length; i += SPECIES.length()) { 308 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 309 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 310 av.xor(bv, vmask).intoArray(r, i); 311 } 312 } 313 314 bh.consume(r); 315 } 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 @Benchmark 332 public void aShiftRShift(Blackhole bh) { 333 byte[] a = fa.apply(SPECIES.length()); 334 byte[] b = fb.apply(SPECIES.length()); 335 byte[] r = fr.apply(SPECIES.length()); 336 337 for (int ic = 0; ic < INVOC_COUNT; ic++) { 338 for (int i = 0; i < a.length; i += SPECIES.length()) { 339 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 340 av.aShiftR((int)b[i]).intoArray(r, i); 341 } 342 } 343 344 bh.consume(r); 345 } 346 347 348 349 @Benchmark 350 public void aShiftRMaskedShift(Blackhole bh) { 351 byte[] a = fa.apply(SPECIES.length()); 352 byte[] b = fb.apply(SPECIES.length()); 353 byte[] r = fr.apply(SPECIES.length()); 354 boolean[] mask = fm.apply(SPECIES.length()); 355 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 356 357 for (int ic = 0; ic < INVOC_COUNT; ic++) { 358 for (int i = 0; i < a.length; i += SPECIES.length()) { 359 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 360 av.aShiftR((int)b[i], vmask).intoArray(r, i); 361 } 362 } 363 364 bh.consume(r); 365 } 366 367 368 369 @Benchmark 370 public void shiftLShift(Blackhole bh) { 371 byte[] a = fa.apply(SPECIES.length()); 372 byte[] b = fb.apply(SPECIES.length()); 373 byte[] r = fr.apply(SPECIES.length()); 374 375 for (int ic = 0; ic < INVOC_COUNT; ic++) { 376 for (int i = 0; i < a.length; i += SPECIES.length()) { 377 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 378 av.shiftL((int)b[i]).intoArray(r, i); 379 } 380 } 381 382 bh.consume(r); 383 } 384 385 386 387 @Benchmark 388 public void shiftLMaskedShift(Blackhole bh) { 389 byte[] a = fa.apply(SPECIES.length()); 390 byte[] b = fb.apply(SPECIES.length()); 391 byte[] r = fr.apply(SPECIES.length()); 392 boolean[] mask = fm.apply(SPECIES.length()); 393 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 394 395 for (int ic = 0; ic < INVOC_COUNT; ic++) { 396 for (int i = 0; i < a.length; i += SPECIES.length()) { 397 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 398 av.shiftL((int)b[i], vmask).intoArray(r, i); 399 } 400 } 401 402 bh.consume(r); 403 } 404 405 406 407 @Benchmark 408 public void shiftRShift(Blackhole bh) { 409 byte[] a = fa.apply(SPECIES.length()); 410 byte[] b = fb.apply(SPECIES.length()); 411 byte[] r = fr.apply(SPECIES.length()); 412 413 for (int ic = 0; ic < INVOC_COUNT; ic++) { 414 for (int i = 0; i < a.length; i += SPECIES.length()) { 415 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 416 av.shiftR((int)b[i]).intoArray(r, i); 417 } 418 } 419 420 bh.consume(r); 421 } 422 423 424 425 @Benchmark 426 public void shiftRMaskedShift(Blackhole bh) { 427 byte[] a = fa.apply(SPECIES.length()); 428 byte[] b = fb.apply(SPECIES.length()); 429 byte[] r = fr.apply(SPECIES.length()); 430 boolean[] mask = fm.apply(SPECIES.length()); 431 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 432 433 for (int ic = 0; ic < INVOC_COUNT; ic++) { 434 for (int i = 0; i < a.length; i += SPECIES.length()) { 435 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 436 av.shiftR((int)b[i], vmask).intoArray(r, i); 437 } 438 } 439 440 bh.consume(r); 441 } 442 443 444 445 446 447 448 449 450 @Benchmark 451 public void max(Blackhole bh) { 452 byte[] a = fa.apply(SPECIES.length()); 453 byte[] b = fb.apply(SPECIES.length()); 454 byte[] r = fr.apply(SPECIES.length()); 455 456 for (int ic = 0; ic < INVOC_COUNT; ic++) { 457 for (int i = 0; i < a.length; i += SPECIES.length()) { 458 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 459 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 460 av.max(bv).intoArray(r, i); 461 } 462 } 463 464 bh.consume(r); 465 } 466 467 @Benchmark 468 public void min(Blackhole bh) { 469 byte[] a = fa.apply(SPECIES.length()); 470 byte[] b = fb.apply(SPECIES.length()); 471 byte[] r = fr.apply(SPECIES.length()); 472 473 for (int ic = 0; ic < INVOC_COUNT; ic++) { 474 for (int i = 0; i < a.length; i += SPECIES.length()) { 475 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 476 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 477 av.min(bv).intoArray(r, i); 478 } 479 } 480 481 bh.consume(r); 482 } 483 484 485 @Benchmark 486 public void andAll(Blackhole bh) { 487 byte[] a = fa.apply(SPECIES.length()); 488 byte[] r = fr.apply(SPECIES.length()); 489 byte ra = -1; 490 491 for (int ic = 0; ic < INVOC_COUNT; ic++) { 492 for (int i = 0; i < a.length; i += SPECIES.length()) { 493 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 494 r[i] = av.andAll(); 495 } 496 } 497 498 for (int ic = 0; ic < INVOC_COUNT; ic++) { 499 ra = -1; 500 for (int i = 0; i < a.length; i += SPECIES.length()) { 501 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 502 ra &= av.andAll(); 503 } 504 } 505 506 bh.consume(ra); 507 bh.consume(r); 508 } 509 510 511 512 @Benchmark 513 public void orAll(Blackhole bh) { 514 byte[] a = fa.apply(SPECIES.length()); 515 byte[] r = fr.apply(SPECIES.length()); 516 byte ra = 0; 517 518 for (int ic = 0; ic < INVOC_COUNT; ic++) { 519 for (int i = 0; i < a.length; i += SPECIES.length()) { 520 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 521 r[i] = av.orAll(); 522 } 523 } 524 525 for (int ic = 0; ic < INVOC_COUNT; ic++) { 526 ra = 0; 527 for (int i = 0; i < a.length; i += SPECIES.length()) { 528 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 529 ra |= av.orAll(); 530 } 531 } 532 533 bh.consume(ra); 534 bh.consume(r); 535 } 536 537 538 539 @Benchmark 540 public void xorAll(Blackhole bh) { 541 byte[] a = fa.apply(SPECIES.length()); 542 byte[] r = fr.apply(SPECIES.length()); 543 byte ra = 0; 544 545 for (int ic = 0; ic < INVOC_COUNT; ic++) { 546 for (int i = 0; i < a.length; i += SPECIES.length()) { 547 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 548 r[i] = av.xorAll(); 549 } 550 } 551 552 for (int ic = 0; ic < INVOC_COUNT; ic++) { 553 ra = 0; 554 for (int i = 0; i < a.length; i += SPECIES.length()) { 555 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 556 ra ^= av.xorAll(); 557 } 558 } 559 560 bh.consume(ra); 561 bh.consume(r); 562 } 563 564 565 @Benchmark 566 public void addAll(Blackhole bh) { 567 byte[] a = fa.apply(SPECIES.length()); 568 byte[] r = fr.apply(SPECIES.length()); 569 byte ra = 0; 570 571 for (int ic = 0; ic < INVOC_COUNT; ic++) { 572 for (int i = 0; i < a.length; i += SPECIES.length()) { 573 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 574 r[i] = av.addAll(); 575 } 576 } 577 578 for (int ic = 0; ic < INVOC_COUNT; ic++) { 579 ra = 0; 580 for (int i = 0; i < a.length; i += SPECIES.length()) { 581 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 582 ra += av.addAll(); 583 } 584 } 585 586 bh.consume(ra); 587 bh.consume(r); 588 } 589 590 @Benchmark 591 public void mulAll(Blackhole bh) { 592 byte[] a = fa.apply(SPECIES.length()); 593 byte[] r = fr.apply(SPECIES.length()); 594 byte ra = 1; 595 596 for (int ic = 0; ic < INVOC_COUNT; ic++) { 597 for (int i = 0; i < a.length; i += SPECIES.length()) { 598 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 599 r[i] = av.mulAll(); 600 } 601 } 602 603 for (int ic = 0; ic < INVOC_COUNT; ic++) { 604 ra = 1; 605 for (int i = 0; i < a.length; i += SPECIES.length()) { 606 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 607 ra *= av.mulAll(); 608 } 609 } 610 611 bh.consume(ra); 612 bh.consume(r); 613 } 614 615 @Benchmark 616 public void minAll(Blackhole bh) { 617 byte[] a = fa.apply(SPECIES.length()); 618 byte[] r = fr.apply(SPECIES.length()); 619 byte ra = Byte.MAX_VALUE; 620 621 for (int ic = 0; ic < INVOC_COUNT; ic++) { 622 for (int i = 0; i < a.length; i += SPECIES.length()) { 623 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 624 r[i] = av.minAll(); 625 } 626 } 627 628 for (int ic = 0; ic < INVOC_COUNT; ic++) { 629 ra = Byte.MAX_VALUE; 630 for (int i = 0; i < a.length; i += SPECIES.length()) { 631 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 632 ra = (byte)Math.min(ra, av.minAll()); 633 } 634 } 635 636 bh.consume(ra); 637 bh.consume(r); 638 } 639 640 @Benchmark 641 public void maxAll(Blackhole bh) { 642 byte[] a = fa.apply(SPECIES.length()); 643 byte[] r = fr.apply(SPECIES.length()); 644 byte ra = Byte.MIN_VALUE; 645 646 for (int ic = 0; ic < INVOC_COUNT; ic++) { 647 for (int i = 0; i < a.length; i += SPECIES.length()) { 648 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 649 r[i] = av.maxAll(); 650 } 651 } 652 653 for (int ic = 0; ic < INVOC_COUNT; ic++) { 654 ra = Byte.MIN_VALUE; 655 for (int i = 0; i < a.length; i += SPECIES.length()) { 656 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 657 ra = (byte)Math.max(ra, av.maxAll()); 658 } 659 } 660 661 bh.consume(ra); 662 bh.consume(r); 663 } 664 665 666 @Benchmark 667 public void anyTrue(Blackhole bh) { 668 boolean[] mask = fm.apply(SPECIES.length()); 669 boolean[] r = fmr.apply(SPECIES.length()); 670 671 for (int ic = 0; ic < INVOC_COUNT; ic++) { 672 for (int i = 0; i < mask.length; i += SPECIES.length()) { 673 Vector.Mask<Byte> vmask = ByteVector.maskFromArray(SPECIES, mask, i); 674 r[i] = vmask.anyTrue(); 675 } 676 } 677 678 bh.consume(r); 679 } 680 681 682 683 @Benchmark 684 public void allTrue(Blackhole bh) { 685 boolean[] mask = fm.apply(SPECIES.length()); 686 boolean[] r = fmr.apply(SPECIES.length()); 687 688 for (int ic = 0; ic < INVOC_COUNT; ic++) { 689 for (int i = 0; i < mask.length; i += SPECIES.length()) { 690 Vector.Mask<Byte> vmask = ByteVector.maskFromArray(SPECIES, mask, i); 691 r[i] = vmask.allTrue(); 692 } 693 } 694 695 bh.consume(r); 696 } 697 698 699 @Benchmark 700 public void with(Blackhole bh) { 701 byte[] a = fa.apply(SPECIES.length()); 702 byte[] r = fr.apply(SPECIES.length()); 703 704 for (int ic = 0; ic < INVOC_COUNT; ic++) { 705 for (int i = 0; i < a.length; i += SPECIES.length()) { 706 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 707 av.with(0, (byte)4).intoArray(r, i); 708 } 709 } 710 711 bh.consume(r); 712 } 713 714 @Benchmark 715 public Object lessThan() { 716 byte[] a = fa.apply(size); 717 byte[] b = fb.apply(size); 718 boolean[] ms = fm.apply(size); 719 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0); 720 721 for (int ic = 0; ic < INVOC_COUNT; ic++) { 722 for (int i = 0; i < a.length; i += SPECIES.length()) { 723 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 724 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 725 Vector.Mask<Byte> mv = av.lessThan(bv); 726 727 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 728 } 729 } 730 return m; 731 } 732 733 734 @Benchmark 735 public Object greaterThan() { 736 byte[] a = fa.apply(size); 737 byte[] b = fb.apply(size); 738 boolean[] ms = fm.apply(size); 739 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0); 740 741 for (int ic = 0; ic < INVOC_COUNT; ic++) { 742 for (int i = 0; i < a.length; i += SPECIES.length()) { 743 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 744 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 745 Vector.Mask<Byte> mv = av.greaterThan(bv); 746 747 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 748 } 749 } 750 return m; 751 } 752 753 754 @Benchmark 755 public Object equal() { 756 byte[] a = fa.apply(size); 757 byte[] b = fb.apply(size); 758 boolean[] ms = fm.apply(size); 759 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0); 760 761 for (int ic = 0; ic < INVOC_COUNT; ic++) { 762 for (int i = 0; i < a.length; i += SPECIES.length()) { 763 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 764 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 765 Vector.Mask<Byte> mv = av.equal(bv); 766 767 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 768 } 769 } 770 return m; 771 } 772 773 774 @Benchmark 775 public Object notEqual() { 776 byte[] a = fa.apply(size); 777 byte[] b = fb.apply(size); 778 boolean[] ms = fm.apply(size); 779 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0); 780 781 for (int ic = 0; ic < INVOC_COUNT; ic++) { 782 for (int i = 0; i < a.length; i += SPECIES.length()) { 783 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 784 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 785 Vector.Mask<Byte> mv = av.notEqual(bv); 786 787 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 788 } 789 } 790 return m; 791 } 792 793 794 @Benchmark 795 public Object lessThanEq() { 796 byte[] a = fa.apply(size); 797 byte[] b = fb.apply(size); 798 boolean[] ms = fm.apply(size); 799 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0); 800 801 for (int ic = 0; ic < INVOC_COUNT; ic++) { 802 for (int i = 0; i < a.length; i += SPECIES.length()) { 803 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 804 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 805 Vector.Mask<Byte> mv = av.lessThanEq(bv); 806 807 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 808 } 809 } 810 return m; 811 } 812 813 814 @Benchmark 815 public Object greaterThanEq() { 816 byte[] a = fa.apply(size); 817 byte[] b = fb.apply(size); 818 boolean[] ms = fm.apply(size); 819 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0); 820 821 for (int ic = 0; ic < INVOC_COUNT; ic++) { 822 for (int i = 0; i < a.length; i += SPECIES.length()) { 823 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 824 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 825 Vector.Mask<Byte> mv = av.greaterThanEq(bv); 826 827 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 828 } 829 } 830 return m; 831 } 832 833 834 @Benchmark 835 public void blend(Blackhole bh) { 836 byte[] a = fa.apply(SPECIES.length()); 837 byte[] b = fb.apply(SPECIES.length()); 838 byte[] r = fr.apply(SPECIES.length()); 839 boolean[] mask = fm.apply(SPECIES.length()); 840 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 841 842 for (int ic = 0; ic < INVOC_COUNT; ic++) { 843 for (int i = 0; i < a.length; i += SPECIES.length()) { 844 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 845 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 846 av.blend(bv, vmask).intoArray(r, i); 847 } 848 } 849 850 bh.consume(r); 851 } 852 853 @Benchmark 854 public void rearrange(Blackhole bh) { 855 byte[] a = fa.apply(SPECIES.length()); 856 int[] order = fs.apply(a.length, SPECIES.length()); 857 byte[] r = fr.apply(SPECIES.length()); 858 859 for (int ic = 0; ic < INVOC_COUNT; ic++) { 860 for (int i = 0; i < a.length; i += SPECIES.length()) { 861 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 862 av.rearrange(ByteVector.shuffleFromArray(SPECIES, order, i)).intoArray(r, i); 863 } 864 } 865 866 bh.consume(r); 867 } 868 869 @Benchmark 870 public void extract(Blackhole bh) { 871 byte[] a = fa.apply(SPECIES.length()); 872 byte[] r = fr.apply(SPECIES.length()); 873 874 for (int ic = 0; ic < INVOC_COUNT; ic++) { 875 for (int i = 0; i < a.length; i += SPECIES.length()) { 876 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 877 int num_lanes = SPECIES.length(); 878 // Manually unroll because full unroll happens after intrinsification. 879 // Unroll is needed because get intrinsic requires for index to be a known constant. 880 if (num_lanes == 1) { 881 r[i]=av.get(0); 882 } else if (num_lanes == 2) { 883 r[i]=av.get(0); 884 r[i+1]=av.get(1); 885 } else if (num_lanes == 4) { 886 r[i]=av.get(0); 887 r[i+1]=av.get(1); 888 r[i+2]=av.get(2); 889 r[i+3]=av.get(3); 890 } else if (num_lanes == 8) { 891 r[i]=av.get(0); 892 r[i+1]=av.get(1); 893 r[i+2]=av.get(2); 894 r[i+3]=av.get(3); 895 r[i+4]=av.get(4); 896 r[i+5]=av.get(5); 897 r[i+6]=av.get(6); 898 r[i+7]=av.get(7); 899 } else if (num_lanes == 16) { 900 r[i]=av.get(0); 901 r[i+1]=av.get(1); 902 r[i+2]=av.get(2); 903 r[i+3]=av.get(3); 904 r[i+4]=av.get(4); 905 r[i+5]=av.get(5); 906 r[i+6]=av.get(6); 907 r[i+7]=av.get(7); 908 r[i+8]=av.get(8); 909 r[i+9]=av.get(9); 910 r[i+10]=av.get(10); 911 r[i+11]=av.get(11); 912 r[i+12]=av.get(12); 913 r[i+13]=av.get(13); 914 r[i+14]=av.get(14); 915 r[i+15]=av.get(15); 916 } else if (num_lanes == 32) { 917 r[i]=av.get(0); 918 r[i+1]=av.get(1); 919 r[i+2]=av.get(2); 920 r[i+3]=av.get(3); 921 r[i+4]=av.get(4); 922 r[i+5]=av.get(5); 923 r[i+6]=av.get(6); 924 r[i+7]=av.get(7); 925 r[i+8]=av.get(8); 926 r[i+9]=av.get(9); 927 r[i+10]=av.get(10); 928 r[i+11]=av.get(11); 929 r[i+12]=av.get(12); 930 r[i+13]=av.get(13); 931 r[i+14]=av.get(14); 932 r[i+15]=av.get(15); 933 r[i+16]=av.get(16); 934 r[i+17]=av.get(17); 935 r[i+18]=av.get(18); 936 r[i+19]=av.get(19); 937 r[i+20]=av.get(20); 938 r[i+21]=av.get(21); 939 r[i+22]=av.get(22); 940 r[i+23]=av.get(23); 941 r[i+24]=av.get(24); 942 r[i+25]=av.get(25); 943 r[i+26]=av.get(26); 944 r[i+27]=av.get(27); 945 r[i+28]=av.get(28); 946 r[i+29]=av.get(29); 947 r[i+30]=av.get(30); 948 r[i+31]=av.get(31); 949 } else if (num_lanes == 64) { 950 r[i]=av.get(0); 951 r[i+1]=av.get(1); 952 r[i+2]=av.get(2); 953 r[i+3]=av.get(3); 954 r[i+4]=av.get(4); 955 r[i+5]=av.get(5); 956 r[i+6]=av.get(6); 957 r[i+7]=av.get(7); 958 r[i+8]=av.get(8); 959 r[i+9]=av.get(9); 960 r[i+10]=av.get(10); 961 r[i+11]=av.get(11); 962 r[i+12]=av.get(12); 963 r[i+13]=av.get(13); 964 r[i+14]=av.get(14); 965 r[i+15]=av.get(15); 966 r[i+16]=av.get(16); 967 r[i+17]=av.get(17); 968 r[i+18]=av.get(18); 969 r[i+19]=av.get(19); 970 r[i+20]=av.get(20); 971 r[i+21]=av.get(21); 972 r[i+22]=av.get(22); 973 r[i+23]=av.get(23); 974 r[i+24]=av.get(24); 975 r[i+25]=av.get(25); 976 r[i+26]=av.get(26); 977 r[i+27]=av.get(27); 978 r[i+28]=av.get(28); 979 r[i+29]=av.get(29); 980 r[i+30]=av.get(30); 981 r[i+31]=av.get(31); 982 r[i+32]=av.get(32); 983 r[i+33]=av.get(33); 984 r[i+34]=av.get(34); 985 r[i+35]=av.get(35); 986 r[i+36]=av.get(36); 987 r[i+37]=av.get(37); 988 r[i+38]=av.get(38); 989 r[i+39]=av.get(39); 990 r[i+40]=av.get(40); 991 r[i+41]=av.get(41); 992 r[i+42]=av.get(42); 993 r[i+43]=av.get(43); 994 r[i+44]=av.get(44); 995 r[i+45]=av.get(45); 996 r[i+46]=av.get(46); 997 r[i+47]=av.get(47); 998 r[i+48]=av.get(48); 999 r[i+49]=av.get(49); 1000 r[i+50]=av.get(50); 1001 r[i+51]=av.get(51); 1002 r[i+52]=av.get(52); 1003 r[i+53]=av.get(53); 1004 r[i+54]=av.get(54); 1005 r[i+55]=av.get(55); 1006 r[i+56]=av.get(56); 1007 r[i+57]=av.get(57); 1008 r[i+58]=av.get(58); 1009 r[i+59]=av.get(59); 1010 r[i+60]=av.get(60); 1011 r[i+61]=av.get(61); 1012 r[i+62]=av.get(62); 1013 r[i+63]=av.get(63); 1014 } else { 1015 for (int j = 0; j < SPECIES.length(); j++) { 1016 r[i+j]=av.get(j); 1017 } 1018 } 1019 } 1020 } 1021 1022 bh.consume(r); 1023 } 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 @Benchmark 1046 public void neg(Blackhole bh) { 1047 byte[] a = fa.apply(SPECIES.length()); 1048 byte[] r = fr.apply(SPECIES.length()); 1049 1050 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1051 for (int i = 0; i < a.length; i += SPECIES.length()) { 1052 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1053 av.neg().intoArray(r, i); 1054 } 1055 } 1056 1057 bh.consume(r); 1058 } 1059 1060 @Benchmark 1061 public void negMasked(Blackhole bh) { 1062 byte[] a = fa.apply(SPECIES.length()); 1063 byte[] r = fr.apply(SPECIES.length()); 1064 boolean[] mask = fm.apply(SPECIES.length()); 1065 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 1066 1067 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1068 for (int i = 0; i < a.length; i += SPECIES.length()) { 1069 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1070 av.neg(vmask).intoArray(r, i); 1071 } 1072 } 1073 1074 bh.consume(r); 1075 } 1076 1077 @Benchmark 1078 public void abs(Blackhole bh) { 1079 byte[] a = fa.apply(SPECIES.length()); 1080 byte[] r = fr.apply(SPECIES.length()); 1081 1082 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1083 for (int i = 0; i < a.length; i += SPECIES.length()) { 1084 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1085 av.abs().intoArray(r, i); 1086 } 1087 } 1088 1089 bh.consume(r); 1090 } 1091 1092 @Benchmark 1093 public void absMasked(Blackhole bh) { 1094 byte[] a = fa.apply(SPECIES.length()); 1095 byte[] r = fr.apply(SPECIES.length()); 1096 boolean[] mask = fm.apply(SPECIES.length()); 1097 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 1098 1099 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1100 for (int i = 0; i < a.length; i += SPECIES.length()) { 1101 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1102 av.abs(vmask).intoArray(r, i); 1103 } 1104 } 1105 1106 bh.consume(r); 1107 } 1108 1109 1110 @Benchmark 1111 public void not(Blackhole bh) { 1112 byte[] a = fa.apply(SPECIES.length()); 1113 byte[] r = fr.apply(SPECIES.length()); 1114 1115 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1116 for (int i = 0; i < a.length; i += SPECIES.length()) { 1117 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1118 av.not().intoArray(r, i); 1119 } 1120 } 1121 1122 bh.consume(r); 1123 } 1124 1125 1126 1127 @Benchmark 1128 public void notMasked(Blackhole bh) { 1129 byte[] a = fa.apply(SPECIES.length()); 1130 byte[] r = fr.apply(SPECIES.length()); 1131 boolean[] mask = fm.apply(SPECIES.length()); 1132 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 1133 1134 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1135 for (int i = 0; i < a.length; i += SPECIES.length()) { 1136 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1137 av.not(vmask).intoArray(r, i); 1138 } 1139 } 1140 1141 bh.consume(r); 1142 } 1143 1144 1145 1146 1147 1148 } 1149