1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have 21 * questions. 22 */ 23 24 package benchmark.jdk.incubator.vector; 25 26 import jdk.incubator.vector.Vector; 27 import jdk.incubator.vector.Vector.Shape; 28 import jdk.incubator.vector.ByteVector; 29 30 import java.util.concurrent.TimeUnit; 31 import java.util.function.BiFunction; 32 import java.util.function.IntFunction; 33 34 import org.openjdk.jmh.annotations.*; 35 import org.openjdk.jmh.infra.Blackhole; 36 37 @BenchmarkMode(Mode.Throughput) 38 @OutputTimeUnit(TimeUnit.MILLISECONDS) 39 @State(Scope.Benchmark) 40 @Warmup(iterations = 3, time = 1) 41 @Measurement(iterations = 5, time = 1) 42 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) 43 public class ByteMaxVector extends AbstractVectorBenchmark { 44 static final ByteVector.ByteSpecies SPECIES = ByteVector.species(Shape.S_Max_BIT); 45 46 static final int INVOC_COUNT = 1; // get rid of outer loop 47 48 @Param("1024") 49 int size; 50 51 byte[] fill(IntFunction<Byte> f) { 52 byte[] array = new byte[size]; 53 for (int i = 0; i < array.length; i++) { 54 array[i] = f.apply(i); 55 } 56 return array; 57 } 58 59 byte[] a, b, c, r; 60 boolean[] m, rm; 61 int[] s; 62 63 @Setup 64 public void init() { 65 size += size % SPECIES.length(); // FIXME: add post-loops 66 67 a = fill(i -> (byte)(2*i)); 68 b = fill(i -> (byte)(i+1)); 69 c = fill(i -> (byte)(i+5)); 70 r = fill(i -> (byte)0); 71 72 m = fillMask(size, i -> (i % 2) == 0); 73 rm = fillMask(size, i -> false); 74 75 s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length())); 76 } 77 78 final IntFunction<byte[]> fa = vl -> a; 79 final IntFunction<byte[]> fb = vl -> b; 80 final IntFunction<byte[]> fc = vl -> c; 81 final IntFunction<byte[]> fr = vl -> r; 82 final IntFunction<boolean[]> fm = vl -> m; 83 final IntFunction<boolean[]> fmr = vl -> rm; 84 final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s; 85 86 87 @Benchmark 88 public void add(Blackhole bh) { 89 byte[] a = fa.apply(SPECIES.length()); 90 byte[] b = fb.apply(SPECIES.length()); 91 byte[] r = fr.apply(SPECIES.length()); 92 93 for (int ic = 0; ic < INVOC_COUNT; ic++) { 94 for (int i = 0; i < a.length; i += SPECIES.length()) { 95 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 96 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 97 av.add(bv).intoArray(r, i); 98 } 99 } 100 101 bh.consume(r); 102 } 103 104 @Benchmark 105 public void addMasked(Blackhole bh) { 106 byte[] a = fa.apply(SPECIES.length()); 107 byte[] b = fb.apply(SPECIES.length()); 108 byte[] r = fr.apply(SPECIES.length()); 109 boolean[] mask = fm.apply(SPECIES.length()); 110 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 111 112 for (int ic = 0; ic < INVOC_COUNT; ic++) { 113 for (int i = 0; i < a.length; i += SPECIES.length()) { 114 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 115 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 116 av.add(bv, vmask).intoArray(r, i); 117 } 118 } 119 120 bh.consume(r); 121 } 122 123 @Benchmark 124 public void sub(Blackhole bh) { 125 byte[] a = fa.apply(SPECIES.length()); 126 byte[] b = fb.apply(SPECIES.length()); 127 byte[] r = fr.apply(SPECIES.length()); 128 129 for (int ic = 0; ic < INVOC_COUNT; ic++) { 130 for (int i = 0; i < a.length; i += SPECIES.length()) { 131 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 132 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 133 av.sub(bv).intoArray(r, i); 134 } 135 } 136 137 bh.consume(r); 138 } 139 140 @Benchmark 141 public void subMasked(Blackhole bh) { 142 byte[] a = fa.apply(SPECIES.length()); 143 byte[] b = fb.apply(SPECIES.length()); 144 byte[] r = fr.apply(SPECIES.length()); 145 boolean[] mask = fm.apply(SPECIES.length()); 146 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 147 148 for (int ic = 0; ic < INVOC_COUNT; ic++) { 149 for (int i = 0; i < a.length; i += SPECIES.length()) { 150 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 151 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 152 av.sub(bv, vmask).intoArray(r, i); 153 } 154 } 155 156 bh.consume(r); 157 } 158 159 160 161 @Benchmark 162 public void mul(Blackhole bh) { 163 byte[] a = fa.apply(SPECIES.length()); 164 byte[] b = fb.apply(SPECIES.length()); 165 byte[] r = fr.apply(SPECIES.length()); 166 167 for (int ic = 0; ic < INVOC_COUNT; ic++) { 168 for (int i = 0; i < a.length; i += SPECIES.length()) { 169 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 170 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 171 av.mul(bv).intoArray(r, i); 172 } 173 } 174 175 bh.consume(r); 176 } 177 178 @Benchmark 179 public void mulMasked(Blackhole bh) { 180 byte[] a = fa.apply(SPECIES.length()); 181 byte[] b = fb.apply(SPECIES.length()); 182 byte[] r = fr.apply(SPECIES.length()); 183 boolean[] mask = fm.apply(SPECIES.length()); 184 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 185 186 for (int ic = 0; ic < INVOC_COUNT; ic++) { 187 for (int i = 0; i < a.length; i += SPECIES.length()) { 188 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 189 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 190 av.mul(bv, vmask).intoArray(r, i); 191 } 192 } 193 194 bh.consume(r); 195 } 196 197 198 @Benchmark 199 public void and(Blackhole bh) { 200 byte[] a = fa.apply(SPECIES.length()); 201 byte[] b = fb.apply(SPECIES.length()); 202 byte[] r = fr.apply(SPECIES.length()); 203 204 for (int ic = 0; ic < INVOC_COUNT; ic++) { 205 for (int i = 0; i < a.length; i += SPECIES.length()) { 206 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 207 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 208 av.and(bv).intoArray(r, i); 209 } 210 } 211 212 bh.consume(r); 213 } 214 215 216 217 @Benchmark 218 public void andMasked(Blackhole bh) { 219 byte[] a = fa.apply(SPECIES.length()); 220 byte[] b = fb.apply(SPECIES.length()); 221 byte[] r = fr.apply(SPECIES.length()); 222 boolean[] mask = fm.apply(SPECIES.length()); 223 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 224 225 for (int ic = 0; ic < INVOC_COUNT; ic++) { 226 for (int i = 0; i < a.length; i += SPECIES.length()) { 227 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 228 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 229 av.and(bv, vmask).intoArray(r, i); 230 } 231 } 232 233 bh.consume(r); 234 } 235 236 237 238 @Benchmark 239 public void or(Blackhole bh) { 240 byte[] a = fa.apply(SPECIES.length()); 241 byte[] b = fb.apply(SPECIES.length()); 242 byte[] r = fr.apply(SPECIES.length()); 243 244 for (int ic = 0; ic < INVOC_COUNT; ic++) { 245 for (int i = 0; i < a.length; i += SPECIES.length()) { 246 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 247 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 248 av.or(bv).intoArray(r, i); 249 } 250 } 251 252 bh.consume(r); 253 } 254 255 256 257 @Benchmark 258 public void orMasked(Blackhole bh) { 259 byte[] a = fa.apply(SPECIES.length()); 260 byte[] b = fb.apply(SPECIES.length()); 261 byte[] r = fr.apply(SPECIES.length()); 262 boolean[] mask = fm.apply(SPECIES.length()); 263 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 264 265 for (int ic = 0; ic < INVOC_COUNT; ic++) { 266 for (int i = 0; i < a.length; i += SPECIES.length()) { 267 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 268 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 269 av.or(bv, vmask).intoArray(r, i); 270 } 271 } 272 273 bh.consume(r); 274 } 275 276 277 278 @Benchmark 279 public void xor(Blackhole bh) { 280 byte[] a = fa.apply(SPECIES.length()); 281 byte[] b = fb.apply(SPECIES.length()); 282 byte[] r = fr.apply(SPECIES.length()); 283 284 for (int ic = 0; ic < INVOC_COUNT; ic++) { 285 for (int i = 0; i < a.length; i += SPECIES.length()) { 286 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 287 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 288 av.xor(bv).intoArray(r, i); 289 } 290 } 291 292 bh.consume(r); 293 } 294 295 296 297 @Benchmark 298 public void xorMasked(Blackhole bh) { 299 byte[] a = fa.apply(SPECIES.length()); 300 byte[] b = fb.apply(SPECIES.length()); 301 byte[] r = fr.apply(SPECIES.length()); 302 boolean[] mask = fm.apply(SPECIES.length()); 303 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 304 305 for (int ic = 0; ic < INVOC_COUNT; ic++) { 306 for (int i = 0; i < a.length; i += SPECIES.length()) { 307 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 308 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 309 av.xor(bv, vmask).intoArray(r, i); 310 } 311 } 312 313 bh.consume(r); 314 } 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 @Benchmark 331 public void aShiftRShift(Blackhole bh) { 332 byte[] a = fa.apply(SPECIES.length()); 333 byte[] b = fb.apply(SPECIES.length()); 334 byte[] r = fr.apply(SPECIES.length()); 335 336 for (int ic = 0; ic < INVOC_COUNT; ic++) { 337 for (int i = 0; i < a.length; i += SPECIES.length()) { 338 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 339 av.aShiftR((int)b[i]).intoArray(r, i); 340 } 341 } 342 343 bh.consume(r); 344 } 345 346 347 348 @Benchmark 349 public void aShiftRMaskedShift(Blackhole bh) { 350 byte[] a = fa.apply(SPECIES.length()); 351 byte[] b = fb.apply(SPECIES.length()); 352 byte[] r = fr.apply(SPECIES.length()); 353 boolean[] mask = fm.apply(SPECIES.length()); 354 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 355 356 for (int ic = 0; ic < INVOC_COUNT; ic++) { 357 for (int i = 0; i < a.length; i += SPECIES.length()) { 358 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 359 av.aShiftR((int)b[i], vmask).intoArray(r, i); 360 } 361 } 362 363 bh.consume(r); 364 } 365 366 367 368 @Benchmark 369 public void shiftLShift(Blackhole bh) { 370 byte[] a = fa.apply(SPECIES.length()); 371 byte[] b = fb.apply(SPECIES.length()); 372 byte[] r = fr.apply(SPECIES.length()); 373 374 for (int ic = 0; ic < INVOC_COUNT; ic++) { 375 for (int i = 0; i < a.length; i += SPECIES.length()) { 376 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 377 av.shiftL((int)b[i]).intoArray(r, i); 378 } 379 } 380 381 bh.consume(r); 382 } 383 384 385 386 @Benchmark 387 public void shiftLMaskedShift(Blackhole bh) { 388 byte[] a = fa.apply(SPECIES.length()); 389 byte[] b = fb.apply(SPECIES.length()); 390 byte[] r = fr.apply(SPECIES.length()); 391 boolean[] mask = fm.apply(SPECIES.length()); 392 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 393 394 for (int ic = 0; ic < INVOC_COUNT; ic++) { 395 for (int i = 0; i < a.length; i += SPECIES.length()) { 396 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 397 av.shiftL((int)b[i], vmask).intoArray(r, i); 398 } 399 } 400 401 bh.consume(r); 402 } 403 404 405 406 @Benchmark 407 public void shiftRShift(Blackhole bh) { 408 byte[] a = fa.apply(SPECIES.length()); 409 byte[] b = fb.apply(SPECIES.length()); 410 byte[] r = fr.apply(SPECIES.length()); 411 412 for (int ic = 0; ic < INVOC_COUNT; ic++) { 413 for (int i = 0; i < a.length; i += SPECIES.length()) { 414 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 415 av.shiftR((int)b[i]).intoArray(r, i); 416 } 417 } 418 419 bh.consume(r); 420 } 421 422 423 424 @Benchmark 425 public void shiftRMaskedShift(Blackhole bh) { 426 byte[] a = fa.apply(SPECIES.length()); 427 byte[] b = fb.apply(SPECIES.length()); 428 byte[] r = fr.apply(SPECIES.length()); 429 boolean[] mask = fm.apply(SPECIES.length()); 430 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 431 432 for (int ic = 0; ic < INVOC_COUNT; ic++) { 433 for (int i = 0; i < a.length; i += SPECIES.length()) { 434 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 435 av.shiftR((int)b[i], vmask).intoArray(r, i); 436 } 437 } 438 439 bh.consume(r); 440 } 441 442 443 444 445 446 447 448 449 @Benchmark 450 public void max(Blackhole bh) { 451 byte[] a = fa.apply(SPECIES.length()); 452 byte[] b = fb.apply(SPECIES.length()); 453 byte[] r = fr.apply(SPECIES.length()); 454 455 for (int ic = 0; ic < INVOC_COUNT; ic++) { 456 for (int i = 0; i < a.length; i += SPECIES.length()) { 457 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 458 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 459 av.max(bv).intoArray(r, i); 460 } 461 } 462 463 bh.consume(r); 464 } 465 466 @Benchmark 467 public void min(Blackhole bh) { 468 byte[] a = fa.apply(SPECIES.length()); 469 byte[] b = fb.apply(SPECIES.length()); 470 byte[] r = fr.apply(SPECIES.length()); 471 472 for (int ic = 0; ic < INVOC_COUNT; ic++) { 473 for (int i = 0; i < a.length; i += SPECIES.length()) { 474 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 475 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 476 av.min(bv).intoArray(r, i); 477 } 478 } 479 480 bh.consume(r); 481 } 482 483 484 @Benchmark 485 public void andAll(Blackhole bh) { 486 byte[] a = fa.apply(SPECIES.length()); 487 byte[] r = fr.apply(SPECIES.length()); 488 byte ra = -1; 489 490 for (int ic = 0; ic < INVOC_COUNT; ic++) { 491 for (int i = 0; i < a.length; i += SPECIES.length()) { 492 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 493 r[i] = av.andAll(); 494 } 495 } 496 497 for (int ic = 0; ic < INVOC_COUNT; ic++) { 498 ra = -1; 499 for (int i = 0; i < a.length; i += SPECIES.length()) { 500 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 501 ra &= av.andAll(); 502 } 503 } 504 505 bh.consume(ra); 506 bh.consume(r); 507 } 508 509 510 511 @Benchmark 512 public void orAll(Blackhole bh) { 513 byte[] a = fa.apply(SPECIES.length()); 514 byte[] r = fr.apply(SPECIES.length()); 515 byte ra = 0; 516 517 for (int ic = 0; ic < INVOC_COUNT; ic++) { 518 for (int i = 0; i < a.length; i += SPECIES.length()) { 519 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 520 r[i] = av.orAll(); 521 } 522 } 523 524 for (int ic = 0; ic < INVOC_COUNT; ic++) { 525 ra = 0; 526 for (int i = 0; i < a.length; i += SPECIES.length()) { 527 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 528 ra |= av.orAll(); 529 } 530 } 531 532 bh.consume(ra); 533 bh.consume(r); 534 } 535 536 537 538 @Benchmark 539 public void xorAll(Blackhole bh) { 540 byte[] a = fa.apply(SPECIES.length()); 541 byte[] r = fr.apply(SPECIES.length()); 542 byte ra = 0; 543 544 for (int ic = 0; ic < INVOC_COUNT; ic++) { 545 for (int i = 0; i < a.length; i += SPECIES.length()) { 546 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 547 r[i] = av.xorAll(); 548 } 549 } 550 551 for (int ic = 0; ic < INVOC_COUNT; ic++) { 552 ra = 0; 553 for (int i = 0; i < a.length; i += SPECIES.length()) { 554 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 555 ra ^= av.xorAll(); 556 } 557 } 558 559 bh.consume(ra); 560 bh.consume(r); 561 } 562 563 564 @Benchmark 565 public void addAll(Blackhole bh) { 566 byte[] a = fa.apply(SPECIES.length()); 567 byte[] r = fr.apply(SPECIES.length()); 568 byte ra = 0; 569 570 for (int ic = 0; ic < INVOC_COUNT; ic++) { 571 for (int i = 0; i < a.length; i += SPECIES.length()) { 572 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 573 r[i] = av.addAll(); 574 } 575 } 576 577 for (int ic = 0; ic < INVOC_COUNT; ic++) { 578 ra = 0; 579 for (int i = 0; i < a.length; i += SPECIES.length()) { 580 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 581 ra += av.addAll(); 582 } 583 } 584 585 bh.consume(ra); 586 bh.consume(r); 587 } 588 589 @Benchmark 590 public void mulAll(Blackhole bh) { 591 byte[] a = fa.apply(SPECIES.length()); 592 byte[] r = fr.apply(SPECIES.length()); 593 byte ra = 1; 594 595 for (int ic = 0; ic < INVOC_COUNT; ic++) { 596 for (int i = 0; i < a.length; i += SPECIES.length()) { 597 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 598 r[i] = av.mulAll(); 599 } 600 } 601 602 for (int ic = 0; ic < INVOC_COUNT; ic++) { 603 ra = 1; 604 for (int i = 0; i < a.length; i += SPECIES.length()) { 605 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 606 ra *= av.mulAll(); 607 } 608 } 609 610 bh.consume(ra); 611 bh.consume(r); 612 } 613 614 @Benchmark 615 public void minAll(Blackhole bh) { 616 byte[] a = fa.apply(SPECIES.length()); 617 byte[] r = fr.apply(SPECIES.length()); 618 byte ra = Byte.MAX_VALUE; 619 620 for (int ic = 0; ic < INVOC_COUNT; ic++) { 621 for (int i = 0; i < a.length; i += SPECIES.length()) { 622 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 623 r[i] = av.minAll(); 624 } 625 } 626 627 for (int ic = 0; ic < INVOC_COUNT; ic++) { 628 ra = Byte.MAX_VALUE; 629 for (int i = 0; i < a.length; i += SPECIES.length()) { 630 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 631 ra = (byte)Math.min(ra, av.minAll()); 632 } 633 } 634 635 bh.consume(ra); 636 bh.consume(r); 637 } 638 639 @Benchmark 640 public void maxAll(Blackhole bh) { 641 byte[] a = fa.apply(SPECIES.length()); 642 byte[] r = fr.apply(SPECIES.length()); 643 byte ra = Byte.MIN_VALUE; 644 645 for (int ic = 0; ic < INVOC_COUNT; ic++) { 646 for (int i = 0; i < a.length; i += SPECIES.length()) { 647 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 648 r[i] = av.maxAll(); 649 } 650 } 651 652 for (int ic = 0; ic < INVOC_COUNT; ic++) { 653 ra = Byte.MIN_VALUE; 654 for (int i = 0; i < a.length; i += SPECIES.length()) { 655 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 656 ra = (byte)Math.max(ra, av.maxAll()); 657 } 658 } 659 660 bh.consume(ra); 661 bh.consume(r); 662 } 663 664 665 @Benchmark 666 public void anyTrue(Blackhole bh) { 667 boolean[] mask = fm.apply(SPECIES.length()); 668 boolean[] r = fmr.apply(SPECIES.length()); 669 670 for (int ic = 0; ic < INVOC_COUNT; ic++) { 671 for (int i = 0; i < mask.length; i += SPECIES.length()) { 672 Vector.Mask<Byte> vmask = ByteVector.maskFromArray(SPECIES, mask, i); 673 r[i] = vmask.anyTrue(); 674 } 675 } 676 677 bh.consume(r); 678 } 679 680 681 682 @Benchmark 683 public void allTrue(Blackhole bh) { 684 boolean[] mask = fm.apply(SPECIES.length()); 685 boolean[] r = fmr.apply(SPECIES.length()); 686 687 for (int ic = 0; ic < INVOC_COUNT; ic++) { 688 for (int i = 0; i < mask.length; i += SPECIES.length()) { 689 Vector.Mask<Byte> vmask = ByteVector.maskFromArray(SPECIES, mask, i); 690 r[i] = vmask.allTrue(); 691 } 692 } 693 694 bh.consume(r); 695 } 696 697 698 @Benchmark 699 public void with(Blackhole bh) { 700 byte[] a = fa.apply(SPECIES.length()); 701 byte[] r = fr.apply(SPECIES.length()); 702 703 for (int ic = 0; ic < INVOC_COUNT; ic++) { 704 for (int i = 0; i < a.length; i += SPECIES.length()) { 705 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 706 av.with(0, (byte)4).intoArray(r, i); 707 } 708 } 709 710 bh.consume(r); 711 } 712 713 @Benchmark 714 public Object lessThan() { 715 byte[] a = fa.apply(size); 716 byte[] b = fb.apply(size); 717 boolean[] ms = fm.apply(size); 718 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0); 719 720 for (int ic = 0; ic < INVOC_COUNT; ic++) { 721 for (int i = 0; i < a.length; i += SPECIES.length()) { 722 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 723 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 724 Vector.Mask<Byte> mv = av.lessThan(bv); 725 726 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 727 } 728 } 729 return m; 730 } 731 732 733 @Benchmark 734 public Object greaterThan() { 735 byte[] a = fa.apply(size); 736 byte[] b = fb.apply(size); 737 boolean[] ms = fm.apply(size); 738 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0); 739 740 for (int ic = 0; ic < INVOC_COUNT; ic++) { 741 for (int i = 0; i < a.length; i += SPECIES.length()) { 742 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 743 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 744 Vector.Mask<Byte> mv = av.greaterThan(bv); 745 746 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 747 } 748 } 749 return m; 750 } 751 752 753 @Benchmark 754 public Object equal() { 755 byte[] a = fa.apply(size); 756 byte[] b = fb.apply(size); 757 boolean[] ms = fm.apply(size); 758 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0); 759 760 for (int ic = 0; ic < INVOC_COUNT; ic++) { 761 for (int i = 0; i < a.length; i += SPECIES.length()) { 762 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 763 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 764 Vector.Mask<Byte> mv = av.equal(bv); 765 766 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 767 } 768 } 769 return m; 770 } 771 772 773 @Benchmark 774 public Object notEqual() { 775 byte[] a = fa.apply(size); 776 byte[] b = fb.apply(size); 777 boolean[] ms = fm.apply(size); 778 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0); 779 780 for (int ic = 0; ic < INVOC_COUNT; ic++) { 781 for (int i = 0; i < a.length; i += SPECIES.length()) { 782 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 783 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 784 Vector.Mask<Byte> mv = av.notEqual(bv); 785 786 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 787 } 788 } 789 return m; 790 } 791 792 793 @Benchmark 794 public Object lessThanEq() { 795 byte[] a = fa.apply(size); 796 byte[] b = fb.apply(size); 797 boolean[] ms = fm.apply(size); 798 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0); 799 800 for (int ic = 0; ic < INVOC_COUNT; ic++) { 801 for (int i = 0; i < a.length; i += SPECIES.length()) { 802 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 803 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 804 Vector.Mask<Byte> mv = av.lessThanEq(bv); 805 806 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 807 } 808 } 809 return m; 810 } 811 812 813 @Benchmark 814 public Object greaterThanEq() { 815 byte[] a = fa.apply(size); 816 byte[] b = fb.apply(size); 817 boolean[] ms = fm.apply(size); 818 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0); 819 820 for (int ic = 0; ic < INVOC_COUNT; ic++) { 821 for (int i = 0; i < a.length; i += SPECIES.length()) { 822 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 823 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 824 Vector.Mask<Byte> mv = av.greaterThanEq(bv); 825 826 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 827 } 828 } 829 return m; 830 } 831 832 833 @Benchmark 834 public void blend(Blackhole bh) { 835 byte[] a = fa.apply(SPECIES.length()); 836 byte[] b = fb.apply(SPECIES.length()); 837 byte[] r = fr.apply(SPECIES.length()); 838 boolean[] mask = fm.apply(SPECIES.length()); 839 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 840 841 for (int ic = 0; ic < INVOC_COUNT; ic++) { 842 for (int i = 0; i < a.length; i += SPECIES.length()) { 843 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 844 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 845 av.blend(bv, vmask).intoArray(r, i); 846 } 847 } 848 849 bh.consume(r); 850 } 851 852 @Benchmark 853 public void rearrange(Blackhole bh) { 854 byte[] a = fa.apply(SPECIES.length()); 855 int[] order = fs.apply(a.length, SPECIES.length()); 856 byte[] r = fr.apply(SPECIES.length()); 857 858 for (int ic = 0; ic < INVOC_COUNT; ic++) { 859 for (int i = 0; i < a.length; i += SPECIES.length()) { 860 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 861 av.rearrange(ByteVector.shuffleFromArray(SPECIES, order, i)).intoArray(r, i); 862 } 863 } 864 865 bh.consume(r); 866 } 867 868 @Benchmark 869 public void extract(Blackhole bh) { 870 byte[] a = fa.apply(SPECIES.length()); 871 byte[] r = fr.apply(SPECIES.length()); 872 873 for (int ic = 0; ic < INVOC_COUNT; ic++) { 874 for (int i = 0; i < a.length; i += SPECIES.length()) { 875 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 876 int num_lanes = SPECIES.length(); 877 // Manually unroll because full unroll happens after intrinsification. 878 // Unroll is needed because get intrinsic requires for index to be a known constant. 879 if (num_lanes == 1) { 880 r[i]=av.get(0); 881 } else if (num_lanes == 2) { 882 r[i]=av.get(0); 883 r[i+1]=av.get(1); 884 } else if (num_lanes == 4) { 885 r[i]=av.get(0); 886 r[i+1]=av.get(1); 887 r[i+2]=av.get(2); 888 r[i+3]=av.get(3); 889 } else if (num_lanes == 8) { 890 r[i]=av.get(0); 891 r[i+1]=av.get(1); 892 r[i+2]=av.get(2); 893 r[i+3]=av.get(3); 894 r[i+4]=av.get(4); 895 r[i+5]=av.get(5); 896 r[i+6]=av.get(6); 897 r[i+7]=av.get(7); 898 } else if (num_lanes == 16) { 899 r[i]=av.get(0); 900 r[i+1]=av.get(1); 901 r[i+2]=av.get(2); 902 r[i+3]=av.get(3); 903 r[i+4]=av.get(4); 904 r[i+5]=av.get(5); 905 r[i+6]=av.get(6); 906 r[i+7]=av.get(7); 907 r[i+8]=av.get(8); 908 r[i+9]=av.get(9); 909 r[i+10]=av.get(10); 910 r[i+11]=av.get(11); 911 r[i+12]=av.get(12); 912 r[i+13]=av.get(13); 913 r[i+14]=av.get(14); 914 r[i+15]=av.get(15); 915 } else if (num_lanes == 32) { 916 r[i]=av.get(0); 917 r[i+1]=av.get(1); 918 r[i+2]=av.get(2); 919 r[i+3]=av.get(3); 920 r[i+4]=av.get(4); 921 r[i+5]=av.get(5); 922 r[i+6]=av.get(6); 923 r[i+7]=av.get(7); 924 r[i+8]=av.get(8); 925 r[i+9]=av.get(9); 926 r[i+10]=av.get(10); 927 r[i+11]=av.get(11); 928 r[i+12]=av.get(12); 929 r[i+13]=av.get(13); 930 r[i+14]=av.get(14); 931 r[i+15]=av.get(15); 932 r[i+16]=av.get(16); 933 r[i+17]=av.get(17); 934 r[i+18]=av.get(18); 935 r[i+19]=av.get(19); 936 r[i+20]=av.get(20); 937 r[i+21]=av.get(21); 938 r[i+22]=av.get(22); 939 r[i+23]=av.get(23); 940 r[i+24]=av.get(24); 941 r[i+25]=av.get(25); 942 r[i+26]=av.get(26); 943 r[i+27]=av.get(27); 944 r[i+28]=av.get(28); 945 r[i+29]=av.get(29); 946 r[i+30]=av.get(30); 947 r[i+31]=av.get(31); 948 } else if (num_lanes == 64) { 949 r[i]=av.get(0); 950 r[i+1]=av.get(1); 951 r[i+2]=av.get(2); 952 r[i+3]=av.get(3); 953 r[i+4]=av.get(4); 954 r[i+5]=av.get(5); 955 r[i+6]=av.get(6); 956 r[i+7]=av.get(7); 957 r[i+8]=av.get(8); 958 r[i+9]=av.get(9); 959 r[i+10]=av.get(10); 960 r[i+11]=av.get(11); 961 r[i+12]=av.get(12); 962 r[i+13]=av.get(13); 963 r[i+14]=av.get(14); 964 r[i+15]=av.get(15); 965 r[i+16]=av.get(16); 966 r[i+17]=av.get(17); 967 r[i+18]=av.get(18); 968 r[i+19]=av.get(19); 969 r[i+20]=av.get(20); 970 r[i+21]=av.get(21); 971 r[i+22]=av.get(22); 972 r[i+23]=av.get(23); 973 r[i+24]=av.get(24); 974 r[i+25]=av.get(25); 975 r[i+26]=av.get(26); 976 r[i+27]=av.get(27); 977 r[i+28]=av.get(28); 978 r[i+29]=av.get(29); 979 r[i+30]=av.get(30); 980 r[i+31]=av.get(31); 981 r[i+32]=av.get(32); 982 r[i+33]=av.get(33); 983 r[i+34]=av.get(34); 984 r[i+35]=av.get(35); 985 r[i+36]=av.get(36); 986 r[i+37]=av.get(37); 987 r[i+38]=av.get(38); 988 r[i+39]=av.get(39); 989 r[i+40]=av.get(40); 990 r[i+41]=av.get(41); 991 r[i+42]=av.get(42); 992 r[i+43]=av.get(43); 993 r[i+44]=av.get(44); 994 r[i+45]=av.get(45); 995 r[i+46]=av.get(46); 996 r[i+47]=av.get(47); 997 r[i+48]=av.get(48); 998 r[i+49]=av.get(49); 999 r[i+50]=av.get(50); 1000 r[i+51]=av.get(51); 1001 r[i+52]=av.get(52); 1002 r[i+53]=av.get(53); 1003 r[i+54]=av.get(54); 1004 r[i+55]=av.get(55); 1005 r[i+56]=av.get(56); 1006 r[i+57]=av.get(57); 1007 r[i+58]=av.get(58); 1008 r[i+59]=av.get(59); 1009 r[i+60]=av.get(60); 1010 r[i+61]=av.get(61); 1011 r[i+62]=av.get(62); 1012 r[i+63]=av.get(63); 1013 } else { 1014 for (int j = 0; j < SPECIES.length(); j++) { 1015 r[i+j]=av.get(j); 1016 } 1017 } 1018 } 1019 } 1020 1021 bh.consume(r); 1022 } 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 @Benchmark 1045 public void neg(Blackhole bh) { 1046 byte[] a = fa.apply(SPECIES.length()); 1047 byte[] r = fr.apply(SPECIES.length()); 1048 1049 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1050 for (int i = 0; i < a.length; i += SPECIES.length()) { 1051 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1052 av.neg().intoArray(r, i); 1053 } 1054 } 1055 1056 bh.consume(r); 1057 } 1058 1059 @Benchmark 1060 public void negMasked(Blackhole bh) { 1061 byte[] a = fa.apply(SPECIES.length()); 1062 byte[] r = fr.apply(SPECIES.length()); 1063 boolean[] mask = fm.apply(SPECIES.length()); 1064 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 1065 1066 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1067 for (int i = 0; i < a.length; i += SPECIES.length()) { 1068 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1069 av.neg(vmask).intoArray(r, i); 1070 } 1071 } 1072 1073 bh.consume(r); 1074 } 1075 1076 @Benchmark 1077 public void abs(Blackhole bh) { 1078 byte[] a = fa.apply(SPECIES.length()); 1079 byte[] r = fr.apply(SPECIES.length()); 1080 1081 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1082 for (int i = 0; i < a.length; i += SPECIES.length()) { 1083 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1084 av.abs().intoArray(r, i); 1085 } 1086 } 1087 1088 bh.consume(r); 1089 } 1090 1091 @Benchmark 1092 public void absMasked(Blackhole bh) { 1093 byte[] a = fa.apply(SPECIES.length()); 1094 byte[] r = fr.apply(SPECIES.length()); 1095 boolean[] mask = fm.apply(SPECIES.length()); 1096 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 1097 1098 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1099 for (int i = 0; i < a.length; i += SPECIES.length()) { 1100 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1101 av.abs(vmask).intoArray(r, i); 1102 } 1103 } 1104 1105 bh.consume(r); 1106 } 1107 1108 1109 @Benchmark 1110 public void not(Blackhole bh) { 1111 byte[] a = fa.apply(SPECIES.length()); 1112 byte[] r = fr.apply(SPECIES.length()); 1113 1114 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1115 for (int i = 0; i < a.length; i += SPECIES.length()) { 1116 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1117 av.not().intoArray(r, i); 1118 } 1119 } 1120 1121 bh.consume(r); 1122 } 1123 1124 1125 1126 @Benchmark 1127 public void notMasked(Blackhole bh) { 1128 byte[] a = fa.apply(SPECIES.length()); 1129 byte[] r = fr.apply(SPECIES.length()); 1130 boolean[] mask = fm.apply(SPECIES.length()); 1131 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask); 1132 1133 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1134 for (int i = 0; i < a.length; i += SPECIES.length()) { 1135 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1136 av.not(vmask).intoArray(r, i); 1137 } 1138 } 1139 1140 bh.consume(r); 1141 } 1142 1143 1144 1145 1146 1147 } 1148