1 /* 2 * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have 21 * questions. 22 */ 23 24 package benchmark.jdk.incubator.vector; 25 26 import jdk.incubator.vector.Vector; 27 import jdk.incubator.vector.VectorShape; 28 import jdk.incubator.vector.VectorSpecies; 29 import jdk.incubator.vector.VectorShuffle; 30 import jdk.incubator.vector.ByteVector; 31 32 import java.util.concurrent.TimeUnit; 33 import java.util.function.BiFunction; 34 import java.util.function.IntFunction; 35 36 import org.openjdk.jmh.annotations.*; 37 import org.openjdk.jmh.infra.Blackhole; 38 39 @BenchmarkMode(Mode.Throughput) 40 @OutputTimeUnit(TimeUnit.MILLISECONDS) 41 @State(Scope.Benchmark) 42 @Warmup(iterations = 3, time = 1) 43 @Measurement(iterations = 5, time = 1) 44 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) 45 public class Byte64Vector extends AbstractVectorBenchmark { 46 static final VectorSpecies<Byte> SPECIES = ByteVector.SPECIES_64; 47 48 static final int INVOC_COUNT = 1; // get rid of outer loop 49 50 @Param("1024") 51 int size; 52 53 byte[] fill(IntFunction<Byte> f) { 54 byte[] array = new byte[size]; 55 for (int i = 0; i < array.length; i++) { 56 array[i] = f.apply(i); 57 } 58 return array; 59 } 60 61 byte[] a, b, c, r; 62 boolean[] m, rm; 63 int[] s; 64 65 @Setup 66 public void init() { 67 size += size % SPECIES.length(); // FIXME: add post-loops 68 69 a = fill(i -> (byte)(2*i)); 70 b = fill(i -> (byte)(i+1)); 71 c = fill(i -> (byte)(i+5)); 72 r = fill(i -> (byte)0); 73 74 m = fillMask(size, i -> (i % 2) == 0); 75 rm = fillMask(size, i -> false); 76 77 s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length())); 78 } 79 80 final IntFunction<byte[]> fa = vl -> a; 81 final IntFunction<byte[]> fb = vl -> b; 82 final IntFunction<byte[]> fc = vl -> c; 83 final IntFunction<byte[]> fr = vl -> r; 84 final IntFunction<boolean[]> fm = vl -> m; 85 final IntFunction<boolean[]> fmr = vl -> rm; 86 final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s; 87 88 89 @Benchmark 90 public void add(Blackhole bh) { 91 byte[] a = fa.apply(SPECIES.length()); 92 byte[] b = fb.apply(SPECIES.length()); 93 byte[] r = fr.apply(SPECIES.length()); 94 95 for (int ic = 0; ic < INVOC_COUNT; ic++) { 96 for (int i = 0; i < a.length; i += SPECIES.length()) { 97 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 98 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 99 av.add(bv).intoArray(r, i); 100 } 101 } 102 103 bh.consume(r); 104 } 105 106 @Benchmark 107 public void addMasked(Blackhole bh) { 108 byte[] a = fa.apply(SPECIES.length()); 109 byte[] b = fb.apply(SPECIES.length()); 110 byte[] r = fr.apply(SPECIES.length()); 111 boolean[] mask = fm.apply(SPECIES.length()); 112 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask); 113 114 for (int ic = 0; ic < INVOC_COUNT; ic++) { 115 for (int i = 0; i < a.length; i += SPECIES.length()) { 116 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 117 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 118 av.add(bv, vmask).intoArray(r, i); 119 } 120 } 121 122 bh.consume(r); 123 } 124 125 @Benchmark 126 public void sub(Blackhole bh) { 127 byte[] a = fa.apply(SPECIES.length()); 128 byte[] b = fb.apply(SPECIES.length()); 129 byte[] r = fr.apply(SPECIES.length()); 130 131 for (int ic = 0; ic < INVOC_COUNT; ic++) { 132 for (int i = 0; i < a.length; i += SPECIES.length()) { 133 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 134 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 135 av.sub(bv).intoArray(r, i); 136 } 137 } 138 139 bh.consume(r); 140 } 141 142 @Benchmark 143 public void subMasked(Blackhole bh) { 144 byte[] a = fa.apply(SPECIES.length()); 145 byte[] b = fb.apply(SPECIES.length()); 146 byte[] r = fr.apply(SPECIES.length()); 147 boolean[] mask = fm.apply(SPECIES.length()); 148 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask); 149 150 for (int ic = 0; ic < INVOC_COUNT; ic++) { 151 for (int i = 0; i < a.length; i += SPECIES.length()) { 152 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 153 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 154 av.sub(bv, vmask).intoArray(r, i); 155 } 156 } 157 158 bh.consume(r); 159 } 160 161 162 163 @Benchmark 164 public void mul(Blackhole bh) { 165 byte[] a = fa.apply(SPECIES.length()); 166 byte[] b = fb.apply(SPECIES.length()); 167 byte[] r = fr.apply(SPECIES.length()); 168 169 for (int ic = 0; ic < INVOC_COUNT; ic++) { 170 for (int i = 0; i < a.length; i += SPECIES.length()) { 171 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 172 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 173 av.mul(bv).intoArray(r, i); 174 } 175 } 176 177 bh.consume(r); 178 } 179 180 @Benchmark 181 public void mulMasked(Blackhole bh) { 182 byte[] a = fa.apply(SPECIES.length()); 183 byte[] b = fb.apply(SPECIES.length()); 184 byte[] r = fr.apply(SPECIES.length()); 185 boolean[] mask = fm.apply(SPECIES.length()); 186 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask); 187 188 for (int ic = 0; ic < INVOC_COUNT; ic++) { 189 for (int i = 0; i < a.length; i += SPECIES.length()) { 190 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 191 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 192 av.mul(bv, vmask).intoArray(r, i); 193 } 194 } 195 196 bh.consume(r); 197 } 198 199 200 @Benchmark 201 public void and(Blackhole bh) { 202 byte[] a = fa.apply(SPECIES.length()); 203 byte[] b = fb.apply(SPECIES.length()); 204 byte[] r = fr.apply(SPECIES.length()); 205 206 for (int ic = 0; ic < INVOC_COUNT; ic++) { 207 for (int i = 0; i < a.length; i += SPECIES.length()) { 208 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 209 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 210 av.and(bv).intoArray(r, i); 211 } 212 } 213 214 bh.consume(r); 215 } 216 217 218 219 @Benchmark 220 public void andMasked(Blackhole bh) { 221 byte[] a = fa.apply(SPECIES.length()); 222 byte[] b = fb.apply(SPECIES.length()); 223 byte[] r = fr.apply(SPECIES.length()); 224 boolean[] mask = fm.apply(SPECIES.length()); 225 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask); 226 227 for (int ic = 0; ic < INVOC_COUNT; ic++) { 228 for (int i = 0; i < a.length; i += SPECIES.length()) { 229 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 230 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 231 av.and(bv, vmask).intoArray(r, i); 232 } 233 } 234 235 bh.consume(r); 236 } 237 238 239 240 @Benchmark 241 public void or(Blackhole bh) { 242 byte[] a = fa.apply(SPECIES.length()); 243 byte[] b = fb.apply(SPECIES.length()); 244 byte[] r = fr.apply(SPECIES.length()); 245 246 for (int ic = 0; ic < INVOC_COUNT; ic++) { 247 for (int i = 0; i < a.length; i += SPECIES.length()) { 248 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 249 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 250 av.or(bv).intoArray(r, i); 251 } 252 } 253 254 bh.consume(r); 255 } 256 257 258 259 @Benchmark 260 public void orMasked(Blackhole bh) { 261 byte[] a = fa.apply(SPECIES.length()); 262 byte[] b = fb.apply(SPECIES.length()); 263 byte[] r = fr.apply(SPECIES.length()); 264 boolean[] mask = fm.apply(SPECIES.length()); 265 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask); 266 267 for (int ic = 0; ic < INVOC_COUNT; ic++) { 268 for (int i = 0; i < a.length; i += SPECIES.length()) { 269 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 270 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 271 av.or(bv, vmask).intoArray(r, i); 272 } 273 } 274 275 bh.consume(r); 276 } 277 278 279 280 @Benchmark 281 public void xor(Blackhole bh) { 282 byte[] a = fa.apply(SPECIES.length()); 283 byte[] b = fb.apply(SPECIES.length()); 284 byte[] r = fr.apply(SPECIES.length()); 285 286 for (int ic = 0; ic < INVOC_COUNT; ic++) { 287 for (int i = 0; i < a.length; i += SPECIES.length()) { 288 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 289 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 290 av.xor(bv).intoArray(r, i); 291 } 292 } 293 294 bh.consume(r); 295 } 296 297 298 299 @Benchmark 300 public void xorMasked(Blackhole bh) { 301 byte[] a = fa.apply(SPECIES.length()); 302 byte[] b = fb.apply(SPECIES.length()); 303 byte[] r = fr.apply(SPECIES.length()); 304 boolean[] mask = fm.apply(SPECIES.length()); 305 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask); 306 307 for (int ic = 0; ic < INVOC_COUNT; ic++) { 308 for (int i = 0; i < a.length; i += SPECIES.length()) { 309 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 310 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 311 av.xor(bv, vmask).intoArray(r, i); 312 } 313 } 314 315 bh.consume(r); 316 } 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 @Benchmark 333 public void aShiftRShift(Blackhole bh) { 334 byte[] a = fa.apply(SPECIES.length()); 335 byte[] b = fb.apply(SPECIES.length()); 336 byte[] r = fr.apply(SPECIES.length()); 337 338 for (int ic = 0; ic < INVOC_COUNT; ic++) { 339 for (int i = 0; i < a.length; i += SPECIES.length()) { 340 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 341 av.aShiftR((int)b[i]).intoArray(r, i); 342 } 343 } 344 345 bh.consume(r); 346 } 347 348 349 350 @Benchmark 351 public void aShiftRMaskedShift(Blackhole bh) { 352 byte[] a = fa.apply(SPECIES.length()); 353 byte[] b = fb.apply(SPECIES.length()); 354 byte[] r = fr.apply(SPECIES.length()); 355 boolean[] mask = fm.apply(SPECIES.length()); 356 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask); 357 358 for (int ic = 0; ic < INVOC_COUNT; ic++) { 359 for (int i = 0; i < a.length; i += SPECIES.length()) { 360 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 361 av.aShiftR((int)b[i], vmask).intoArray(r, i); 362 } 363 } 364 365 bh.consume(r); 366 } 367 368 369 370 @Benchmark 371 public void shiftLShift(Blackhole bh) { 372 byte[] a = fa.apply(SPECIES.length()); 373 byte[] b = fb.apply(SPECIES.length()); 374 byte[] r = fr.apply(SPECIES.length()); 375 376 for (int ic = 0; ic < INVOC_COUNT; ic++) { 377 for (int i = 0; i < a.length; i += SPECIES.length()) { 378 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 379 av.shiftL((int)b[i]).intoArray(r, i); 380 } 381 } 382 383 bh.consume(r); 384 } 385 386 387 388 @Benchmark 389 public void shiftLMaskedShift(Blackhole bh) { 390 byte[] a = fa.apply(SPECIES.length()); 391 byte[] b = fb.apply(SPECIES.length()); 392 byte[] r = fr.apply(SPECIES.length()); 393 boolean[] mask = fm.apply(SPECIES.length()); 394 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask); 395 396 for (int ic = 0; ic < INVOC_COUNT; ic++) { 397 for (int i = 0; i < a.length; i += SPECIES.length()) { 398 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 399 av.shiftL((int)b[i], vmask).intoArray(r, i); 400 } 401 } 402 403 bh.consume(r); 404 } 405 406 407 408 @Benchmark 409 public void shiftRShift(Blackhole bh) { 410 byte[] a = fa.apply(SPECIES.length()); 411 byte[] b = fb.apply(SPECIES.length()); 412 byte[] r = fr.apply(SPECIES.length()); 413 414 for (int ic = 0; ic < INVOC_COUNT; ic++) { 415 for (int i = 0; i < a.length; i += SPECIES.length()) { 416 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 417 av.shiftR((int)b[i]).intoArray(r, i); 418 } 419 } 420 421 bh.consume(r); 422 } 423 424 425 426 @Benchmark 427 public void shiftRMaskedShift(Blackhole bh) { 428 byte[] a = fa.apply(SPECIES.length()); 429 byte[] b = fb.apply(SPECIES.length()); 430 byte[] r = fr.apply(SPECIES.length()); 431 boolean[] mask = fm.apply(SPECIES.length()); 432 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask); 433 434 for (int ic = 0; ic < INVOC_COUNT; ic++) { 435 for (int i = 0; i < a.length; i += SPECIES.length()) { 436 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 437 av.shiftR((int)b[i], vmask).intoArray(r, i); 438 } 439 } 440 441 bh.consume(r); 442 } 443 444 445 446 447 448 449 450 451 @Benchmark 452 public void max(Blackhole bh) { 453 byte[] a = fa.apply(SPECIES.length()); 454 byte[] b = fb.apply(SPECIES.length()); 455 byte[] r = fr.apply(SPECIES.length()); 456 457 for (int ic = 0; ic < INVOC_COUNT; ic++) { 458 for (int i = 0; i < a.length; i += SPECIES.length()) { 459 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 460 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 461 av.max(bv).intoArray(r, i); 462 } 463 } 464 465 bh.consume(r); 466 } 467 468 @Benchmark 469 public void min(Blackhole bh) { 470 byte[] a = fa.apply(SPECIES.length()); 471 byte[] b = fb.apply(SPECIES.length()); 472 byte[] r = fr.apply(SPECIES.length()); 473 474 for (int ic = 0; ic < INVOC_COUNT; ic++) { 475 for (int i = 0; i < a.length; i += SPECIES.length()) { 476 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 477 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 478 av.min(bv).intoArray(r, i); 479 } 480 } 481 482 bh.consume(r); 483 } 484 485 486 @Benchmark 487 public void andAll(Blackhole bh) { 488 byte[] a = fa.apply(SPECIES.length()); 489 byte ra = -1; 490 491 for (int ic = 0; ic < INVOC_COUNT; ic++) { 492 ra = -1; 493 for (int i = 0; i < a.length; i += SPECIES.length()) { 494 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 495 ra &= av.andAll(); 496 } 497 } 498 bh.consume(ra); 499 } 500 501 502 503 @Benchmark 504 public void orAll(Blackhole bh) { 505 byte[] a = fa.apply(SPECIES.length()); 506 byte ra = 0; 507 508 for (int ic = 0; ic < INVOC_COUNT; ic++) { 509 ra = 0; 510 for (int i = 0; i < a.length; i += SPECIES.length()) { 511 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 512 ra |= av.orAll(); 513 } 514 } 515 bh.consume(ra); 516 } 517 518 519 520 @Benchmark 521 public void xorAll(Blackhole bh) { 522 byte[] a = fa.apply(SPECIES.length()); 523 byte ra = 0; 524 525 for (int ic = 0; ic < INVOC_COUNT; ic++) { 526 ra = 0; 527 for (int i = 0; i < a.length; i += SPECIES.length()) { 528 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 529 ra ^= av.xorAll(); 530 } 531 } 532 bh.consume(ra); 533 } 534 535 536 @Benchmark 537 public void addAll(Blackhole bh) { 538 byte[] a = fa.apply(SPECIES.length()); 539 byte ra = 0; 540 541 for (int ic = 0; ic < INVOC_COUNT; ic++) { 542 ra = 0; 543 for (int i = 0; i < a.length; i += SPECIES.length()) { 544 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 545 ra += av.addAll(); 546 } 547 } 548 bh.consume(ra); 549 } 550 551 @Benchmark 552 public void mulAll(Blackhole bh) { 553 byte[] a = fa.apply(SPECIES.length()); 554 byte ra = 1; 555 556 for (int ic = 0; ic < INVOC_COUNT; ic++) { 557 ra = 1; 558 for (int i = 0; i < a.length; i += SPECIES.length()) { 559 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 560 ra *= av.mulAll(); 561 } 562 } 563 bh.consume(ra); 564 } 565 566 @Benchmark 567 public void minAll(Blackhole bh) { 568 byte[] a = fa.apply(SPECIES.length()); 569 byte ra = Byte.MAX_VALUE; 570 571 for (int ic = 0; ic < INVOC_COUNT; ic++) { 572 ra = Byte.MAX_VALUE; 573 for (int i = 0; i < a.length; i += SPECIES.length()) { 574 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 575 ra = (byte)Math.min(ra, av.minAll()); 576 } 577 } 578 bh.consume(ra); 579 } 580 581 @Benchmark 582 public void maxAll(Blackhole bh) { 583 byte[] a = fa.apply(SPECIES.length()); 584 byte ra = Byte.MIN_VALUE; 585 586 for (int ic = 0; ic < INVOC_COUNT; ic++) { 587 ra = Byte.MIN_VALUE; 588 for (int i = 0; i < a.length; i += SPECIES.length()) { 589 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 590 ra = (byte)Math.max(ra, av.maxAll()); 591 } 592 } 593 bh.consume(ra); 594 } 595 596 597 @Benchmark 598 public void anyTrue(Blackhole bh) { 599 boolean[] mask = fm.apply(SPECIES.length()); 600 boolean[] r = fmr.apply(SPECIES.length()); 601 602 for (int ic = 0; ic < INVOC_COUNT; ic++) { 603 for (int i = 0; i < mask.length; i += SPECIES.length()) { 604 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i); 605 r[i] = vmask.anyTrue(); 606 } 607 } 608 609 bh.consume(r); 610 } 611 612 613 614 @Benchmark 615 public void allTrue(Blackhole bh) { 616 boolean[] mask = fm.apply(SPECIES.length()); 617 boolean[] r = fmr.apply(SPECIES.length()); 618 619 for (int ic = 0; ic < INVOC_COUNT; ic++) { 620 for (int i = 0; i < mask.length; i += SPECIES.length()) { 621 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i); 622 r[i] = vmask.allTrue(); 623 } 624 } 625 626 bh.consume(r); 627 } 628 629 630 @Benchmark 631 public void with(Blackhole bh) { 632 byte[] a = fa.apply(SPECIES.length()); 633 byte[] r = fr.apply(SPECIES.length()); 634 635 for (int ic = 0; ic < INVOC_COUNT; ic++) { 636 for (int i = 0; i < a.length; i += SPECIES.length()) { 637 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 638 av.with(0, (byte)4).intoArray(r, i); 639 } 640 } 641 642 bh.consume(r); 643 } 644 645 @Benchmark 646 public Object lessThan() { 647 byte[] a = fa.apply(size); 648 byte[] b = fb.apply(size); 649 boolean[] ms = fm.apply(size); 650 VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0); 651 652 for (int ic = 0; ic < INVOC_COUNT; ic++) { 653 for (int i = 0; i < a.length; i += SPECIES.length()) { 654 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 655 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 656 VectorMask<Byte> mv = av.lessThan(bv); 657 658 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 659 } 660 } 661 return m; 662 } 663 664 665 @Benchmark 666 public Object greaterThan() { 667 byte[] a = fa.apply(size); 668 byte[] b = fb.apply(size); 669 boolean[] ms = fm.apply(size); 670 VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0); 671 672 for (int ic = 0; ic < INVOC_COUNT; ic++) { 673 for (int i = 0; i < a.length; i += SPECIES.length()) { 674 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 675 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 676 VectorMask<Byte> mv = av.greaterThan(bv); 677 678 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 679 } 680 } 681 return m; 682 } 683 684 685 @Benchmark 686 public Object equal() { 687 byte[] a = fa.apply(size); 688 byte[] b = fb.apply(size); 689 boolean[] ms = fm.apply(size); 690 VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0); 691 692 for (int ic = 0; ic < INVOC_COUNT; ic++) { 693 for (int i = 0; i < a.length; i += SPECIES.length()) { 694 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 695 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 696 VectorMask<Byte> mv = av.equal(bv); 697 698 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 699 } 700 } 701 return m; 702 } 703 704 705 @Benchmark 706 public Object notEqual() { 707 byte[] a = fa.apply(size); 708 byte[] b = fb.apply(size); 709 boolean[] ms = fm.apply(size); 710 VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0); 711 712 for (int ic = 0; ic < INVOC_COUNT; ic++) { 713 for (int i = 0; i < a.length; i += SPECIES.length()) { 714 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 715 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 716 VectorMask<Byte> mv = av.notEqual(bv); 717 718 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 719 } 720 } 721 return m; 722 } 723 724 725 @Benchmark 726 public Object lessThanEq() { 727 byte[] a = fa.apply(size); 728 byte[] b = fb.apply(size); 729 boolean[] ms = fm.apply(size); 730 VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0); 731 732 for (int ic = 0; ic < INVOC_COUNT; ic++) { 733 for (int i = 0; i < a.length; i += SPECIES.length()) { 734 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 735 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 736 VectorMask<Byte> mv = av.lessThanEq(bv); 737 738 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 739 } 740 } 741 return m; 742 } 743 744 745 @Benchmark 746 public Object greaterThanEq() { 747 byte[] a = fa.apply(size); 748 byte[] b = fb.apply(size); 749 boolean[] ms = fm.apply(size); 750 VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0); 751 752 for (int ic = 0; ic < INVOC_COUNT; ic++) { 753 for (int i = 0; i < a.length; i += SPECIES.length()) { 754 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 755 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 756 VectorMask<Byte> mv = av.greaterThanEq(bv); 757 758 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 759 } 760 } 761 return m; 762 } 763 764 765 @Benchmark 766 public void blend(Blackhole bh) { 767 byte[] a = fa.apply(SPECIES.length()); 768 byte[] b = fb.apply(SPECIES.length()); 769 byte[] r = fr.apply(SPECIES.length()); 770 boolean[] mask = fm.apply(SPECIES.length()); 771 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask); 772 773 for (int ic = 0; ic < INVOC_COUNT; ic++) { 774 for (int i = 0; i < a.length; i += SPECIES.length()) { 775 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 776 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 777 av.blend(bv, vmask).intoArray(r, i); 778 } 779 } 780 781 bh.consume(r); 782 } 783 784 @Benchmark 785 public void rearrange(Blackhole bh) { 786 byte[] a = fa.apply(SPECIES.length()); 787 int[] order = fs.apply(a.length, SPECIES.length()); 788 byte[] r = fr.apply(SPECIES.length()); 789 790 for (int ic = 0; ic < INVOC_COUNT; ic++) { 791 for (int i = 0; i < a.length; i += SPECIES.length()) { 792 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 793 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i); 794 } 795 } 796 797 bh.consume(r); 798 } 799 800 @Benchmark 801 public void extract(Blackhole bh) { 802 byte[] a = fa.apply(SPECIES.length()); 803 byte[] r = fr.apply(SPECIES.length()); 804 805 for (int ic = 0; ic < INVOC_COUNT; ic++) { 806 for (int i = 0; i < a.length; i += SPECIES.length()) { 807 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 808 int num_lanes = SPECIES.length(); 809 // Manually unroll because full unroll happens after intrinsification. 810 // Unroll is needed because get intrinsic requires for index to be a known constant. 811 if (num_lanes == 1) { 812 r[i]=av.get(0); 813 } else if (num_lanes == 2) { 814 r[i]=av.get(0); 815 r[i+1]=av.get(1); 816 } else if (num_lanes == 4) { 817 r[i]=av.get(0); 818 r[i+1]=av.get(1); 819 r[i+2]=av.get(2); 820 r[i+3]=av.get(3); 821 } else if (num_lanes == 8) { 822 r[i]=av.get(0); 823 r[i+1]=av.get(1); 824 r[i+2]=av.get(2); 825 r[i+3]=av.get(3); 826 r[i+4]=av.get(4); 827 r[i+5]=av.get(5); 828 r[i+6]=av.get(6); 829 r[i+7]=av.get(7); 830 } else if (num_lanes == 16) { 831 r[i]=av.get(0); 832 r[i+1]=av.get(1); 833 r[i+2]=av.get(2); 834 r[i+3]=av.get(3); 835 r[i+4]=av.get(4); 836 r[i+5]=av.get(5); 837 r[i+6]=av.get(6); 838 r[i+7]=av.get(7); 839 r[i+8]=av.get(8); 840 r[i+9]=av.get(9); 841 r[i+10]=av.get(10); 842 r[i+11]=av.get(11); 843 r[i+12]=av.get(12); 844 r[i+13]=av.get(13); 845 r[i+14]=av.get(14); 846 r[i+15]=av.get(15); 847 } else if (num_lanes == 32) { 848 r[i]=av.get(0); 849 r[i+1]=av.get(1); 850 r[i+2]=av.get(2); 851 r[i+3]=av.get(3); 852 r[i+4]=av.get(4); 853 r[i+5]=av.get(5); 854 r[i+6]=av.get(6); 855 r[i+7]=av.get(7); 856 r[i+8]=av.get(8); 857 r[i+9]=av.get(9); 858 r[i+10]=av.get(10); 859 r[i+11]=av.get(11); 860 r[i+12]=av.get(12); 861 r[i+13]=av.get(13); 862 r[i+14]=av.get(14); 863 r[i+15]=av.get(15); 864 r[i+16]=av.get(16); 865 r[i+17]=av.get(17); 866 r[i+18]=av.get(18); 867 r[i+19]=av.get(19); 868 r[i+20]=av.get(20); 869 r[i+21]=av.get(21); 870 r[i+22]=av.get(22); 871 r[i+23]=av.get(23); 872 r[i+24]=av.get(24); 873 r[i+25]=av.get(25); 874 r[i+26]=av.get(26); 875 r[i+27]=av.get(27); 876 r[i+28]=av.get(28); 877 r[i+29]=av.get(29); 878 r[i+30]=av.get(30); 879 r[i+31]=av.get(31); 880 } else if (num_lanes == 64) { 881 r[i]=av.get(0); 882 r[i+1]=av.get(1); 883 r[i+2]=av.get(2); 884 r[i+3]=av.get(3); 885 r[i+4]=av.get(4); 886 r[i+5]=av.get(5); 887 r[i+6]=av.get(6); 888 r[i+7]=av.get(7); 889 r[i+8]=av.get(8); 890 r[i+9]=av.get(9); 891 r[i+10]=av.get(10); 892 r[i+11]=av.get(11); 893 r[i+12]=av.get(12); 894 r[i+13]=av.get(13); 895 r[i+14]=av.get(14); 896 r[i+15]=av.get(15); 897 r[i+16]=av.get(16); 898 r[i+17]=av.get(17); 899 r[i+18]=av.get(18); 900 r[i+19]=av.get(19); 901 r[i+20]=av.get(20); 902 r[i+21]=av.get(21); 903 r[i+22]=av.get(22); 904 r[i+23]=av.get(23); 905 r[i+24]=av.get(24); 906 r[i+25]=av.get(25); 907 r[i+26]=av.get(26); 908 r[i+27]=av.get(27); 909 r[i+28]=av.get(28); 910 r[i+29]=av.get(29); 911 r[i+30]=av.get(30); 912 r[i+31]=av.get(31); 913 r[i+32]=av.get(32); 914 r[i+33]=av.get(33); 915 r[i+34]=av.get(34); 916 r[i+35]=av.get(35); 917 r[i+36]=av.get(36); 918 r[i+37]=av.get(37); 919 r[i+38]=av.get(38); 920 r[i+39]=av.get(39); 921 r[i+40]=av.get(40); 922 r[i+41]=av.get(41); 923 r[i+42]=av.get(42); 924 r[i+43]=av.get(43); 925 r[i+44]=av.get(44); 926 r[i+45]=av.get(45); 927 r[i+46]=av.get(46); 928 r[i+47]=av.get(47); 929 r[i+48]=av.get(48); 930 r[i+49]=av.get(49); 931 r[i+50]=av.get(50); 932 r[i+51]=av.get(51); 933 r[i+52]=av.get(52); 934 r[i+53]=av.get(53); 935 r[i+54]=av.get(54); 936 r[i+55]=av.get(55); 937 r[i+56]=av.get(56); 938 r[i+57]=av.get(57); 939 r[i+58]=av.get(58); 940 r[i+59]=av.get(59); 941 r[i+60]=av.get(60); 942 r[i+61]=av.get(61); 943 r[i+62]=av.get(62); 944 r[i+63]=av.get(63); 945 } else { 946 for (int j = 0; j < SPECIES.length(); j++) { 947 r[i+j]=av.get(j); 948 } 949 } 950 } 951 } 952 953 bh.consume(r); 954 } 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 @Benchmark 977 public void neg(Blackhole bh) { 978 byte[] a = fa.apply(SPECIES.length()); 979 byte[] r = fr.apply(SPECIES.length()); 980 981 for (int ic = 0; ic < INVOC_COUNT; ic++) { 982 for (int i = 0; i < a.length; i += SPECIES.length()) { 983 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 984 av.neg().intoArray(r, i); 985 } 986 } 987 988 bh.consume(r); 989 } 990 991 @Benchmark 992 public void negMasked(Blackhole bh) { 993 byte[] a = fa.apply(SPECIES.length()); 994 byte[] r = fr.apply(SPECIES.length()); 995 boolean[] mask = fm.apply(SPECIES.length()); 996 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask); 997 998 for (int ic = 0; ic < INVOC_COUNT; ic++) { 999 for (int i = 0; i < a.length; i += SPECIES.length()) { 1000 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1001 av.neg(vmask).intoArray(r, i); 1002 } 1003 } 1004 1005 bh.consume(r); 1006 } 1007 1008 @Benchmark 1009 public void abs(Blackhole bh) { 1010 byte[] a = fa.apply(SPECIES.length()); 1011 byte[] r = fr.apply(SPECIES.length()); 1012 1013 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1014 for (int i = 0; i < a.length; i += SPECIES.length()) { 1015 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1016 av.abs().intoArray(r, i); 1017 } 1018 } 1019 1020 bh.consume(r); 1021 } 1022 1023 @Benchmark 1024 public void absMasked(Blackhole bh) { 1025 byte[] a = fa.apply(SPECIES.length()); 1026 byte[] r = fr.apply(SPECIES.length()); 1027 boolean[] mask = fm.apply(SPECIES.length()); 1028 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask); 1029 1030 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1031 for (int i = 0; i < a.length; i += SPECIES.length()) { 1032 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1033 av.abs(vmask).intoArray(r, i); 1034 } 1035 } 1036 1037 bh.consume(r); 1038 } 1039 1040 1041 @Benchmark 1042 public void not(Blackhole bh) { 1043 byte[] a = fa.apply(SPECIES.length()); 1044 byte[] r = fr.apply(SPECIES.length()); 1045 1046 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1047 for (int i = 0; i < a.length; i += SPECIES.length()) { 1048 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1049 av.not().intoArray(r, i); 1050 } 1051 } 1052 1053 bh.consume(r); 1054 } 1055 1056 1057 1058 @Benchmark 1059 public void notMasked(Blackhole bh) { 1060 byte[] a = fa.apply(SPECIES.length()); 1061 byte[] r = fr.apply(SPECIES.length()); 1062 boolean[] mask = fm.apply(SPECIES.length()); 1063 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask); 1064 1065 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1066 for (int i = 0; i < a.length; i += SPECIES.length()) { 1067 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1068 av.not(vmask).intoArray(r, i); 1069 } 1070 } 1071 1072 bh.consume(r); 1073 } 1074 1075 1076 1077 1078 1079 } 1080