1 /* 2 * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have 21 * questions. 22 */ 23 24 package benchmark.jdk.incubator.vector; 25 26 import jdk.incubator.vector.Vector; 27 import jdk.incubator.vector.Vector.Shape; 28 import jdk.incubator.vector.Vector.Species; 29 import jdk.incubator.vector.ShortVector; 30 31 import java.util.concurrent.TimeUnit; 32 import java.util.function.BiFunction; 33 import java.util.function.IntFunction; 34 35 import org.openjdk.jmh.annotations.*; 36 import org.openjdk.jmh.infra.Blackhole; 37 38 @BenchmarkMode(Mode.Throughput) 39 @OutputTimeUnit(TimeUnit.MILLISECONDS) 40 @State(Scope.Benchmark) 41 @Warmup(iterations = 3, time = 1) 42 @Measurement(iterations = 5, time = 1) 43 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) 44 public class Short512Vector extends AbstractVectorBenchmark { 45 static final Species<Short> SPECIES = ShortVector.SPECIES_512; 46 47 static final int INVOC_COUNT = 1; // get rid of outer loop 48 49 @Param("1024") 50 int size; 51 52 short[] fill(IntFunction<Short> f) { 53 short[] array = new short[size]; 54 for (int i = 0; i < array.length; i++) { 55 array[i] = f.apply(i); 56 } 57 return array; 58 } 59 60 short[] a, b, c, r; 61 boolean[] m, rm; 62 int[] s; 63 64 @Setup 65 public void init() { 66 size += size % SPECIES.length(); // FIXME: add post-loops 67 68 a = fill(i -> (short)(2*i)); 69 b = fill(i -> (short)(i+1)); 70 c = fill(i -> (short)(i+5)); 71 r = fill(i -> (short)0); 72 73 m = fillMask(size, i -> (i % 2) == 0); 74 rm = fillMask(size, i -> false); 75 76 s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length())); 77 } 78 79 final IntFunction<short[]> fa = vl -> a; 80 final IntFunction<short[]> fb = vl -> b; 81 final IntFunction<short[]> fc = vl -> c; 82 final IntFunction<short[]> fr = vl -> r; 83 final IntFunction<boolean[]> fm = vl -> m; 84 final IntFunction<boolean[]> fmr = vl -> rm; 85 final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s; 86 87 88 @Benchmark 89 public void add(Blackhole bh) { 90 short[] a = fa.apply(SPECIES.length()); 91 short[] b = fb.apply(SPECIES.length()); 92 short[] r = fr.apply(SPECIES.length()); 93 94 for (int ic = 0; ic < INVOC_COUNT; ic++) { 95 for (int i = 0; i < a.length; i += SPECIES.length()) { 96 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 97 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 98 av.add(bv).intoArray(r, i); 99 } 100 } 101 102 bh.consume(r); 103 } 104 105 @Benchmark 106 public void addMasked(Blackhole bh) { 107 short[] a = fa.apply(SPECIES.length()); 108 short[] b = fb.apply(SPECIES.length()); 109 short[] r = fr.apply(SPECIES.length()); 110 boolean[] mask = fm.apply(SPECIES.length()); 111 Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask); 112 113 for (int ic = 0; ic < INVOC_COUNT; ic++) { 114 for (int i = 0; i < a.length; i += SPECIES.length()) { 115 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 116 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 117 av.add(bv, vmask).intoArray(r, i); 118 } 119 } 120 121 bh.consume(r); 122 } 123 124 @Benchmark 125 public void sub(Blackhole bh) { 126 short[] a = fa.apply(SPECIES.length()); 127 short[] b = fb.apply(SPECIES.length()); 128 short[] r = fr.apply(SPECIES.length()); 129 130 for (int ic = 0; ic < INVOC_COUNT; ic++) { 131 for (int i = 0; i < a.length; i += SPECIES.length()) { 132 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 133 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 134 av.sub(bv).intoArray(r, i); 135 } 136 } 137 138 bh.consume(r); 139 } 140 141 @Benchmark 142 public void subMasked(Blackhole bh) { 143 short[] a = fa.apply(SPECIES.length()); 144 short[] b = fb.apply(SPECIES.length()); 145 short[] r = fr.apply(SPECIES.length()); 146 boolean[] mask = fm.apply(SPECIES.length()); 147 Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask); 148 149 for (int ic = 0; ic < INVOC_COUNT; ic++) { 150 for (int i = 0; i < a.length; i += SPECIES.length()) { 151 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 152 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 153 av.sub(bv, vmask).intoArray(r, i); 154 } 155 } 156 157 bh.consume(r); 158 } 159 160 161 162 @Benchmark 163 public void mul(Blackhole bh) { 164 short[] a = fa.apply(SPECIES.length()); 165 short[] b = fb.apply(SPECIES.length()); 166 short[] r = fr.apply(SPECIES.length()); 167 168 for (int ic = 0; ic < INVOC_COUNT; ic++) { 169 for (int i = 0; i < a.length; i += SPECIES.length()) { 170 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 171 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 172 av.mul(bv).intoArray(r, i); 173 } 174 } 175 176 bh.consume(r); 177 } 178 179 @Benchmark 180 public void mulMasked(Blackhole bh) { 181 short[] a = fa.apply(SPECIES.length()); 182 short[] b = fb.apply(SPECIES.length()); 183 short[] r = fr.apply(SPECIES.length()); 184 boolean[] mask = fm.apply(SPECIES.length()); 185 Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask); 186 187 for (int ic = 0; ic < INVOC_COUNT; ic++) { 188 for (int i = 0; i < a.length; i += SPECIES.length()) { 189 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 190 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 191 av.mul(bv, vmask).intoArray(r, i); 192 } 193 } 194 195 bh.consume(r); 196 } 197 198 199 @Benchmark 200 public void and(Blackhole bh) { 201 short[] a = fa.apply(SPECIES.length()); 202 short[] b = fb.apply(SPECIES.length()); 203 short[] r = fr.apply(SPECIES.length()); 204 205 for (int ic = 0; ic < INVOC_COUNT; ic++) { 206 for (int i = 0; i < a.length; i += SPECIES.length()) { 207 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 208 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 209 av.and(bv).intoArray(r, i); 210 } 211 } 212 213 bh.consume(r); 214 } 215 216 217 218 @Benchmark 219 public void andMasked(Blackhole bh) { 220 short[] a = fa.apply(SPECIES.length()); 221 short[] b = fb.apply(SPECIES.length()); 222 short[] r = fr.apply(SPECIES.length()); 223 boolean[] mask = fm.apply(SPECIES.length()); 224 Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask); 225 226 for (int ic = 0; ic < INVOC_COUNT; ic++) { 227 for (int i = 0; i < a.length; i += SPECIES.length()) { 228 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 229 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 230 av.and(bv, vmask).intoArray(r, i); 231 } 232 } 233 234 bh.consume(r); 235 } 236 237 238 239 @Benchmark 240 public void or(Blackhole bh) { 241 short[] a = fa.apply(SPECIES.length()); 242 short[] b = fb.apply(SPECIES.length()); 243 short[] r = fr.apply(SPECIES.length()); 244 245 for (int ic = 0; ic < INVOC_COUNT; ic++) { 246 for (int i = 0; i < a.length; i += SPECIES.length()) { 247 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 248 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 249 av.or(bv).intoArray(r, i); 250 } 251 } 252 253 bh.consume(r); 254 } 255 256 257 258 @Benchmark 259 public void orMasked(Blackhole bh) { 260 short[] a = fa.apply(SPECIES.length()); 261 short[] b = fb.apply(SPECIES.length()); 262 short[] r = fr.apply(SPECIES.length()); 263 boolean[] mask = fm.apply(SPECIES.length()); 264 Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask); 265 266 for (int ic = 0; ic < INVOC_COUNT; ic++) { 267 for (int i = 0; i < a.length; i += SPECIES.length()) { 268 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 269 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 270 av.or(bv, vmask).intoArray(r, i); 271 } 272 } 273 274 bh.consume(r); 275 } 276 277 278 279 @Benchmark 280 public void xor(Blackhole bh) { 281 short[] a = fa.apply(SPECIES.length()); 282 short[] b = fb.apply(SPECIES.length()); 283 short[] r = fr.apply(SPECIES.length()); 284 285 for (int ic = 0; ic < INVOC_COUNT; ic++) { 286 for (int i = 0; i < a.length; i += SPECIES.length()) { 287 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 288 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 289 av.xor(bv).intoArray(r, i); 290 } 291 } 292 293 bh.consume(r); 294 } 295 296 297 298 @Benchmark 299 public void xorMasked(Blackhole bh) { 300 short[] a = fa.apply(SPECIES.length()); 301 short[] b = fb.apply(SPECIES.length()); 302 short[] r = fr.apply(SPECIES.length()); 303 boolean[] mask = fm.apply(SPECIES.length()); 304 Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask); 305 306 for (int ic = 0; ic < INVOC_COUNT; ic++) { 307 for (int i = 0; i < a.length; i += SPECIES.length()) { 308 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 309 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 310 av.xor(bv, vmask).intoArray(r, i); 311 } 312 } 313 314 bh.consume(r); 315 } 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 @Benchmark 338 public void aShiftRShift(Blackhole bh) { 339 short[] a = fa.apply(SPECIES.length()); 340 short[] b = fb.apply(SPECIES.length()); 341 short[] r = fr.apply(SPECIES.length()); 342 343 for (int ic = 0; ic < INVOC_COUNT; ic++) { 344 for (int i = 0; i < a.length; i += SPECIES.length()) { 345 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 346 av.aShiftR((int)b[i]).intoArray(r, i); 347 } 348 } 349 350 bh.consume(r); 351 } 352 353 354 355 @Benchmark 356 public void aShiftRMaskedShift(Blackhole bh) { 357 short[] a = fa.apply(SPECIES.length()); 358 short[] b = fb.apply(SPECIES.length()); 359 short[] r = fr.apply(SPECIES.length()); 360 boolean[] mask = fm.apply(SPECIES.length()); 361 Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask); 362 363 for (int ic = 0; ic < INVOC_COUNT; ic++) { 364 for (int i = 0; i < a.length; i += SPECIES.length()) { 365 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 366 av.aShiftR((int)b[i], vmask).intoArray(r, i); 367 } 368 } 369 370 bh.consume(r); 371 } 372 373 374 375 @Benchmark 376 public void shiftLShift(Blackhole bh) { 377 short[] a = fa.apply(SPECIES.length()); 378 short[] b = fb.apply(SPECIES.length()); 379 short[] r = fr.apply(SPECIES.length()); 380 381 for (int ic = 0; ic < INVOC_COUNT; ic++) { 382 for (int i = 0; i < a.length; i += SPECIES.length()) { 383 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 384 av.shiftL((int)b[i]).intoArray(r, i); 385 } 386 } 387 388 bh.consume(r); 389 } 390 391 392 393 @Benchmark 394 public void shiftLMaskedShift(Blackhole bh) { 395 short[] a = fa.apply(SPECIES.length()); 396 short[] b = fb.apply(SPECIES.length()); 397 short[] r = fr.apply(SPECIES.length()); 398 boolean[] mask = fm.apply(SPECIES.length()); 399 Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask); 400 401 for (int ic = 0; ic < INVOC_COUNT; ic++) { 402 for (int i = 0; i < a.length; i += SPECIES.length()) { 403 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 404 av.shiftL((int)b[i], vmask).intoArray(r, i); 405 } 406 } 407 408 bh.consume(r); 409 } 410 411 412 413 @Benchmark 414 public void shiftRShift(Blackhole bh) { 415 short[] a = fa.apply(SPECIES.length()); 416 short[] b = fb.apply(SPECIES.length()); 417 short[] r = fr.apply(SPECIES.length()); 418 419 for (int ic = 0; ic < INVOC_COUNT; ic++) { 420 for (int i = 0; i < a.length; i += SPECIES.length()) { 421 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 422 av.shiftR((int)b[i]).intoArray(r, i); 423 } 424 } 425 426 bh.consume(r); 427 } 428 429 430 431 @Benchmark 432 public void shiftRMaskedShift(Blackhole bh) { 433 short[] a = fa.apply(SPECIES.length()); 434 short[] b = fb.apply(SPECIES.length()); 435 short[] r = fr.apply(SPECIES.length()); 436 boolean[] mask = fm.apply(SPECIES.length()); 437 Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask); 438 439 for (int ic = 0; ic < INVOC_COUNT; ic++) { 440 for (int i = 0; i < a.length; i += SPECIES.length()) { 441 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 442 av.shiftR((int)b[i], vmask).intoArray(r, i); 443 } 444 } 445 446 bh.consume(r); 447 } 448 449 450 @Benchmark 451 public void max(Blackhole bh) { 452 short[] a = fa.apply(SPECIES.length()); 453 short[] b = fb.apply(SPECIES.length()); 454 short[] r = fr.apply(SPECIES.length()); 455 456 for (int ic = 0; ic < INVOC_COUNT; ic++) { 457 for (int i = 0; i < a.length; i += SPECIES.length()) { 458 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 459 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 460 av.max(bv).intoArray(r, i); 461 } 462 } 463 464 bh.consume(r); 465 } 466 467 @Benchmark 468 public void min(Blackhole bh) { 469 short[] a = fa.apply(SPECIES.length()); 470 short[] b = fb.apply(SPECIES.length()); 471 short[] r = fr.apply(SPECIES.length()); 472 473 for (int ic = 0; ic < INVOC_COUNT; ic++) { 474 for (int i = 0; i < a.length; i += SPECIES.length()) { 475 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 476 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 477 av.min(bv).intoArray(r, i); 478 } 479 } 480 481 bh.consume(r); 482 } 483 484 485 @Benchmark 486 public void andAll(Blackhole bh) { 487 short[] a = fa.apply(SPECIES.length()); 488 short ra = -1; 489 490 for (int ic = 0; ic < INVOC_COUNT; ic++) { 491 ra = -1; 492 for (int i = 0; i < a.length; i += SPECIES.length()) { 493 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 494 ra &= av.andAll(); 495 } 496 } 497 bh.consume(ra); 498 } 499 500 501 502 @Benchmark 503 public void orAll(Blackhole bh) { 504 short[] a = fa.apply(SPECIES.length()); 505 short ra = 0; 506 507 for (int ic = 0; ic < INVOC_COUNT; ic++) { 508 ra = 0; 509 for (int i = 0; i < a.length; i += SPECIES.length()) { 510 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 511 ra |= av.orAll(); 512 } 513 } 514 bh.consume(ra); 515 } 516 517 518 519 @Benchmark 520 public void xorAll(Blackhole bh) { 521 short[] a = fa.apply(SPECIES.length()); 522 short ra = 0; 523 524 for (int ic = 0; ic < INVOC_COUNT; ic++) { 525 ra = 0; 526 for (int i = 0; i < a.length; i += SPECIES.length()) { 527 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 528 ra ^= av.xorAll(); 529 } 530 } 531 bh.consume(ra); 532 } 533 534 535 @Benchmark 536 public void addAll(Blackhole bh) { 537 short[] a = fa.apply(SPECIES.length()); 538 short ra = 0; 539 540 for (int ic = 0; ic < INVOC_COUNT; ic++) { 541 ra = 0; 542 for (int i = 0; i < a.length; i += SPECIES.length()) { 543 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 544 ra += av.addAll(); 545 } 546 } 547 bh.consume(ra); 548 } 549 550 @Benchmark 551 public void mulAll(Blackhole bh) { 552 short[] a = fa.apply(SPECIES.length()); 553 short ra = 1; 554 555 for (int ic = 0; ic < INVOC_COUNT; ic++) { 556 ra = 1; 557 for (int i = 0; i < a.length; i += SPECIES.length()) { 558 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 559 ra *= av.mulAll(); 560 } 561 } 562 bh.consume(ra); 563 } 564 565 @Benchmark 566 public void minAll(Blackhole bh) { 567 short[] a = fa.apply(SPECIES.length()); 568 short ra = Short.MAX_VALUE; 569 570 for (int ic = 0; ic < INVOC_COUNT; ic++) { 571 ra = Short.MAX_VALUE; 572 for (int i = 0; i < a.length; i += SPECIES.length()) { 573 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 574 ra = (short)Math.min(ra, av.minAll()); 575 } 576 } 577 bh.consume(ra); 578 } 579 580 @Benchmark 581 public void maxAll(Blackhole bh) { 582 short[] a = fa.apply(SPECIES.length()); 583 short ra = Short.MIN_VALUE; 584 585 for (int ic = 0; ic < INVOC_COUNT; ic++) { 586 ra = Short.MIN_VALUE; 587 for (int i = 0; i < a.length; i += SPECIES.length()) { 588 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 589 ra = (short)Math.max(ra, av.maxAll()); 590 } 591 } 592 bh.consume(ra); 593 } 594 595 596 @Benchmark 597 public void anyTrue(Blackhole bh) { 598 boolean[] mask = fm.apply(SPECIES.length()); 599 boolean[] r = fmr.apply(SPECIES.length()); 600 601 for (int ic = 0; ic < INVOC_COUNT; ic++) { 602 for (int i = 0; i < mask.length; i += SPECIES.length()) { 603 Vector.Mask<Short> vmask = ShortVector.maskFromArray(SPECIES, mask, i); 604 r[i] = vmask.anyTrue(); 605 } 606 } 607 608 bh.consume(r); 609 } 610 611 612 613 @Benchmark 614 public void allTrue(Blackhole bh) { 615 boolean[] mask = fm.apply(SPECIES.length()); 616 boolean[] r = fmr.apply(SPECIES.length()); 617 618 for (int ic = 0; ic < INVOC_COUNT; ic++) { 619 for (int i = 0; i < mask.length; i += SPECIES.length()) { 620 Vector.Mask<Short> vmask = ShortVector.maskFromArray(SPECIES, mask, i); 621 r[i] = vmask.allTrue(); 622 } 623 } 624 625 bh.consume(r); 626 } 627 628 629 @Benchmark 630 public void with(Blackhole bh) { 631 short[] a = fa.apply(SPECIES.length()); 632 short[] r = fr.apply(SPECIES.length()); 633 634 for (int ic = 0; ic < INVOC_COUNT; ic++) { 635 for (int i = 0; i < a.length; i += SPECIES.length()) { 636 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 637 av.with(0, (short)4).intoArray(r, i); 638 } 639 } 640 641 bh.consume(r); 642 } 643 644 @Benchmark 645 public Object lessThan() { 646 short[] a = fa.apply(size); 647 short[] b = fb.apply(size); 648 boolean[] ms = fm.apply(size); 649 Vector.Mask<Short> m = ShortVector.maskFromArray(SPECIES, ms, 0); 650 651 for (int ic = 0; ic < INVOC_COUNT; ic++) { 652 for (int i = 0; i < a.length; i += SPECIES.length()) { 653 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 654 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 655 Vector.Mask<Short> mv = av.lessThan(bv); 656 657 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 658 } 659 } 660 return m; 661 } 662 663 664 @Benchmark 665 public Object greaterThan() { 666 short[] a = fa.apply(size); 667 short[] b = fb.apply(size); 668 boolean[] ms = fm.apply(size); 669 Vector.Mask<Short> m = ShortVector.maskFromArray(SPECIES, ms, 0); 670 671 for (int ic = 0; ic < INVOC_COUNT; ic++) { 672 for (int i = 0; i < a.length; i += SPECIES.length()) { 673 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 674 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 675 Vector.Mask<Short> mv = av.greaterThan(bv); 676 677 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 678 } 679 } 680 return m; 681 } 682 683 684 @Benchmark 685 public Object equal() { 686 short[] a = fa.apply(size); 687 short[] b = fb.apply(size); 688 boolean[] ms = fm.apply(size); 689 Vector.Mask<Short> m = ShortVector.maskFromArray(SPECIES, ms, 0); 690 691 for (int ic = 0; ic < INVOC_COUNT; ic++) { 692 for (int i = 0; i < a.length; i += SPECIES.length()) { 693 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 694 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 695 Vector.Mask<Short> mv = av.equal(bv); 696 697 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 698 } 699 } 700 return m; 701 } 702 703 704 @Benchmark 705 public Object notEqual() { 706 short[] a = fa.apply(size); 707 short[] b = fb.apply(size); 708 boolean[] ms = fm.apply(size); 709 Vector.Mask<Short> m = ShortVector.maskFromArray(SPECIES, ms, 0); 710 711 for (int ic = 0; ic < INVOC_COUNT; ic++) { 712 for (int i = 0; i < a.length; i += SPECIES.length()) { 713 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 714 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 715 Vector.Mask<Short> mv = av.notEqual(bv); 716 717 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 718 } 719 } 720 return m; 721 } 722 723 724 @Benchmark 725 public Object lessThanEq() { 726 short[] a = fa.apply(size); 727 short[] b = fb.apply(size); 728 boolean[] ms = fm.apply(size); 729 Vector.Mask<Short> m = ShortVector.maskFromArray(SPECIES, ms, 0); 730 731 for (int ic = 0; ic < INVOC_COUNT; ic++) { 732 for (int i = 0; i < a.length; i += SPECIES.length()) { 733 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 734 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 735 Vector.Mask<Short> mv = av.lessThanEq(bv); 736 737 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 738 } 739 } 740 return m; 741 } 742 743 744 @Benchmark 745 public Object greaterThanEq() { 746 short[] a = fa.apply(size); 747 short[] b = fb.apply(size); 748 boolean[] ms = fm.apply(size); 749 Vector.Mask<Short> m = ShortVector.maskFromArray(SPECIES, ms, 0); 750 751 for (int ic = 0; ic < INVOC_COUNT; ic++) { 752 for (int i = 0; i < a.length; i += SPECIES.length()) { 753 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 754 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 755 Vector.Mask<Short> mv = av.greaterThanEq(bv); 756 757 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 758 } 759 } 760 return m; 761 } 762 763 764 @Benchmark 765 public void blend(Blackhole bh) { 766 short[] a = fa.apply(SPECIES.length()); 767 short[] b = fb.apply(SPECIES.length()); 768 short[] r = fr.apply(SPECIES.length()); 769 boolean[] mask = fm.apply(SPECIES.length()); 770 Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask); 771 772 for (int ic = 0; ic < INVOC_COUNT; ic++) { 773 for (int i = 0; i < a.length; i += SPECIES.length()) { 774 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 775 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 776 av.blend(bv, vmask).intoArray(r, i); 777 } 778 } 779 780 bh.consume(r); 781 } 782 783 @Benchmark 784 public void rearrange(Blackhole bh) { 785 short[] a = fa.apply(SPECIES.length()); 786 int[] order = fs.apply(a.length, SPECIES.length()); 787 short[] r = fr.apply(SPECIES.length()); 788 789 for (int ic = 0; ic < INVOC_COUNT; ic++) { 790 for (int i = 0; i < a.length; i += SPECIES.length()) { 791 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 792 av.rearrange(ShortVector.shuffleFromArray(SPECIES, order, i)).intoArray(r, i); 793 } 794 } 795 796 bh.consume(r); 797 } 798 799 @Benchmark 800 public void extract(Blackhole bh) { 801 short[] a = fa.apply(SPECIES.length()); 802 short[] r = fr.apply(SPECIES.length()); 803 804 for (int ic = 0; ic < INVOC_COUNT; ic++) { 805 for (int i = 0; i < a.length; i += SPECIES.length()) { 806 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 807 int num_lanes = SPECIES.length(); 808 // Manually unroll because full unroll happens after intrinsification. 809 // Unroll is needed because get intrinsic requires for index to be a known constant. 810 if (num_lanes == 1) { 811 r[i]=av.get(0); 812 } else if (num_lanes == 2) { 813 r[i]=av.get(0); 814 r[i+1]=av.get(1); 815 } else if (num_lanes == 4) { 816 r[i]=av.get(0); 817 r[i+1]=av.get(1); 818 r[i+2]=av.get(2); 819 r[i+3]=av.get(3); 820 } else if (num_lanes == 8) { 821 r[i]=av.get(0); 822 r[i+1]=av.get(1); 823 r[i+2]=av.get(2); 824 r[i+3]=av.get(3); 825 r[i+4]=av.get(4); 826 r[i+5]=av.get(5); 827 r[i+6]=av.get(6); 828 r[i+7]=av.get(7); 829 } else if (num_lanes == 16) { 830 r[i]=av.get(0); 831 r[i+1]=av.get(1); 832 r[i+2]=av.get(2); 833 r[i+3]=av.get(3); 834 r[i+4]=av.get(4); 835 r[i+5]=av.get(5); 836 r[i+6]=av.get(6); 837 r[i+7]=av.get(7); 838 r[i+8]=av.get(8); 839 r[i+9]=av.get(9); 840 r[i+10]=av.get(10); 841 r[i+11]=av.get(11); 842 r[i+12]=av.get(12); 843 r[i+13]=av.get(13); 844 r[i+14]=av.get(14); 845 r[i+15]=av.get(15); 846 } else if (num_lanes == 32) { 847 r[i]=av.get(0); 848 r[i+1]=av.get(1); 849 r[i+2]=av.get(2); 850 r[i+3]=av.get(3); 851 r[i+4]=av.get(4); 852 r[i+5]=av.get(5); 853 r[i+6]=av.get(6); 854 r[i+7]=av.get(7); 855 r[i+8]=av.get(8); 856 r[i+9]=av.get(9); 857 r[i+10]=av.get(10); 858 r[i+11]=av.get(11); 859 r[i+12]=av.get(12); 860 r[i+13]=av.get(13); 861 r[i+14]=av.get(14); 862 r[i+15]=av.get(15); 863 r[i+16]=av.get(16); 864 r[i+17]=av.get(17); 865 r[i+18]=av.get(18); 866 r[i+19]=av.get(19); 867 r[i+20]=av.get(20); 868 r[i+21]=av.get(21); 869 r[i+22]=av.get(22); 870 r[i+23]=av.get(23); 871 r[i+24]=av.get(24); 872 r[i+25]=av.get(25); 873 r[i+26]=av.get(26); 874 r[i+27]=av.get(27); 875 r[i+28]=av.get(28); 876 r[i+29]=av.get(29); 877 r[i+30]=av.get(30); 878 r[i+31]=av.get(31); 879 } else if (num_lanes == 64) { 880 r[i]=av.get(0); 881 r[i+1]=av.get(1); 882 r[i+2]=av.get(2); 883 r[i+3]=av.get(3); 884 r[i+4]=av.get(4); 885 r[i+5]=av.get(5); 886 r[i+6]=av.get(6); 887 r[i+7]=av.get(7); 888 r[i+8]=av.get(8); 889 r[i+9]=av.get(9); 890 r[i+10]=av.get(10); 891 r[i+11]=av.get(11); 892 r[i+12]=av.get(12); 893 r[i+13]=av.get(13); 894 r[i+14]=av.get(14); 895 r[i+15]=av.get(15); 896 r[i+16]=av.get(16); 897 r[i+17]=av.get(17); 898 r[i+18]=av.get(18); 899 r[i+19]=av.get(19); 900 r[i+20]=av.get(20); 901 r[i+21]=av.get(21); 902 r[i+22]=av.get(22); 903 r[i+23]=av.get(23); 904 r[i+24]=av.get(24); 905 r[i+25]=av.get(25); 906 r[i+26]=av.get(26); 907 r[i+27]=av.get(27); 908 r[i+28]=av.get(28); 909 r[i+29]=av.get(29); 910 r[i+30]=av.get(30); 911 r[i+31]=av.get(31); 912 r[i+32]=av.get(32); 913 r[i+33]=av.get(33); 914 r[i+34]=av.get(34); 915 r[i+35]=av.get(35); 916 r[i+36]=av.get(36); 917 r[i+37]=av.get(37); 918 r[i+38]=av.get(38); 919 r[i+39]=av.get(39); 920 r[i+40]=av.get(40); 921 r[i+41]=av.get(41); 922 r[i+42]=av.get(42); 923 r[i+43]=av.get(43); 924 r[i+44]=av.get(44); 925 r[i+45]=av.get(45); 926 r[i+46]=av.get(46); 927 r[i+47]=av.get(47); 928 r[i+48]=av.get(48); 929 r[i+49]=av.get(49); 930 r[i+50]=av.get(50); 931 r[i+51]=av.get(51); 932 r[i+52]=av.get(52); 933 r[i+53]=av.get(53); 934 r[i+54]=av.get(54); 935 r[i+55]=av.get(55); 936 r[i+56]=av.get(56); 937 r[i+57]=av.get(57); 938 r[i+58]=av.get(58); 939 r[i+59]=av.get(59); 940 r[i+60]=av.get(60); 941 r[i+61]=av.get(61); 942 r[i+62]=av.get(62); 943 r[i+63]=av.get(63); 944 } else { 945 for (int j = 0; j < SPECIES.length(); j++) { 946 r[i+j]=av.get(j); 947 } 948 } 949 } 950 } 951 952 bh.consume(r); 953 } 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 @Benchmark 976 public void neg(Blackhole bh) { 977 short[] a = fa.apply(SPECIES.length()); 978 short[] r = fr.apply(SPECIES.length()); 979 980 for (int ic = 0; ic < INVOC_COUNT; ic++) { 981 for (int i = 0; i < a.length; i += SPECIES.length()) { 982 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 983 av.neg().intoArray(r, i); 984 } 985 } 986 987 bh.consume(r); 988 } 989 990 @Benchmark 991 public void negMasked(Blackhole bh) { 992 short[] a = fa.apply(SPECIES.length()); 993 short[] r = fr.apply(SPECIES.length()); 994 boolean[] mask = fm.apply(SPECIES.length()); 995 Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask); 996 997 for (int ic = 0; ic < INVOC_COUNT; ic++) { 998 for (int i = 0; i < a.length; i += SPECIES.length()) { 999 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 1000 av.neg(vmask).intoArray(r, i); 1001 } 1002 } 1003 1004 bh.consume(r); 1005 } 1006 1007 @Benchmark 1008 public void abs(Blackhole bh) { 1009 short[] a = fa.apply(SPECIES.length()); 1010 short[] r = fr.apply(SPECIES.length()); 1011 1012 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1013 for (int i = 0; i < a.length; i += SPECIES.length()) { 1014 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 1015 av.abs().intoArray(r, i); 1016 } 1017 } 1018 1019 bh.consume(r); 1020 } 1021 1022 @Benchmark 1023 public void absMasked(Blackhole bh) { 1024 short[] a = fa.apply(SPECIES.length()); 1025 short[] r = fr.apply(SPECIES.length()); 1026 boolean[] mask = fm.apply(SPECIES.length()); 1027 Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask); 1028 1029 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1030 for (int i = 0; i < a.length; i += SPECIES.length()) { 1031 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 1032 av.abs(vmask).intoArray(r, i); 1033 } 1034 } 1035 1036 bh.consume(r); 1037 } 1038 1039 1040 @Benchmark 1041 public void not(Blackhole bh) { 1042 short[] a = fa.apply(SPECIES.length()); 1043 short[] r = fr.apply(SPECIES.length()); 1044 1045 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1046 for (int i = 0; i < a.length; i += SPECIES.length()) { 1047 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 1048 av.not().intoArray(r, i); 1049 } 1050 } 1051 1052 bh.consume(r); 1053 } 1054 1055 1056 1057 @Benchmark 1058 public void notMasked(Blackhole bh) { 1059 short[] a = fa.apply(SPECIES.length()); 1060 short[] r = fr.apply(SPECIES.length()); 1061 boolean[] mask = fm.apply(SPECIES.length()); 1062 Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask); 1063 1064 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1065 for (int i = 0; i < a.length; i += SPECIES.length()) { 1066 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 1067 av.not(vmask).intoArray(r, i); 1068 } 1069 } 1070 1071 bh.consume(r); 1072 } 1073 1074 1075 1076 1077 1078 } 1079