1 /* 2 * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have 21 * questions. 22 */ 23 24 package benchmark.jdk.incubator.vector; 25 26 import jdk.incubator.vector.Vector; 27 import jdk.incubator.vector.VectorShape; 28 import jdk.incubator.vector.VectorSpecies; 29 import jdk.incubator.vector.VectorShuffle; 30 import jdk.incubator.vector.ShortVector; 31 32 import java.util.concurrent.TimeUnit; 33 import java.util.function.BiFunction; 34 import java.util.function.IntFunction; 35 36 import org.openjdk.jmh.annotations.*; 37 import org.openjdk.jmh.infra.Blackhole; 38 39 @BenchmarkMode(Mode.Throughput) 40 @OutputTimeUnit(TimeUnit.MILLISECONDS) 41 @State(Scope.Benchmark) 42 @Warmup(iterations = 3, time = 1) 43 @Measurement(iterations = 5, time = 1) 44 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) 45 public class Short64Vector extends AbstractVectorBenchmark { 46 static final VectorSpecies<Short> SPECIES = ShortVector.SPECIES_64; 47 48 static final int INVOC_COUNT = 1; // get rid of outer loop 49 50 @Param("1024") 51 int size; 52 53 short[] fill(IntFunction<Short> f) { 54 short[] array = new short[size]; 55 for (int i = 0; i < array.length; i++) { 56 array[i] = f.apply(i); 57 } 58 return array; 59 } 60 61 short[] a, b, c, r; 62 boolean[] m, rm; 63 int[] s; 64 65 @Setup 66 public void init() { 67 size += size % SPECIES.length(); // FIXME: add post-loops 68 69 a = fill(i -> (short)(2*i)); 70 b = fill(i -> (short)(i+1)); 71 c = fill(i -> (short)(i+5)); 72 r = fill(i -> (short)0); 73 74 m = fillMask(size, i -> (i % 2) == 0); 75 rm = fillMask(size, i -> false); 76 77 s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length())); 78 } 79 80 final IntFunction<short[]> fa = vl -> a; 81 final IntFunction<short[]> fb = vl -> b; 82 final IntFunction<short[]> fc = vl -> c; 83 final IntFunction<short[]> fr = vl -> r; 84 final IntFunction<boolean[]> fm = vl -> m; 85 final IntFunction<boolean[]> fmr = vl -> rm; 86 final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s; 87 88 89 @Benchmark 90 public void add(Blackhole bh) { 91 short[] a = fa.apply(SPECIES.length()); 92 short[] b = fb.apply(SPECIES.length()); 93 short[] r = fr.apply(SPECIES.length()); 94 95 for (int ic = 0; ic < INVOC_COUNT; ic++) { 96 for (int i = 0; i < a.length; i += SPECIES.length()) { 97 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 98 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 99 av.add(bv).intoArray(r, i); 100 } 101 } 102 103 bh.consume(r); 104 } 105 106 @Benchmark 107 public void addMasked(Blackhole bh) { 108 short[] a = fa.apply(SPECIES.length()); 109 short[] b = fb.apply(SPECIES.length()); 110 short[] r = fr.apply(SPECIES.length()); 111 boolean[] mask = fm.apply(SPECIES.length()); 112 VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask); 113 114 for (int ic = 0; ic < INVOC_COUNT; ic++) { 115 for (int i = 0; i < a.length; i += SPECIES.length()) { 116 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 117 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 118 av.add(bv, vmask).intoArray(r, i); 119 } 120 } 121 122 bh.consume(r); 123 } 124 125 @Benchmark 126 public void sub(Blackhole bh) { 127 short[] a = fa.apply(SPECIES.length()); 128 short[] b = fb.apply(SPECIES.length()); 129 short[] r = fr.apply(SPECIES.length()); 130 131 for (int ic = 0; ic < INVOC_COUNT; ic++) { 132 for (int i = 0; i < a.length; i += SPECIES.length()) { 133 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 134 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 135 av.sub(bv).intoArray(r, i); 136 } 137 } 138 139 bh.consume(r); 140 } 141 142 @Benchmark 143 public void subMasked(Blackhole bh) { 144 short[] a = fa.apply(SPECIES.length()); 145 short[] b = fb.apply(SPECIES.length()); 146 short[] r = fr.apply(SPECIES.length()); 147 boolean[] mask = fm.apply(SPECIES.length()); 148 VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask); 149 150 for (int ic = 0; ic < INVOC_COUNT; ic++) { 151 for (int i = 0; i < a.length; i += SPECIES.length()) { 152 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 153 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 154 av.sub(bv, vmask).intoArray(r, i); 155 } 156 } 157 158 bh.consume(r); 159 } 160 161 162 163 @Benchmark 164 public void mul(Blackhole bh) { 165 short[] a = fa.apply(SPECIES.length()); 166 short[] b = fb.apply(SPECIES.length()); 167 short[] r = fr.apply(SPECIES.length()); 168 169 for (int ic = 0; ic < INVOC_COUNT; ic++) { 170 for (int i = 0; i < a.length; i += SPECIES.length()) { 171 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 172 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 173 av.mul(bv).intoArray(r, i); 174 } 175 } 176 177 bh.consume(r); 178 } 179 180 @Benchmark 181 public void mulMasked(Blackhole bh) { 182 short[] a = fa.apply(SPECIES.length()); 183 short[] b = fb.apply(SPECIES.length()); 184 short[] r = fr.apply(SPECIES.length()); 185 boolean[] mask = fm.apply(SPECIES.length()); 186 VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask); 187 188 for (int ic = 0; ic < INVOC_COUNT; ic++) { 189 for (int i = 0; i < a.length; i += SPECIES.length()) { 190 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 191 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 192 av.mul(bv, vmask).intoArray(r, i); 193 } 194 } 195 196 bh.consume(r); 197 } 198 199 200 @Benchmark 201 public void and(Blackhole bh) { 202 short[] a = fa.apply(SPECIES.length()); 203 short[] b = fb.apply(SPECIES.length()); 204 short[] r = fr.apply(SPECIES.length()); 205 206 for (int ic = 0; ic < INVOC_COUNT; ic++) { 207 for (int i = 0; i < a.length; i += SPECIES.length()) { 208 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 209 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 210 av.and(bv).intoArray(r, i); 211 } 212 } 213 214 bh.consume(r); 215 } 216 217 218 219 @Benchmark 220 public void andMasked(Blackhole bh) { 221 short[] a = fa.apply(SPECIES.length()); 222 short[] b = fb.apply(SPECIES.length()); 223 short[] r = fr.apply(SPECIES.length()); 224 boolean[] mask = fm.apply(SPECIES.length()); 225 VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask); 226 227 for (int ic = 0; ic < INVOC_COUNT; ic++) { 228 for (int i = 0; i < a.length; i += SPECIES.length()) { 229 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 230 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 231 av.and(bv, vmask).intoArray(r, i); 232 } 233 } 234 235 bh.consume(r); 236 } 237 238 239 240 @Benchmark 241 public void or(Blackhole bh) { 242 short[] a = fa.apply(SPECIES.length()); 243 short[] b = fb.apply(SPECIES.length()); 244 short[] r = fr.apply(SPECIES.length()); 245 246 for (int ic = 0; ic < INVOC_COUNT; ic++) { 247 for (int i = 0; i < a.length; i += SPECIES.length()) { 248 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 249 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 250 av.or(bv).intoArray(r, i); 251 } 252 } 253 254 bh.consume(r); 255 } 256 257 258 259 @Benchmark 260 public void orMasked(Blackhole bh) { 261 short[] a = fa.apply(SPECIES.length()); 262 short[] b = fb.apply(SPECIES.length()); 263 short[] r = fr.apply(SPECIES.length()); 264 boolean[] mask = fm.apply(SPECIES.length()); 265 VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask); 266 267 for (int ic = 0; ic < INVOC_COUNT; ic++) { 268 for (int i = 0; i < a.length; i += SPECIES.length()) { 269 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 270 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 271 av.or(bv, vmask).intoArray(r, i); 272 } 273 } 274 275 bh.consume(r); 276 } 277 278 279 280 @Benchmark 281 public void xor(Blackhole bh) { 282 short[] a = fa.apply(SPECIES.length()); 283 short[] b = fb.apply(SPECIES.length()); 284 short[] r = fr.apply(SPECIES.length()); 285 286 for (int ic = 0; ic < INVOC_COUNT; ic++) { 287 for (int i = 0; i < a.length; i += SPECIES.length()) { 288 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 289 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 290 av.xor(bv).intoArray(r, i); 291 } 292 } 293 294 bh.consume(r); 295 } 296 297 298 299 @Benchmark 300 public void xorMasked(Blackhole bh) { 301 short[] a = fa.apply(SPECIES.length()); 302 short[] b = fb.apply(SPECIES.length()); 303 short[] r = fr.apply(SPECIES.length()); 304 boolean[] mask = fm.apply(SPECIES.length()); 305 VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask); 306 307 for (int ic = 0; ic < INVOC_COUNT; ic++) { 308 for (int i = 0; i < a.length; i += SPECIES.length()) { 309 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 310 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 311 av.xor(bv, vmask).intoArray(r, i); 312 } 313 } 314 315 bh.consume(r); 316 } 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 @Benchmark 339 public void aShiftRShift(Blackhole bh) { 340 short[] a = fa.apply(SPECIES.length()); 341 short[] b = fb.apply(SPECIES.length()); 342 short[] r = fr.apply(SPECIES.length()); 343 344 for (int ic = 0; ic < INVOC_COUNT; ic++) { 345 for (int i = 0; i < a.length; i += SPECIES.length()) { 346 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 347 av.aShiftR((int)b[i]).intoArray(r, i); 348 } 349 } 350 351 bh.consume(r); 352 } 353 354 355 356 @Benchmark 357 public void aShiftRMaskedShift(Blackhole bh) { 358 short[] a = fa.apply(SPECIES.length()); 359 short[] b = fb.apply(SPECIES.length()); 360 short[] r = fr.apply(SPECIES.length()); 361 boolean[] mask = fm.apply(SPECIES.length()); 362 VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask); 363 364 for (int ic = 0; ic < INVOC_COUNT; ic++) { 365 for (int i = 0; i < a.length; i += SPECIES.length()) { 366 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 367 av.aShiftR((int)b[i], vmask).intoArray(r, i); 368 } 369 } 370 371 bh.consume(r); 372 } 373 374 375 376 @Benchmark 377 public void shiftLShift(Blackhole bh) { 378 short[] a = fa.apply(SPECIES.length()); 379 short[] b = fb.apply(SPECIES.length()); 380 short[] r = fr.apply(SPECIES.length()); 381 382 for (int ic = 0; ic < INVOC_COUNT; ic++) { 383 for (int i = 0; i < a.length; i += SPECIES.length()) { 384 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 385 av.shiftL((int)b[i]).intoArray(r, i); 386 } 387 } 388 389 bh.consume(r); 390 } 391 392 393 394 @Benchmark 395 public void shiftLMaskedShift(Blackhole bh) { 396 short[] a = fa.apply(SPECIES.length()); 397 short[] b = fb.apply(SPECIES.length()); 398 short[] r = fr.apply(SPECIES.length()); 399 boolean[] mask = fm.apply(SPECIES.length()); 400 VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask); 401 402 for (int ic = 0; ic < INVOC_COUNT; ic++) { 403 for (int i = 0; i < a.length; i += SPECIES.length()) { 404 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 405 av.shiftL((int)b[i], vmask).intoArray(r, i); 406 } 407 } 408 409 bh.consume(r); 410 } 411 412 413 414 @Benchmark 415 public void shiftRShift(Blackhole bh) { 416 short[] a = fa.apply(SPECIES.length()); 417 short[] b = fb.apply(SPECIES.length()); 418 short[] r = fr.apply(SPECIES.length()); 419 420 for (int ic = 0; ic < INVOC_COUNT; ic++) { 421 for (int i = 0; i < a.length; i += SPECIES.length()) { 422 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 423 av.shiftR((int)b[i]).intoArray(r, i); 424 } 425 } 426 427 bh.consume(r); 428 } 429 430 431 432 @Benchmark 433 public void shiftRMaskedShift(Blackhole bh) { 434 short[] a = fa.apply(SPECIES.length()); 435 short[] b = fb.apply(SPECIES.length()); 436 short[] r = fr.apply(SPECIES.length()); 437 boolean[] mask = fm.apply(SPECIES.length()); 438 VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask); 439 440 for (int ic = 0; ic < INVOC_COUNT; ic++) { 441 for (int i = 0; i < a.length; i += SPECIES.length()) { 442 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 443 av.shiftR((int)b[i], vmask).intoArray(r, i); 444 } 445 } 446 447 bh.consume(r); 448 } 449 450 451 @Benchmark 452 public void max(Blackhole bh) { 453 short[] a = fa.apply(SPECIES.length()); 454 short[] b = fb.apply(SPECIES.length()); 455 short[] r = fr.apply(SPECIES.length()); 456 457 for (int ic = 0; ic < INVOC_COUNT; ic++) { 458 for (int i = 0; i < a.length; i += SPECIES.length()) { 459 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 460 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 461 av.max(bv).intoArray(r, i); 462 } 463 } 464 465 bh.consume(r); 466 } 467 468 @Benchmark 469 public void min(Blackhole bh) { 470 short[] a = fa.apply(SPECIES.length()); 471 short[] b = fb.apply(SPECIES.length()); 472 short[] r = fr.apply(SPECIES.length()); 473 474 for (int ic = 0; ic < INVOC_COUNT; ic++) { 475 for (int i = 0; i < a.length; i += SPECIES.length()) { 476 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 477 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 478 av.min(bv).intoArray(r, i); 479 } 480 } 481 482 bh.consume(r); 483 } 484 485 486 @Benchmark 487 public void andAll(Blackhole bh) { 488 short[] a = fa.apply(SPECIES.length()); 489 short ra = -1; 490 491 for (int ic = 0; ic < INVOC_COUNT; ic++) { 492 ra = -1; 493 for (int i = 0; i < a.length; i += SPECIES.length()) { 494 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 495 ra &= av.andAll(); 496 } 497 } 498 bh.consume(ra); 499 } 500 501 502 503 @Benchmark 504 public void orAll(Blackhole bh) { 505 short[] a = fa.apply(SPECIES.length()); 506 short ra = 0; 507 508 for (int ic = 0; ic < INVOC_COUNT; ic++) { 509 ra = 0; 510 for (int i = 0; i < a.length; i += SPECIES.length()) { 511 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 512 ra |= av.orAll(); 513 } 514 } 515 bh.consume(ra); 516 } 517 518 519 520 @Benchmark 521 public void xorAll(Blackhole bh) { 522 short[] a = fa.apply(SPECIES.length()); 523 short ra = 0; 524 525 for (int ic = 0; ic < INVOC_COUNT; ic++) { 526 ra = 0; 527 for (int i = 0; i < a.length; i += SPECIES.length()) { 528 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 529 ra ^= av.xorAll(); 530 } 531 } 532 bh.consume(ra); 533 } 534 535 536 @Benchmark 537 public void addAll(Blackhole bh) { 538 short[] a = fa.apply(SPECIES.length()); 539 short ra = 0; 540 541 for (int ic = 0; ic < INVOC_COUNT; ic++) { 542 ra = 0; 543 for (int i = 0; i < a.length; i += SPECIES.length()) { 544 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 545 ra += av.addAll(); 546 } 547 } 548 bh.consume(ra); 549 } 550 551 @Benchmark 552 public void mulAll(Blackhole bh) { 553 short[] a = fa.apply(SPECIES.length()); 554 short ra = 1; 555 556 for (int ic = 0; ic < INVOC_COUNT; ic++) { 557 ra = 1; 558 for (int i = 0; i < a.length; i += SPECIES.length()) { 559 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 560 ra *= av.mulAll(); 561 } 562 } 563 bh.consume(ra); 564 } 565 566 @Benchmark 567 public void minAll(Blackhole bh) { 568 short[] a = fa.apply(SPECIES.length()); 569 short ra = Short.MAX_VALUE; 570 571 for (int ic = 0; ic < INVOC_COUNT; ic++) { 572 ra = Short.MAX_VALUE; 573 for (int i = 0; i < a.length; i += SPECIES.length()) { 574 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 575 ra = (short)Math.min(ra, av.minAll()); 576 } 577 } 578 bh.consume(ra); 579 } 580 581 @Benchmark 582 public void maxAll(Blackhole bh) { 583 short[] a = fa.apply(SPECIES.length()); 584 short ra = Short.MIN_VALUE; 585 586 for (int ic = 0; ic < INVOC_COUNT; ic++) { 587 ra = Short.MIN_VALUE; 588 for (int i = 0; i < a.length; i += SPECIES.length()) { 589 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 590 ra = (short)Math.max(ra, av.maxAll()); 591 } 592 } 593 bh.consume(ra); 594 } 595 596 597 @Benchmark 598 public void anyTrue(Blackhole bh) { 599 boolean[] mask = fm.apply(SPECIES.length()); 600 boolean[] r = fmr.apply(SPECIES.length()); 601 602 for (int ic = 0; ic < INVOC_COUNT; ic++) { 603 for (int i = 0; i < mask.length; i += SPECIES.length()) { 604 VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, i); 605 r[i] = vmask.anyTrue(); 606 } 607 } 608 609 bh.consume(r); 610 } 611 612 613 614 @Benchmark 615 public void allTrue(Blackhole bh) { 616 boolean[] mask = fm.apply(SPECIES.length()); 617 boolean[] r = fmr.apply(SPECIES.length()); 618 619 for (int ic = 0; ic < INVOC_COUNT; ic++) { 620 for (int i = 0; i < mask.length; i += SPECIES.length()) { 621 VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, i); 622 r[i] = vmask.allTrue(); 623 } 624 } 625 626 bh.consume(r); 627 } 628 629 630 @Benchmark 631 public void with(Blackhole bh) { 632 short[] a = fa.apply(SPECIES.length()); 633 short[] r = fr.apply(SPECIES.length()); 634 635 for (int ic = 0; ic < INVOC_COUNT; ic++) { 636 for (int i = 0; i < a.length; i += SPECIES.length()) { 637 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 638 av.with(0, (short)4).intoArray(r, i); 639 } 640 } 641 642 bh.consume(r); 643 } 644 645 @Benchmark 646 public Object lessThan() { 647 short[] a = fa.apply(size); 648 short[] b = fb.apply(size); 649 boolean[] ms = fm.apply(size); 650 VectorMask<Short> m = VectorMask.maskFromArray(SPECIES, ms, 0); 651 652 for (int ic = 0; ic < INVOC_COUNT; ic++) { 653 for (int i = 0; i < a.length; i += SPECIES.length()) { 654 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 655 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 656 VectorMask<Short> mv = av.lessThan(bv); 657 658 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 659 } 660 } 661 return m; 662 } 663 664 665 @Benchmark 666 public Object greaterThan() { 667 short[] a = fa.apply(size); 668 short[] b = fb.apply(size); 669 boolean[] ms = fm.apply(size); 670 VectorMask<Short> m = VectorMask.maskFromArray(SPECIES, ms, 0); 671 672 for (int ic = 0; ic < INVOC_COUNT; ic++) { 673 for (int i = 0; i < a.length; i += SPECIES.length()) { 674 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 675 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 676 VectorMask<Short> mv = av.greaterThan(bv); 677 678 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 679 } 680 } 681 return m; 682 } 683 684 685 @Benchmark 686 public Object equal() { 687 short[] a = fa.apply(size); 688 short[] b = fb.apply(size); 689 boolean[] ms = fm.apply(size); 690 VectorMask<Short> m = VectorMask.maskFromArray(SPECIES, ms, 0); 691 692 for (int ic = 0; ic < INVOC_COUNT; ic++) { 693 for (int i = 0; i < a.length; i += SPECIES.length()) { 694 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 695 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 696 VectorMask<Short> mv = av.equal(bv); 697 698 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 699 } 700 } 701 return m; 702 } 703 704 705 @Benchmark 706 public Object notEqual() { 707 short[] a = fa.apply(size); 708 short[] b = fb.apply(size); 709 boolean[] ms = fm.apply(size); 710 VectorMask<Short> m = VectorMask.maskFromArray(SPECIES, ms, 0); 711 712 for (int ic = 0; ic < INVOC_COUNT; ic++) { 713 for (int i = 0; i < a.length; i += SPECIES.length()) { 714 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 715 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 716 VectorMask<Short> mv = av.notEqual(bv); 717 718 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 719 } 720 } 721 return m; 722 } 723 724 725 @Benchmark 726 public Object lessThanEq() { 727 short[] a = fa.apply(size); 728 short[] b = fb.apply(size); 729 boolean[] ms = fm.apply(size); 730 VectorMask<Short> m = VectorMask.maskFromArray(SPECIES, ms, 0); 731 732 for (int ic = 0; ic < INVOC_COUNT; ic++) { 733 for (int i = 0; i < a.length; i += SPECIES.length()) { 734 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 735 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 736 VectorMask<Short> mv = av.lessThanEq(bv); 737 738 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 739 } 740 } 741 return m; 742 } 743 744 745 @Benchmark 746 public Object greaterThanEq() { 747 short[] a = fa.apply(size); 748 short[] b = fb.apply(size); 749 boolean[] ms = fm.apply(size); 750 VectorMask<Short> m = VectorMask.maskFromArray(SPECIES, ms, 0); 751 752 for (int ic = 0; ic < INVOC_COUNT; ic++) { 753 for (int i = 0; i < a.length; i += SPECIES.length()) { 754 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 755 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 756 VectorMask<Short> mv = av.greaterThanEq(bv); 757 758 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 759 } 760 } 761 return m; 762 } 763 764 765 @Benchmark 766 public void blend(Blackhole bh) { 767 short[] a = fa.apply(SPECIES.length()); 768 short[] b = fb.apply(SPECIES.length()); 769 short[] r = fr.apply(SPECIES.length()); 770 boolean[] mask = fm.apply(SPECIES.length()); 771 VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask); 772 773 for (int ic = 0; ic < INVOC_COUNT; ic++) { 774 for (int i = 0; i < a.length; i += SPECIES.length()) { 775 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 776 ShortVector bv = ShortVector.fromArray(SPECIES, b, i); 777 av.blend(bv, vmask).intoArray(r, i); 778 } 779 } 780 781 bh.consume(r); 782 } 783 784 @Benchmark 785 public void rearrange(Blackhole bh) { 786 short[] a = fa.apply(SPECIES.length()); 787 int[] order = fs.apply(a.length, SPECIES.length()); 788 short[] r = fr.apply(SPECIES.length()); 789 790 for (int ic = 0; ic < INVOC_COUNT; ic++) { 791 for (int i = 0; i < a.length; i += SPECIES.length()) { 792 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 793 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i); 794 } 795 } 796 797 bh.consume(r); 798 } 799 800 @Benchmark 801 public void extract(Blackhole bh) { 802 short[] a = fa.apply(SPECIES.length()); 803 short[] r = fr.apply(SPECIES.length()); 804 805 for (int ic = 0; ic < INVOC_COUNT; ic++) { 806 for (int i = 0; i < a.length; i += SPECIES.length()) { 807 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 808 int num_lanes = SPECIES.length(); 809 // Manually unroll because full unroll happens after intrinsification. 810 // Unroll is needed because get intrinsic requires for index to be a known constant. 811 if (num_lanes == 1) { 812 r[i]=av.lane(0); 813 } else if (num_lanes == 2) { 814 r[i]=av.lane(0); 815 r[i+1]=av.lane(1); 816 } else if (num_lanes == 4) { 817 r[i]=av.lane(0); 818 r[i+1]=av.lane(1); 819 r[i+2]=av.lane(2); 820 r[i+3]=av.lane(3); 821 } else if (num_lanes == 8) { 822 r[i]=av.lane(0); 823 r[i+1]=av.lane(1); 824 r[i+2]=av.lane(2); 825 r[i+3]=av.lane(3); 826 r[i+4]=av.lane(4); 827 r[i+5]=av.lane(5); 828 r[i+6]=av.lane(6); 829 r[i+7]=av.lane(7); 830 } else if (num_lanes == 16) { 831 r[i]=av.lane(0); 832 r[i+1]=av.lane(1); 833 r[i+2]=av.lane(2); 834 r[i+3]=av.lane(3); 835 r[i+4]=av.lane(4); 836 r[i+5]=av.lane(5); 837 r[i+6]=av.lane(6); 838 r[i+7]=av.lane(7); 839 r[i+8]=av.lane(8); 840 r[i+9]=av.lane(9); 841 r[i+10]=av.lane(10); 842 r[i+11]=av.lane(11); 843 r[i+12]=av.lane(12); 844 r[i+13]=av.lane(13); 845 r[i+14]=av.lane(14); 846 r[i+15]=av.lane(15); 847 } else if (num_lanes == 32) { 848 r[i]=av.lane(0); 849 r[i+1]=av.lane(1); 850 r[i+2]=av.lane(2); 851 r[i+3]=av.lane(3); 852 r[i+4]=av.lane(4); 853 r[i+5]=av.lane(5); 854 r[i+6]=av.lane(6); 855 r[i+7]=av.lane(7); 856 r[i+8]=av.lane(8); 857 r[i+9]=av.lane(9); 858 r[i+10]=av.lane(10); 859 r[i+11]=av.lane(11); 860 r[i+12]=av.lane(12); 861 r[i+13]=av.lane(13); 862 r[i+14]=av.lane(14); 863 r[i+15]=av.lane(15); 864 r[i+16]=av.lane(16); 865 r[i+17]=av.lane(17); 866 r[i+18]=av.lane(18); 867 r[i+19]=av.lane(19); 868 r[i+20]=av.lane(20); 869 r[i+21]=av.lane(21); 870 r[i+22]=av.lane(22); 871 r[i+23]=av.lane(23); 872 r[i+24]=av.lane(24); 873 r[i+25]=av.lane(25); 874 r[i+26]=av.lane(26); 875 r[i+27]=av.lane(27); 876 r[i+28]=av.lane(28); 877 r[i+29]=av.lane(29); 878 r[i+30]=av.lane(30); 879 r[i+31]=av.lane(31); 880 } else if (num_lanes == 64) { 881 r[i]=av.lane(0); 882 r[i+1]=av.lane(1); 883 r[i+2]=av.lane(2); 884 r[i+3]=av.lane(3); 885 r[i+4]=av.lane(4); 886 r[i+5]=av.lane(5); 887 r[i+6]=av.lane(6); 888 r[i+7]=av.lane(7); 889 r[i+8]=av.lane(8); 890 r[i+9]=av.lane(9); 891 r[i+10]=av.lane(10); 892 r[i+11]=av.lane(11); 893 r[i+12]=av.lane(12); 894 r[i+13]=av.lane(13); 895 r[i+14]=av.lane(14); 896 r[i+15]=av.lane(15); 897 r[i+16]=av.lane(16); 898 r[i+17]=av.lane(17); 899 r[i+18]=av.lane(18); 900 r[i+19]=av.lane(19); 901 r[i+20]=av.lane(20); 902 r[i+21]=av.lane(21); 903 r[i+22]=av.lane(22); 904 r[i+23]=av.lane(23); 905 r[i+24]=av.lane(24); 906 r[i+25]=av.lane(25); 907 r[i+26]=av.lane(26); 908 r[i+27]=av.lane(27); 909 r[i+28]=av.lane(28); 910 r[i+29]=av.lane(29); 911 r[i+30]=av.lane(30); 912 r[i+31]=av.lane(31); 913 r[i+32]=av.lane(32); 914 r[i+33]=av.lane(33); 915 r[i+34]=av.lane(34); 916 r[i+35]=av.lane(35); 917 r[i+36]=av.lane(36); 918 r[i+37]=av.lane(37); 919 r[i+38]=av.lane(38); 920 r[i+39]=av.lane(39); 921 r[i+40]=av.lane(40); 922 r[i+41]=av.lane(41); 923 r[i+42]=av.lane(42); 924 r[i+43]=av.lane(43); 925 r[i+44]=av.lane(44); 926 r[i+45]=av.lane(45); 927 r[i+46]=av.lane(46); 928 r[i+47]=av.lane(47); 929 r[i+48]=av.lane(48); 930 r[i+49]=av.lane(49); 931 r[i+50]=av.lane(50); 932 r[i+51]=av.lane(51); 933 r[i+52]=av.lane(52); 934 r[i+53]=av.lane(53); 935 r[i+54]=av.lane(54); 936 r[i+55]=av.lane(55); 937 r[i+56]=av.lane(56); 938 r[i+57]=av.lane(57); 939 r[i+58]=av.lane(58); 940 r[i+59]=av.lane(59); 941 r[i+60]=av.lane(60); 942 r[i+61]=av.lane(61); 943 r[i+62]=av.lane(62); 944 r[i+63]=av.lane(63); 945 } else { 946 for (int j = 0; j < SPECIES.length(); j++) { 947 r[i+j]=av.lane(j); 948 } 949 } 950 } 951 } 952 953 bh.consume(r); 954 } 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 @Benchmark 977 public void neg(Blackhole bh) { 978 short[] a = fa.apply(SPECIES.length()); 979 short[] r = fr.apply(SPECIES.length()); 980 981 for (int ic = 0; ic < INVOC_COUNT; ic++) { 982 for (int i = 0; i < a.length; i += SPECIES.length()) { 983 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 984 av.neg().intoArray(r, i); 985 } 986 } 987 988 bh.consume(r); 989 } 990 991 @Benchmark 992 public void negMasked(Blackhole bh) { 993 short[] a = fa.apply(SPECIES.length()); 994 short[] r = fr.apply(SPECIES.length()); 995 boolean[] mask = fm.apply(SPECIES.length()); 996 VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask); 997 998 for (int ic = 0; ic < INVOC_COUNT; ic++) { 999 for (int i = 0; i < a.length; i += SPECIES.length()) { 1000 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 1001 av.neg(vmask).intoArray(r, i); 1002 } 1003 } 1004 1005 bh.consume(r); 1006 } 1007 1008 @Benchmark 1009 public void abs(Blackhole bh) { 1010 short[] a = fa.apply(SPECIES.length()); 1011 short[] r = fr.apply(SPECIES.length()); 1012 1013 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1014 for (int i = 0; i < a.length; i += SPECIES.length()) { 1015 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 1016 av.abs().intoArray(r, i); 1017 } 1018 } 1019 1020 bh.consume(r); 1021 } 1022 1023 @Benchmark 1024 public void absMasked(Blackhole bh) { 1025 short[] a = fa.apply(SPECIES.length()); 1026 short[] r = fr.apply(SPECIES.length()); 1027 boolean[] mask = fm.apply(SPECIES.length()); 1028 VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask); 1029 1030 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1031 for (int i = 0; i < a.length; i += SPECIES.length()) { 1032 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 1033 av.abs(vmask).intoArray(r, i); 1034 } 1035 } 1036 1037 bh.consume(r); 1038 } 1039 1040 1041 @Benchmark 1042 public void not(Blackhole bh) { 1043 short[] a = fa.apply(SPECIES.length()); 1044 short[] r = fr.apply(SPECIES.length()); 1045 1046 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1047 for (int i = 0; i < a.length; i += SPECIES.length()) { 1048 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 1049 av.not().intoArray(r, i); 1050 } 1051 } 1052 1053 bh.consume(r); 1054 } 1055 1056 1057 1058 @Benchmark 1059 public void notMasked(Blackhole bh) { 1060 short[] a = fa.apply(SPECIES.length()); 1061 short[] r = fr.apply(SPECIES.length()); 1062 boolean[] mask = fm.apply(SPECIES.length()); 1063 VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask); 1064 1065 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1066 for (int i = 0; i < a.length; i += SPECIES.length()) { 1067 ShortVector av = ShortVector.fromArray(SPECIES, a, i); 1068 av.not(vmask).intoArray(r, i); 1069 } 1070 } 1071 1072 bh.consume(r); 1073 } 1074 1075 1076 1077 1078 1079 } 1080