1 /* 2 * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 package benchmark.jdk.incubator.vector; 25 26 // -- This file was mechanically generated: Do not edit! -- // 27 28 import jdk.incubator.vector.Vector; 29 import jdk.incubator.vector.VectorMask; 30 import jdk.incubator.vector.VectorOperators; 31 import jdk.incubator.vector.VectorShape; 32 import jdk.incubator.vector.VectorSpecies; 33 import jdk.incubator.vector.VectorShuffle; 34 import jdk.incubator.vector.ByteVector; 35 36 import java.util.concurrent.TimeUnit; 37 import java.util.function.BiFunction; 38 import java.util.function.IntFunction; 39 40 import org.openjdk.jmh.annotations.*; 41 import org.openjdk.jmh.infra.Blackhole; 42 43 @BenchmarkMode(Mode.Throughput) 44 @OutputTimeUnit(TimeUnit.MILLISECONDS) 45 @State(Scope.Benchmark) 46 @Warmup(iterations = 3, time = 1) 47 @Measurement(iterations = 5, time = 1) 48 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) 49 public class Byte256Vector extends AbstractVectorBenchmark { 50 static final VectorSpecies<Byte> SPECIES = ByteVector.SPECIES_256; 51 52 static final int INVOC_COUNT = 1; // get rid of outer loop 53 54 static void replaceZero(byte[] a, byte v) { 55 for (int i = 0; i < a.length; i++) { 56 if (a[i] == 0) { 57 a[i] = v; 58 } 59 } 60 } 61 62 static void replaceZero(byte[] a, boolean[] mask, byte v) { 63 for (int i = 0; i < a.length; i++) { 64 if (mask[i % mask.length] && a[i] == 0) { 65 a[i] = v; 66 } 67 } 68 } 69 70 @Param("1024") 71 int size; 72 73 byte[] fill(IntFunction<Byte> f) { 74 byte[] array = new byte[size]; 75 for (int i = 0; i < array.length; i++) { 76 array[i] = f.apply(i); 77 } 78 return array; 79 } 80 81 byte[] a, b, c, r; 82 boolean[] m, mt, rm; 83 int[] s; 84 85 @Setup 86 public void init() { 87 size += size % SPECIES.length(); // FIXME: add post-loops 88 89 a = fill(i -> (byte)(2*i)); 90 b = fill(i -> (byte)(i+1)); 91 c = fill(i -> (byte)(i+5)); 92 r = fill(i -> (byte)0); 93 94 m = fillMask(size, i -> (i % 2) == 0); 95 mt = fillMask(size, i -> true); 96 rm = fillMask(size, i -> false); 97 98 s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length())); 99 } 100 101 final IntFunction<byte[]> fa = vl -> a; 102 final IntFunction<byte[]> fb = vl -> b; 103 final IntFunction<byte[]> fc = vl -> c; 104 final IntFunction<byte[]> fr = vl -> r; 105 final IntFunction<boolean[]> fm = vl -> m; 106 final IntFunction<boolean[]> fmt = vl -> mt; 107 final IntFunction<boolean[]> fmr = vl -> rm; 108 final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s; 109 110 111 @Benchmark 112 public void ADD(Blackhole bh) { 113 byte[] a = fa.apply(SPECIES.length()); 114 byte[] b = fb.apply(SPECIES.length()); 115 byte[] r = fr.apply(SPECIES.length()); 116 117 for (int ic = 0; ic < INVOC_COUNT; ic++) { 118 for (int i = 0; i < a.length; i += SPECIES.length()) { 119 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 120 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 121 av.lanewise(VectorOperators.ADD, bv).intoArray(r, i); 122 } 123 } 124 125 bh.consume(r); 126 } 127 128 @Benchmark 129 public void ADDMasked(Blackhole bh) { 130 byte[] a = fa.apply(SPECIES.length()); 131 byte[] b = fb.apply(SPECIES.length()); 132 byte[] r = fr.apply(SPECIES.length()); 133 boolean[] mask = fm.apply(SPECIES.length()); 134 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 135 136 for (int ic = 0; ic < INVOC_COUNT; ic++) { 137 for (int i = 0; i < a.length; i += SPECIES.length()) { 138 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 139 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 140 av.lanewise(VectorOperators.ADD, bv, vmask).intoArray(r, i); 141 } 142 } 143 144 bh.consume(r); 145 } 146 147 @Benchmark 148 public void SUB(Blackhole bh) { 149 byte[] a = fa.apply(SPECIES.length()); 150 byte[] b = fb.apply(SPECIES.length()); 151 byte[] r = fr.apply(SPECIES.length()); 152 153 for (int ic = 0; ic < INVOC_COUNT; ic++) { 154 for (int i = 0; i < a.length; i += SPECIES.length()) { 155 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 156 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 157 av.lanewise(VectorOperators.SUB, bv).intoArray(r, i); 158 } 159 } 160 161 bh.consume(r); 162 } 163 164 @Benchmark 165 public void SUBMasked(Blackhole bh) { 166 byte[] a = fa.apply(SPECIES.length()); 167 byte[] b = fb.apply(SPECIES.length()); 168 byte[] r = fr.apply(SPECIES.length()); 169 boolean[] mask = fm.apply(SPECIES.length()); 170 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 171 172 for (int ic = 0; ic < INVOC_COUNT; ic++) { 173 for (int i = 0; i < a.length; i += SPECIES.length()) { 174 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 175 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 176 av.lanewise(VectorOperators.SUB, bv, vmask).intoArray(r, i); 177 } 178 } 179 180 bh.consume(r); 181 } 182 183 @Benchmark 184 public void MUL(Blackhole bh) { 185 byte[] a = fa.apply(SPECIES.length()); 186 byte[] b = fb.apply(SPECIES.length()); 187 byte[] r = fr.apply(SPECIES.length()); 188 189 for (int ic = 0; ic < INVOC_COUNT; ic++) { 190 for (int i = 0; i < a.length; i += SPECIES.length()) { 191 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 192 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 193 av.lanewise(VectorOperators.MUL, bv).intoArray(r, i); 194 } 195 } 196 197 bh.consume(r); 198 } 199 200 @Benchmark 201 public void MULMasked(Blackhole bh) { 202 byte[] a = fa.apply(SPECIES.length()); 203 byte[] b = fb.apply(SPECIES.length()); 204 byte[] r = fr.apply(SPECIES.length()); 205 boolean[] mask = fm.apply(SPECIES.length()); 206 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 207 208 for (int ic = 0; ic < INVOC_COUNT; ic++) { 209 for (int i = 0; i < a.length; i += SPECIES.length()) { 210 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 211 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 212 av.lanewise(VectorOperators.MUL, bv, vmask).intoArray(r, i); 213 } 214 } 215 216 bh.consume(r); 217 } 218 219 220 221 222 @Benchmark 223 public void DIV(Blackhole bh) { 224 byte[] a = fa.apply(SPECIES.length()); 225 byte[] b = fb.apply(SPECIES.length()); 226 byte[] r = fr.apply(SPECIES.length()); 227 228 replaceZero(b, (byte) 1); 229 230 for (int ic = 0; ic < INVOC_COUNT; ic++) { 231 for (int i = 0; i < a.length; i += SPECIES.length()) { 232 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 233 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 234 av.lanewise(VectorOperators.DIV, bv).intoArray(r, i); 235 } 236 } 237 238 bh.consume(r); 239 } 240 241 242 243 @Benchmark 244 public void DIVMasked(Blackhole bh) { 245 byte[] a = fa.apply(SPECIES.length()); 246 byte[] b = fb.apply(SPECIES.length()); 247 byte[] r = fr.apply(SPECIES.length()); 248 boolean[] mask = fm.apply(SPECIES.length()); 249 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 250 251 replaceZero(b, mask, (byte) 1); 252 253 for (int ic = 0; ic < INVOC_COUNT; ic++) { 254 for (int i = 0; i < a.length; i += SPECIES.length()) { 255 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 256 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 257 av.lanewise(VectorOperators.DIV, bv, vmask).intoArray(r, i); 258 } 259 } 260 261 bh.consume(r); 262 } 263 264 265 @Benchmark 266 public void FIRST_NONZERO(Blackhole bh) { 267 byte[] a = fa.apply(SPECIES.length()); 268 byte[] b = fb.apply(SPECIES.length()); 269 byte[] r = fr.apply(SPECIES.length()); 270 271 for (int ic = 0; ic < INVOC_COUNT; ic++) { 272 for (int i = 0; i < a.length; i += SPECIES.length()) { 273 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 274 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 275 av.lanewise(VectorOperators.FIRST_NONZERO, bv).intoArray(r, i); 276 } 277 } 278 279 bh.consume(r); 280 } 281 282 @Benchmark 283 public void FIRST_NONZEROMasked(Blackhole bh) { 284 byte[] a = fa.apply(SPECIES.length()); 285 byte[] b = fb.apply(SPECIES.length()); 286 byte[] r = fr.apply(SPECIES.length()); 287 boolean[] mask = fm.apply(SPECIES.length()); 288 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 289 290 for (int ic = 0; ic < INVOC_COUNT; ic++) { 291 for (int i = 0; i < a.length; i += SPECIES.length()) { 292 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 293 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 294 av.lanewise(VectorOperators.FIRST_NONZERO, bv, vmask).intoArray(r, i); 295 } 296 } 297 298 bh.consume(r); 299 } 300 301 302 @Benchmark 303 public void AND(Blackhole bh) { 304 byte[] a = fa.apply(SPECIES.length()); 305 byte[] b = fb.apply(SPECIES.length()); 306 byte[] r = fr.apply(SPECIES.length()); 307 308 for (int ic = 0; ic < INVOC_COUNT; ic++) { 309 for (int i = 0; i < a.length; i += SPECIES.length()) { 310 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 311 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 312 av.lanewise(VectorOperators.AND, bv).intoArray(r, i); 313 } 314 } 315 316 bh.consume(r); 317 } 318 319 320 321 @Benchmark 322 public void ANDMasked(Blackhole bh) { 323 byte[] a = fa.apply(SPECIES.length()); 324 byte[] b = fb.apply(SPECIES.length()); 325 byte[] r = fr.apply(SPECIES.length()); 326 boolean[] mask = fm.apply(SPECIES.length()); 327 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 328 329 for (int ic = 0; ic < INVOC_COUNT; ic++) { 330 for (int i = 0; i < a.length; i += SPECIES.length()) { 331 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 332 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 333 av.lanewise(VectorOperators.AND, bv, vmask).intoArray(r, i); 334 } 335 } 336 337 bh.consume(r); 338 } 339 340 341 342 @Benchmark 343 public void AND_NOT(Blackhole bh) { 344 byte[] a = fa.apply(SPECIES.length()); 345 byte[] b = fb.apply(SPECIES.length()); 346 byte[] r = fr.apply(SPECIES.length()); 347 348 for (int ic = 0; ic < INVOC_COUNT; ic++) { 349 for (int i = 0; i < a.length; i += SPECIES.length()) { 350 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 351 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 352 av.lanewise(VectorOperators.AND_NOT, bv).intoArray(r, i); 353 } 354 } 355 356 bh.consume(r); 357 } 358 359 360 361 @Benchmark 362 public void AND_NOTMasked(Blackhole bh) { 363 byte[] a = fa.apply(SPECIES.length()); 364 byte[] b = fb.apply(SPECIES.length()); 365 byte[] r = fr.apply(SPECIES.length()); 366 boolean[] mask = fm.apply(SPECIES.length()); 367 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 368 369 for (int ic = 0; ic < INVOC_COUNT; ic++) { 370 for (int i = 0; i < a.length; i += SPECIES.length()) { 371 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 372 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 373 av.lanewise(VectorOperators.AND_NOT, bv, vmask).intoArray(r, i); 374 } 375 } 376 377 bh.consume(r); 378 } 379 380 381 382 @Benchmark 383 public void OR(Blackhole bh) { 384 byte[] a = fa.apply(SPECIES.length()); 385 byte[] b = fb.apply(SPECIES.length()); 386 byte[] r = fr.apply(SPECIES.length()); 387 388 for (int ic = 0; ic < INVOC_COUNT; ic++) { 389 for (int i = 0; i < a.length; i += SPECIES.length()) { 390 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 391 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 392 av.lanewise(VectorOperators.OR, bv).intoArray(r, i); 393 } 394 } 395 396 bh.consume(r); 397 } 398 399 400 401 @Benchmark 402 public void ORMasked(Blackhole bh) { 403 byte[] a = fa.apply(SPECIES.length()); 404 byte[] b = fb.apply(SPECIES.length()); 405 byte[] r = fr.apply(SPECIES.length()); 406 boolean[] mask = fm.apply(SPECIES.length()); 407 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 408 409 for (int ic = 0; ic < INVOC_COUNT; ic++) { 410 for (int i = 0; i < a.length; i += SPECIES.length()) { 411 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 412 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 413 av.lanewise(VectorOperators.OR, bv, vmask).intoArray(r, i); 414 } 415 } 416 417 bh.consume(r); 418 } 419 420 421 422 @Benchmark 423 public void XOR(Blackhole bh) { 424 byte[] a = fa.apply(SPECIES.length()); 425 byte[] b = fb.apply(SPECIES.length()); 426 byte[] r = fr.apply(SPECIES.length()); 427 428 for (int ic = 0; ic < INVOC_COUNT; ic++) { 429 for (int i = 0; i < a.length; i += SPECIES.length()) { 430 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 431 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 432 av.lanewise(VectorOperators.XOR, bv).intoArray(r, i); 433 } 434 } 435 436 bh.consume(r); 437 } 438 439 440 441 @Benchmark 442 public void XORMasked(Blackhole bh) { 443 byte[] a = fa.apply(SPECIES.length()); 444 byte[] b = fb.apply(SPECIES.length()); 445 byte[] r = fr.apply(SPECIES.length()); 446 boolean[] mask = fm.apply(SPECIES.length()); 447 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 448 449 for (int ic = 0; ic < INVOC_COUNT; ic++) { 450 for (int i = 0; i < a.length; i += SPECIES.length()) { 451 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 452 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 453 av.lanewise(VectorOperators.XOR, bv, vmask).intoArray(r, i); 454 } 455 } 456 457 bh.consume(r); 458 } 459 460 461 462 463 464 @Benchmark 465 public void LSHL(Blackhole bh) { 466 byte[] a = fa.apply(SPECIES.length()); 467 byte[] b = fb.apply(SPECIES.length()); 468 byte[] r = fr.apply(SPECIES.length()); 469 470 for (int ic = 0; ic < INVOC_COUNT; ic++) { 471 for (int i = 0; i < a.length; i += SPECIES.length()) { 472 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 473 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 474 av.lanewise(VectorOperators.LSHL, bv).intoArray(r, i); 475 } 476 } 477 478 bh.consume(r); 479 } 480 481 482 483 @Benchmark 484 public void LSHLMasked(Blackhole bh) { 485 byte[] a = fa.apply(SPECIES.length()); 486 byte[] b = fb.apply(SPECIES.length()); 487 byte[] r = fr.apply(SPECIES.length()); 488 boolean[] mask = fm.apply(SPECIES.length()); 489 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 490 491 for (int ic = 0; ic < INVOC_COUNT; ic++) { 492 for (int i = 0; i < a.length; i += SPECIES.length()) { 493 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 494 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 495 av.lanewise(VectorOperators.LSHL, bv, vmask).intoArray(r, i); 496 } 497 } 498 499 bh.consume(r); 500 } 501 502 503 504 505 506 507 508 @Benchmark 509 public void ASHR(Blackhole bh) { 510 byte[] a = fa.apply(SPECIES.length()); 511 byte[] b = fb.apply(SPECIES.length()); 512 byte[] r = fr.apply(SPECIES.length()); 513 514 for (int ic = 0; ic < INVOC_COUNT; ic++) { 515 for (int i = 0; i < a.length; i += SPECIES.length()) { 516 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 517 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 518 av.lanewise(VectorOperators.ASHR, bv).intoArray(r, i); 519 } 520 } 521 522 bh.consume(r); 523 } 524 525 526 527 @Benchmark 528 public void ASHRMasked(Blackhole bh) { 529 byte[] a = fa.apply(SPECIES.length()); 530 byte[] b = fb.apply(SPECIES.length()); 531 byte[] r = fr.apply(SPECIES.length()); 532 boolean[] mask = fm.apply(SPECIES.length()); 533 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 534 535 for (int ic = 0; ic < INVOC_COUNT; ic++) { 536 for (int i = 0; i < a.length; i += SPECIES.length()) { 537 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 538 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 539 av.lanewise(VectorOperators.ASHR, bv, vmask).intoArray(r, i); 540 } 541 } 542 543 bh.consume(r); 544 } 545 546 547 548 549 550 551 552 @Benchmark 553 public void LSHR(Blackhole bh) { 554 byte[] a = fa.apply(SPECIES.length()); 555 byte[] b = fb.apply(SPECIES.length()); 556 byte[] r = fr.apply(SPECIES.length()); 557 558 for (int ic = 0; ic < INVOC_COUNT; ic++) { 559 for (int i = 0; i < a.length; i += SPECIES.length()) { 560 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 561 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 562 av.lanewise(VectorOperators.LSHR, bv).intoArray(r, i); 563 } 564 } 565 566 bh.consume(r); 567 } 568 569 570 571 @Benchmark 572 public void LSHRMasked(Blackhole bh) { 573 byte[] a = fa.apply(SPECIES.length()); 574 byte[] b = fb.apply(SPECIES.length()); 575 byte[] r = fr.apply(SPECIES.length()); 576 boolean[] mask = fm.apply(SPECIES.length()); 577 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 578 579 for (int ic = 0; ic < INVOC_COUNT; ic++) { 580 for (int i = 0; i < a.length; i += SPECIES.length()) { 581 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 582 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 583 av.lanewise(VectorOperators.LSHR, bv, vmask).intoArray(r, i); 584 } 585 } 586 587 bh.consume(r); 588 } 589 590 591 592 593 594 595 596 @Benchmark 597 public void LSHLShift(Blackhole bh) { 598 byte[] a = fa.apply(SPECIES.length()); 599 byte[] b = fb.apply(SPECIES.length()); 600 byte[] r = fr.apply(SPECIES.length()); 601 602 for (int ic = 0; ic < INVOC_COUNT; ic++) { 603 for (int i = 0; i < a.length; i += SPECIES.length()) { 604 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 605 av.lanewise(VectorOperators.LSHL, (int)b[i]).intoArray(r, i); 606 } 607 } 608 609 bh.consume(r); 610 } 611 612 613 614 @Benchmark 615 public void LSHLMaskedShift(Blackhole bh) { 616 byte[] a = fa.apply(SPECIES.length()); 617 byte[] b = fb.apply(SPECIES.length()); 618 byte[] r = fr.apply(SPECIES.length()); 619 boolean[] mask = fm.apply(SPECIES.length()); 620 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 621 622 for (int ic = 0; ic < INVOC_COUNT; ic++) { 623 for (int i = 0; i < a.length; i += SPECIES.length()) { 624 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 625 av.lanewise(VectorOperators.LSHL, (int)b[i], vmask).intoArray(r, i); 626 } 627 } 628 629 bh.consume(r); 630 } 631 632 633 634 635 636 637 638 @Benchmark 639 public void LSHRShift(Blackhole bh) { 640 byte[] a = fa.apply(SPECIES.length()); 641 byte[] b = fb.apply(SPECIES.length()); 642 byte[] r = fr.apply(SPECIES.length()); 643 644 for (int ic = 0; ic < INVOC_COUNT; ic++) { 645 for (int i = 0; i < a.length; i += SPECIES.length()) { 646 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 647 av.lanewise(VectorOperators.LSHR, (int)b[i]).intoArray(r, i); 648 } 649 } 650 651 bh.consume(r); 652 } 653 654 655 656 @Benchmark 657 public void LSHRMaskedShift(Blackhole bh) { 658 byte[] a = fa.apply(SPECIES.length()); 659 byte[] b = fb.apply(SPECIES.length()); 660 byte[] r = fr.apply(SPECIES.length()); 661 boolean[] mask = fm.apply(SPECIES.length()); 662 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 663 664 for (int ic = 0; ic < INVOC_COUNT; ic++) { 665 for (int i = 0; i < a.length; i += SPECIES.length()) { 666 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 667 av.lanewise(VectorOperators.LSHR, (int)b[i], vmask).intoArray(r, i); 668 } 669 } 670 671 bh.consume(r); 672 } 673 674 675 676 677 678 679 680 @Benchmark 681 public void ASHRShift(Blackhole bh) { 682 byte[] a = fa.apply(SPECIES.length()); 683 byte[] b = fb.apply(SPECIES.length()); 684 byte[] r = fr.apply(SPECIES.length()); 685 686 for (int ic = 0; ic < INVOC_COUNT; ic++) { 687 for (int i = 0; i < a.length; i += SPECIES.length()) { 688 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 689 av.lanewise(VectorOperators.ASHR, (int)b[i]).intoArray(r, i); 690 } 691 } 692 693 bh.consume(r); 694 } 695 696 697 698 @Benchmark 699 public void ASHRMaskedShift(Blackhole bh) { 700 byte[] a = fa.apply(SPECIES.length()); 701 byte[] b = fb.apply(SPECIES.length()); 702 byte[] r = fr.apply(SPECIES.length()); 703 boolean[] mask = fm.apply(SPECIES.length()); 704 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 705 706 for (int ic = 0; ic < INVOC_COUNT; ic++) { 707 for (int i = 0; i < a.length; i += SPECIES.length()) { 708 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 709 av.lanewise(VectorOperators.ASHR, (int)b[i], vmask).intoArray(r, i); 710 } 711 } 712 713 bh.consume(r); 714 } 715 716 717 718 719 @Benchmark 720 public void MIN(Blackhole bh) { 721 byte[] a = fa.apply(SPECIES.length()); 722 byte[] b = fb.apply(SPECIES.length()); 723 byte[] r = fr.apply(SPECIES.length()); 724 725 for (int ic = 0; ic < INVOC_COUNT; ic++) { 726 for (int i = 0; i < a.length; i += SPECIES.length()) { 727 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 728 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 729 av.lanewise(VectorOperators.MIN, bv).intoArray(r, i); 730 } 731 } 732 733 bh.consume(r); 734 } 735 736 @Benchmark 737 public void MAX(Blackhole bh) { 738 byte[] a = fa.apply(SPECIES.length()); 739 byte[] b = fb.apply(SPECIES.length()); 740 byte[] r = fr.apply(SPECIES.length()); 741 742 for (int ic = 0; ic < INVOC_COUNT; ic++) { 743 for (int i = 0; i < a.length; i += SPECIES.length()) { 744 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 745 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 746 av.lanewise(VectorOperators.MAX, bv).intoArray(r, i); 747 } 748 } 749 750 bh.consume(r); 751 } 752 753 754 @Benchmark 755 public void ANDLanes(Blackhole bh) { 756 byte[] a = fa.apply(SPECIES.length()); 757 byte ra = -1; 758 759 for (int ic = 0; ic < INVOC_COUNT; ic++) { 760 ra = -1; 761 for (int i = 0; i < a.length; i += SPECIES.length()) { 762 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 763 ra &= av.reduceLanes(VectorOperators.AND); 764 } 765 } 766 bh.consume(ra); 767 } 768 769 770 771 @Benchmark 772 public void ANDMaskedLanes(Blackhole bh) { 773 byte[] a = fa.apply(SPECIES.length()); 774 boolean[] mask = fm.apply(SPECIES.length()); 775 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 776 byte ra = -1; 777 778 for (int ic = 0; ic < INVOC_COUNT; ic++) { 779 ra = -1; 780 for (int i = 0; i < a.length; i += SPECIES.length()) { 781 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 782 ra &= av.reduceLanes(VectorOperators.AND, vmask); 783 } 784 } 785 bh.consume(ra); 786 } 787 788 789 790 @Benchmark 791 public void ORLanes(Blackhole bh) { 792 byte[] a = fa.apply(SPECIES.length()); 793 byte ra = 0; 794 795 for (int ic = 0; ic < INVOC_COUNT; ic++) { 796 ra = 0; 797 for (int i = 0; i < a.length; i += SPECIES.length()) { 798 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 799 ra |= av.reduceLanes(VectorOperators.OR); 800 } 801 } 802 bh.consume(ra); 803 } 804 805 806 807 @Benchmark 808 public void ORMaskedLanes(Blackhole bh) { 809 byte[] a = fa.apply(SPECIES.length()); 810 boolean[] mask = fm.apply(SPECIES.length()); 811 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 812 byte ra = 0; 813 814 for (int ic = 0; ic < INVOC_COUNT; ic++) { 815 ra = 0; 816 for (int i = 0; i < a.length; i += SPECIES.length()) { 817 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 818 ra |= av.reduceLanes(VectorOperators.OR, vmask); 819 } 820 } 821 bh.consume(ra); 822 } 823 824 825 826 @Benchmark 827 public void XORLanes(Blackhole bh) { 828 byte[] a = fa.apply(SPECIES.length()); 829 byte ra = 0; 830 831 for (int ic = 0; ic < INVOC_COUNT; ic++) { 832 ra = 0; 833 for (int i = 0; i < a.length; i += SPECIES.length()) { 834 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 835 ra ^= av.reduceLanes(VectorOperators.XOR); 836 } 837 } 838 bh.consume(ra); 839 } 840 841 842 843 @Benchmark 844 public void XORMaskedLanes(Blackhole bh) { 845 byte[] a = fa.apply(SPECIES.length()); 846 boolean[] mask = fm.apply(SPECIES.length()); 847 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 848 byte ra = 0; 849 850 for (int ic = 0; ic < INVOC_COUNT; ic++) { 851 ra = 0; 852 for (int i = 0; i < a.length; i += SPECIES.length()) { 853 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 854 ra ^= av.reduceLanes(VectorOperators.XOR, vmask); 855 } 856 } 857 bh.consume(ra); 858 } 859 860 861 @Benchmark 862 public void ADDLanes(Blackhole bh) { 863 byte[] a = fa.apply(SPECIES.length()); 864 byte ra = 0; 865 866 for (int ic = 0; ic < INVOC_COUNT; ic++) { 867 ra = 0; 868 for (int i = 0; i < a.length; i += SPECIES.length()) { 869 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 870 ra += av.reduceLanes(VectorOperators.ADD); 871 } 872 } 873 bh.consume(ra); 874 } 875 876 @Benchmark 877 public void ADDMaskedLanes(Blackhole bh) { 878 byte[] a = fa.apply(SPECIES.length()); 879 boolean[] mask = fm.apply(SPECIES.length()); 880 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 881 byte ra = 0; 882 883 for (int ic = 0; ic < INVOC_COUNT; ic++) { 884 ra = 0; 885 for (int i = 0; i < a.length; i += SPECIES.length()) { 886 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 887 ra += av.reduceLanes(VectorOperators.ADD, vmask); 888 } 889 } 890 bh.consume(ra); 891 } 892 893 @Benchmark 894 public void MULLanes(Blackhole bh) { 895 byte[] a = fa.apply(SPECIES.length()); 896 byte ra = 1; 897 898 for (int ic = 0; ic < INVOC_COUNT; ic++) { 899 ra = 1; 900 for (int i = 0; i < a.length; i += SPECIES.length()) { 901 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 902 ra *= av.reduceLanes(VectorOperators.MUL); 903 } 904 } 905 bh.consume(ra); 906 } 907 908 @Benchmark 909 public void MULMaskedLanes(Blackhole bh) { 910 byte[] a = fa.apply(SPECIES.length()); 911 boolean[] mask = fm.apply(SPECIES.length()); 912 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 913 byte ra = 1; 914 915 for (int ic = 0; ic < INVOC_COUNT; ic++) { 916 ra = 1; 917 for (int i = 0; i < a.length; i += SPECIES.length()) { 918 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 919 ra *= av.reduceLanes(VectorOperators.MUL, vmask); 920 } 921 } 922 bh.consume(ra); 923 } 924 925 @Benchmark 926 public void MINLanes(Blackhole bh) { 927 byte[] a = fa.apply(SPECIES.length()); 928 byte ra = Byte.MAX_VALUE; 929 930 for (int ic = 0; ic < INVOC_COUNT; ic++) { 931 ra = Byte.MAX_VALUE; 932 for (int i = 0; i < a.length; i += SPECIES.length()) { 933 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 934 ra = (byte)Math.min(ra, av.reduceLanes(VectorOperators.MIN)); 935 } 936 } 937 bh.consume(ra); 938 } 939 940 @Benchmark 941 public void MINMaskedLanes(Blackhole bh) { 942 byte[] a = fa.apply(SPECIES.length()); 943 boolean[] mask = fm.apply(SPECIES.length()); 944 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 945 byte ra = Byte.MAX_VALUE; 946 947 for (int ic = 0; ic < INVOC_COUNT; ic++) { 948 ra = Byte.MAX_VALUE; 949 for (int i = 0; i < a.length; i += SPECIES.length()) { 950 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 951 ra = (byte)Math.min(ra, av.reduceLanes(VectorOperators.MIN, vmask)); 952 } 953 } 954 bh.consume(ra); 955 } 956 957 @Benchmark 958 public void MAXLanes(Blackhole bh) { 959 byte[] a = fa.apply(SPECIES.length()); 960 byte ra = Byte.MIN_VALUE; 961 962 for (int ic = 0; ic < INVOC_COUNT; ic++) { 963 ra = Byte.MIN_VALUE; 964 for (int i = 0; i < a.length; i += SPECIES.length()) { 965 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 966 ra = (byte)Math.max(ra, av.reduceLanes(VectorOperators.MAX)); 967 } 968 } 969 bh.consume(ra); 970 } 971 972 @Benchmark 973 public void MAXMaskedLanes(Blackhole bh) { 974 byte[] a = fa.apply(SPECIES.length()); 975 boolean[] mask = fm.apply(SPECIES.length()); 976 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 977 byte ra = Byte.MIN_VALUE; 978 979 for (int ic = 0; ic < INVOC_COUNT; ic++) { 980 ra = Byte.MIN_VALUE; 981 for (int i = 0; i < a.length; i += SPECIES.length()) { 982 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 983 ra = (byte)Math.max(ra, av.reduceLanes(VectorOperators.MAX, vmask)); 984 } 985 } 986 bh.consume(ra); 987 } 988 989 990 @Benchmark 991 public void anyTrue(Blackhole bh) { 992 boolean[] mask = fm.apply(SPECIES.length()); 993 boolean[] r = fmr.apply(SPECIES.length()); 994 995 for (int ic = 0; ic < INVOC_COUNT; ic++) { 996 for (int i = 0; i < mask.length; i += SPECIES.length()) { 997 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i); 998 r[i] = vmask.anyTrue(); 999 } 1000 } 1001 1002 bh.consume(r); 1003 } 1004 1005 1006 1007 @Benchmark 1008 public void allTrue(Blackhole bh) { 1009 boolean[] mask = fm.apply(SPECIES.length()); 1010 boolean[] r = fmr.apply(SPECIES.length()); 1011 1012 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1013 for (int i = 0; i < mask.length; i += SPECIES.length()) { 1014 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i); 1015 r[i] = vmask.allTrue(); 1016 } 1017 } 1018 1019 bh.consume(r); 1020 } 1021 1022 1023 @Benchmark 1024 public void withLane(Blackhole bh) { 1025 byte[] a = fa.apply(SPECIES.length()); 1026 byte[] r = fr.apply(SPECIES.length()); 1027 1028 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1029 for (int i = 0; i < a.length; i += SPECIES.length()) { 1030 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1031 av.withLane(0, (byte)4).intoArray(r, i); 1032 } 1033 } 1034 1035 bh.consume(r); 1036 } 1037 1038 @Benchmark 1039 public Object IS_DEFAULT() { 1040 byte[] a = fa.apply(size); 1041 boolean[] ms = fmt.apply(size); 1042 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0); 1043 1044 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1045 for (int i = 0; i < a.length; i += SPECIES.length()) { 1046 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1047 1048 // accumulate results, so JIT can't eliminate relevant computations 1049 m = m.and(av.test(VectorOperators.IS_DEFAULT)); 1050 } 1051 } 1052 1053 return m; 1054 } 1055 1056 @Benchmark 1057 public Object IS_NEGATIVE() { 1058 byte[] a = fa.apply(size); 1059 boolean[] ms = fmt.apply(size); 1060 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0); 1061 1062 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1063 for (int i = 0; i < a.length; i += SPECIES.length()) { 1064 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1065 1066 // accumulate results, so JIT can't eliminate relevant computations 1067 m = m.and(av.test(VectorOperators.IS_NEGATIVE)); 1068 } 1069 } 1070 1071 return m; 1072 } 1073 1074 1075 1076 1077 @Benchmark 1078 public Object LT() { 1079 byte[] a = fa.apply(size); 1080 byte[] b = fb.apply(size); 1081 boolean[] ms = fmt.apply(size); 1082 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0); 1083 1084 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1085 for (int i = 0; i < a.length; i += SPECIES.length()) { 1086 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1087 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 1088 1089 // accumulate results, so JIT can't eliminate relevant computations 1090 m = m.and(av.compare(VectorOperators.LT, bv)); 1091 } 1092 } 1093 1094 return m; 1095 } 1096 1097 @Benchmark 1098 public Object GT() { 1099 byte[] a = fa.apply(size); 1100 byte[] b = fb.apply(size); 1101 boolean[] ms = fmt.apply(size); 1102 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0); 1103 1104 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1105 for (int i = 0; i < a.length; i += SPECIES.length()) { 1106 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1107 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 1108 1109 // accumulate results, so JIT can't eliminate relevant computations 1110 m = m.and(av.compare(VectorOperators.GT, bv)); 1111 } 1112 } 1113 1114 return m; 1115 } 1116 1117 @Benchmark 1118 public Object EQ() { 1119 byte[] a = fa.apply(size); 1120 byte[] b = fb.apply(size); 1121 boolean[] ms = fmt.apply(size); 1122 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0); 1123 1124 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1125 for (int i = 0; i < a.length; i += SPECIES.length()) { 1126 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1127 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 1128 1129 // accumulate results, so JIT can't eliminate relevant computations 1130 m = m.and(av.compare(VectorOperators.EQ, bv)); 1131 } 1132 } 1133 1134 return m; 1135 } 1136 1137 @Benchmark 1138 public Object NE() { 1139 byte[] a = fa.apply(size); 1140 byte[] b = fb.apply(size); 1141 boolean[] ms = fmt.apply(size); 1142 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0); 1143 1144 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1145 for (int i = 0; i < a.length; i += SPECIES.length()) { 1146 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1147 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 1148 1149 // accumulate results, so JIT can't eliminate relevant computations 1150 m = m.and(av.compare(VectorOperators.NE, bv)); 1151 } 1152 } 1153 1154 return m; 1155 } 1156 1157 @Benchmark 1158 public Object LE() { 1159 byte[] a = fa.apply(size); 1160 byte[] b = fb.apply(size); 1161 boolean[] ms = fmt.apply(size); 1162 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0); 1163 1164 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1165 for (int i = 0; i < a.length; i += SPECIES.length()) { 1166 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1167 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 1168 1169 // accumulate results, so JIT can't eliminate relevant computations 1170 m = m.and(av.compare(VectorOperators.LE, bv)); 1171 } 1172 } 1173 1174 return m; 1175 } 1176 1177 @Benchmark 1178 public Object GE() { 1179 byte[] a = fa.apply(size); 1180 byte[] b = fb.apply(size); 1181 boolean[] ms = fmt.apply(size); 1182 VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0); 1183 1184 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1185 for (int i = 0; i < a.length; i += SPECIES.length()) { 1186 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1187 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 1188 1189 // accumulate results, so JIT can't eliminate relevant computations 1190 m = m.and(av.compare(VectorOperators.GE, bv)); 1191 } 1192 } 1193 1194 return m; 1195 } 1196 1197 @Benchmark 1198 public void blend(Blackhole bh) { 1199 byte[] a = fa.apply(SPECIES.length()); 1200 byte[] b = fb.apply(SPECIES.length()); 1201 byte[] r = fr.apply(SPECIES.length()); 1202 boolean[] mask = fm.apply(SPECIES.length()); 1203 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 1204 1205 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1206 for (int i = 0; i < a.length; i += SPECIES.length()) { 1207 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1208 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 1209 av.blend(bv, vmask).intoArray(r, i); 1210 } 1211 } 1212 1213 bh.consume(r); 1214 } 1215 1216 @Benchmark 1217 public void rearrange(Blackhole bh) { 1218 byte[] a = fa.apply(SPECIES.length()); 1219 int[] order = fs.apply(a.length, SPECIES.length()); 1220 byte[] r = fr.apply(SPECIES.length()); 1221 1222 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1223 for (int i = 0; i < a.length; i += SPECIES.length()) { 1224 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1225 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i); 1226 } 1227 } 1228 1229 bh.consume(r); 1230 } 1231 1232 @Benchmark 1233 public void laneextract(Blackhole bh) { 1234 byte[] a = fa.apply(SPECIES.length()); 1235 byte[] r = fr.apply(SPECIES.length()); 1236 1237 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1238 for (int i = 0; i < a.length; i += SPECIES.length()) { 1239 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1240 int num_lanes = SPECIES.length(); 1241 // Manually unroll because full unroll happens after intrinsification. 1242 // Unroll is needed because get intrinsic requires for index to be a known constant. 1243 if (num_lanes == 1) { 1244 r[i]=av.lane(0); 1245 } else if (num_lanes == 2) { 1246 r[i]=av.lane(0); 1247 r[i+1]=av.lane(1); 1248 } else if (num_lanes == 4) { 1249 r[i]=av.lane(0); 1250 r[i+1]=av.lane(1); 1251 r[i+2]=av.lane(2); 1252 r[i+3]=av.lane(3); 1253 } else if (num_lanes == 8) { 1254 r[i]=av.lane(0); 1255 r[i+1]=av.lane(1); 1256 r[i+2]=av.lane(2); 1257 r[i+3]=av.lane(3); 1258 r[i+4]=av.lane(4); 1259 r[i+5]=av.lane(5); 1260 r[i+6]=av.lane(6); 1261 r[i+7]=av.lane(7); 1262 } else if (num_lanes == 16) { 1263 r[i]=av.lane(0); 1264 r[i+1]=av.lane(1); 1265 r[i+2]=av.lane(2); 1266 r[i+3]=av.lane(3); 1267 r[i+4]=av.lane(4); 1268 r[i+5]=av.lane(5); 1269 r[i+6]=av.lane(6); 1270 r[i+7]=av.lane(7); 1271 r[i+8]=av.lane(8); 1272 r[i+9]=av.lane(9); 1273 r[i+10]=av.lane(10); 1274 r[i+11]=av.lane(11); 1275 r[i+12]=av.lane(12); 1276 r[i+13]=av.lane(13); 1277 r[i+14]=av.lane(14); 1278 r[i+15]=av.lane(15); 1279 } else if (num_lanes == 32) { 1280 r[i]=av.lane(0); 1281 r[i+1]=av.lane(1); 1282 r[i+2]=av.lane(2); 1283 r[i+3]=av.lane(3); 1284 r[i+4]=av.lane(4); 1285 r[i+5]=av.lane(5); 1286 r[i+6]=av.lane(6); 1287 r[i+7]=av.lane(7); 1288 r[i+8]=av.lane(8); 1289 r[i+9]=av.lane(9); 1290 r[i+10]=av.lane(10); 1291 r[i+11]=av.lane(11); 1292 r[i+12]=av.lane(12); 1293 r[i+13]=av.lane(13); 1294 r[i+14]=av.lane(14); 1295 r[i+15]=av.lane(15); 1296 r[i+16]=av.lane(16); 1297 r[i+17]=av.lane(17); 1298 r[i+18]=av.lane(18); 1299 r[i+19]=av.lane(19); 1300 r[i+20]=av.lane(20); 1301 r[i+21]=av.lane(21); 1302 r[i+22]=av.lane(22); 1303 r[i+23]=av.lane(23); 1304 r[i+24]=av.lane(24); 1305 r[i+25]=av.lane(25); 1306 r[i+26]=av.lane(26); 1307 r[i+27]=av.lane(27); 1308 r[i+28]=av.lane(28); 1309 r[i+29]=av.lane(29); 1310 r[i+30]=av.lane(30); 1311 r[i+31]=av.lane(31); 1312 } else if (num_lanes == 64) { 1313 r[i]=av.lane(0); 1314 r[i+1]=av.lane(1); 1315 r[i+2]=av.lane(2); 1316 r[i+3]=av.lane(3); 1317 r[i+4]=av.lane(4); 1318 r[i+5]=av.lane(5); 1319 r[i+6]=av.lane(6); 1320 r[i+7]=av.lane(7); 1321 r[i+8]=av.lane(8); 1322 r[i+9]=av.lane(9); 1323 r[i+10]=av.lane(10); 1324 r[i+11]=av.lane(11); 1325 r[i+12]=av.lane(12); 1326 r[i+13]=av.lane(13); 1327 r[i+14]=av.lane(14); 1328 r[i+15]=av.lane(15); 1329 r[i+16]=av.lane(16); 1330 r[i+17]=av.lane(17); 1331 r[i+18]=av.lane(18); 1332 r[i+19]=av.lane(19); 1333 r[i+20]=av.lane(20); 1334 r[i+21]=av.lane(21); 1335 r[i+22]=av.lane(22); 1336 r[i+23]=av.lane(23); 1337 r[i+24]=av.lane(24); 1338 r[i+25]=av.lane(25); 1339 r[i+26]=av.lane(26); 1340 r[i+27]=av.lane(27); 1341 r[i+28]=av.lane(28); 1342 r[i+29]=av.lane(29); 1343 r[i+30]=av.lane(30); 1344 r[i+31]=av.lane(31); 1345 r[i+32]=av.lane(32); 1346 r[i+33]=av.lane(33); 1347 r[i+34]=av.lane(34); 1348 r[i+35]=av.lane(35); 1349 r[i+36]=av.lane(36); 1350 r[i+37]=av.lane(37); 1351 r[i+38]=av.lane(38); 1352 r[i+39]=av.lane(39); 1353 r[i+40]=av.lane(40); 1354 r[i+41]=av.lane(41); 1355 r[i+42]=av.lane(42); 1356 r[i+43]=av.lane(43); 1357 r[i+44]=av.lane(44); 1358 r[i+45]=av.lane(45); 1359 r[i+46]=av.lane(46); 1360 r[i+47]=av.lane(47); 1361 r[i+48]=av.lane(48); 1362 r[i+49]=av.lane(49); 1363 r[i+50]=av.lane(50); 1364 r[i+51]=av.lane(51); 1365 r[i+52]=av.lane(52); 1366 r[i+53]=av.lane(53); 1367 r[i+54]=av.lane(54); 1368 r[i+55]=av.lane(55); 1369 r[i+56]=av.lane(56); 1370 r[i+57]=av.lane(57); 1371 r[i+58]=av.lane(58); 1372 r[i+59]=av.lane(59); 1373 r[i+60]=av.lane(60); 1374 r[i+61]=av.lane(61); 1375 r[i+62]=av.lane(62); 1376 r[i+63]=av.lane(63); 1377 } else { 1378 for (int j = 0; j < SPECIES.length(); j++) { 1379 r[i+j]=av.lane(j); 1380 } 1381 } 1382 } 1383 } 1384 1385 bh.consume(r); 1386 } 1387 1388 @Benchmark 1389 public void broadcast(Blackhole bh) { 1390 byte[] a = fa.apply(SPECIES.length()); 1391 byte[] r = new byte[a.length]; 1392 1393 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1394 for (int i = 0; i < a.length; i += SPECIES.length()) { 1395 ByteVector.broadcast(SPECIES, a[i]).intoArray(r, i); 1396 } 1397 } 1398 1399 bh.consume(r); 1400 } 1401 1402 @Benchmark 1403 public void zero(Blackhole bh) { 1404 byte[] a = fa.apply(SPECIES.length()); 1405 byte[] r = new byte[a.length]; 1406 1407 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1408 for (int i = 0; i < a.length; i += SPECIES.length()) { 1409 ByteVector.zero(SPECIES).intoArray(a, i); 1410 } 1411 } 1412 1413 bh.consume(r); 1414 } 1415 1416 @Benchmark 1417 public void sliceUnary(Blackhole bh) { 1418 byte[] a = fa.apply(SPECIES.length()); 1419 byte[] r = new byte[a.length]; 1420 int origin = (new java.util.Random()).nextInt(SPECIES.length()); 1421 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1422 for (int i = 0; i < a.length; i += SPECIES.length()) { 1423 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1424 av.slice(origin).intoArray(r, i); 1425 } 1426 } 1427 1428 bh.consume(r); 1429 } 1430 1431 @Benchmark 1432 public void sliceBinary(Blackhole bh) { 1433 byte[] a = fa.apply(SPECIES.length()); 1434 byte[] b = fb.apply(SPECIES.length()); 1435 byte[] r = new byte[a.length]; 1436 int origin = (new java.util.Random()).nextInt(SPECIES.length()); 1437 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1438 for (int i = 0; i < a.length; i += SPECIES.length()) { 1439 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1440 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 1441 av.slice(origin, bv).intoArray(r, i); 1442 } 1443 } 1444 1445 bh.consume(r); 1446 } 1447 1448 @Benchmark 1449 public void sliceMasked(Blackhole bh) { 1450 byte[] a = fa.apply(SPECIES.length()); 1451 byte[] b = fb.apply(SPECIES.length()); 1452 boolean[] mask = fm.apply(SPECIES.length()); 1453 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 1454 1455 byte[] r = new byte[a.length]; 1456 int origin = (new java.util.Random()).nextInt(SPECIES.length()); 1457 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1458 for (int i = 0; i < a.length; i += SPECIES.length()) { 1459 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1460 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 1461 av.slice(origin, bv, vmask).intoArray(r, i); 1462 } 1463 } 1464 1465 bh.consume(r); 1466 } 1467 1468 @Benchmark 1469 public void unsliceUnary(Blackhole bh) { 1470 byte[] a = fa.apply(SPECIES.length()); 1471 byte[] r = new byte[a.length]; 1472 int origin = (new java.util.Random()).nextInt(SPECIES.length()); 1473 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1474 for (int i = 0; i < a.length; i += SPECIES.length()) { 1475 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1476 av.unslice(origin).intoArray(r, i); 1477 } 1478 } 1479 1480 bh.consume(r); 1481 } 1482 1483 @Benchmark 1484 public void unsliceBinary(Blackhole bh) { 1485 byte[] a = fa.apply(SPECIES.length()); 1486 byte[] b = fb.apply(SPECIES.length()); 1487 byte[] r = new byte[a.length]; 1488 int origin = (new java.util.Random()).nextInt(SPECIES.length()); 1489 int part = (new java.util.Random()).nextInt(2); 1490 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1491 for (int i = 0; i < a.length; i += SPECIES.length()) { 1492 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1493 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 1494 av.unslice(origin, bv, part).intoArray(r, i); 1495 } 1496 } 1497 1498 bh.consume(r); 1499 } 1500 1501 @Benchmark 1502 public void unsliceMasked(Blackhole bh) { 1503 byte[] a = fa.apply(SPECIES.length()); 1504 byte[] b = fb.apply(SPECIES.length()); 1505 boolean[] mask = fm.apply(SPECIES.length()); 1506 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 1507 byte[] r = new byte[a.length]; 1508 int origin = (new java.util.Random()).nextInt(SPECIES.length()); 1509 int part = (new java.util.Random()).nextInt(2); 1510 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1511 for (int i = 0; i < a.length; i += SPECIES.length()) { 1512 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1513 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 1514 av.unslice(origin, bv, part, vmask).intoArray(r, i); 1515 } 1516 } 1517 1518 bh.consume(r); 1519 } 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 @Benchmark 1543 public void BITWISE_BLEND(Blackhole bh) { 1544 byte[] a = fa.apply(SPECIES.length()); 1545 byte[] b = fb.apply(SPECIES.length()); 1546 byte[] c = fc.apply(SPECIES.length()); 1547 byte[] r = fr.apply(SPECIES.length()); 1548 1549 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1550 for (int i = 0; i < a.length; i += SPECIES.length()) { 1551 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1552 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 1553 ByteVector cv = ByteVector.fromArray(SPECIES, c, i); 1554 av.lanewise(VectorOperators.BITWISE_BLEND, bv, cv).intoArray(r, i); 1555 } 1556 } 1557 1558 bh.consume(r); 1559 } 1560 1561 1562 1563 @Benchmark 1564 public void BITWISE_BLENDMasked(Blackhole bh) { 1565 byte[] a = fa.apply(SPECIES.length()); 1566 byte[] b = fb.apply(SPECIES.length()); 1567 byte[] c = fc.apply(SPECIES.length()); 1568 byte[] r = fr.apply(SPECIES.length()); 1569 boolean[] mask = fm.apply(SPECIES.length()); 1570 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 1571 1572 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1573 for (int i = 0; i < a.length; i += SPECIES.length()) { 1574 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1575 ByteVector bv = ByteVector.fromArray(SPECIES, b, i); 1576 ByteVector cv = ByteVector.fromArray(SPECIES, c, i); 1577 av.lanewise(VectorOperators.BITWISE_BLEND, bv, cv, vmask).intoArray(r, i); 1578 } 1579 } 1580 1581 bh.consume(r); 1582 } 1583 1584 1585 @Benchmark 1586 public void NEG(Blackhole bh) { 1587 byte[] a = fa.apply(SPECIES.length()); 1588 byte[] r = fr.apply(SPECIES.length()); 1589 1590 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1591 for (int i = 0; i < a.length; i += SPECIES.length()) { 1592 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1593 av.lanewise(VectorOperators.NEG).intoArray(r, i); 1594 } 1595 } 1596 1597 bh.consume(r); 1598 } 1599 1600 @Benchmark 1601 public void NEGMasked(Blackhole bh) { 1602 byte[] a = fa.apply(SPECIES.length()); 1603 byte[] r = fr.apply(SPECIES.length()); 1604 boolean[] mask = fm.apply(SPECIES.length()); 1605 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 1606 1607 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1608 for (int i = 0; i < a.length; i += SPECIES.length()) { 1609 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1610 av.lanewise(VectorOperators.NEG, vmask).intoArray(r, i); 1611 } 1612 } 1613 1614 bh.consume(r); 1615 } 1616 1617 @Benchmark 1618 public void ABS(Blackhole bh) { 1619 byte[] a = fa.apply(SPECIES.length()); 1620 byte[] r = fr.apply(SPECIES.length()); 1621 1622 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1623 for (int i = 0; i < a.length; i += SPECIES.length()) { 1624 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1625 av.lanewise(VectorOperators.ABS).intoArray(r, i); 1626 } 1627 } 1628 1629 bh.consume(r); 1630 } 1631 1632 @Benchmark 1633 public void ABSMasked(Blackhole bh) { 1634 byte[] a = fa.apply(SPECIES.length()); 1635 byte[] r = fr.apply(SPECIES.length()); 1636 boolean[] mask = fm.apply(SPECIES.length()); 1637 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 1638 1639 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1640 for (int i = 0; i < a.length; i += SPECIES.length()) { 1641 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1642 av.lanewise(VectorOperators.ABS, vmask).intoArray(r, i); 1643 } 1644 } 1645 1646 bh.consume(r); 1647 } 1648 1649 1650 @Benchmark 1651 public void NOT(Blackhole bh) { 1652 byte[] a = fa.apply(SPECIES.length()); 1653 byte[] r = fr.apply(SPECIES.length()); 1654 1655 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1656 for (int i = 0; i < a.length; i += SPECIES.length()) { 1657 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1658 av.lanewise(VectorOperators.NOT).intoArray(r, i); 1659 } 1660 } 1661 1662 bh.consume(r); 1663 } 1664 1665 1666 1667 @Benchmark 1668 public void NOTMasked(Blackhole bh) { 1669 byte[] a = fa.apply(SPECIES.length()); 1670 byte[] r = fr.apply(SPECIES.length()); 1671 boolean[] mask = fm.apply(SPECIES.length()); 1672 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 1673 1674 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1675 for (int i = 0; i < a.length; i += SPECIES.length()) { 1676 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1677 av.lanewise(VectorOperators.NOT, vmask).intoArray(r, i); 1678 } 1679 } 1680 1681 bh.consume(r); 1682 } 1683 1684 1685 1686 @Benchmark 1687 public void ZOMO(Blackhole bh) { 1688 byte[] a = fa.apply(SPECIES.length()); 1689 byte[] r = fr.apply(SPECIES.length()); 1690 1691 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1692 for (int i = 0; i < a.length; i += SPECIES.length()) { 1693 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1694 av.lanewise(VectorOperators.ZOMO).intoArray(r, i); 1695 } 1696 } 1697 1698 bh.consume(r); 1699 } 1700 1701 1702 1703 @Benchmark 1704 public void ZOMOMasked(Blackhole bh) { 1705 byte[] a = fa.apply(SPECIES.length()); 1706 byte[] r = fr.apply(SPECIES.length()); 1707 boolean[] mask = fm.apply(SPECIES.length()); 1708 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 1709 1710 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1711 for (int i = 0; i < a.length; i += SPECIES.length()) { 1712 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1713 av.lanewise(VectorOperators.ZOMO, vmask).intoArray(r, i); 1714 } 1715 } 1716 1717 bh.consume(r); 1718 } 1719 1720 1721 1722 1723 @Benchmark 1724 public void gather(Blackhole bh) { 1725 byte[] a = fa.apply(SPECIES.length()); 1726 int[] b = fs.apply(a.length, SPECIES.length()); 1727 byte[] r = new byte[a.length]; 1728 1729 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1730 for (int i = 0; i < a.length; i += SPECIES.length()) { 1731 ByteVector av = ByteVector.fromArray(SPECIES, a, i, b, i); 1732 av.intoArray(r, i); 1733 } 1734 } 1735 1736 bh.consume(r); 1737 } 1738 1739 @Benchmark 1740 public void gatherMasked(Blackhole bh) { 1741 byte[] a = fa.apply(SPECIES.length()); 1742 int[] b = fs.apply(a.length, SPECIES.length()); 1743 byte[] r = new byte[a.length]; 1744 boolean[] mask = fm.apply(SPECIES.length()); 1745 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 1746 1747 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1748 for (int i = 0; i < a.length; i += SPECIES.length()) { 1749 ByteVector av = ByteVector.fromArray(SPECIES, a, i, b, i, vmask); 1750 av.intoArray(r, i); 1751 } 1752 } 1753 1754 bh.consume(r); 1755 } 1756 1757 @Benchmark 1758 public void scatter(Blackhole bh) { 1759 byte[] a = fa.apply(SPECIES.length()); 1760 int[] b = fs.apply(a.length, SPECIES.length()); 1761 byte[] r = new byte[a.length]; 1762 1763 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1764 for (int i = 0; i < a.length; i += SPECIES.length()) { 1765 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1766 av.intoArray(r, i, b, i); 1767 } 1768 } 1769 1770 bh.consume(r); 1771 } 1772 1773 @Benchmark 1774 public void scatterMasked(Blackhole bh) { 1775 byte[] a = fa.apply(SPECIES.length()); 1776 int[] b = fs.apply(a.length, SPECIES.length()); 1777 byte[] r = fb.apply(SPECIES.length()); 1778 boolean[] mask = fm.apply(SPECIES.length()); 1779 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, 0); 1780 1781 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1782 for (int i = 0; i < a.length; i += SPECIES.length()) { 1783 ByteVector av = ByteVector.fromArray(SPECIES, a, i); 1784 av.intoArray(r, i, b, i, vmask); 1785 } 1786 } 1787 1788 bh.consume(r); 1789 } 1790 } 1791