1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have 21 * questions. 22 */ 23 24 package benchmark.jdk.incubator.vector; 25 26 import jdk.incubator.vector.Vector; 27 import jdk.incubator.vector.Vector.Shape; 28 import jdk.incubator.vector.Vector.Species; 29 import jdk.incubator.vector.FloatVector; 30 31 import java.util.concurrent.TimeUnit; 32 import java.util.function.BiFunction; 33 import java.util.function.IntFunction; 34 35 import org.openjdk.jmh.annotations.*; 36 import org.openjdk.jmh.infra.Blackhole; 37 38 @BenchmarkMode(Mode.Throughput) 39 @OutputTimeUnit(TimeUnit.MILLISECONDS) 40 @State(Scope.Benchmark) 41 @Warmup(iterations = 3, time = 1) 42 @Measurement(iterations = 5, time = 1) 43 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) 44 public class FloatMaxVector extends AbstractVectorBenchmark { 45 static final Species<Float> SPECIES = FloatVector.SPECIES_MAX; 46 47 static final int INVOC_COUNT = 1; // get rid of outer loop 48 49 @Param("1024") 50 int size; 51 52 float[] fill(IntFunction<Float> f) { 53 float[] array = new float[size]; 54 for (int i = 0; i < array.length; i++) { 55 array[i] = f.apply(i); 56 } 57 return array; 58 } 59 60 float[] a, b, c, r; 61 boolean[] m, rm; 62 int[] s; 63 64 @Setup 65 public void init() { 66 size += size % SPECIES.length(); // FIXME: add post-loops 67 68 a = fill(i -> (float)(2*i)); 69 b = fill(i -> (float)(i+1)); 70 c = fill(i -> (float)(i+5)); 71 r = fill(i -> (float)0); 72 73 m = fillMask(size, i -> (i % 2) == 0); 74 rm = fillMask(size, i -> false); 75 76 s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length())); 77 } 78 79 final IntFunction<float[]> fa = vl -> a; 80 final IntFunction<float[]> fb = vl -> b; 81 final IntFunction<float[]> fc = vl -> c; 82 final IntFunction<float[]> fr = vl -> r; 83 final IntFunction<boolean[]> fm = vl -> m; 84 final IntFunction<boolean[]> fmr = vl -> rm; 85 final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s; 86 87 88 @Benchmark 89 public void add(Blackhole bh) { 90 float[] a = fa.apply(SPECIES.length()); 91 float[] b = fb.apply(SPECIES.length()); 92 float[] r = fr.apply(SPECIES.length()); 93 94 for (int ic = 0; ic < INVOC_COUNT; ic++) { 95 for (int i = 0; i < a.length; i += SPECIES.length()) { 96 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 97 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 98 av.add(bv).intoArray(r, i); 99 } 100 } 101 102 bh.consume(r); 103 } 104 105 @Benchmark 106 public void addMasked(Blackhole bh) { 107 float[] a = fa.apply(SPECIES.length()); 108 float[] b = fb.apply(SPECIES.length()); 109 float[] r = fr.apply(SPECIES.length()); 110 boolean[] mask = fm.apply(SPECIES.length()); 111 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 112 113 for (int ic = 0; ic < INVOC_COUNT; ic++) { 114 for (int i = 0; i < a.length; i += SPECIES.length()) { 115 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 116 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 117 av.add(bv, vmask).intoArray(r, i); 118 } 119 } 120 121 bh.consume(r); 122 } 123 124 @Benchmark 125 public void sub(Blackhole bh) { 126 float[] a = fa.apply(SPECIES.length()); 127 float[] b = fb.apply(SPECIES.length()); 128 float[] r = fr.apply(SPECIES.length()); 129 130 for (int ic = 0; ic < INVOC_COUNT; ic++) { 131 for (int i = 0; i < a.length; i += SPECIES.length()) { 132 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 133 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 134 av.sub(bv).intoArray(r, i); 135 } 136 } 137 138 bh.consume(r); 139 } 140 141 @Benchmark 142 public void subMasked(Blackhole bh) { 143 float[] a = fa.apply(SPECIES.length()); 144 float[] b = fb.apply(SPECIES.length()); 145 float[] r = fr.apply(SPECIES.length()); 146 boolean[] mask = fm.apply(SPECIES.length()); 147 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 148 149 for (int ic = 0; ic < INVOC_COUNT; ic++) { 150 for (int i = 0; i < a.length; i += SPECIES.length()) { 151 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 152 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 153 av.sub(bv, vmask).intoArray(r, i); 154 } 155 } 156 157 bh.consume(r); 158 } 159 160 161 @Benchmark 162 public void div(Blackhole bh) { 163 float[] a = fa.apply(SPECIES.length()); 164 float[] b = fb.apply(SPECIES.length()); 165 float[] r = fr.apply(SPECIES.length()); 166 167 for (int ic = 0; ic < INVOC_COUNT; ic++) { 168 for (int i = 0; i < a.length; i += SPECIES.length()) { 169 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 170 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 171 av.div(bv).intoArray(r, i); 172 } 173 } 174 175 bh.consume(r); 176 } 177 178 179 180 @Benchmark 181 public void divMasked(Blackhole bh) { 182 float[] a = fa.apply(SPECIES.length()); 183 float[] b = fb.apply(SPECIES.length()); 184 float[] r = fr.apply(SPECIES.length()); 185 boolean[] mask = fm.apply(SPECIES.length()); 186 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 187 188 for (int ic = 0; ic < INVOC_COUNT; ic++) { 189 for (int i = 0; i < a.length; i += SPECIES.length()) { 190 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 191 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 192 av.div(bv, vmask).intoArray(r, i); 193 } 194 } 195 196 bh.consume(r); 197 } 198 199 200 @Benchmark 201 public void mul(Blackhole bh) { 202 float[] a = fa.apply(SPECIES.length()); 203 float[] b = fb.apply(SPECIES.length()); 204 float[] r = fr.apply(SPECIES.length()); 205 206 for (int ic = 0; ic < INVOC_COUNT; ic++) { 207 for (int i = 0; i < a.length; i += SPECIES.length()) { 208 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 209 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 210 av.mul(bv).intoArray(r, i); 211 } 212 } 213 214 bh.consume(r); 215 } 216 217 @Benchmark 218 public void mulMasked(Blackhole bh) { 219 float[] a = fa.apply(SPECIES.length()); 220 float[] b = fb.apply(SPECIES.length()); 221 float[] r = fr.apply(SPECIES.length()); 222 boolean[] mask = fm.apply(SPECIES.length()); 223 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 224 225 for (int ic = 0; ic < INVOC_COUNT; ic++) { 226 for (int i = 0; i < a.length; i += SPECIES.length()) { 227 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 228 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 229 av.mul(bv, vmask).intoArray(r, i); 230 } 231 } 232 233 bh.consume(r); 234 } 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 @Benchmark 267 public void max(Blackhole bh) { 268 float[] a = fa.apply(SPECIES.length()); 269 float[] b = fb.apply(SPECIES.length()); 270 float[] r = fr.apply(SPECIES.length()); 271 272 for (int ic = 0; ic < INVOC_COUNT; ic++) { 273 for (int i = 0; i < a.length; i += SPECIES.length()) { 274 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 275 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 276 av.max(bv).intoArray(r, i); 277 } 278 } 279 280 bh.consume(r); 281 } 282 283 @Benchmark 284 public void min(Blackhole bh) { 285 float[] a = fa.apply(SPECIES.length()); 286 float[] b = fb.apply(SPECIES.length()); 287 float[] r = fr.apply(SPECIES.length()); 288 289 for (int ic = 0; ic < INVOC_COUNT; ic++) { 290 for (int i = 0; i < a.length; i += SPECIES.length()) { 291 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 292 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 293 av.min(bv).intoArray(r, i); 294 } 295 } 296 297 bh.consume(r); 298 } 299 300 301 302 303 @Benchmark 304 public void addAll(Blackhole bh) { 305 float[] a = fa.apply(SPECIES.length()); 306 float[] r = fr.apply(SPECIES.length()); 307 float ra = 0; 308 309 for (int ic = 0; ic < INVOC_COUNT; ic++) { 310 for (int i = 0; i < a.length; i += SPECIES.length()) { 311 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 312 r[i] = av.addAll(); 313 } 314 } 315 316 for (int ic = 0; ic < INVOC_COUNT; ic++) { 317 ra = 0; 318 for (int i = 0; i < a.length; i += SPECIES.length()) { 319 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 320 ra += av.addAll(); 321 } 322 } 323 324 bh.consume(ra); 325 bh.consume(r); 326 } 327 328 @Benchmark 329 public void mulAll(Blackhole bh) { 330 float[] a = fa.apply(SPECIES.length()); 331 float[] r = fr.apply(SPECIES.length()); 332 float ra = 1; 333 334 for (int ic = 0; ic < INVOC_COUNT; ic++) { 335 for (int i = 0; i < a.length; i += SPECIES.length()) { 336 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 337 r[i] = av.mulAll(); 338 } 339 } 340 341 for (int ic = 0; ic < INVOC_COUNT; ic++) { 342 ra = 1; 343 for (int i = 0; i < a.length; i += SPECIES.length()) { 344 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 345 ra *= av.mulAll(); 346 } 347 } 348 349 bh.consume(ra); 350 bh.consume(r); 351 } 352 353 @Benchmark 354 public void minAll(Blackhole bh) { 355 float[] a = fa.apply(SPECIES.length()); 356 float[] r = fr.apply(SPECIES.length()); 357 float ra = Float.MAX_VALUE; 358 359 for (int ic = 0; ic < INVOC_COUNT; ic++) { 360 for (int i = 0; i < a.length; i += SPECIES.length()) { 361 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 362 r[i] = av.minAll(); 363 } 364 } 365 366 for (int ic = 0; ic < INVOC_COUNT; ic++) { 367 ra = Float.MAX_VALUE; 368 for (int i = 0; i < a.length; i += SPECIES.length()) { 369 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 370 ra = (float)Math.min(ra, av.minAll()); 371 } 372 } 373 374 bh.consume(ra); 375 bh.consume(r); 376 } 377 378 @Benchmark 379 public void maxAll(Blackhole bh) { 380 float[] a = fa.apply(SPECIES.length()); 381 float[] r = fr.apply(SPECIES.length()); 382 float ra = Float.MIN_VALUE; 383 384 for (int ic = 0; ic < INVOC_COUNT; ic++) { 385 for (int i = 0; i < a.length; i += SPECIES.length()) { 386 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 387 r[i] = av.maxAll(); 388 } 389 } 390 391 for (int ic = 0; ic < INVOC_COUNT; ic++) { 392 ra = Float.MIN_VALUE; 393 for (int i = 0; i < a.length; i += SPECIES.length()) { 394 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 395 ra = (float)Math.max(ra, av.maxAll()); 396 } 397 } 398 399 bh.consume(ra); 400 bh.consume(r); 401 } 402 403 404 405 @Benchmark 406 public void with(Blackhole bh) { 407 float[] a = fa.apply(SPECIES.length()); 408 float[] r = fr.apply(SPECIES.length()); 409 410 for (int ic = 0; ic < INVOC_COUNT; ic++) { 411 for (int i = 0; i < a.length; i += SPECIES.length()) { 412 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 413 av.with(0, (float)4).intoArray(r, i); 414 } 415 } 416 417 bh.consume(r); 418 } 419 420 @Benchmark 421 public Object lessThan() { 422 float[] a = fa.apply(size); 423 float[] b = fb.apply(size); 424 boolean[] ms = fm.apply(size); 425 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0); 426 427 for (int ic = 0; ic < INVOC_COUNT; ic++) { 428 for (int i = 0; i < a.length; i += SPECIES.length()) { 429 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 430 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 431 Vector.Mask<Float> mv = av.lessThan(bv); 432 433 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 434 } 435 } 436 return m; 437 } 438 439 440 @Benchmark 441 public Object greaterThan() { 442 float[] a = fa.apply(size); 443 float[] b = fb.apply(size); 444 boolean[] ms = fm.apply(size); 445 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0); 446 447 for (int ic = 0; ic < INVOC_COUNT; ic++) { 448 for (int i = 0; i < a.length; i += SPECIES.length()) { 449 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 450 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 451 Vector.Mask<Float> mv = av.greaterThan(bv); 452 453 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 454 } 455 } 456 return m; 457 } 458 459 460 @Benchmark 461 public Object equal() { 462 float[] a = fa.apply(size); 463 float[] b = fb.apply(size); 464 boolean[] ms = fm.apply(size); 465 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0); 466 467 for (int ic = 0; ic < INVOC_COUNT; ic++) { 468 for (int i = 0; i < a.length; i += SPECIES.length()) { 469 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 470 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 471 Vector.Mask<Float> mv = av.equal(bv); 472 473 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 474 } 475 } 476 return m; 477 } 478 479 480 @Benchmark 481 public Object notEqual() { 482 float[] a = fa.apply(size); 483 float[] b = fb.apply(size); 484 boolean[] ms = fm.apply(size); 485 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0); 486 487 for (int ic = 0; ic < INVOC_COUNT; ic++) { 488 for (int i = 0; i < a.length; i += SPECIES.length()) { 489 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 490 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 491 Vector.Mask<Float> mv = av.notEqual(bv); 492 493 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 494 } 495 } 496 return m; 497 } 498 499 500 @Benchmark 501 public Object lessThanEq() { 502 float[] a = fa.apply(size); 503 float[] b = fb.apply(size); 504 boolean[] ms = fm.apply(size); 505 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0); 506 507 for (int ic = 0; ic < INVOC_COUNT; ic++) { 508 for (int i = 0; i < a.length; i += SPECIES.length()) { 509 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 510 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 511 Vector.Mask<Float> mv = av.lessThanEq(bv); 512 513 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 514 } 515 } 516 return m; 517 } 518 519 520 @Benchmark 521 public Object greaterThanEq() { 522 float[] a = fa.apply(size); 523 float[] b = fb.apply(size); 524 boolean[] ms = fm.apply(size); 525 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0); 526 527 for (int ic = 0; ic < INVOC_COUNT; ic++) { 528 for (int i = 0; i < a.length; i += SPECIES.length()) { 529 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 530 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 531 Vector.Mask<Float> mv = av.greaterThanEq(bv); 532 533 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 534 } 535 } 536 return m; 537 } 538 539 540 @Benchmark 541 public void blend(Blackhole bh) { 542 float[] a = fa.apply(SPECIES.length()); 543 float[] b = fb.apply(SPECIES.length()); 544 float[] r = fr.apply(SPECIES.length()); 545 boolean[] mask = fm.apply(SPECIES.length()); 546 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 547 548 for (int ic = 0; ic < INVOC_COUNT; ic++) { 549 for (int i = 0; i < a.length; i += SPECIES.length()) { 550 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 551 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 552 av.blend(bv, vmask).intoArray(r, i); 553 } 554 } 555 556 bh.consume(r); 557 } 558 559 @Benchmark 560 public void rearrange(Blackhole bh) { 561 float[] a = fa.apply(SPECIES.length()); 562 int[] order = fs.apply(a.length, SPECIES.length()); 563 float[] r = fr.apply(SPECIES.length()); 564 565 for (int ic = 0; ic < INVOC_COUNT; ic++) { 566 for (int i = 0; i < a.length; i += SPECIES.length()) { 567 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 568 av.rearrange(FloatVector.shuffleFromArray(SPECIES, order, i)).intoArray(r, i); 569 } 570 } 571 572 bh.consume(r); 573 } 574 575 @Benchmark 576 public void extract(Blackhole bh) { 577 float[] a = fa.apply(SPECIES.length()); 578 float[] r = fr.apply(SPECIES.length()); 579 580 for (int ic = 0; ic < INVOC_COUNT; ic++) { 581 for (int i = 0; i < a.length; i += SPECIES.length()) { 582 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 583 int num_lanes = SPECIES.length(); 584 // Manually unroll because full unroll happens after intrinsification. 585 // Unroll is needed because get intrinsic requires for index to be a known constant. 586 if (num_lanes == 1) { 587 r[i]=av.get(0); 588 } else if (num_lanes == 2) { 589 r[i]=av.get(0); 590 r[i+1]=av.get(1); 591 } else if (num_lanes == 4) { 592 r[i]=av.get(0); 593 r[i+1]=av.get(1); 594 r[i+2]=av.get(2); 595 r[i+3]=av.get(3); 596 } else if (num_lanes == 8) { 597 r[i]=av.get(0); 598 r[i+1]=av.get(1); 599 r[i+2]=av.get(2); 600 r[i+3]=av.get(3); 601 r[i+4]=av.get(4); 602 r[i+5]=av.get(5); 603 r[i+6]=av.get(6); 604 r[i+7]=av.get(7); 605 } else if (num_lanes == 16) { 606 r[i]=av.get(0); 607 r[i+1]=av.get(1); 608 r[i+2]=av.get(2); 609 r[i+3]=av.get(3); 610 r[i+4]=av.get(4); 611 r[i+5]=av.get(5); 612 r[i+6]=av.get(6); 613 r[i+7]=av.get(7); 614 r[i+8]=av.get(8); 615 r[i+9]=av.get(9); 616 r[i+10]=av.get(10); 617 r[i+11]=av.get(11); 618 r[i+12]=av.get(12); 619 r[i+13]=av.get(13); 620 r[i+14]=av.get(14); 621 r[i+15]=av.get(15); 622 } else if (num_lanes == 32) { 623 r[i]=av.get(0); 624 r[i+1]=av.get(1); 625 r[i+2]=av.get(2); 626 r[i+3]=av.get(3); 627 r[i+4]=av.get(4); 628 r[i+5]=av.get(5); 629 r[i+6]=av.get(6); 630 r[i+7]=av.get(7); 631 r[i+8]=av.get(8); 632 r[i+9]=av.get(9); 633 r[i+10]=av.get(10); 634 r[i+11]=av.get(11); 635 r[i+12]=av.get(12); 636 r[i+13]=av.get(13); 637 r[i+14]=av.get(14); 638 r[i+15]=av.get(15); 639 r[i+16]=av.get(16); 640 r[i+17]=av.get(17); 641 r[i+18]=av.get(18); 642 r[i+19]=av.get(19); 643 r[i+20]=av.get(20); 644 r[i+21]=av.get(21); 645 r[i+22]=av.get(22); 646 r[i+23]=av.get(23); 647 r[i+24]=av.get(24); 648 r[i+25]=av.get(25); 649 r[i+26]=av.get(26); 650 r[i+27]=av.get(27); 651 r[i+28]=av.get(28); 652 r[i+29]=av.get(29); 653 r[i+30]=av.get(30); 654 r[i+31]=av.get(31); 655 } else if (num_lanes == 64) { 656 r[i]=av.get(0); 657 r[i+1]=av.get(1); 658 r[i+2]=av.get(2); 659 r[i+3]=av.get(3); 660 r[i+4]=av.get(4); 661 r[i+5]=av.get(5); 662 r[i+6]=av.get(6); 663 r[i+7]=av.get(7); 664 r[i+8]=av.get(8); 665 r[i+9]=av.get(9); 666 r[i+10]=av.get(10); 667 r[i+11]=av.get(11); 668 r[i+12]=av.get(12); 669 r[i+13]=av.get(13); 670 r[i+14]=av.get(14); 671 r[i+15]=av.get(15); 672 r[i+16]=av.get(16); 673 r[i+17]=av.get(17); 674 r[i+18]=av.get(18); 675 r[i+19]=av.get(19); 676 r[i+20]=av.get(20); 677 r[i+21]=av.get(21); 678 r[i+22]=av.get(22); 679 r[i+23]=av.get(23); 680 r[i+24]=av.get(24); 681 r[i+25]=av.get(25); 682 r[i+26]=av.get(26); 683 r[i+27]=av.get(27); 684 r[i+28]=av.get(28); 685 r[i+29]=av.get(29); 686 r[i+30]=av.get(30); 687 r[i+31]=av.get(31); 688 r[i+32]=av.get(32); 689 r[i+33]=av.get(33); 690 r[i+34]=av.get(34); 691 r[i+35]=av.get(35); 692 r[i+36]=av.get(36); 693 r[i+37]=av.get(37); 694 r[i+38]=av.get(38); 695 r[i+39]=av.get(39); 696 r[i+40]=av.get(40); 697 r[i+41]=av.get(41); 698 r[i+42]=av.get(42); 699 r[i+43]=av.get(43); 700 r[i+44]=av.get(44); 701 r[i+45]=av.get(45); 702 r[i+46]=av.get(46); 703 r[i+47]=av.get(47); 704 r[i+48]=av.get(48); 705 r[i+49]=av.get(49); 706 r[i+50]=av.get(50); 707 r[i+51]=av.get(51); 708 r[i+52]=av.get(52); 709 r[i+53]=av.get(53); 710 r[i+54]=av.get(54); 711 r[i+55]=av.get(55); 712 r[i+56]=av.get(56); 713 r[i+57]=av.get(57); 714 r[i+58]=av.get(58); 715 r[i+59]=av.get(59); 716 r[i+60]=av.get(60); 717 r[i+61]=av.get(61); 718 r[i+62]=av.get(62); 719 r[i+63]=av.get(63); 720 } else { 721 for (int j = 0; j < SPECIES.length(); j++) { 722 r[i+j]=av.get(j); 723 } 724 } 725 } 726 } 727 728 bh.consume(r); 729 } 730 731 732 @Benchmark 733 public void sin(Blackhole bh) { 734 float[] a = fa.apply(SPECIES.length()); 735 float[] r = fr.apply(SPECIES.length()); 736 737 for (int ic = 0; ic < INVOC_COUNT; ic++) { 738 for (int i = 0; i < a.length; i += SPECIES.length()) { 739 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 740 av.sin().intoArray(r, i); 741 } 742 } 743 744 bh.consume(r); 745 } 746 747 748 749 @Benchmark 750 public void exp(Blackhole bh) { 751 float[] a = fa.apply(SPECIES.length()); 752 float[] r = fr.apply(SPECIES.length()); 753 754 for (int ic = 0; ic < INVOC_COUNT; ic++) { 755 for (int i = 0; i < a.length; i += SPECIES.length()) { 756 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 757 av.exp().intoArray(r, i); 758 } 759 } 760 761 bh.consume(r); 762 } 763 764 765 766 @Benchmark 767 public void log1p(Blackhole bh) { 768 float[] a = fa.apply(SPECIES.length()); 769 float[] r = fr.apply(SPECIES.length()); 770 771 for (int ic = 0; ic < INVOC_COUNT; ic++) { 772 for (int i = 0; i < a.length; i += SPECIES.length()) { 773 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 774 av.log1p().intoArray(r, i); 775 } 776 } 777 778 bh.consume(r); 779 } 780 781 782 783 @Benchmark 784 public void log(Blackhole bh) { 785 float[] a = fa.apply(SPECIES.length()); 786 float[] r = fr.apply(SPECIES.length()); 787 788 for (int ic = 0; ic < INVOC_COUNT; ic++) { 789 for (int i = 0; i < a.length; i += SPECIES.length()) { 790 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 791 av.log().intoArray(r, i); 792 } 793 } 794 795 bh.consume(r); 796 } 797 798 799 800 @Benchmark 801 public void log10(Blackhole bh) { 802 float[] a = fa.apply(SPECIES.length()); 803 float[] r = fr.apply(SPECIES.length()); 804 805 for (int ic = 0; ic < INVOC_COUNT; ic++) { 806 for (int i = 0; i < a.length; i += SPECIES.length()) { 807 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 808 av.log10().intoArray(r, i); 809 } 810 } 811 812 bh.consume(r); 813 } 814 815 816 817 @Benchmark 818 public void expm1(Blackhole bh) { 819 float[] a = fa.apply(SPECIES.length()); 820 float[] r = fr.apply(SPECIES.length()); 821 822 for (int ic = 0; ic < INVOC_COUNT; ic++) { 823 for (int i = 0; i < a.length; i += SPECIES.length()) { 824 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 825 av.expm1().intoArray(r, i); 826 } 827 } 828 829 bh.consume(r); 830 } 831 832 833 834 @Benchmark 835 public void cos(Blackhole bh) { 836 float[] a = fa.apply(SPECIES.length()); 837 float[] r = fr.apply(SPECIES.length()); 838 839 for (int ic = 0; ic < INVOC_COUNT; ic++) { 840 for (int i = 0; i < a.length; i += SPECIES.length()) { 841 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 842 av.cos().intoArray(r, i); 843 } 844 } 845 846 bh.consume(r); 847 } 848 849 850 851 @Benchmark 852 public void tan(Blackhole bh) { 853 float[] a = fa.apply(SPECIES.length()); 854 float[] r = fr.apply(SPECIES.length()); 855 856 for (int ic = 0; ic < INVOC_COUNT; ic++) { 857 for (int i = 0; i < a.length; i += SPECIES.length()) { 858 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 859 av.tan().intoArray(r, i); 860 } 861 } 862 863 bh.consume(r); 864 } 865 866 867 868 @Benchmark 869 public void sinh(Blackhole bh) { 870 float[] a = fa.apply(SPECIES.length()); 871 float[] r = fr.apply(SPECIES.length()); 872 873 for (int ic = 0; ic < INVOC_COUNT; ic++) { 874 for (int i = 0; i < a.length; i += SPECIES.length()) { 875 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 876 av.sinh().intoArray(r, i); 877 } 878 } 879 880 bh.consume(r); 881 } 882 883 884 885 @Benchmark 886 public void cosh(Blackhole bh) { 887 float[] a = fa.apply(SPECIES.length()); 888 float[] r = fr.apply(SPECIES.length()); 889 890 for (int ic = 0; ic < INVOC_COUNT; ic++) { 891 for (int i = 0; i < a.length; i += SPECIES.length()) { 892 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 893 av.cosh().intoArray(r, i); 894 } 895 } 896 897 bh.consume(r); 898 } 899 900 901 902 @Benchmark 903 public void tanh(Blackhole bh) { 904 float[] a = fa.apply(SPECIES.length()); 905 float[] r = fr.apply(SPECIES.length()); 906 907 for (int ic = 0; ic < INVOC_COUNT; ic++) { 908 for (int i = 0; i < a.length; i += SPECIES.length()) { 909 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 910 av.tanh().intoArray(r, i); 911 } 912 } 913 914 bh.consume(r); 915 } 916 917 918 919 @Benchmark 920 public void asin(Blackhole bh) { 921 float[] a = fa.apply(SPECIES.length()); 922 float[] r = fr.apply(SPECIES.length()); 923 924 for (int ic = 0; ic < INVOC_COUNT; ic++) { 925 for (int i = 0; i < a.length; i += SPECIES.length()) { 926 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 927 av.asin().intoArray(r, i); 928 } 929 } 930 931 bh.consume(r); 932 } 933 934 935 936 @Benchmark 937 public void acos(Blackhole bh) { 938 float[] a = fa.apply(SPECIES.length()); 939 float[] r = fr.apply(SPECIES.length()); 940 941 for (int ic = 0; ic < INVOC_COUNT; ic++) { 942 for (int i = 0; i < a.length; i += SPECIES.length()) { 943 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 944 av.acos().intoArray(r, i); 945 } 946 } 947 948 bh.consume(r); 949 } 950 951 952 953 @Benchmark 954 public void atan(Blackhole bh) { 955 float[] a = fa.apply(SPECIES.length()); 956 float[] r = fr.apply(SPECIES.length()); 957 958 for (int ic = 0; ic < INVOC_COUNT; ic++) { 959 for (int i = 0; i < a.length; i += SPECIES.length()) { 960 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 961 av.atan().intoArray(r, i); 962 } 963 } 964 965 bh.consume(r); 966 } 967 968 969 970 @Benchmark 971 public void cbrt(Blackhole bh) { 972 float[] a = fa.apply(SPECIES.length()); 973 float[] r = fr.apply(SPECIES.length()); 974 975 for (int ic = 0; ic < INVOC_COUNT; ic++) { 976 for (int i = 0; i < a.length; i += SPECIES.length()) { 977 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 978 av.cbrt().intoArray(r, i); 979 } 980 } 981 982 bh.consume(r); 983 } 984 985 986 987 @Benchmark 988 public void hypot(Blackhole bh) { 989 float[] a = fa.apply(SPECIES.length()); 990 float[] b = fb.apply(SPECIES.length()); 991 float[] r = fr.apply(SPECIES.length()); 992 993 for (int ic = 0; ic < INVOC_COUNT; ic++) { 994 for (int i = 0; i < a.length; i += SPECIES.length()) { 995 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 996 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 997 av.hypot(bv).intoArray(r, i); 998 } 999 } 1000 1001 bh.consume(r); 1002 } 1003 1004 1005 1006 @Benchmark 1007 public void pow(Blackhole bh) { 1008 float[] a = fa.apply(SPECIES.length()); 1009 float[] b = fb.apply(SPECIES.length()); 1010 float[] r = fr.apply(SPECIES.length()); 1011 1012 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1013 for (int i = 0; i < a.length; i += SPECIES.length()) { 1014 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1015 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 1016 av.pow(bv).intoArray(r, i); 1017 } 1018 } 1019 1020 bh.consume(r); 1021 } 1022 1023 1024 1025 @Benchmark 1026 public void atan2(Blackhole bh) { 1027 float[] a = fa.apply(SPECIES.length()); 1028 float[] b = fb.apply(SPECIES.length()); 1029 float[] r = fr.apply(SPECIES.length()); 1030 1031 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1032 for (int i = 0; i < a.length; i += SPECIES.length()) { 1033 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1034 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 1035 av.atan2(bv).intoArray(r, i); 1036 } 1037 } 1038 1039 bh.consume(r); 1040 } 1041 1042 1043 1044 @Benchmark 1045 public void fma(Blackhole bh) { 1046 float[] a = fa.apply(SPECIES.length()); 1047 float[] b = fb.apply(SPECIES.length()); 1048 float[] c = fc.apply(SPECIES.length()); 1049 float[] r = fr.apply(SPECIES.length()); 1050 1051 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1052 for (int i = 0; i < a.length; i += SPECIES.length()) { 1053 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1054 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 1055 FloatVector cv = FloatVector.fromArray(SPECIES, c, i); 1056 av.fma(bv, cv).intoArray(r, i); 1057 } 1058 } 1059 1060 bh.consume(r); 1061 } 1062 1063 1064 1065 @Benchmark 1066 public void fmaMasked(Blackhole bh) { 1067 float[] a = fa.apply(SPECIES.length()); 1068 float[] b = fb.apply(SPECIES.length()); 1069 float[] c = fc.apply(SPECIES.length()); 1070 float[] r = fr.apply(SPECIES.length()); 1071 boolean[] mask = fm.apply(SPECIES.length()); 1072 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 1073 1074 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1075 for (int i = 0; i < a.length; i += SPECIES.length()) { 1076 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1077 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 1078 FloatVector cv = FloatVector.fromArray(SPECIES, c, i); 1079 av.fma(bv, cv, vmask).intoArray(r, i); 1080 } 1081 } 1082 1083 bh.consume(r); 1084 } 1085 1086 1087 @Benchmark 1088 public void neg(Blackhole bh) { 1089 float[] a = fa.apply(SPECIES.length()); 1090 float[] r = fr.apply(SPECIES.length()); 1091 1092 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1093 for (int i = 0; i < a.length; i += SPECIES.length()) { 1094 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1095 av.neg().intoArray(r, i); 1096 } 1097 } 1098 1099 bh.consume(r); 1100 } 1101 1102 @Benchmark 1103 public void negMasked(Blackhole bh) { 1104 float[] a = fa.apply(SPECIES.length()); 1105 float[] r = fr.apply(SPECIES.length()); 1106 boolean[] mask = fm.apply(SPECIES.length()); 1107 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 1108 1109 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1110 for (int i = 0; i < a.length; i += SPECIES.length()) { 1111 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1112 av.neg(vmask).intoArray(r, i); 1113 } 1114 } 1115 1116 bh.consume(r); 1117 } 1118 1119 @Benchmark 1120 public void abs(Blackhole bh) { 1121 float[] a = fa.apply(SPECIES.length()); 1122 float[] r = fr.apply(SPECIES.length()); 1123 1124 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1125 for (int i = 0; i < a.length; i += SPECIES.length()) { 1126 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1127 av.abs().intoArray(r, i); 1128 } 1129 } 1130 1131 bh.consume(r); 1132 } 1133 1134 @Benchmark 1135 public void absMasked(Blackhole bh) { 1136 float[] a = fa.apply(SPECIES.length()); 1137 float[] r = fr.apply(SPECIES.length()); 1138 boolean[] mask = fm.apply(SPECIES.length()); 1139 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 1140 1141 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1142 for (int i = 0; i < a.length; i += SPECIES.length()) { 1143 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1144 av.abs(vmask).intoArray(r, i); 1145 } 1146 } 1147 1148 bh.consume(r); 1149 } 1150 1151 1152 1153 1154 @Benchmark 1155 public void sqrt(Blackhole bh) { 1156 float[] a = fa.apply(SPECIES.length()); 1157 float[] r = fr.apply(SPECIES.length()); 1158 1159 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1160 for (int i = 0; i < a.length; i += SPECIES.length()) { 1161 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1162 av.sqrt().intoArray(r, i); 1163 } 1164 } 1165 1166 bh.consume(r); 1167 } 1168 1169 1170 1171 @Benchmark 1172 public void sqrtMasked(Blackhole bh) { 1173 float[] a = fa.apply(SPECIES.length()); 1174 float[] r = fr.apply(SPECIES.length()); 1175 boolean[] mask = fm.apply(SPECIES.length()); 1176 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 1177 1178 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1179 for (int i = 0; i < a.length; i += SPECIES.length()) { 1180 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1181 av.sqrt(vmask).intoArray(r, i); 1182 } 1183 } 1184 1185 bh.consume(r); 1186 } 1187 1188 1189 1190 @Benchmark 1191 public void gather(Blackhole bh) { 1192 float[] a = fa.apply(SPECIES.length()); 1193 int[] b = fs.apply(a.length, SPECIES.length()); 1194 float[] r = new float[a.length]; 1195 1196 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1197 for (int i = 0; i < a.length; i += SPECIES.length()) { 1198 FloatVector av = FloatVector.fromArray(SPECIES, a, i, b, i); 1199 av.intoArray(r, i); 1200 } 1201 } 1202 1203 bh.consume(r); 1204 } 1205 1206 1207 1208 @Benchmark 1209 public void scatter(Blackhole bh) { 1210 float[] a = fa.apply(SPECIES.length()); 1211 int[] b = fs.apply(a.length, SPECIES.length()); 1212 float[] r = new float[a.length]; 1213 1214 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1215 for (int i = 0; i < a.length; i += SPECIES.length()) { 1216 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1217 av.intoArray(r, i, b, i); 1218 } 1219 } 1220 1221 bh.consume(r); 1222 } 1223 1224 } 1225