1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have 21 * questions. 22 */ 23 24 package benchmark.jdk.incubator.vector; 25 26 import jdk.incubator.vector.Vector; 27 import jdk.incubator.vector.Vector.Shape; 28 import jdk.incubator.vector.FloatVector; 29 30 import java.util.concurrent.TimeUnit; 31 import java.util.function.BiFunction; 32 import java.util.function.IntFunction; 33 34 import org.openjdk.jmh.annotations.*; 35 import org.openjdk.jmh.infra.Blackhole; 36 37 @BenchmarkMode(Mode.Throughput) 38 @OutputTimeUnit(TimeUnit.MILLISECONDS) 39 @State(Scope.Benchmark) 40 @Warmup(iterations = 3, time = 1) 41 @Measurement(iterations = 5, time = 1) 42 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) 43 public class Float128Vector extends AbstractVectorBenchmark { 44 static final FloatVector.FloatSpecies SPECIES = FloatVector.species(Shape.S_128_BIT); 45 46 static final int INVOC_COUNT = 1; // get rid of outer loop 47 48 @Param("1024") 49 int size; 50 51 float[] fill(IntFunction<Float> f) { 52 float[] array = new float[size]; 53 for (int i = 0; i < array.length; i++) { 54 array[i] = f.apply(i); 55 } 56 return array; 57 } 58 59 float[] a, b, c, r; 60 boolean[] m, rm; 61 int[] s; 62 63 @Setup 64 public void init() { 65 size += size % SPECIES.length(); // FIXME: add post-loops 66 67 a = fill(i -> (float)(2*i)); 68 b = fill(i -> (float)(i+1)); 69 c = fill(i -> (float)(i+5)); 70 r = fill(i -> (float)0); 71 72 m = fillMask(size, i -> (i % 2) == 0); 73 rm = fillMask(size, i -> false); 74 75 s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length())); 76 } 77 78 final IntFunction<float[]> fa = vl -> a; 79 final IntFunction<float[]> fb = vl -> b; 80 final IntFunction<float[]> fc = vl -> c; 81 final IntFunction<float[]> fr = vl -> r; 82 final IntFunction<boolean[]> fm = vl -> m; 83 final IntFunction<boolean[]> fmr = vl -> rm; 84 final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s; 85 86 87 @Benchmark 88 public void add(Blackhole bh) { 89 float[] a = fa.apply(SPECIES.length()); 90 float[] b = fb.apply(SPECIES.length()); 91 float[] r = fr.apply(SPECIES.length()); 92 93 for (int ic = 0; ic < INVOC_COUNT; ic++) { 94 for (int i = 0; i < a.length; i += SPECIES.length()) { 95 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 96 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 97 av.add(bv).intoArray(r, i); 98 } 99 } 100 101 bh.consume(r); 102 } 103 104 @Benchmark 105 public void addMasked(Blackhole bh) { 106 float[] a = fa.apply(SPECIES.length()); 107 float[] b = fb.apply(SPECIES.length()); 108 float[] r = fr.apply(SPECIES.length()); 109 boolean[] mask = fm.apply(SPECIES.length()); 110 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 111 112 for (int ic = 0; ic < INVOC_COUNT; ic++) { 113 for (int i = 0; i < a.length; i += SPECIES.length()) { 114 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 115 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 116 av.add(bv, vmask).intoArray(r, i); 117 } 118 } 119 120 bh.consume(r); 121 } 122 123 @Benchmark 124 public void sub(Blackhole bh) { 125 float[] a = fa.apply(SPECIES.length()); 126 float[] b = fb.apply(SPECIES.length()); 127 float[] r = fr.apply(SPECIES.length()); 128 129 for (int ic = 0; ic < INVOC_COUNT; ic++) { 130 for (int i = 0; i < a.length; i += SPECIES.length()) { 131 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 132 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 133 av.sub(bv).intoArray(r, i); 134 } 135 } 136 137 bh.consume(r); 138 } 139 140 @Benchmark 141 public void subMasked(Blackhole bh) { 142 float[] a = fa.apply(SPECIES.length()); 143 float[] b = fb.apply(SPECIES.length()); 144 float[] r = fr.apply(SPECIES.length()); 145 boolean[] mask = fm.apply(SPECIES.length()); 146 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 147 148 for (int ic = 0; ic < INVOC_COUNT; ic++) { 149 for (int i = 0; i < a.length; i += SPECIES.length()) { 150 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 151 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 152 av.sub(bv, vmask).intoArray(r, i); 153 } 154 } 155 156 bh.consume(r); 157 } 158 159 160 @Benchmark 161 public void div(Blackhole bh) { 162 float[] a = fa.apply(SPECIES.length()); 163 float[] b = fb.apply(SPECIES.length()); 164 float[] r = fr.apply(SPECIES.length()); 165 166 for (int ic = 0; ic < INVOC_COUNT; ic++) { 167 for (int i = 0; i < a.length; i += SPECIES.length()) { 168 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 169 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 170 av.div(bv).intoArray(r, i); 171 } 172 } 173 174 bh.consume(r); 175 } 176 177 178 179 @Benchmark 180 public void divMasked(Blackhole bh) { 181 float[] a = fa.apply(SPECIES.length()); 182 float[] b = fb.apply(SPECIES.length()); 183 float[] r = fr.apply(SPECIES.length()); 184 boolean[] mask = fm.apply(SPECIES.length()); 185 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 186 187 for (int ic = 0; ic < INVOC_COUNT; ic++) { 188 for (int i = 0; i < a.length; i += SPECIES.length()) { 189 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 190 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 191 av.div(bv, vmask).intoArray(r, i); 192 } 193 } 194 195 bh.consume(r); 196 } 197 198 199 @Benchmark 200 public void mul(Blackhole bh) { 201 float[] a = fa.apply(SPECIES.length()); 202 float[] b = fb.apply(SPECIES.length()); 203 float[] r = fr.apply(SPECIES.length()); 204 205 for (int ic = 0; ic < INVOC_COUNT; ic++) { 206 for (int i = 0; i < a.length; i += SPECIES.length()) { 207 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 208 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 209 av.mul(bv).intoArray(r, i); 210 } 211 } 212 213 bh.consume(r); 214 } 215 216 @Benchmark 217 public void mulMasked(Blackhole bh) { 218 float[] a = fa.apply(SPECIES.length()); 219 float[] b = fb.apply(SPECIES.length()); 220 float[] r = fr.apply(SPECIES.length()); 221 boolean[] mask = fm.apply(SPECIES.length()); 222 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 223 224 for (int ic = 0; ic < INVOC_COUNT; ic++) { 225 for (int i = 0; i < a.length; i += SPECIES.length()) { 226 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 227 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 228 av.mul(bv, vmask).intoArray(r, i); 229 } 230 } 231 232 bh.consume(r); 233 } 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 @Benchmark 266 public void max(Blackhole bh) { 267 float[] a = fa.apply(SPECIES.length()); 268 float[] b = fb.apply(SPECIES.length()); 269 float[] r = fr.apply(SPECIES.length()); 270 271 for (int ic = 0; ic < INVOC_COUNT; ic++) { 272 for (int i = 0; i < a.length; i += SPECIES.length()) { 273 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 274 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 275 av.max(bv).intoArray(r, i); 276 } 277 } 278 279 bh.consume(r); 280 } 281 282 @Benchmark 283 public void min(Blackhole bh) { 284 float[] a = fa.apply(SPECIES.length()); 285 float[] b = fb.apply(SPECIES.length()); 286 float[] r = fr.apply(SPECIES.length()); 287 288 for (int ic = 0; ic < INVOC_COUNT; ic++) { 289 for (int i = 0; i < a.length; i += SPECIES.length()) { 290 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 291 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 292 av.min(bv).intoArray(r, i); 293 } 294 } 295 296 bh.consume(r); 297 } 298 299 300 301 302 @Benchmark 303 public void addAll(Blackhole bh) { 304 float[] a = fa.apply(SPECIES.length()); 305 float[] r = fr.apply(SPECIES.length()); 306 float ra = 0; 307 308 for (int ic = 0; ic < INVOC_COUNT; ic++) { 309 for (int i = 0; i < a.length; i += SPECIES.length()) { 310 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 311 r[i] = av.addAll(); 312 } 313 } 314 315 for (int ic = 0; ic < INVOC_COUNT; ic++) { 316 ra = 0; 317 for (int i = 0; i < a.length; i += SPECIES.length()) { 318 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 319 ra += av.addAll(); 320 } 321 } 322 323 bh.consume(ra); 324 bh.consume(r); 325 } 326 327 @Benchmark 328 public void mulAll(Blackhole bh) { 329 float[] a = fa.apply(SPECIES.length()); 330 float[] r = fr.apply(SPECIES.length()); 331 float ra = 1; 332 333 for (int ic = 0; ic < INVOC_COUNT; ic++) { 334 for (int i = 0; i < a.length; i += SPECIES.length()) { 335 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 336 r[i] = av.mulAll(); 337 } 338 } 339 340 for (int ic = 0; ic < INVOC_COUNT; ic++) { 341 ra = 1; 342 for (int i = 0; i < a.length; i += SPECIES.length()) { 343 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 344 ra *= av.mulAll(); 345 } 346 } 347 348 bh.consume(ra); 349 bh.consume(r); 350 } 351 352 @Benchmark 353 public void minAll(Blackhole bh) { 354 float[] a = fa.apply(SPECIES.length()); 355 float[] r = fr.apply(SPECIES.length()); 356 float ra = Float.MAX_VALUE; 357 358 for (int ic = 0; ic < INVOC_COUNT; ic++) { 359 for (int i = 0; i < a.length; i += SPECIES.length()) { 360 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 361 r[i] = av.minAll(); 362 } 363 } 364 365 for (int ic = 0; ic < INVOC_COUNT; ic++) { 366 ra = Float.MAX_VALUE; 367 for (int i = 0; i < a.length; i += SPECIES.length()) { 368 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 369 ra = (float)Math.min(ra, av.minAll()); 370 } 371 } 372 373 bh.consume(ra); 374 bh.consume(r); 375 } 376 377 @Benchmark 378 public void maxAll(Blackhole bh) { 379 float[] a = fa.apply(SPECIES.length()); 380 float[] r = fr.apply(SPECIES.length()); 381 float ra = Float.MIN_VALUE; 382 383 for (int ic = 0; ic < INVOC_COUNT; ic++) { 384 for (int i = 0; i < a.length; i += SPECIES.length()) { 385 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 386 r[i] = av.maxAll(); 387 } 388 } 389 390 for (int ic = 0; ic < INVOC_COUNT; ic++) { 391 ra = Float.MIN_VALUE; 392 for (int i = 0; i < a.length; i += SPECIES.length()) { 393 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 394 ra = (float)Math.max(ra, av.maxAll()); 395 } 396 } 397 398 bh.consume(ra); 399 bh.consume(r); 400 } 401 402 403 404 @Benchmark 405 public void with(Blackhole bh) { 406 float[] a = fa.apply(SPECIES.length()); 407 float[] r = fr.apply(SPECIES.length()); 408 409 for (int ic = 0; ic < INVOC_COUNT; ic++) { 410 for (int i = 0; i < a.length; i += SPECIES.length()) { 411 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 412 av.with(0, (float)4).intoArray(r, i); 413 } 414 } 415 416 bh.consume(r); 417 } 418 419 @Benchmark 420 public Object lessThan() { 421 float[] a = fa.apply(size); 422 float[] b = fb.apply(size); 423 boolean[] ms = fm.apply(size); 424 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0); 425 426 for (int ic = 0; ic < INVOC_COUNT; ic++) { 427 for (int i = 0; i < a.length; i += SPECIES.length()) { 428 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 429 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 430 Vector.Mask<Float> mv = av.lessThan(bv); 431 432 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 433 } 434 } 435 return m; 436 } 437 438 439 @Benchmark 440 public Object greaterThan() { 441 float[] a = fa.apply(size); 442 float[] b = fb.apply(size); 443 boolean[] ms = fm.apply(size); 444 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0); 445 446 for (int ic = 0; ic < INVOC_COUNT; ic++) { 447 for (int i = 0; i < a.length; i += SPECIES.length()) { 448 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 449 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 450 Vector.Mask<Float> mv = av.greaterThan(bv); 451 452 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 453 } 454 } 455 return m; 456 } 457 458 459 @Benchmark 460 public Object equal() { 461 float[] a = fa.apply(size); 462 float[] b = fb.apply(size); 463 boolean[] ms = fm.apply(size); 464 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0); 465 466 for (int ic = 0; ic < INVOC_COUNT; ic++) { 467 for (int i = 0; i < a.length; i += SPECIES.length()) { 468 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 469 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 470 Vector.Mask<Float> mv = av.equal(bv); 471 472 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 473 } 474 } 475 return m; 476 } 477 478 479 @Benchmark 480 public Object notEqual() { 481 float[] a = fa.apply(size); 482 float[] b = fb.apply(size); 483 boolean[] ms = fm.apply(size); 484 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0); 485 486 for (int ic = 0; ic < INVOC_COUNT; ic++) { 487 for (int i = 0; i < a.length; i += SPECIES.length()) { 488 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 489 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 490 Vector.Mask<Float> mv = av.notEqual(bv); 491 492 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 493 } 494 } 495 return m; 496 } 497 498 499 @Benchmark 500 public Object lessThanEq() { 501 float[] a = fa.apply(size); 502 float[] b = fb.apply(size); 503 boolean[] ms = fm.apply(size); 504 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0); 505 506 for (int ic = 0; ic < INVOC_COUNT; ic++) { 507 for (int i = 0; i < a.length; i += SPECIES.length()) { 508 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 509 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 510 Vector.Mask<Float> mv = av.lessThanEq(bv); 511 512 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 513 } 514 } 515 return m; 516 } 517 518 519 @Benchmark 520 public Object greaterThanEq() { 521 float[] a = fa.apply(size); 522 float[] b = fb.apply(size); 523 boolean[] ms = fm.apply(size); 524 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0); 525 526 for (int ic = 0; ic < INVOC_COUNT; ic++) { 527 for (int i = 0; i < a.length; i += SPECIES.length()) { 528 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 529 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 530 Vector.Mask<Float> mv = av.greaterThanEq(bv); 531 532 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations 533 } 534 } 535 return m; 536 } 537 538 539 @Benchmark 540 public void blend(Blackhole bh) { 541 float[] a = fa.apply(SPECIES.length()); 542 float[] b = fb.apply(SPECIES.length()); 543 float[] r = fr.apply(SPECIES.length()); 544 boolean[] mask = fm.apply(SPECIES.length()); 545 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 546 547 for (int ic = 0; ic < INVOC_COUNT; ic++) { 548 for (int i = 0; i < a.length; i += SPECIES.length()) { 549 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 550 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 551 av.blend(bv, vmask).intoArray(r, i); 552 } 553 } 554 555 bh.consume(r); 556 } 557 558 @Benchmark 559 public void rearrange(Blackhole bh) { 560 float[] a = fa.apply(SPECIES.length()); 561 int[] order = fs.apply(a.length, SPECIES.length()); 562 float[] r = fr.apply(SPECIES.length()); 563 564 for (int ic = 0; ic < INVOC_COUNT; ic++) { 565 for (int i = 0; i < a.length; i += SPECIES.length()) { 566 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 567 av.rearrange(FloatVector.shuffleFromArray(SPECIES, order, i)).intoArray(r, i); 568 } 569 } 570 571 bh.consume(r); 572 } 573 574 @Benchmark 575 public void extract(Blackhole bh) { 576 float[] a = fa.apply(SPECIES.length()); 577 float[] r = fr.apply(SPECIES.length()); 578 579 for (int ic = 0; ic < INVOC_COUNT; ic++) { 580 for (int i = 0; i < a.length; i += SPECIES.length()) { 581 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 582 int num_lanes = SPECIES.length(); 583 // Manually unroll because full unroll happens after intrinsification. 584 // Unroll is needed because get intrinsic requires for index to be a known constant. 585 if (num_lanes == 1) { 586 r[i]=av.get(0); 587 } else if (num_lanes == 2) { 588 r[i]=av.get(0); 589 r[i+1]=av.get(1); 590 } else if (num_lanes == 4) { 591 r[i]=av.get(0); 592 r[i+1]=av.get(1); 593 r[i+2]=av.get(2); 594 r[i+3]=av.get(3); 595 } else if (num_lanes == 8) { 596 r[i]=av.get(0); 597 r[i+1]=av.get(1); 598 r[i+2]=av.get(2); 599 r[i+3]=av.get(3); 600 r[i+4]=av.get(4); 601 r[i+5]=av.get(5); 602 r[i+6]=av.get(6); 603 r[i+7]=av.get(7); 604 } else if (num_lanes == 16) { 605 r[i]=av.get(0); 606 r[i+1]=av.get(1); 607 r[i+2]=av.get(2); 608 r[i+3]=av.get(3); 609 r[i+4]=av.get(4); 610 r[i+5]=av.get(5); 611 r[i+6]=av.get(6); 612 r[i+7]=av.get(7); 613 r[i+8]=av.get(8); 614 r[i+9]=av.get(9); 615 r[i+10]=av.get(10); 616 r[i+11]=av.get(11); 617 r[i+12]=av.get(12); 618 r[i+13]=av.get(13); 619 r[i+14]=av.get(14); 620 r[i+15]=av.get(15); 621 } else if (num_lanes == 32) { 622 r[i]=av.get(0); 623 r[i+1]=av.get(1); 624 r[i+2]=av.get(2); 625 r[i+3]=av.get(3); 626 r[i+4]=av.get(4); 627 r[i+5]=av.get(5); 628 r[i+6]=av.get(6); 629 r[i+7]=av.get(7); 630 r[i+8]=av.get(8); 631 r[i+9]=av.get(9); 632 r[i+10]=av.get(10); 633 r[i+11]=av.get(11); 634 r[i+12]=av.get(12); 635 r[i+13]=av.get(13); 636 r[i+14]=av.get(14); 637 r[i+15]=av.get(15); 638 r[i+16]=av.get(16); 639 r[i+17]=av.get(17); 640 r[i+18]=av.get(18); 641 r[i+19]=av.get(19); 642 r[i+20]=av.get(20); 643 r[i+21]=av.get(21); 644 r[i+22]=av.get(22); 645 r[i+23]=av.get(23); 646 r[i+24]=av.get(24); 647 r[i+25]=av.get(25); 648 r[i+26]=av.get(26); 649 r[i+27]=av.get(27); 650 r[i+28]=av.get(28); 651 r[i+29]=av.get(29); 652 r[i+30]=av.get(30); 653 r[i+31]=av.get(31); 654 } else if (num_lanes == 64) { 655 r[i]=av.get(0); 656 r[i+1]=av.get(1); 657 r[i+2]=av.get(2); 658 r[i+3]=av.get(3); 659 r[i+4]=av.get(4); 660 r[i+5]=av.get(5); 661 r[i+6]=av.get(6); 662 r[i+7]=av.get(7); 663 r[i+8]=av.get(8); 664 r[i+9]=av.get(9); 665 r[i+10]=av.get(10); 666 r[i+11]=av.get(11); 667 r[i+12]=av.get(12); 668 r[i+13]=av.get(13); 669 r[i+14]=av.get(14); 670 r[i+15]=av.get(15); 671 r[i+16]=av.get(16); 672 r[i+17]=av.get(17); 673 r[i+18]=av.get(18); 674 r[i+19]=av.get(19); 675 r[i+20]=av.get(20); 676 r[i+21]=av.get(21); 677 r[i+22]=av.get(22); 678 r[i+23]=av.get(23); 679 r[i+24]=av.get(24); 680 r[i+25]=av.get(25); 681 r[i+26]=av.get(26); 682 r[i+27]=av.get(27); 683 r[i+28]=av.get(28); 684 r[i+29]=av.get(29); 685 r[i+30]=av.get(30); 686 r[i+31]=av.get(31); 687 r[i+32]=av.get(32); 688 r[i+33]=av.get(33); 689 r[i+34]=av.get(34); 690 r[i+35]=av.get(35); 691 r[i+36]=av.get(36); 692 r[i+37]=av.get(37); 693 r[i+38]=av.get(38); 694 r[i+39]=av.get(39); 695 r[i+40]=av.get(40); 696 r[i+41]=av.get(41); 697 r[i+42]=av.get(42); 698 r[i+43]=av.get(43); 699 r[i+44]=av.get(44); 700 r[i+45]=av.get(45); 701 r[i+46]=av.get(46); 702 r[i+47]=av.get(47); 703 r[i+48]=av.get(48); 704 r[i+49]=av.get(49); 705 r[i+50]=av.get(50); 706 r[i+51]=av.get(51); 707 r[i+52]=av.get(52); 708 r[i+53]=av.get(53); 709 r[i+54]=av.get(54); 710 r[i+55]=av.get(55); 711 r[i+56]=av.get(56); 712 r[i+57]=av.get(57); 713 r[i+58]=av.get(58); 714 r[i+59]=av.get(59); 715 r[i+60]=av.get(60); 716 r[i+61]=av.get(61); 717 r[i+62]=av.get(62); 718 r[i+63]=av.get(63); 719 } else { 720 for (int j = 0; j < SPECIES.length(); j++) { 721 r[i+j]=av.get(j); 722 } 723 } 724 } 725 } 726 727 bh.consume(r); 728 } 729 730 731 @Benchmark 732 public void sin(Blackhole bh) { 733 float[] a = fa.apply(SPECIES.length()); 734 float[] r = fr.apply(SPECIES.length()); 735 736 for (int ic = 0; ic < INVOC_COUNT; ic++) { 737 for (int i = 0; i < a.length; i += SPECIES.length()) { 738 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 739 av.sin().intoArray(r, i); 740 } 741 } 742 743 bh.consume(r); 744 } 745 746 747 748 @Benchmark 749 public void exp(Blackhole bh) { 750 float[] a = fa.apply(SPECIES.length()); 751 float[] r = fr.apply(SPECIES.length()); 752 753 for (int ic = 0; ic < INVOC_COUNT; ic++) { 754 for (int i = 0; i < a.length; i += SPECIES.length()) { 755 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 756 av.exp().intoArray(r, i); 757 } 758 } 759 760 bh.consume(r); 761 } 762 763 764 765 @Benchmark 766 public void log1p(Blackhole bh) { 767 float[] a = fa.apply(SPECIES.length()); 768 float[] r = fr.apply(SPECIES.length()); 769 770 for (int ic = 0; ic < INVOC_COUNT; ic++) { 771 for (int i = 0; i < a.length; i += SPECIES.length()) { 772 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 773 av.log1p().intoArray(r, i); 774 } 775 } 776 777 bh.consume(r); 778 } 779 780 781 782 @Benchmark 783 public void log(Blackhole bh) { 784 float[] a = fa.apply(SPECIES.length()); 785 float[] r = fr.apply(SPECIES.length()); 786 787 for (int ic = 0; ic < INVOC_COUNT; ic++) { 788 for (int i = 0; i < a.length; i += SPECIES.length()) { 789 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 790 av.log().intoArray(r, i); 791 } 792 } 793 794 bh.consume(r); 795 } 796 797 798 799 @Benchmark 800 public void log10(Blackhole bh) { 801 float[] a = fa.apply(SPECIES.length()); 802 float[] r = fr.apply(SPECIES.length()); 803 804 for (int ic = 0; ic < INVOC_COUNT; ic++) { 805 for (int i = 0; i < a.length; i += SPECIES.length()) { 806 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 807 av.log10().intoArray(r, i); 808 } 809 } 810 811 bh.consume(r); 812 } 813 814 815 816 @Benchmark 817 public void expm1(Blackhole bh) { 818 float[] a = fa.apply(SPECIES.length()); 819 float[] r = fr.apply(SPECIES.length()); 820 821 for (int ic = 0; ic < INVOC_COUNT; ic++) { 822 for (int i = 0; i < a.length; i += SPECIES.length()) { 823 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 824 av.expm1().intoArray(r, i); 825 } 826 } 827 828 bh.consume(r); 829 } 830 831 832 833 @Benchmark 834 public void cos(Blackhole bh) { 835 float[] a = fa.apply(SPECIES.length()); 836 float[] r = fr.apply(SPECIES.length()); 837 838 for (int ic = 0; ic < INVOC_COUNT; ic++) { 839 for (int i = 0; i < a.length; i += SPECIES.length()) { 840 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 841 av.cos().intoArray(r, i); 842 } 843 } 844 845 bh.consume(r); 846 } 847 848 849 850 @Benchmark 851 public void tan(Blackhole bh) { 852 float[] a = fa.apply(SPECIES.length()); 853 float[] r = fr.apply(SPECIES.length()); 854 855 for (int ic = 0; ic < INVOC_COUNT; ic++) { 856 for (int i = 0; i < a.length; i += SPECIES.length()) { 857 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 858 av.tan().intoArray(r, i); 859 } 860 } 861 862 bh.consume(r); 863 } 864 865 866 867 @Benchmark 868 public void sinh(Blackhole bh) { 869 float[] a = fa.apply(SPECIES.length()); 870 float[] r = fr.apply(SPECIES.length()); 871 872 for (int ic = 0; ic < INVOC_COUNT; ic++) { 873 for (int i = 0; i < a.length; i += SPECIES.length()) { 874 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 875 av.sinh().intoArray(r, i); 876 } 877 } 878 879 bh.consume(r); 880 } 881 882 883 884 @Benchmark 885 public void cosh(Blackhole bh) { 886 float[] a = fa.apply(SPECIES.length()); 887 float[] r = fr.apply(SPECIES.length()); 888 889 for (int ic = 0; ic < INVOC_COUNT; ic++) { 890 for (int i = 0; i < a.length; i += SPECIES.length()) { 891 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 892 av.cosh().intoArray(r, i); 893 } 894 } 895 896 bh.consume(r); 897 } 898 899 900 901 @Benchmark 902 public void tanh(Blackhole bh) { 903 float[] a = fa.apply(SPECIES.length()); 904 float[] r = fr.apply(SPECIES.length()); 905 906 for (int ic = 0; ic < INVOC_COUNT; ic++) { 907 for (int i = 0; i < a.length; i += SPECIES.length()) { 908 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 909 av.tanh().intoArray(r, i); 910 } 911 } 912 913 bh.consume(r); 914 } 915 916 917 918 @Benchmark 919 public void asin(Blackhole bh) { 920 float[] a = fa.apply(SPECIES.length()); 921 float[] r = fr.apply(SPECIES.length()); 922 923 for (int ic = 0; ic < INVOC_COUNT; ic++) { 924 for (int i = 0; i < a.length; i += SPECIES.length()) { 925 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 926 av.asin().intoArray(r, i); 927 } 928 } 929 930 bh.consume(r); 931 } 932 933 934 935 @Benchmark 936 public void acos(Blackhole bh) { 937 float[] a = fa.apply(SPECIES.length()); 938 float[] r = fr.apply(SPECIES.length()); 939 940 for (int ic = 0; ic < INVOC_COUNT; ic++) { 941 for (int i = 0; i < a.length; i += SPECIES.length()) { 942 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 943 av.acos().intoArray(r, i); 944 } 945 } 946 947 bh.consume(r); 948 } 949 950 951 952 @Benchmark 953 public void atan(Blackhole bh) { 954 float[] a = fa.apply(SPECIES.length()); 955 float[] r = fr.apply(SPECIES.length()); 956 957 for (int ic = 0; ic < INVOC_COUNT; ic++) { 958 for (int i = 0; i < a.length; i += SPECIES.length()) { 959 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 960 av.atan().intoArray(r, i); 961 } 962 } 963 964 bh.consume(r); 965 } 966 967 968 969 @Benchmark 970 public void cbrt(Blackhole bh) { 971 float[] a = fa.apply(SPECIES.length()); 972 float[] r = fr.apply(SPECIES.length()); 973 974 for (int ic = 0; ic < INVOC_COUNT; ic++) { 975 for (int i = 0; i < a.length; i += SPECIES.length()) { 976 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 977 av.cbrt().intoArray(r, i); 978 } 979 } 980 981 bh.consume(r); 982 } 983 984 985 986 @Benchmark 987 public void hypot(Blackhole bh) { 988 float[] a = fa.apply(SPECIES.length()); 989 float[] b = fb.apply(SPECIES.length()); 990 float[] r = fr.apply(SPECIES.length()); 991 992 for (int ic = 0; ic < INVOC_COUNT; ic++) { 993 for (int i = 0; i < a.length; i += SPECIES.length()) { 994 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 995 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 996 av.hypot(bv).intoArray(r, i); 997 } 998 } 999 1000 bh.consume(r); 1001 } 1002 1003 1004 1005 @Benchmark 1006 public void pow(Blackhole bh) { 1007 float[] a = fa.apply(SPECIES.length()); 1008 float[] b = fb.apply(SPECIES.length()); 1009 float[] r = fr.apply(SPECIES.length()); 1010 1011 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1012 for (int i = 0; i < a.length; i += SPECIES.length()) { 1013 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1014 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 1015 av.pow(bv).intoArray(r, i); 1016 } 1017 } 1018 1019 bh.consume(r); 1020 } 1021 1022 1023 1024 @Benchmark 1025 public void atan2(Blackhole bh) { 1026 float[] a = fa.apply(SPECIES.length()); 1027 float[] b = fb.apply(SPECIES.length()); 1028 float[] r = fr.apply(SPECIES.length()); 1029 1030 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1031 for (int i = 0; i < a.length; i += SPECIES.length()) { 1032 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1033 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 1034 av.atan2(bv).intoArray(r, i); 1035 } 1036 } 1037 1038 bh.consume(r); 1039 } 1040 1041 1042 1043 @Benchmark 1044 public void fma(Blackhole bh) { 1045 float[] a = fa.apply(SPECIES.length()); 1046 float[] b = fb.apply(SPECIES.length()); 1047 float[] c = fc.apply(SPECIES.length()); 1048 float[] r = fr.apply(SPECIES.length()); 1049 1050 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1051 for (int i = 0; i < a.length; i += SPECIES.length()) { 1052 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1053 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 1054 FloatVector cv = FloatVector.fromArray(SPECIES, c, i); 1055 av.fma(bv, cv).intoArray(r, i); 1056 } 1057 } 1058 1059 bh.consume(r); 1060 } 1061 1062 1063 1064 @Benchmark 1065 public void fmaMasked(Blackhole bh) { 1066 float[] a = fa.apply(SPECIES.length()); 1067 float[] b = fb.apply(SPECIES.length()); 1068 float[] c = fc.apply(SPECIES.length()); 1069 float[] r = fr.apply(SPECIES.length()); 1070 boolean[] mask = fm.apply(SPECIES.length()); 1071 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 1072 1073 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1074 for (int i = 0; i < a.length; i += SPECIES.length()) { 1075 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1076 FloatVector bv = FloatVector.fromArray(SPECIES, b, i); 1077 FloatVector cv = FloatVector.fromArray(SPECIES, c, i); 1078 av.fma(bv, cv, vmask).intoArray(r, i); 1079 } 1080 } 1081 1082 bh.consume(r); 1083 } 1084 1085 1086 @Benchmark 1087 public void neg(Blackhole bh) { 1088 float[] a = fa.apply(SPECIES.length()); 1089 float[] r = fr.apply(SPECIES.length()); 1090 1091 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1092 for (int i = 0; i < a.length; i += SPECIES.length()) { 1093 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1094 av.neg().intoArray(r, i); 1095 } 1096 } 1097 1098 bh.consume(r); 1099 } 1100 1101 @Benchmark 1102 public void negMasked(Blackhole bh) { 1103 float[] a = fa.apply(SPECIES.length()); 1104 float[] r = fr.apply(SPECIES.length()); 1105 boolean[] mask = fm.apply(SPECIES.length()); 1106 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 1107 1108 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1109 for (int i = 0; i < a.length; i += SPECIES.length()) { 1110 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1111 av.neg(vmask).intoArray(r, i); 1112 } 1113 } 1114 1115 bh.consume(r); 1116 } 1117 1118 @Benchmark 1119 public void abs(Blackhole bh) { 1120 float[] a = fa.apply(SPECIES.length()); 1121 float[] r = fr.apply(SPECIES.length()); 1122 1123 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1124 for (int i = 0; i < a.length; i += SPECIES.length()) { 1125 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1126 av.abs().intoArray(r, i); 1127 } 1128 } 1129 1130 bh.consume(r); 1131 } 1132 1133 @Benchmark 1134 public void absMasked(Blackhole bh) { 1135 float[] a = fa.apply(SPECIES.length()); 1136 float[] r = fr.apply(SPECIES.length()); 1137 boolean[] mask = fm.apply(SPECIES.length()); 1138 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 1139 1140 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1141 for (int i = 0; i < a.length; i += SPECIES.length()) { 1142 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1143 av.abs(vmask).intoArray(r, i); 1144 } 1145 } 1146 1147 bh.consume(r); 1148 } 1149 1150 1151 1152 1153 @Benchmark 1154 public void sqrt(Blackhole bh) { 1155 float[] a = fa.apply(SPECIES.length()); 1156 float[] r = fr.apply(SPECIES.length()); 1157 1158 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1159 for (int i = 0; i < a.length; i += SPECIES.length()) { 1160 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1161 av.sqrt().intoArray(r, i); 1162 } 1163 } 1164 1165 bh.consume(r); 1166 } 1167 1168 1169 1170 @Benchmark 1171 public void sqrtMasked(Blackhole bh) { 1172 float[] a = fa.apply(SPECIES.length()); 1173 float[] r = fr.apply(SPECIES.length()); 1174 boolean[] mask = fm.apply(SPECIES.length()); 1175 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask); 1176 1177 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1178 for (int i = 0; i < a.length; i += SPECIES.length()) { 1179 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1180 av.sqrt(vmask).intoArray(r, i); 1181 } 1182 } 1183 1184 bh.consume(r); 1185 } 1186 1187 1188 1189 @Benchmark 1190 public void gather(Blackhole bh) { 1191 float[] a = fa.apply(SPECIES.length()); 1192 int[] b = fs.apply(a.length, SPECIES.length()); 1193 float[] r = new float[a.length]; 1194 1195 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1196 for (int i = 0; i < a.length; i += SPECIES.length()) { 1197 FloatVector av = FloatVector.fromArray(SPECIES, a, i, b, i); 1198 av.intoArray(r, i); 1199 } 1200 } 1201 1202 bh.consume(r); 1203 } 1204 1205 1206 1207 @Benchmark 1208 public void scatter(Blackhole bh) { 1209 float[] a = fa.apply(SPECIES.length()); 1210 int[] b = fs.apply(a.length, SPECIES.length()); 1211 float[] r = new float[a.length]; 1212 1213 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1214 for (int i = 0; i < a.length; i += SPECIES.length()) { 1215 FloatVector av = FloatVector.fromArray(SPECIES, a, i); 1216 av.intoArray(r, i, b, i); 1217 } 1218 } 1219 1220 bh.consume(r); 1221 } 1222 1223 } 1224