1 /* 2 * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have 23 * questions. 24 */ 25 package jdk.incubator.vector; 26 27 import java.nio.ByteBuffer; 28 import java.nio.FloatBuffer; 29 import java.nio.ByteOrder; 30 import java.util.Arrays; 31 import java.util.Objects; 32 import java.util.function.BinaryOperator; 33 import java.util.function.IntUnaryOperator; 34 import java.util.function.Function; 35 import java.util.function.UnaryOperator; 36 import java.util.concurrent.ThreadLocalRandom; 37 38 import jdk.internal.misc.Unsafe; 39 import jdk.internal.vm.annotation.ForceInline; 40 41 import static jdk.incubator.vector.VectorIntrinsics.*; 42 import static jdk.incubator.vector.VectorOperators.*; 43 44 // -- This file was mechanically generated: Do not edit! -- // 45 46 /** 47 * A specialized {@link Vector} representing an ordered immutable sequence of 48 * {@code float} values. 49 */ 50 @SuppressWarnings("cast") // warning: redundant cast 51 public abstract class FloatVector extends AbstractVector<Float> { 52 53 FloatVector() {} 54 55 static final int FORBID_OPCODE_KIND = VO_NOFP; 56 57 @ForceInline 58 static int opCode(Operator op) { 59 return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND); 60 } 61 @ForceInline 62 static int opCode(Operator op, int requireKind) { 63 requireKind |= VO_OPCODE_VALID; 64 return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND); 65 } 66 @ForceInline 67 static boolean opKind(Operator op, int bit) { 68 return VectorOperators.opKind(op, bit); 69 } 70 71 // Virtualized factories and operators, 72 // coded with portable definitions. 73 // These are all @ForceInline in case 74 // they need to be used performantly. 75 // The various shape-specific subclasses 76 // also specialize them by wrapping 77 // them in a call like this: 78 // return (Byte128Vector) 79 // super.bOp((Byte128Vector) o); 80 // The purpose of that is to forcibly inline 81 // the generic definition from this file 82 // into a sharply type- and size-specific 83 // wrapper in the subclass file, so that 84 // the JIT can specialize the code. 85 // The code is only inlined and expanded 86 // if it gets hot. Think of it as a cheap 87 // and lazy version of C++ templates. 88 89 // Virtualized getter 90 91 /*package-private*/ 92 abstract float[] vec(); 93 94 // Virtualized constructors 95 96 /** 97 * Build a vector directly using my own constructor. 98 * It is an error if the array is aliased elsewhere. 99 */ 100 /*package-private*/ 101 abstract FloatVector vectorFactory(float[] vec); 102 103 /** 104 * Build a mask directly using my species. 105 * It is an error if the array is aliased elsewhere. 106 */ 107 /*package-private*/ 108 @ForceInline 109 final 110 AbstractMask<Float> maskFactory(boolean[] bits) { 111 return vspecies().maskFactory(bits); 112 } 113 114 // Constant loader (takes dummy as vector arg) 115 interface FVOp { 116 float apply(int i); 117 } 118 119 /*package-private*/ 120 @ForceInline 121 final 122 FloatVector vOp(FVOp f) { 123 float[] res = new float[length()]; 124 for (int i = 0; i < res.length; i++) { 125 res[i] = f.apply(i); 126 } 127 return vectorFactory(res); 128 } 129 130 @ForceInline 131 final 132 FloatVector vOp(VectorMask<Float> m, FVOp f) { 133 float[] res = new float[length()]; 134 boolean[] mbits = ((AbstractMask<Float>)m).getBits(); 135 for (int i = 0; i < res.length; i++) { 136 if (mbits[i]) { 137 res[i] = f.apply(i); 138 } 139 } 140 return vectorFactory(res); 141 } 142 143 // Unary operator 144 145 /*package-private*/ 146 interface FUnOp { 147 float apply(int i, float a); 148 } 149 150 /*package-private*/ 151 abstract 152 FloatVector uOp(FUnOp f); 153 @ForceInline 154 final 155 FloatVector uOpTemplate(FUnOp f) { 156 float[] vec = vec(); 157 float[] res = new float[length()]; 158 for (int i = 0; i < res.length; i++) { 159 res[i] = f.apply(i, vec[i]); 160 } 161 return vectorFactory(res); 162 } 163 164 /*package-private*/ 165 abstract 166 FloatVector uOp(VectorMask<Float> m, 167 FUnOp f); 168 @ForceInline 169 final 170 FloatVector uOpTemplate(VectorMask<Float> m, 171 FUnOp f) { 172 float[] vec = vec(); 173 float[] res = new float[length()]; 174 boolean[] mbits = ((AbstractMask<Float>)m).getBits(); 175 for (int i = 0; i < res.length; i++) { 176 res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i]; 177 } 178 return vectorFactory(res); 179 } 180 181 // Binary operator 182 183 /*package-private*/ 184 interface FBinOp { 185 float apply(int i, float a, float b); 186 } 187 188 /*package-private*/ 189 abstract 190 FloatVector bOp(Vector<Float> o, 191 FBinOp f); 192 @ForceInline 193 final 194 FloatVector bOpTemplate(Vector<Float> o, 195 FBinOp f) { 196 float[] res = new float[length()]; 197 float[] vec1 = this.vec(); 198 float[] vec2 = ((FloatVector)o).vec(); 199 for (int i = 0; i < res.length; i++) { 200 res[i] = f.apply(i, vec1[i], vec2[i]); 201 } 202 return vectorFactory(res); 203 } 204 205 /*package-private*/ 206 abstract 207 FloatVector bOp(Vector<Float> o, 208 VectorMask<Float> m, 209 FBinOp f); 210 @ForceInline 211 final 212 FloatVector bOpTemplate(Vector<Float> o, 213 VectorMask<Float> m, 214 FBinOp f) { 215 float[] res = new float[length()]; 216 float[] vec1 = this.vec(); 217 float[] vec2 = ((FloatVector)o).vec(); 218 boolean[] mbits = ((AbstractMask<Float>)m).getBits(); 219 for (int i = 0; i < res.length; i++) { 220 res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i]; 221 } 222 return vectorFactory(res); 223 } 224 225 // Ternary operator 226 227 /*package-private*/ 228 interface FTriOp { 229 float apply(int i, float a, float b, float c); 230 } 231 232 /*package-private*/ 233 abstract 234 FloatVector tOp(Vector<Float> o1, 235 Vector<Float> o2, 236 FTriOp f); 237 @ForceInline 238 final 239 FloatVector tOpTemplate(Vector<Float> o1, 240 Vector<Float> o2, 241 FTriOp f) { 242 float[] res = new float[length()]; 243 float[] vec1 = this.vec(); 244 float[] vec2 = ((FloatVector)o1).vec(); 245 float[] vec3 = ((FloatVector)o2).vec(); 246 for (int i = 0; i < res.length; i++) { 247 res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]); 248 } 249 return vectorFactory(res); 250 } 251 252 /*package-private*/ 253 abstract 254 FloatVector tOp(Vector<Float> o1, 255 Vector<Float> o2, 256 VectorMask<Float> m, 257 FTriOp f); 258 @ForceInline 259 final 260 FloatVector tOpTemplate(Vector<Float> o1, 261 Vector<Float> o2, 262 VectorMask<Float> m, 263 FTriOp f) { 264 float[] res = new float[length()]; 265 float[] vec1 = this.vec(); 266 float[] vec2 = ((FloatVector)o1).vec(); 267 float[] vec3 = ((FloatVector)o2).vec(); 268 boolean[] mbits = ((AbstractMask<Float>)m).getBits(); 269 for (int i = 0; i < res.length; i++) { 270 res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i]; 271 } 272 return vectorFactory(res); 273 } 274 275 // Reduction operator 276 277 /*package-private*/ 278 abstract 279 float rOp(float v, FBinOp f); 280 @ForceInline 281 final 282 float rOpTemplate(float v, FBinOp f) { 283 float[] vec = vec(); 284 for (int i = 0; i < vec.length; i++) { 285 v = f.apply(i, v, vec[i]); 286 } 287 return v; 288 } 289 290 // Memory reference 291 292 /*package-private*/ 293 interface FLdOp<M> { 294 float apply(M memory, int offset, int i); 295 } 296 297 /*package-private*/ 298 @ForceInline 299 final 300 <M> FloatVector ldOp(M memory, int offset, 301 FLdOp<M> f) { 302 //dummy; no vec = vec(); 303 float[] res = new float[length()]; 304 for (int i = 0; i < res.length; i++) { 305 res[i] = f.apply(memory, offset, i); 306 } 307 return vectorFactory(res); 308 } 309 310 /*package-private*/ 311 @ForceInline 312 final 313 <M> FloatVector ldOp(M memory, int offset, 314 VectorMask<Float> m, 315 FLdOp<M> f) { 316 //float[] vec = vec(); 317 float[] res = new float[length()]; 318 boolean[] mbits = ((AbstractMask<Float>)m).getBits(); 319 for (int i = 0; i < res.length; i++) { 320 if (mbits[i]) { 321 res[i] = f.apply(memory, offset, i); 322 } 323 } 324 return vectorFactory(res); 325 } 326 327 interface FStOp<M> { 328 void apply(M memory, int offset, int i, float a); 329 } 330 331 /*package-private*/ 332 @ForceInline 333 final 334 <M> void stOp(M memory, int offset, 335 FStOp<M> f) { 336 float[] vec = vec(); 337 for (int i = 0; i < vec.length; i++) { 338 f.apply(memory, offset, i, vec[i]); 339 } 340 } 341 342 /*package-private*/ 343 @ForceInline 344 final 345 <M> void stOp(M memory, int offset, 346 VectorMask<Float> m, 347 FStOp<M> f) { 348 float[] vec = vec(); 349 boolean[] mbits = ((AbstractMask<Float>)m).getBits(); 350 for (int i = 0; i < vec.length; i++) { 351 if (mbits[i]) { 352 f.apply(memory, offset, i, vec[i]); 353 } 354 } 355 } 356 357 // Binary test 358 359 /*package-private*/ 360 interface FBinTest { 361 boolean apply(int cond, int i, float a, float b); 362 } 363 364 /*package-private*/ 365 @ForceInline 366 final 367 AbstractMask<Float> bTest(int cond, 368 Vector<Float> o, 369 FBinTest f) { 370 float[] vec1 = vec(); 371 float[] vec2 = ((FloatVector)o).vec(); 372 boolean[] bits = new boolean[length()]; 373 for (int i = 0; i < length(); i++){ 374 bits[i] = f.apply(cond, i, vec1[i], vec2[i]); 375 } 376 return maskFactory(bits); 377 } 378 379 /*package-private*/ 380 @ForceInline 381 static boolean doBinTest(int cond, float a, float b) { 382 switch (cond) { 383 case BT_eq: return a == b; 384 case BT_ne: return a != b; 385 case BT_lt: return a < b; 386 case BT_le: return a <= b; 387 case BT_gt: return a > b; 388 case BT_ge: return a >= b; 389 } 390 throw new AssertionError(Integer.toHexString(cond)); 391 } 392 393 /*package-private*/ 394 @Override 395 abstract FloatSpecies vspecies(); 396 397 /*package-private*/ 398 @ForceInline 399 static long toBits(float e) { 400 return Float.floatToIntBits(e); 401 } 402 403 /*package-private*/ 404 @ForceInline 405 static float fromBits(long bits) { 406 return Float.intBitsToFloat((int)bits); 407 } 408 409 // Static factories (other than memory operations) 410 411 // Note: A surprising behavior in javadoc 412 // sometimes makes a lone /** {@inheritDoc} */ 413 // comment drop the method altogether, 414 // apparently if the method mentions an 415 // parameter or return type of Vector<Float> 416 // instead of Vector<E> as originally specified. 417 // Adding an empty HTML fragment appears to 418 // nudge javadoc into providing the desired 419 // inherited documentation. We use the HTML 420 // comment <!--workaround--> for this. 421 422 /** 423 * {@inheritDoc} <!--workaround--> 424 */ 425 @ForceInline 426 public static FloatVector zero(VectorSpecies<Float> species) { 427 FloatSpecies vsp = (FloatSpecies) species; 428 return VectorIntrinsics.broadcastCoerced(vsp.vectorType(), float.class, species.length(), 429 toBits(0.0f), vsp, 430 ((bits_, s_) -> s_.rvOp(i -> bits_))); 431 } 432 433 /** 434 * Returns a vector of the same species as this one 435 * where all lane elements are set to 436 * the primitive value {@code e}. 437 * 438 * The contents of the current vector are discarded; 439 * only the species is relevant to this operation. 440 * 441 * <p> This method returns the value of this expression: 442 * {@code FloatVector.broadcast(this.species(), e)}. 443 * 444 * @apiNote 445 * Unlike the similar method named {@code broadcast()} 446 * in the supertype {@code Vector}, this method does not 447 * need to validate its argument, and cannot throw 448 * {@code IllegalArgumentException}. This method is 449 * therefore preferable to the supertype method. 450 * 451 * @param e the value to broadcast 452 * @return a vector where all lane elements are set to 453 * the primitive value {@code e} 454 * @see #broadcast(VectorSpecies,long) 455 * @see Vector#broadcast(long) 456 * @see VectorSpecies#broadcast(long) 457 */ 458 public abstract FloatVector broadcast(float e); 459 460 /** 461 * Returns a vector of the given species 462 * where all lane elements are set to 463 * the primitive value {@code e}. 464 * 465 * @param species species of the desired vector 466 * @param e the value to broadcast 467 * @return a vector where all lane elements are set to 468 * the primitive value {@code e} 469 * @see #broadcast(long) 470 * @see Vector#broadcast(long) 471 * @see VectorSpecies#broadcast(long) 472 */ 473 public static FloatVector broadcast(VectorSpecies<Float> species, float e) { 474 FloatSpecies vsp = (FloatSpecies) species; 475 return vsp.broadcast(e); 476 } 477 478 /*package-private*/ 479 @ForceInline 480 final FloatVector broadcastTemplate(float e) { 481 FloatSpecies vsp = vspecies(); 482 return vsp.broadcast(e); 483 } 484 485 /** 486 * {@inheritDoc} <!--workaround--> 487 * @apiNote 488 * When working with vector subtypes like {@code FloatVector}, 489 * {@linkplain #broadcast(float) the more strongly typed method} 490 * is typically selected. It can be explicitly selected 491 * using a cast: {@code v.broadcast((float)e)}. 492 * The two expressions will produce numerically identical results. 493 */ 494 @Override 495 public abstract FloatVector broadcast(long e); 496 497 /** 498 * Returns a vector of the given species 499 * where all lane elements are set to 500 * the primitive value {@code e}. 501 * 502 * The {@code long} value must be accurately representable 503 * by the {@code ETYPE} of the vector species, so that 504 * {@code e==(long)(ETYPE)e}. 505 * 506 * @param species species of the desired vector 507 * @param e the value to broadcast 508 * @return a vector where all lane elements are set to 509 * the primitive value {@code e} 510 * @throws IllegalArgumentException 511 * if the given {@code long} value cannot 512 * be represented by the vector's {@code ETYPE} 513 * @see #broadcast(VectorSpecies,float) 514 * @see VectorSpecies#checkValue(long) 515 */ 516 public static FloatVector broadcast(VectorSpecies<Float> species, long e) { 517 FloatSpecies vsp = (FloatSpecies) species; 518 return vsp.broadcast(e); 519 } 520 521 /*package-private*/ 522 @ForceInline 523 final FloatVector broadcastTemplate(long e) { 524 return vspecies().broadcast(e); 525 } 526 527 /** 528 * Returns a vector where each lane element is set to given 529 * primitive values. 530 * <p> 531 * For each vector lane, where {@code N} is the vector lane index, the 532 * the primitive value at index {@code N} is placed into the resulting 533 * vector at lane index {@code N}. 534 * 535 * @param species species of the desired vector 536 * @param es the given primitive values 537 * @return a vector where each lane element is set to given primitive 538 * values 539 * @throws IllegalArgumentException 540 * if {@code es.length != species.length()} 541 */ 542 @ForceInline 543 @SuppressWarnings("unchecked") 544 public static FloatVector fromValues(VectorSpecies<Float> species, float... es) { 545 FloatSpecies vsp = (FloatSpecies) species; 546 int vlength = vsp.laneCount(); 547 VectorIntrinsics.requireLength(es.length, vlength); 548 // Get an unaliased copy and use it directly: 549 return vsp.vectorFactory(Arrays.copyOf(es, vlength)); 550 } 551 552 /** 553 * Returns a vector where the first lane element is set to the primtive 554 * value {@code e}, all other lane elements are set to the default 555 * value(positive zero). 556 * 557 * @param species species of the desired vector 558 * @param e the value 559 * @return a vector where the first lane element is set to the primitive 560 * value {@code e} 561 */ 562 // FIXME: Does this carry its weight? 563 @ForceInline 564 public static FloatVector single(VectorSpecies<Float> species, float e) { 565 return zero(species).withLane(0, e); 566 } 567 568 /** 569 * Returns a vector where each lane element is set to a randomly 570 * generated primitive value. 571 * 572 * The semantics are equivalent to calling 573 * {@link ThreadLocalRandom#nextFloat()} 574 * for each lane, from first to last. 575 * 576 * @param species species of the desired vector 577 * @return a vector where each lane elements is set to a randomly 578 * generated primitive value 579 */ 580 public static FloatVector random(VectorSpecies<Float> species) { 581 FloatSpecies vsp = (FloatSpecies) species; 582 ThreadLocalRandom r = ThreadLocalRandom.current(); 583 return vsp.vOp(i -> nextRandom(r)); 584 } 585 private static float nextRandom(ThreadLocalRandom r) { 586 return r.nextFloat(); 587 } 588 589 // Unary lanewise support 590 591 /** 592 * {@inheritDoc} <!--workaround--> 593 */ 594 public abstract 595 FloatVector lanewise(VectorOperators.Unary op); 596 597 @ForceInline 598 final 599 FloatVector lanewiseTemplate(VectorOperators.Unary op) { 600 if (opKind(op, VO_SPECIAL)) { 601 if (op == ZOMO) { 602 return blend(broadcast(-1), compare(NE, 0)); 603 } 604 } 605 int opc = opCode(op); 606 return VectorIntrinsics.unaryOp( 607 opc, getClass(), float.class, length(), 608 this, 609 UN_IMPL.find(op, opc, (opc_) -> { 610 switch (opc_) { 611 case VECTOR_OP_NEG: return v0 -> 612 v0.uOp((i, a) -> (float) -a); 613 case VECTOR_OP_ABS: return v0 -> 614 v0.uOp((i, a) -> (float) Math.abs(a)); 615 case VECTOR_OP_SIN: return v0 -> 616 v0.uOp((i, a) -> (float) Math.sin(a)); 617 case VECTOR_OP_COS: return v0 -> 618 v0.uOp((i, a) -> (float) Math.cos(a)); 619 case VECTOR_OP_TAN: return v0 -> 620 v0.uOp((i, a) -> (float) Math.tan(a)); 621 case VECTOR_OP_ASIN: return v0 -> 622 v0.uOp((i, a) -> (float) Math.asin(a)); 623 case VECTOR_OP_ACOS: return v0 -> 624 v0.uOp((i, a) -> (float) Math.acos(a)); 625 case VECTOR_OP_ATAN: return v0 -> 626 v0.uOp((i, a) -> (float) Math.atan(a)); 627 case VECTOR_OP_EXP: return v0 -> 628 v0.uOp((i, a) -> (float) Math.exp(a)); 629 case VECTOR_OP_LOG: return v0 -> 630 v0.uOp((i, a) -> (float) Math.log(a)); 631 case VECTOR_OP_LOG10: return v0 -> 632 v0.uOp((i, a) -> (float) Math.log10(a)); 633 case VECTOR_OP_SQRT: return v0 -> 634 v0.uOp((i, a) -> (float) Math.sqrt(a)); 635 case VECTOR_OP_CBRT: return v0 -> 636 v0.uOp((i, a) -> (float) Math.cbrt(a)); 637 case VECTOR_OP_SINH: return v0 -> 638 v0.uOp((i, a) -> (float) Math.sinh(a)); 639 case VECTOR_OP_COSH: return v0 -> 640 v0.uOp((i, a) -> (float) Math.cosh(a)); 641 case VECTOR_OP_TANH: return v0 -> 642 v0.uOp((i, a) -> (float) Math.tanh(a)); 643 case VECTOR_OP_EXPM1: return v0 -> 644 v0.uOp((i, a) -> (float) Math.expm1(a)); 645 case VECTOR_OP_LOG1P: return v0 -> 646 v0.uOp((i, a) -> (float) Math.log1p(a)); 647 default: return null; 648 }})); 649 } 650 private static final 651 ImplCache<Unary,UnaryOperator<FloatVector>> UN_IMPL 652 = new ImplCache<>(Unary.class, FloatVector.class); 653 654 /** 655 * {@inheritDoc} <!--workaround--> 656 */ 657 @ForceInline 658 public final 659 FloatVector lanewise(VectorOperators.Unary op, 660 VectorMask<Float> m) { 661 return blend(lanewise(op), m); 662 } 663 664 // Binary lanewise support 665 666 /** 667 * {@inheritDoc} <!--workaround--> 668 * @see #lanewise(VectorOperators.Binary,float) 669 * @see #lanewise(VectorOperators.Binary,float,VectorMask) 670 */ 671 @Override 672 public abstract 673 FloatVector lanewise(VectorOperators.Binary op, 674 Vector<Float> v); 675 @ForceInline 676 final 677 FloatVector lanewiseTemplate(VectorOperators.Binary op, 678 Vector<Float> v) { 679 FloatVector that = (FloatVector) v; 680 that.check(this); 681 if (opKind(op, VO_SPECIAL )) { 682 if (op == FIRST_NONZERO) { 683 // FIXME: Support this in the JIT. 684 VectorMask<Integer> thisNZ 685 = this.viewAsIntegralLanes().compare(NE, (int) 0); 686 that = that.blend((float) 0, thisNZ.cast(vspecies())); 687 op = OR_UNCHECKED; 688 // FIXME: Support OR_UNCHECKED on float/double also! 689 return this.viewAsIntegralLanes() 690 .lanewise(op, that.viewAsIntegralLanes()) 691 .viewAsFloatingLanes(); 692 } 693 } 694 int opc = opCode(op); 695 return VectorIntrinsics.binaryOp( 696 opc, getClass(), float.class, length(), 697 this, that, 698 BIN_IMPL.find(op, opc, (opc_) -> { 699 switch (opc_) { 700 case VECTOR_OP_ADD: return (v0, v1) -> 701 v0.bOp(v1, (i, a, b) -> (float)(a + b)); 702 case VECTOR_OP_SUB: return (v0, v1) -> 703 v0.bOp(v1, (i, a, b) -> (float)(a - b)); 704 case VECTOR_OP_MUL: return (v0, v1) -> 705 v0.bOp(v1, (i, a, b) -> (float)(a * b)); 706 case VECTOR_OP_DIV: return (v0, v1) -> 707 v0.bOp(v1, (i, a, b) -> (float)(a / b)); 708 case VECTOR_OP_MAX: return (v0, v1) -> 709 v0.bOp(v1, (i, a, b) -> (float)Math.max(a, b)); 710 case VECTOR_OP_MIN: return (v0, v1) -> 711 v0.bOp(v1, (i, a, b) -> (float)Math.min(a, b)); 712 case VECTOR_OP_FIRST_NONZERO: return (v0, v1) -> 713 v0.bOp(v1, (i, a, b) -> toBits(a) != 0 ? a : b); 714 case VECTOR_OP_OR: return (v0, v1) -> 715 v0.bOp(v1, (i, a, b) -> fromBits(toBits(a) | toBits(b))); 716 case VECTOR_OP_ATAN2: return (v0, v1) -> 717 v0.bOp(v1, (i, a, b) -> (float) Math.atan2(a, b)); 718 case VECTOR_OP_POW: return (v0, v1) -> 719 v0.bOp(v1, (i, a, b) -> (float) Math.pow(a, b)); 720 case VECTOR_OP_HYPOT: return (v0, v1) -> 721 v0.bOp(v1, (i, a, b) -> (float) Math.hypot(a, b)); 722 default: return null; 723 }})); 724 } 725 private static final 726 ImplCache<Binary,BinaryOperator<FloatVector>> BIN_IMPL 727 = new ImplCache<>(Binary.class, FloatVector.class); 728 729 /** 730 * {@inheritDoc} <!--workaround--> 731 * @see #lanewise(VectorOperators.Binary,float,VectorMask) 732 */ 733 @ForceInline 734 public final 735 FloatVector lanewise(VectorOperators.Binary op, 736 Vector<Float> v, 737 VectorMask<Float> m) { 738 return blend(lanewise(op, v), m); 739 } 740 // FIXME: Maybe all of the public final methods in this file (the 741 // simple ones that just call lanewise) should be pushed down to 742 // the X-VectorBits template. They can't optimize properly at 743 // this level, and must rely on inlining. Does it work? 744 // (If it works, of course keep the code here.) 745 746 /** 747 * Combines the lane values of this vector 748 * with the value of a broadcast scalar. 749 * 750 * This is a lane-wise binary operation which applies 751 * the selected operation to each lane. 752 * The return value will be equal to this expression: 753 * {@code this.lanewise(op, this.broadcast(e))}. 754 * 755 * @param op the operation used to process lane values 756 * @param e the input scalar 757 * @return the result of applying the operation lane-wise 758 * to the two input vectors 759 * @throws UnsupportedOperationException if this vector does 760 * not support the requested operation 761 * @see #lanewise(VectorOperators.Binary,Vector) 762 * @see #lanewise(VectorOperators.Binary,float,VectorMask) 763 */ 764 @ForceInline 765 public final 766 FloatVector lanewise(VectorOperators.Binary op, 767 float e) { 768 int opc = opCode(op); 769 return lanewise(op, broadcast(e)); 770 } 771 772 /** 773 * Combines the lane values of this vector 774 * with the value of a broadcast scalar, 775 * with selection of lane elements controlled by a mask. 776 * 777 * This is a masked lane-wise binary operation which applies 778 * the selected operation to each lane. 779 * The return value will be equal to this expression: 780 * {@code this.lanewise(op, this.broadcast(e), m)}. 781 * 782 * @param op the operation used to process lane values 783 * @param e the input scalar 784 * @param m the mask controlling lane selection 785 * @return the result of applying the operation lane-wise 786 * to the input vector and the scalar 787 * @throws UnsupportedOperationException if this vector does 788 * not support the requested operation 789 * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) 790 * @see #lanewise(VectorOperators.Binary,float) 791 */ 792 @ForceInline 793 public final 794 FloatVector lanewise(VectorOperators.Binary op, 795 float e, 796 VectorMask<Float> m) { 797 return blend(lanewise(op, e), m); 798 } 799 800 /** 801 * {@inheritDoc} <!--workaround--> 802 * @apiNote 803 * When working with vector subtypes like {@code FloatVector}, 804 * {@linkplain #lanewise(VectorOperators.Binary,float) 805 * the more strongly typed method} 806 * is typically selected. It can be explicitly selected 807 * using a cast: {@code v.lanewise(op,(float)e)}. 808 * The two expressions will produce numerically identical results. 809 */ 810 @ForceInline 811 public final 812 FloatVector lanewise(VectorOperators.Binary op, 813 long e) { 814 float e1 = (float) e; 815 if ((long)e1 != e 816 ) { 817 vspecies().checkValue(e); // for exception 818 } 819 return lanewise(op, e1); 820 } 821 822 /** 823 * {@inheritDoc} <!--workaround--> 824 * @apiNote 825 * When working with vector subtypes like {@code FloatVector}, 826 * {@linkplain #lanewise(VectorOperators.Binary,float,VectorMask) 827 * the more strongly typed method} 828 * is typically selected. It can be explicitly selected 829 * using a cast: {@code v.lanewise(op,(float)e,m)}. 830 * The two expressions will produce numerically identical results. 831 */ 832 @ForceInline 833 public final 834 FloatVector lanewise(VectorOperators.Binary op, 835 long e, VectorMask<Float> m) { 836 return blend(lanewise(op, e), m); 837 } 838 839 840 // Ternary lanewise support 841 842 // Ternary operators come in eight variations: 843 // lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2]) 844 // lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask) 845 846 // It is annoying to support all of these variations of masking 847 // and broadcast, but it would be more surprising not to continue 848 // the obvious pattern started by unary and binary. 849 850 /** 851 * {@inheritDoc} <!--workaround--> 852 * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask) 853 * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask) 854 * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask) 855 * @see #lanewise(VectorOperators.Ternary,float,float) 856 * @see #lanewise(VectorOperators.Ternary,Vector,float) 857 * @see #lanewise(VectorOperators.Ternary,float,Vector) 858 */ 859 @Override 860 public abstract 861 FloatVector lanewise(VectorOperators.Ternary op, 862 Vector<Float> v1, 863 Vector<Float> v2); 864 @ForceInline 865 final 866 FloatVector lanewiseTemplate(VectorOperators.Ternary op, 867 Vector<Float> v1, 868 Vector<Float> v2) { 869 FloatVector that = (FloatVector) v1; 870 FloatVector tother = (FloatVector) v2; 871 // It's a word: https://www.dictionary.com/browse/tother 872 // See also Chapter 11 of Dickens, Our Mutual Friend: 873 // "Totherest Governor," replied Mr Riderhood... 874 that.check(this); 875 tother.check(this); 876 int opc = opCode(op); 877 return VectorIntrinsics.ternaryOp( 878 opc, getClass(), float.class, length(), 879 this, that, tother, 880 TERN_IMPL.find(op, opc, (opc_) -> { 881 switch (opc_) { 882 case VECTOR_OP_FMA: return (v0, v1_, v2_) -> 883 v0.tOp(v1_, v2_, (i, a, b, c) -> Math.fma(a, b, c)); 884 default: return null; 885 }})); 886 } 887 private static final 888 ImplCache<Ternary,TernaryOperation<FloatVector>> TERN_IMPL 889 = new ImplCache<>(Ternary.class, FloatVector.class); 890 891 /** 892 * {@inheritDoc} <!--workaround--> 893 * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask) 894 * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask) 895 * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask) 896 */ 897 @ForceInline 898 public final 899 FloatVector lanewise(VectorOperators.Ternary op, 900 Vector<Float> v1, 901 Vector<Float> v2, 902 VectorMask<Float> m) { 903 return blend(lanewise(op, v1, v2), m); 904 } 905 906 /** 907 * Combines the lane values of this vector 908 * with the values of two broadcast scalars. 909 * 910 * This is a lane-wise ternary operation which applies 911 * the selected operation to each lane. 912 * The return value will be equal to this expression: 913 * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}. 914 * 915 * @param op the operation used to combine lane values 916 * @param e1 the first input scalar 917 * @param e2 the second input scalar 918 * @return the result of applying the operation lane-wise 919 * to the input vector and the scalars 920 * @throws UnsupportedOperationException if this vector does 921 * not support the requested operation 922 * @see #lanewise(VectorOperators.Ternary,Vector,Vector) 923 * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask) 924 */ 925 @ForceInline 926 public final 927 FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2) 928 float e1, 929 float e2) { 930 return lanewise(op, broadcast(e1), broadcast(e2)); 931 } 932 933 /** 934 * Combines the lane values of this vector 935 * with the values of two broadcast scalars, 936 * with selection of lane elements controlled by a mask. 937 * 938 * This is a masked lane-wise ternary operation which applies 939 * the selected operation to each lane. 940 * The return value will be equal to this expression: 941 * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}. 942 * 943 * @param op the operation used to combine lane values 944 * @param e1 the first input scalar 945 * @param e2 the second input scalar 946 * @param m the mask controlling lane selection 947 * @return the result of applying the operation lane-wise 948 * to the input vector and the scalars 949 * @throws UnsupportedOperationException if this vector does 950 * not support the requested operation 951 * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) 952 * @see #lanewise(VectorOperators.Ternary,float,float) 953 */ 954 @ForceInline 955 public final 956 FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m) 957 float e1, 958 float e2, 959 VectorMask<Float> m) { 960 return blend(lanewise(op, e1, e2), m); 961 } 962 963 /** 964 * Combines the lane values of this vector 965 * with the values of another vector and a broadcast scalar. 966 * 967 * This is a lane-wise ternary operation which applies 968 * the selected operation to each lane. 969 * The return value will be equal to this expression: 970 * {@code this.lanewise(op, v1, this.broadcast(e2))}. 971 * 972 * @param op the operation used to combine lane values 973 * @param v1 the other input vector 974 * @param e2 the input scalar 975 * @return the result of applying the operation lane-wise 976 * to the input vectors and the scalar 977 * @throws UnsupportedOperationException if this vector does 978 * not support the requested operation 979 * @see #lanewise(VectorOperators.Ternary,float,float) 980 * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask) 981 */ 982 @ForceInline 983 public final 984 FloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2) 985 Vector<Float> v1, 986 float e2) { 987 return lanewise(op, v1, broadcast(e2)); 988 } 989 990 /** 991 * Combines the lane values of this vector 992 * with the values of another vector and a broadcast scalar, 993 * with selection of lane elements controlled by a mask. 994 * 995 * This is a masked lane-wise ternary operation which applies 996 * the selected operation to each lane. 997 * The return value will be equal to this expression: 998 * {@code this.lanewise(op, v1, this.broadcast(e2), m)}. 999 * 1000 * @param op the operation used to combine lane values 1001 * @param v1 the other input vector 1002 * @param e2 the input scalar 1003 * @param m the mask controlling lane selection 1004 * @return the result of applying the operation lane-wise 1005 * to the input vectors and the scalar 1006 * @throws UnsupportedOperationException if this vector does 1007 * not support the requested operation 1008 * @see #lanewise(VectorOperators.Ternary,Vector,Vector) 1009 * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask) 1010 * @see #lanewise(VectorOperators.Ternary,Vector,float) 1011 */ 1012 @ForceInline 1013 public final 1014 FloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m) 1015 Vector<Float> v1, 1016 float e2, 1017 VectorMask<Float> m) { 1018 return blend(lanewise(op, v1, e2), m); 1019 } 1020 1021 /** 1022 * Combines the lane values of this vector 1023 * with the values of another vector and a broadcast scalar. 1024 * 1025 * This is a lane-wise ternary operation which applies 1026 * the selected operation to each lane. 1027 * The return value will be equal to this expression: 1028 * {@code this.lanewise(op, this.broadcast(e1), v2)}. 1029 * 1030 * @param op the operation used to combine lane values 1031 * @param e1 the input scalar 1032 * @param v2 the other input vector 1033 * @return the result of applying the operation lane-wise 1034 * to the input vectors and the scalar 1035 * @throws UnsupportedOperationException if this vector does 1036 * not support the requested operation 1037 * @see #lanewise(VectorOperators.Ternary,Vector,Vector) 1038 * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask) 1039 */ 1040 @ForceInline 1041 public final 1042 FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2) 1043 float e1, 1044 Vector<Float> v2) { 1045 return lanewise(op, broadcast(e1), v2); 1046 } 1047 1048 /** 1049 * Combines the lane values of this vector 1050 * with the values of another vector and a broadcast scalar, 1051 * with selection of lane elements controlled by a mask. 1052 * 1053 * This is a masked lane-wise ternary operation which applies 1054 * the selected operation to each lane. 1055 * The return value will be equal to this expression: 1056 * {@code this.lanewise(op, this.broadcast(e1), v2, m)}. 1057 * 1058 * @param op the operation used to combine lane values 1059 * @param e1 the input scalar 1060 * @param v2 the other input vector 1061 * @param m the mask controlling lane selection 1062 * @return the result of applying the operation lane-wise 1063 * to the input vectors and the scalar 1064 * @throws UnsupportedOperationException if this vector does 1065 * not support the requested operation 1066 * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) 1067 * @see #lanewise(VectorOperators.Ternary,float,Vector) 1068 */ 1069 @ForceInline 1070 public final 1071 FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m) 1072 float e1, 1073 Vector<Float> v2, 1074 VectorMask<Float> m) { 1075 return blend(lanewise(op, e1, v2), m); 1076 } 1077 1078 // (Thus endeth the Great and Mighty Ternary Ogdoad.) 1079 // https://en.wikipedia.org/wiki/Ogdoad 1080 1081 /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV 1082 // 1083 // These include masked and non-masked versions. 1084 // This subclass adds broadcast (masked or not). 1085 1086 /** 1087 * {@inheritDoc} <!--workaround--> 1088 * @see #add(float) 1089 */ 1090 @Override 1091 @ForceInline 1092 public final FloatVector add(Vector<Float> v) { 1093 return lanewise(ADD, v); 1094 } 1095 1096 /** 1097 * Adds this vector to the broadcast of an input scalar. 1098 * 1099 * This is a lane-wise binary operation which applies 1100 * the primitive addition operation ({@code +}) to each lane. 1101 * 1102 * This method is also equivalent to the expression 1103 * {@link #lanewise(VectorOperators.Binary,float) 1104 * lanewise}{@code (}{@link VectorOperators#ADD 1105 * ADD}{@code , e)}. 1106 * 1107 * @param e the input scalar 1108 * @return the result of adding each lane of this vector to the scalar 1109 * @see #add(Vector) 1110 * @see #broadcast(float) 1111 * @see #add(float,VectorMask) 1112 * @see VectorOperators#ADD 1113 * @see #lanewise(VectorOperators.Binary,Vector) 1114 * @see #lanewise(VectorOperators.Binary,float) 1115 */ 1116 @ForceInline 1117 public final 1118 FloatVector add(float e) { 1119 return lanewise(ADD, e); 1120 } 1121 1122 /** 1123 * {@inheritDoc} <!--workaround--> 1124 * @see #add(float,VectorMask) 1125 */ 1126 @Override 1127 @ForceInline 1128 public final FloatVector add(Vector<Float> v, 1129 VectorMask<Float> m) { 1130 return lanewise(ADD, v, m); 1131 } 1132 1133 /** 1134 * Adds this vector to the broadcast of an input scalar, 1135 * selecting lane elements controlled by a mask. 1136 * 1137 * This is a masked lane-wise binary operation which applies 1138 * the primitive addition operation ({@code +}) to each lane. 1139 * 1140 * This method is also equivalent to the expression 1141 * {@link #lanewise(VectorOperators.Binary,float,VectorMask) 1142 * lanewise}{@code (}{@link VectorOperators#ADD 1143 * ADD}{@code , s, m)}. 1144 * 1145 * @param e the input scalar 1146 * @param m the mask controlling lane selection 1147 * @return the result of adding each lane of this vector to the scalar 1148 * @see #add(Vector,VectorMask) 1149 * @see #broadcast(float) 1150 * @see #add(float) 1151 * @see VectorOperators#ADD 1152 * @see #lanewise(VectorOperators.Binary,Vector) 1153 * @see #lanewise(VectorOperators.Binary,float) 1154 */ 1155 @ForceInline 1156 public final FloatVector add(float e, 1157 VectorMask<Float> m) { 1158 return lanewise(ADD, e, m); 1159 } 1160 1161 /** 1162 * {@inheritDoc} <!--workaround--> 1163 * @see #sub(float) 1164 */ 1165 @Override 1166 @ForceInline 1167 public final FloatVector sub(Vector<Float> v) { 1168 return lanewise(SUB, v); 1169 } 1170 1171 /** 1172 * Subtracts an input scalar from this vector. 1173 * 1174 * This is a masked lane-wise binary operation which applies 1175 * the primitive subtraction operation ({@code -}) to each lane. 1176 * 1177 * This method is also equivalent to the expression 1178 * {@link #lanewise(VectorOperators.Binary,float) 1179 * lanewise}{@code (}{@link VectorOperators#SUB 1180 * SUB}{@code , e)}. 1181 * 1182 * @param e the input scalar 1183 * @return the result of subtracting the scalar from each lane of this vector 1184 * @see #sub(Vector) 1185 * @see #broadcast(float) 1186 * @see #sub(float,VectorMask) 1187 * @see VectorOperators#SUB 1188 * @see #lanewise(VectorOperators.Binary,Vector) 1189 * @see #lanewise(VectorOperators.Binary,float) 1190 */ 1191 @ForceInline 1192 public final FloatVector sub(float e) { 1193 return lanewise(SUB, e); 1194 } 1195 1196 /** 1197 * {@inheritDoc} <!--workaround--> 1198 * @see #sub(float,VectorMask) 1199 */ 1200 @Override 1201 @ForceInline 1202 public final FloatVector sub(Vector<Float> v, 1203 VectorMask<Float> m) { 1204 return lanewise(SUB, v, m); 1205 } 1206 1207 /** 1208 * Subtracts an input scalar from this vector 1209 * under the control of a mask. 1210 * 1211 * This is a masked lane-wise binary operation which applies 1212 * the primitive subtraction operation ({@code -}) to each lane. 1213 * 1214 * This method is also equivalent to the expression 1215 * {@link #lanewise(VectorOperators.Binary,float,VectorMask) 1216 * lanewise}{@code (}{@link VectorOperators#SUB 1217 * SUB}{@code , s, m)}. 1218 * 1219 * @param e the input scalar 1220 * @param m the mask controlling lane selection 1221 * @return the result of subtracting the scalar from each lane of this vector 1222 * @see #sub(Vector,VectorMask) 1223 * @see #broadcast(float) 1224 * @see #sub(float) 1225 * @see VectorOperators#SUB 1226 * @see #lanewise(VectorOperators.Binary,Vector) 1227 * @see #lanewise(VectorOperators.Binary,float) 1228 */ 1229 @ForceInline 1230 public final FloatVector sub(float e, 1231 VectorMask<Float> m) { 1232 return lanewise(SUB, e, m); 1233 } 1234 1235 /** 1236 * {@inheritDoc} <!--workaround--> 1237 * @see #mul(float) 1238 */ 1239 @Override 1240 @ForceInline 1241 public final FloatVector mul(Vector<Float> v) { 1242 return lanewise(MUL, v); 1243 } 1244 1245 /** 1246 * Multiplies this vector by the broadcast of an input scalar. 1247 * 1248 * This is a lane-wise binary operation which applies 1249 * the primitive multiplication operation ({@code *}) to each lane. 1250 * 1251 * This method is also equivalent to the expression 1252 * {@link #lanewise(VectorOperators.Binary,float) 1253 * lanewise}{@code (}{@link VectorOperators#MUL 1254 * MUL}{@code , e)}. 1255 * 1256 * @param e the input scalar 1257 * @return the result of multiplying this vector by the given scalar 1258 * @see #mul(Vector) 1259 * @see #broadcast(float) 1260 * @see #mul(float,VectorMask) 1261 * @see VectorOperators#MUL 1262 * @see #lanewise(VectorOperators.Binary,Vector) 1263 * @see #lanewise(VectorOperators.Binary,float) 1264 */ 1265 @ForceInline 1266 public final FloatVector mul(float e) { 1267 return lanewise(MUL, e); 1268 } 1269 1270 /** 1271 * {@inheritDoc} <!--workaround--> 1272 * @see #mul(float,VectorMask) 1273 */ 1274 @Override 1275 @ForceInline 1276 public final FloatVector mul(Vector<Float> v, 1277 VectorMask<Float> m) { 1278 return lanewise(MUL, v, m); 1279 } 1280 1281 /** 1282 * Multiplies this vector by the broadcast of an input scalar, 1283 * selecting lane elements controlled by a mask. 1284 * 1285 * This is a masked lane-wise binary operation which applies 1286 * the primitive multiplication operation ({@code *}) to each lane. 1287 * 1288 * This method is also equivalent to the expression 1289 * {@link #lanewise(VectorOperators.Binary,float,VectorMask) 1290 * lanewise}{@code (}{@link VectorOperators#MUL 1291 * MUL}{@code , s, m)}. 1292 * 1293 * @param e the input scalar 1294 * @param m the mask controlling lane selection 1295 * @return the result of muling each lane of this vector to the scalar 1296 * @see #mul(Vector,VectorMask) 1297 * @see #broadcast(float) 1298 * @see #mul(float) 1299 * @see VectorOperators#MUL 1300 * @see #lanewise(VectorOperators.Binary,Vector) 1301 * @see #lanewise(VectorOperators.Binary,float) 1302 */ 1303 @ForceInline 1304 public final FloatVector mul(float e, 1305 VectorMask<Float> m) { 1306 return lanewise(MUL, e, m); 1307 } 1308 1309 /** 1310 * {@inheritDoc} <!--workaround--> 1311 * @apiNote Because the underlying scalar operator is an IEEE 1312 * floating point number, division by zero in fact will 1313 * not throw an exception, but will yield a signed 1314 * infinity or NaN. 1315 */ 1316 @Override 1317 @ForceInline 1318 public final FloatVector div(Vector<Float> v) { 1319 return lanewise(DIV, v); 1320 } 1321 1322 /** 1323 * Divides this vector by the broadcast of an input scalar. 1324 * 1325 * This is a lane-wise binary operation which applies 1326 * the primitive division operation ({@code /}) to each lane. 1327 * 1328 * This method is also equivalent to the expression 1329 * {@link #lanewise(VectorOperators.Binary,float) 1330 * lanewise}{@code (}{@link VectorOperators#DIV 1331 * DIV}{@code , e)}. 1332 * 1333 * @apiNote Because the underlying scalar operator is an IEEE 1334 * floating point number, division by zero in fact will 1335 * not throw an exception, but will yield a signed 1336 * infinity or NaN. 1337 * @see #div(float) 1338 1339 * 1340 * @param e the input scalar 1341 * @return the result of dividing each lane of this vector by the scalar 1342 * @see #div(Vector) 1343 * @see #broadcast(float) 1344 * @see #div(float,VectorMask) 1345 * @see VectorOperators#DIV 1346 * @see #lanewise(VectorOperators.Binary,Vector) 1347 * @see #lanewise(VectorOperators.Binary,float) 1348 */ 1349 @ForceInline 1350 public final FloatVector div(float e) { 1351 return lanewise(DIV, e); 1352 } 1353 1354 /** 1355 * {@inheritDoc} <!--workaround--> 1356 * @see #div(float,VectorMask) 1357 * @apiNote Because the underlying scalar operator is an IEEE 1358 * floating point number, division by zero in fact will 1359 * not throw an exception, but will yield a signed 1360 * infinity or NaN. 1361 */ 1362 @Override 1363 @ForceInline 1364 public final FloatVector div(Vector<Float> v, 1365 VectorMask<Float> m) { 1366 return lanewise(DIV, v, m); 1367 } 1368 1369 /** 1370 * Divides this vector by the broadcast of an input scalar, 1371 * selecting lane elements controlled by a mask. 1372 * 1373 * This is a masked lane-wise binary operation which applies 1374 * the primitive division operation ({@code /}) to each lane. 1375 * 1376 * This method is also equivalent to the expression 1377 * {@link #lanewise(VectorOperators.Binary,float,VectorMask) 1378 * lanewise}{@code (}{@link VectorOperators#DIV 1379 * DIV}{@code , s, m)}. 1380 * 1381 * @apiNote Because the underlying scalar operator is an IEEE 1382 * floating point number, division by zero in fact will 1383 * not throw an exception, but will yield a signed 1384 * infinity or NaN. 1385 * 1386 * @param e the input scalar 1387 * @param m the mask controlling lane selection 1388 * @return the result of dividing each lane of this vector by the scalar 1389 * @see #div(Vector,VectorMask) 1390 * @see #broadcast(float) 1391 * @see #div(float) 1392 * @see VectorOperators#DIV 1393 * @see #lanewise(VectorOperators.Binary,Vector) 1394 * @see #lanewise(VectorOperators.Binary,float) 1395 */ 1396 @ForceInline 1397 public final FloatVector div(float e, 1398 VectorMask<Float> m) { 1399 return lanewise(DIV, e, m); 1400 } 1401 1402 /// END OF FULL-SERVICE BINARY METHODS 1403 1404 /// SECOND-TIER BINARY METHODS 1405 // 1406 // There are no masked versions. 1407 1408 /** 1409 * {@inheritDoc} <!--workaround--> 1410 * @apiNote 1411 * For this method, floating point negative 1412 * zero {@code -0.0} is treated as a value distinct from, and less 1413 * than the default value(positive zero). 1414 */ 1415 @Override 1416 @ForceInline 1417 public final FloatVector min(Vector<Float> v) { 1418 return lanewise(MIN, v); 1419 } 1420 1421 // FIXME: "broadcast of an input scalar" is really wordy. Reduce? 1422 /** 1423 * Computes the smaller of this vector and the broadcast of an input scalar. 1424 * 1425 * This is a lane-wise binary operation which applies the 1426 * operation {@code Math.min()} to each pair of 1427 * corresponding lane values. 1428 * 1429 * This method is also equivalent to the expression 1430 * {@link #lanewise(VectorOperators.Binary,float) 1431 * lanewise}{@code (}{@link VectorOperators#MIN 1432 * MIN}{@code , e)}. 1433 * 1434 * @param e the input scalar 1435 * @return the result of multiplying this vector by the given scalar 1436 * @see #min(Vector) 1437 * @see #broadcast(float) 1438 * @see VectorOperators#MIN 1439 * @see #lanewise(VectorOperators.Binary,float,VectorMask) 1440 * @apiNote 1441 * For this method, floating point negative 1442 * zero {@code -0.0} is treated as a value distinct from, and less 1443 * than the default value(positive zero). 1444 */ 1445 @ForceInline 1446 public final FloatVector min(float e) { 1447 return lanewise(MIN, e); 1448 } 1449 1450 /** 1451 * {@inheritDoc} <!--workaround--> 1452 * @apiNote 1453 * For this method, negative floating-point zero compares 1454 * less than the default value, positive zero. 1455 */ 1456 @Override 1457 @ForceInline 1458 public final FloatVector max(Vector<Float> v) { 1459 return lanewise(MAX, v); 1460 } 1461 1462 /** 1463 * Computes the larger of this vector and the broadcast of an input scalar. 1464 * 1465 * This is a lane-wise binary operation which applies the 1466 * operation {@code Math.max()} to each pair of 1467 * corresponding lane values. 1468 * 1469 * This method is also equivalent to the expression 1470 * {@link #lanewise(VectorOperators.Binary,float) 1471 * lanewise}{@code (}{@link VectorOperators#MAX 1472 * MAX}{@code , e)}. 1473 * 1474 * @param e the input scalar 1475 * @return the result of multiplying this vector by the given scalar 1476 * @see #max(Vector) 1477 * @see #broadcast(float) 1478 * @see VectorOperators#MAX 1479 * @see #lanewise(VectorOperators.Binary,float,VectorMask) 1480 * @apiNote 1481 * For this method, negative floating-point zero compares 1482 * less than the default value, positive zero. 1483 */ 1484 @ForceInline 1485 public final FloatVector max(float e) { 1486 return lanewise(MAX, e); 1487 } 1488 1489 1490 // common FP operator: pow 1491 /** 1492 * Raises this vector to the power of a second input vector. 1493 * 1494 * This is a lane-wise binary operation which applies the 1495 * method {@code Math.pow()} 1496 * to each pair of corresponding lane values. 1497 * 1498 * This method is also equivalent to the expression 1499 * {@link #lanewise(VectorOperators.Binary,Vector) 1500 * lanewise}{@code (}{@link VectorOperators#POW 1501 * POW}{@code , n)}. 1502 * 1503 * <p> 1504 * This is not a full-service named operation like 1505 * {@link #add(Vector) add}. A masked version of 1506 * version of this operation is not directly available 1507 * but may be obtained via the masked version of 1508 * {@code lanewise}. 1509 * 1510 * @param n a vector exponent by which to raise this vector 1511 * @return the {@code n}-th power of this vector 1512 * @see #pow(float) 1513 * @see VectorOperators#POW 1514 * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) 1515 */ 1516 @ForceInline 1517 public final FloatVector pow(Vector<Float> n) { 1518 return lanewise(POW, n); 1519 } 1520 1521 /** 1522 * Raises this vector to a scalar power. 1523 * 1524 * This is a lane-wise binary operation which applies the 1525 * method {@code Math.pow()} 1526 * to each pair of corresponding lane values. 1527 * 1528 * This method is also equivalent to the expression 1529 * {@link #lanewise(VectorOperators.Binary,Vector) 1530 * lanewise}{@code (}{@link VectorOperators#POW 1531 * POW}{@code , n)}. 1532 * 1533 * @param n a scalar exponent by which to raise this vector 1534 * @return the {@code n}-th power of this vector 1535 * @see #pow(Vector) 1536 * @see VectorOperators#POW 1537 * @see #lanewise(VectorOperators.Binary,float,VectorMask) 1538 */ 1539 @ForceInline 1540 public final FloatVector pow(float n) { 1541 return lanewise(POW, n); 1542 } 1543 1544 /// UNARY METHODS 1545 1546 /** 1547 * {@inheritDoc} <!--workaround--> 1548 */ 1549 @Override 1550 @ForceInline 1551 public final 1552 FloatVector neg() { 1553 return lanewise(NEG); 1554 } 1555 1556 /** 1557 * {@inheritDoc} <!--workaround--> 1558 */ 1559 @Override 1560 @ForceInline 1561 public final 1562 FloatVector abs() { 1563 return lanewise(ABS); 1564 } 1565 1566 1567 // sqrt 1568 /** 1569 * Computes the square root of this vector. 1570 * 1571 * This is a lane-wise unary operation which applies the 1572 * the method {@code Math.sqrt()} 1573 * to each lane value. 1574 * 1575 * This method is also equivalent to the expression 1576 * {@link #lanewise(VectorOperators.Unary) 1577 * lanewise}{@code (}{@link VectorOperators#SQRT 1578 * SQRT}{@code )}. 1579 * 1580 * @return the square root of this vector 1581 * @see VectorOperators#SQRT 1582 * @see #lanewise(VectorOperators.Unary,VectorMask) 1583 */ 1584 @ForceInline 1585 public final FloatVector sqrt() { 1586 return lanewise(SQRT); 1587 } 1588 1589 /// COMPARISONS 1590 1591 /** 1592 * {@inheritDoc} <!--workaround--> 1593 */ 1594 @Override 1595 @ForceInline 1596 public final 1597 VectorMask<Float> eq(Vector<Float> v) { 1598 return compare(EQ, v); 1599 } 1600 1601 /** 1602 * Tests if this vector is equal to an input scalar. 1603 * 1604 * This is a lane-wise binary test operation which applies 1605 * the primitive equals operation ({@code ==}) to each lane. 1606 * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}. 1607 * 1608 * @param e the input scalar 1609 * @return the result mask of testing if this vector 1610 * is equal to {@code e} 1611 * @see #compare(VectorOperators.Comparison,float) 1612 */ 1613 @ForceInline 1614 public final 1615 VectorMask<Float> eq(float e) { 1616 return compare(EQ, e); 1617 } 1618 1619 /** 1620 * {@inheritDoc} <!--workaround--> 1621 */ 1622 @Override 1623 @ForceInline 1624 public final 1625 VectorMask<Float> lt(Vector<Float> v) { 1626 return compare(LT, v); 1627 } 1628 1629 /** 1630 * Tests if this vector is less than an input scalar. 1631 * 1632 * This is a lane-wise binary test operation which applies 1633 * the primitive less than operation ({@code <}) to each lane. 1634 * The result is the same as {@code compare(VectorOperators.LT, e)}. 1635 * 1636 * @param e the input scalar 1637 * @return the mask result of testing if this vector 1638 * is less than the input scalar 1639 * @see #compare(VectorOperators.Comparison,float) 1640 */ 1641 @ForceInline 1642 public final 1643 VectorMask<Float> lt(float e) { 1644 return compare(LT, e); 1645 } 1646 1647 /** 1648 * {@inheritDoc} <!--workaround--> 1649 */ 1650 @Override 1651 public abstract 1652 VectorMask<Float> test(VectorOperators.Test op); 1653 1654 /*package-private*/ 1655 @ForceInline 1656 final 1657 <M extends VectorMask<Float>> 1658 M testTemplate(Class<M> maskType, Test op) { 1659 FloatSpecies vsp = vspecies(); 1660 if (opKind(op, VO_SPECIAL)) { 1661 IntVector bits = this.viewAsIntegralLanes(); 1662 VectorMask<Integer> m; 1663 if (op == IS_DEFAULT) { 1664 m = bits.compare(EQ, (int) 0); 1665 } else if (op == IS_NEGATIVE) { 1666 m = bits.compare(LT, (int) 0); 1667 } 1668 else if (op == IS_FINITE || 1669 op == IS_NAN || 1670 op == IS_INFINITE) { 1671 // first kill the sign: 1672 bits = bits.and(Integer.MAX_VALUE); 1673 // next find the bit pattern for infinity: 1674 int infbits = (int) toBits(Float.POSITIVE_INFINITY); 1675 // now compare: 1676 if (op == IS_FINITE) { 1677 m = bits.compare(LT, infbits); 1678 } else if (op == IS_NAN) { 1679 m = bits.compare(GT, infbits); 1680 } else { 1681 m = bits.compare(EQ, infbits); 1682 } 1683 } 1684 else { 1685 throw new AssertionError(op); 1686 } 1687 return maskType.cast(m.cast(this.vspecies())); 1688 } 1689 int opc = opCode(op); 1690 throw new AssertionError(op); 1691 } 1692 1693 /** 1694 * {@inheritDoc} <!--workaround--> 1695 */ 1696 @Override 1697 @ForceInline 1698 public final 1699 VectorMask<Float> test(VectorOperators.Test op, 1700 VectorMask<Float> m) { 1701 return test(op).and(m); 1702 } 1703 1704 /** 1705 * {@inheritDoc} <!--workaround--> 1706 */ 1707 @Override 1708 public abstract 1709 VectorMask<Float> compare(VectorOperators.Comparison op, Vector<Float> v); 1710 1711 /*package-private*/ 1712 @ForceInline 1713 final 1714 <M extends VectorMask<Float>> 1715 M compareTemplate(Class<M> maskType, Comparison op, Vector<Float> v) { 1716 Objects.requireNonNull(v); 1717 FloatSpecies vsp = vspecies(); 1718 FloatVector that = (FloatVector) v; 1719 that.check(this); 1720 int opc = opCode(op); 1721 return VectorIntrinsics.compare( 1722 opc, getClass(), maskType, float.class, length(), 1723 this, that, 1724 (cond, v0, v1) -> { 1725 AbstractMask<Float> m 1726 = v0.bTest(cond, v1, (cond_, i, a, b) 1727 -> compareWithOp(cond, a, b)); 1728 @SuppressWarnings("unchecked") 1729 M m2 = (M) m; 1730 return m2; 1731 }); 1732 } 1733 1734 @ForceInline 1735 private static 1736 boolean compareWithOp(int cond, float a, float b) { 1737 switch (cond) { 1738 case VectorIntrinsics.BT_eq: return a == b; 1739 case VectorIntrinsics.BT_ne: return a != b; 1740 case VectorIntrinsics.BT_lt: return a < b; 1741 case VectorIntrinsics.BT_le: return a <= b; 1742 case VectorIntrinsics.BT_gt: return a > b; 1743 case VectorIntrinsics.BT_ge: return a >= b; 1744 } 1745 throw new AssertionError(); 1746 } 1747 1748 /** 1749 * {@inheritDoc} <!--workaround--> 1750 */ 1751 @Override 1752 @ForceInline 1753 public final 1754 VectorMask<Float> compare(VectorOperators.Comparison op, 1755 Vector<Float> v, 1756 VectorMask<Float> m) { 1757 return compare(op, v).and(m); 1758 } 1759 1760 /** 1761 * Tests this vector by comparing it with an input scalar, 1762 * according to the given comparison operation. 1763 * 1764 * This is a lane-wise binary test operation which applies 1765 * the comparison operation to each lane. 1766 * <p> 1767 * The result is the same as 1768 * {@code compare(op, broadcast(species(), e))}. 1769 * That is, the scalar may be regarded as broadcast to 1770 * a vector of the same species, and then compared 1771 * against the original vector, using the selected 1772 * comparison operation. 1773 * 1774 * @param op the operation used to compare lane values 1775 * @param e the input scalar 1776 * @return the mask result of testing lane-wise if this vector 1777 * compares to the input, according to the selected 1778 * comparison operator 1779 * @see FloatVector#compare(VectorOperators.Comparison,Vector) 1780 * @see #eq(float) 1781 * @see #lt(float) 1782 */ 1783 public abstract 1784 VectorMask<Float> compare(Comparison op, float e); 1785 1786 /*package-private*/ 1787 @ForceInline 1788 final 1789 <M extends VectorMask<Float>> 1790 M compareTemplate(Class<M> maskType, Comparison op, float e) { 1791 return compareTemplate(maskType, op, broadcast(e)); 1792 } 1793 1794 /** 1795 * Tests this vector by comparing it with an input scalar, 1796 * according to the given comparison operation, 1797 * in lanes selected by a mask. 1798 * 1799 * This is a masked lane-wise binary test operation which applies 1800 * to each pair of corresponding lane values. 1801 * 1802 * The returned result is equal to the expression 1803 * {@code compare(op,s).and(m)}. 1804 * 1805 * @param op the operation used to compare lane values 1806 * @param e the input scalar 1807 * @param m the mask controlling lane selection 1808 * @return the mask result of testing lane-wise if this vector 1809 * compares to the input, according to the selected 1810 * comparison operator, 1811 * and only in the lanes selected by the mask 1812 * @see FloatVector#compare(VectorOperators.Comparison,Vector,VectorMask) 1813 */ 1814 @ForceInline 1815 public final VectorMask<Float> compare(VectorOperators.Comparison op, 1816 float e, 1817 VectorMask<Float> m) { 1818 return compare(op, e).and(m); 1819 } 1820 1821 /** 1822 * {@inheritDoc} <!--workaround--> 1823 */ 1824 @Override 1825 public abstract 1826 VectorMask<Float> compare(Comparison op, long e); 1827 1828 /*package-private*/ 1829 @ForceInline 1830 final 1831 <M extends VectorMask<Float>> 1832 M compareTemplate(Class<M> maskType, Comparison op, long e) { 1833 return compareTemplate(maskType, op, broadcast(e)); 1834 } 1835 1836 /** 1837 * {@inheritDoc} <!--workaround--> 1838 */ 1839 @Override 1840 @ForceInline 1841 public final 1842 VectorMask<Float> compare(Comparison op, long e, VectorMask<Float> m) { 1843 return compare(op, broadcast(e), m); 1844 } 1845 1846 1847 1848 /** 1849 * {@inheritDoc} <!--workaround--> 1850 */ 1851 @Override public abstract 1852 FloatVector blend(Vector<Float> v, VectorMask<Float> m); 1853 1854 /*package-private*/ 1855 @ForceInline 1856 final 1857 <M extends VectorMask<Float>> 1858 FloatVector 1859 blendTemplate(Class<M> maskType, FloatVector v, M m) { 1860 v.check(this); 1861 return VectorIntrinsics.blend( 1862 getClass(), maskType, float.class, length(), 1863 this, v, m, 1864 (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b)); 1865 } 1866 1867 /** 1868 * {@inheritDoc} <!--workaround--> 1869 */ 1870 @Override public abstract FloatVector addIndex(int scale); 1871 1872 /*package-private*/ 1873 @ForceInline 1874 final FloatVector addIndexTemplate(int scale) { 1875 FloatSpecies vsp = vspecies(); 1876 // make sure VLENGTH*scale doesn't overflow: 1877 vsp.checkScale(scale); 1878 return VectorIntrinsics.indexVector( 1879 getClass(), float.class, length(), 1880 this, scale, vsp, 1881 (v, scale_, s) 1882 -> { 1883 // If the platform doesn't support an INDEX 1884 // instruction directly, load IOTA from memory 1885 // and multiply. 1886 FloatVector iota = s.iota(); 1887 float sc = (float) scale_; 1888 return v.add(sc == 1 ? iota : iota.mul(sc)); 1889 }); 1890 } 1891 1892 /** 1893 * Replaces selected lanes of this vector with 1894 * a scalar value 1895 * under the control of a mask. 1896 * 1897 * This is a masked lane-wise binary operation which 1898 * selects each lane value from one or the other input. 1899 * 1900 * The returned result is equal to the expression 1901 * {@code blend(broadcast(e),m)}. 1902 * 1903 * @param e the input scalar, containing the replacement lane value 1904 * @param m the mask controlling lane selection of the scalar 1905 * @return the result of blending the lane elements of this vector with 1906 * the scalar value 1907 */ 1908 @ForceInline 1909 public final FloatVector blend(float e, 1910 VectorMask<Float> m) { 1911 return blend(broadcast(e), m); 1912 } 1913 1914 /** 1915 * Replaces selected lanes of this vector with 1916 * a scalar value 1917 * under the control of a mask. 1918 * 1919 * This is a masked lane-wise binary operation which 1920 * selects each lane value from one or the other input. 1921 * 1922 * The returned result is equal to the expression 1923 * {@code blend(broadcast(e),m)}. 1924 * 1925 * @param e the input scalar, containing the replacement lane value 1926 * @param m the mask controlling lane selection of the scalar 1927 * @return the result of blending the lane elements of this vector with 1928 * the scalar value 1929 */ 1930 @ForceInline 1931 public final FloatVector blend(long e, 1932 VectorMask<Float> m) { 1933 return blend(broadcast(e), m); 1934 } 1935 1936 /** 1937 * {@inheritDoc} <!--workaround--> 1938 */ 1939 @Override 1940 public abstract 1941 FloatVector slice(int origin, Vector<Float> v1); 1942 1943 /*package-private*/ 1944 final 1945 @ForceInline 1946 FloatVector sliceTemplate(int origin, Vector<Float> v1) { 1947 FloatVector that = (FloatVector) v1; 1948 that.check(this); 1949 float[] a0 = this.vec(); 1950 float[] a1 = that.vec(); 1951 float[] res = new float[a0.length]; 1952 int vlen = res.length; 1953 int firstPart = vlen - origin; 1954 System.arraycopy(a0, origin, res, 0, firstPart); 1955 System.arraycopy(a1, 0, res, firstPart, origin); 1956 return vectorFactory(res); 1957 } 1958 1959 /** 1960 * {@inheritDoc} <!--workaround--> 1961 */ 1962 @Override 1963 @ForceInline 1964 public final 1965 FloatVector slice(int origin, 1966 Vector<Float> w, 1967 VectorMask<Float> m) { 1968 return broadcast(0).blend(slice(origin, w), m); 1969 } 1970 1971 /** 1972 * {@inheritDoc} <!--workaround--> 1973 */ 1974 @Override 1975 public abstract 1976 FloatVector slice(int origin); 1977 1978 /** 1979 * {@inheritDoc} <!--workaround--> 1980 */ 1981 @Override 1982 public abstract 1983 FloatVector unslice(int origin, Vector<Float> w, int part); 1984 1985 /*package-private*/ 1986 final 1987 @ForceInline 1988 FloatVector 1989 unsliceTemplate(int origin, Vector<Float> w, int part) { 1990 FloatVector that = (FloatVector) w; 1991 that.check(this); 1992 float[] slice = this.vec(); 1993 float[] res = that.vec().clone(); 1994 int vlen = res.length; 1995 int firstPart = vlen - origin; 1996 switch (part) { 1997 case 0: 1998 System.arraycopy(slice, 0, res, origin, firstPart); 1999 break; 2000 case 1: 2001 System.arraycopy(slice, firstPart, res, 0, origin); 2002 break; 2003 default: 2004 throw wrongPartForSlice(part); 2005 } 2006 return vectorFactory(res); 2007 } 2008 2009 /*package-private*/ 2010 final 2011 @ForceInline 2012 <M extends VectorMask<Float>> 2013 FloatVector 2014 unsliceTemplate(Class<M> maskType, int origin, Vector<Float> w, int part, M m) { 2015 FloatVector that = (FloatVector) w; 2016 that.check(this); 2017 FloatVector slice = that.sliceTemplate(origin, that); 2018 slice = slice.blendTemplate(maskType, this, m); 2019 return slice.unsliceTemplate(origin, w, part); 2020 } 2021 2022 /** 2023 * {@inheritDoc} <!--workaround--> 2024 */ 2025 @Override 2026 public abstract 2027 FloatVector unslice(int origin, Vector<Float> w, int part, VectorMask<Float> m); 2028 2029 /** 2030 * {@inheritDoc} <!--workaround--> 2031 */ 2032 @Override 2033 public abstract 2034 FloatVector unslice(int origin); 2035 2036 private ArrayIndexOutOfBoundsException 2037 wrongPartForSlice(int part) { 2038 String msg = String.format("bad part number %d for slice operation", 2039 part); 2040 return new ArrayIndexOutOfBoundsException(msg); 2041 } 2042 2043 /** 2044 * {@inheritDoc} <!--workaround--> 2045 */ 2046 @Override 2047 public abstract 2048 FloatVector rearrange(VectorShuffle<Float> m); 2049 2050 /*package-private*/ 2051 @ForceInline 2052 final 2053 <S extends VectorShuffle<Float>> 2054 FloatVector rearrangeTemplate(Class<S> shuffletype, S shuffle) { 2055 shuffle.checkIndexes(); 2056 return VectorIntrinsics.rearrangeOp( 2057 getClass(), shuffletype, float.class, length(), 2058 this, shuffle, 2059 (v1, s_) -> v1.uOp((i, a) -> { 2060 int ei = s_.laneSource(i); 2061 return v1.lane(ei); 2062 })); 2063 } 2064 2065 /** 2066 * {@inheritDoc} <!--workaround--> 2067 */ 2068 @Override 2069 public abstract 2070 FloatVector rearrange(VectorShuffle<Float> s, 2071 VectorMask<Float> m); 2072 2073 /*package-private*/ 2074 @ForceInline 2075 final 2076 <S extends VectorShuffle<Float>> 2077 FloatVector rearrangeTemplate(Class<S> shuffletype, 2078 S shuffle, 2079 VectorMask<Float> m) { 2080 FloatVector unmasked = 2081 VectorIntrinsics.rearrangeOp( 2082 getClass(), shuffletype, float.class, length(), 2083 this, shuffle, 2084 (v1, s_) -> v1.uOp((i, a) -> { 2085 int ei = s_.laneSource(i); 2086 return ei < 0 ? 0 : v1.lane(ei); 2087 })); 2088 VectorMask<Float> valid = shuffle.laneIsValid(); 2089 if (m.andNot(valid).anyTrue()) { 2090 shuffle.checkIndexes(); 2091 throw new AssertionError(); 2092 } 2093 return broadcast((float)0).blend(unmasked, valid); 2094 } 2095 2096 /** 2097 * {@inheritDoc} <!--workaround--> 2098 */ 2099 @Override 2100 public abstract 2101 FloatVector rearrange(VectorShuffle<Float> s, 2102 Vector<Float> v); 2103 2104 /*package-private*/ 2105 @ForceInline 2106 final 2107 <S extends VectorShuffle<Float>> 2108 FloatVector rearrangeTemplate(Class<S> shuffletype, 2109 S shuffle, 2110 FloatVector v) { 2111 VectorMask<Float> valid = shuffle.laneIsValid(); 2112 S ws = shuffletype.cast(shuffle.wrapIndexes()); 2113 FloatVector r0 = 2114 VectorIntrinsics.rearrangeOp( 2115 getClass(), shuffletype, float.class, length(), 2116 this, ws, 2117 (v0, s_) -> v0.uOp((i, a) -> { 2118 int ei = s_.laneSource(i); 2119 return v0.lane(ei); 2120 })); 2121 FloatVector r1 = 2122 VectorIntrinsics.rearrangeOp( 2123 getClass(), shuffletype, float.class, length(), 2124 v, ws, 2125 (v1, s_) -> v1.uOp((i, a) -> { 2126 int ei = s_.laneSource(i); 2127 return v1.lane(ei); 2128 })); 2129 return r1.blend(r0, valid); 2130 } 2131 2132 /** 2133 * {@inheritDoc} <!--workaround--> 2134 */ 2135 @Override 2136 public abstract 2137 FloatVector selectFrom(Vector<Float> v); 2138 2139 /*package-private*/ 2140 @ForceInline 2141 final FloatVector selectFromTemplate(FloatVector v) { 2142 return v.rearrange(this.toShuffle()); 2143 } 2144 2145 /** 2146 * {@inheritDoc} <!--workaround--> 2147 */ 2148 @Override 2149 public abstract 2150 FloatVector selectFrom(Vector<Float> s, VectorMask<Float> m); 2151 2152 /*package-private*/ 2153 @ForceInline 2154 final FloatVector selectFromTemplate(FloatVector v, 2155 AbstractMask<Float> m) { 2156 return v.rearrange(this.toShuffle(), m); 2157 } 2158 2159 /// Ternary operations 2160 2161 2162 /** 2163 * Multiplies this vector by a second input vector, and sums 2164 * the result with a third. 2165 * 2166 * Extended precision is used for the intermediate result, 2167 * avoiding possible loss of precision from rounding once 2168 * for each of the two operations. 2169 * The result is numerically close to {@code this.mul(b).add(c)}, 2170 * and is typically closer to the true mathematical result. 2171 * 2172 * This is a lane-wise ternary operation which applies the 2173 * {@link Math#fma(float,float,float) Math#fma(a,b,c)} 2174 * operation to each lane. 2175 * 2176 * This method is also equivalent to the expression 2177 * {@link #lanewise(VectorOperators.Ternary,Vector,Vector) 2178 * lanewise}{@code (}{@link VectorOperators#FMA 2179 * FMA}{@code , b, c)}. 2180 * 2181 * @param b the second input vector, supplying multiplier values 2182 * @param c the third input vector, supplying addend values 2183 * @return the product of this vector and the second input vector 2184 * summed with the third input vector, using extended precision 2185 * for the intermediate result 2186 * @see #fma(float,float) 2187 * @see VectorOperators#FMA 2188 * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) 2189 */ 2190 @ForceInline 2191 public final 2192 FloatVector fma(Vector<Float> b, Vector<Float> c) { 2193 return lanewise(FMA, b, c); 2194 } 2195 2196 /** 2197 * Multiplies this vector by a scalar multiplier, and sums 2198 * the result with a scalar addend. 2199 * 2200 * Extended precision is used for the intermediate result, 2201 * avoiding possible loss of precision from rounding once 2202 * for each of the two operations. 2203 * The result is numerically close to {@code this.mul(b).add(c)}, 2204 * and is typically closer to the true mathematical result. 2205 * 2206 * This is a lane-wise ternary operation which applies the 2207 * {@link Math#fma(float,float,float) Math#fma(a,b,c)} 2208 * operation to each lane. 2209 * 2210 * This method is also equivalent to the expression 2211 * {@link #lanewise(VectorOperators.Ternary,Vector,Vector) 2212 * lanewise}{@code (}{@link VectorOperators#FMA 2213 * FMA}{@code , b, c)}. 2214 * 2215 * @param b the scalar multiplier 2216 * @param c the scalar addend 2217 * @return the product of this vector and the scalar multiplier 2218 * summed with scalar addend, using extended precision 2219 * for the intermediate result 2220 * @see #fma(Vector,Vector) 2221 * @see VectorOperators#FMA 2222 * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask) 2223 */ 2224 @ForceInline 2225 public final 2226 FloatVector fma(float b, float c) { 2227 return lanewise(FMA, b, c); 2228 } 2229 2230 // Don't bother with (Vector,float) and (float,Vector) overloadings. 2231 2232 // Type specific horizontal reductions 2233 2234 /** 2235 * Returns a value accumulated from all the lanes of this vector. 2236 * 2237 * This is an associative cross-lane reduction operation which 2238 * applies the specified operation to all the lane elements. 2239 * 2240 * <p> 2241 * A few reduction operations do not support arbitrary reordering 2242 * of their operands, yet are included here because of their 2243 * usefulness. 2244 * 2245 * <ul> 2246 * <li> 2247 * In the case of {@code FIRST_NONZERO}, the reduction returns 2248 * the value from the lowest-numbered non-zero lane. 2249 * 2250 * (As with {@code MAX} and {@code MIN}, floating point negative 2251 * zero {@code -0.0} is treated as a value distinct from 2252 * the default value, positive zero. So a first-nonzero lane reduction 2253 * might return {@code -0.0} even in the presence of non-zero 2254 * lane values.) 2255 * 2256 * <li> 2257 * In the case of floating point addition and multiplication, the 2258 * precise result will reflect the choice of an arbitrary order 2259 * of operations, which may even vary over time. 2260 * 2261 * <li> 2262 * All other reduction operations are fully commutative and 2263 * associative. The implementation can choose any order of 2264 * processing, yet it will always produce the same result. 2265 * 2266 * </ul> 2267 * 2268 * @implNote 2269 * The value of a floating-point reduction may be a function 2270 * both of the input values as well as the order of scalar 2271 * operations which combine those values, specifically in the 2272 * case of {@code ADD} and {@code MUL} operations, where 2273 * details of rounding depend on operand order. 2274 * In those cases, the order of operations of this method is 2275 * intentionally not defined. This allows the JVM to generate 2276 * optimal machine code for the underlying platform at runtime. If 2277 * the platform supports a vector instruction to add or multiply 2278 * all values in the vector, or if there is some other efficient 2279 * machine code sequence, then the JVM has the option of 2280 * generating this machine code. Otherwise, the default 2281 * implementation is applied, which adds vector elements 2282 * sequentially from beginning to end. For this reason, the 2283 * output of this method may vary for the same input values, 2284 * if the selected operator is {@code ADD} or {@code MUL}. 2285 * 2286 * 2287 * @param op the operation used to combine lane values 2288 * @return the accumulated result 2289 * @throws UnsupportedOperationException if this vector does 2290 * not support the requested operation 2291 * @see #reduceLanes(VectorOperators.Associative,VectorMask) 2292 * @see #add(Vector) 2293 * @see #mul(Vector) 2294 * @see #min(Vector) 2295 * @see #max(Vector) 2296 * @see VectorOperators#FIRST_NONZERO 2297 */ 2298 public abstract float reduceLanes(VectorOperators.Associative op); 2299 2300 /** 2301 * Returns a value accumulated from selected lanes of this vector, 2302 * controlled by a mask. 2303 * 2304 * This is an associative cross-lane reduction operation which 2305 * applies the specified operation to the selected lane elements. 2306 * <p> 2307 * If no elements are selected, an operation-specific identity 2308 * value is returned. 2309 * <ul> 2310 * <li> 2311 * If the operation is 2312 * {@code ADD} 2313 * or {@code FIRST_NONZERO}, 2314 * then the identity value is positive zero, the default {@code float} value. 2315 * <li> 2316 * If the operation is {@code MUL}, 2317 * then the identity value is one. 2318 * <li> 2319 * If the operation is {@code MAX}, 2320 * then the identity value is {@code Float.NEGATIVE_INFINITY}. 2321 * <li> 2322 * If the operation is {@code MIN}, 2323 * then the identity value is {@code Float.POSITIVE_INFINITY}. 2324 * </ul> 2325 * 2326 * @implNote 2327 * The value of a floating-point reduction may be a function 2328 * both of the input values as well as the order of scalar 2329 * operations which combine those values, specifically in the 2330 * case of {@code ADD} and {@code MUL} operations, where 2331 * details of rounding depend on operand order. 2332 * See {@linkplain #reduceLanes(VectorOperators.Associative) 2333 * the unmasked version of this method} 2334 * for a discussion. 2335 * 2336 * 2337 * @param op the operation used to combine lane values 2338 * @param m the mask controlling lane selection 2339 * @return the reduced result accumulated from the selected lane values 2340 * @throws UnsupportedOperationException if this vector does 2341 * not support the requested operation 2342 * @see #reduceLanes(VectorOperators.Associative) 2343 */ 2344 public abstract float reduceLanes(VectorOperators.Associative op, 2345 VectorMask<Float> m); 2346 2347 /*package-private*/ 2348 @ForceInline 2349 final 2350 float reduceLanesTemplate(VectorOperators.Associative op, 2351 VectorMask<Float> m) { 2352 FloatVector v = reduceIdentityVector(op).blend(this, m); 2353 return v.reduceLanesTemplate(op); 2354 } 2355 2356 /*package-private*/ 2357 @ForceInline 2358 final 2359 float reduceLanesTemplate(VectorOperators.Associative op) { 2360 if (op == FIRST_NONZERO) { 2361 // FIXME: The JIT should handle this, and other scan ops alos. 2362 VectorMask<Integer> thisNZ 2363 = this.viewAsIntegralLanes().compare(NE, (int) 0); 2364 return this.lane(thisNZ.firstTrue()); 2365 } 2366 int opc = opCode(op); 2367 return fromBits(VectorIntrinsics.reductionCoerced( 2368 opc, getClass(), float.class, length(), 2369 this, 2370 REDUCE_IMPL.find(op, opc, (opc_) -> { 2371 switch (opc_) { 2372 case VECTOR_OP_ADD: return v -> 2373 toBits(v.rOp((float)0, (i, a, b) -> (float)(a + b))); 2374 case VECTOR_OP_MUL: return v -> 2375 toBits(v.rOp((float)1, (i, a, b) -> (float)(a * b))); 2376 case VECTOR_OP_MIN: return v -> 2377 toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (float) Math.min(a, b))); 2378 case VECTOR_OP_MAX: return v -> 2379 toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (float) Math.max(a, b))); 2380 case VECTOR_OP_FIRST_NONZERO: return v -> 2381 toBits(v.rOp((float)0, (i, a, b) -> toBits(a) != 0 ? a : b)); 2382 case VECTOR_OP_OR: return v -> 2383 toBits(v.rOp((float)0, (i, a, b) -> fromBits(toBits(a) | toBits(b)))); 2384 default: return null; 2385 }}))); 2386 } 2387 private static final 2388 ImplCache<Associative,Function<FloatVector,Long>> REDUCE_IMPL 2389 = new ImplCache<>(Associative.class, FloatVector.class); 2390 2391 private 2392 @ForceInline 2393 FloatVector reduceIdentityVector(VectorOperators.Associative op) { 2394 int opc = opCode(op); 2395 UnaryOperator<FloatVector> fn 2396 = REDUCE_ID_IMPL.find(op, opc, (opc_) -> { 2397 switch (opc_) { 2398 case VECTOR_OP_ADD: 2399 case VECTOR_OP_OR: 2400 case VECTOR_OP_XOR: 2401 case VECTOR_OP_FIRST_NONZERO: 2402 return v -> v.broadcast(0); 2403 case VECTOR_OP_MUL: 2404 return v -> v.broadcast(1); 2405 case VECTOR_OP_AND: 2406 return v -> v.broadcast(-1); 2407 case VECTOR_OP_MIN: 2408 return v -> v.broadcast(MAX_OR_INF); 2409 case VECTOR_OP_MAX: 2410 return v -> v.broadcast(MIN_OR_INF); 2411 default: return null; 2412 } 2413 }); 2414 return fn.apply(this); 2415 } 2416 private static final 2417 ImplCache<Associative,UnaryOperator<FloatVector>> REDUCE_ID_IMPL 2418 = new ImplCache<>(Associative.class, FloatVector.class); 2419 2420 private static final float MIN_OR_INF = Float.NEGATIVE_INFINITY; 2421 private static final float MAX_OR_INF = Float.POSITIVE_INFINITY; 2422 2423 public @Override abstract long reduceLanesToLong(VectorOperators.Associative op); 2424 public @Override abstract long reduceLanesToLong(VectorOperators.Associative op, 2425 VectorMask<Float> m); 2426 2427 // Type specific accessors 2428 2429 /** 2430 * Gets the lane element at lane index {@code i} 2431 * 2432 * @param i the lane index 2433 * @return the lane element at lane index {@code i} 2434 * @throws IllegalArgumentException if the index is is out of range 2435 * ({@code < 0 || >= length()}) 2436 */ 2437 public abstract float lane(int i); 2438 2439 /** 2440 * Replaces the lane element of this vector at lane index {@code i} with 2441 * value {@code e}. 2442 * 2443 * This is a cross-lane operation and behaves as if it returns the result 2444 * of blending this vector with an input vector that is the result of 2445 * broadcasting {@code e} and a mask that has only one lane set at lane 2446 * index {@code i}. 2447 * 2448 * @param i the lane index of the lane element to be replaced 2449 * @param e the value to be placed 2450 * @return the result of replacing the lane element of this vector at lane 2451 * index {@code i} with value {@code e}. 2452 * @throws IllegalArgumentException if the index is is out of range 2453 * ({@code < 0 || >= length()}) 2454 */ 2455 public abstract FloatVector withLane(int i, float e); 2456 2457 // Memory load operations 2458 2459 /** 2460 * Returns an array of type {@code float[]} 2461 * containing all the lane values. 2462 * The array length is the same as the vector length. 2463 * The array elements are stored in lane order. 2464 * <p> 2465 * This method behaves as if it stores 2466 * this vector into an allocated array 2467 * (using {@link #intoArray(float[], int) intoArray}) 2468 * and returns the array as follows: 2469 * <pre>{@code 2470 * float[] a = new float[this.length()]; 2471 * this.intoArray(a, 0); 2472 * return a; 2473 * }</pre> 2474 * 2475 * @return an array containing the lane values of this vector 2476 */ 2477 @ForceInline 2478 @Override 2479 public final float[] toArray() { 2480 float[] a = new float[vspecies().laneCount()]; 2481 intoArray(a, 0); 2482 return a; 2483 } 2484 2485 /** {@inheritDoc} <!--workaround--> 2486 */ 2487 @ForceInline 2488 @Override 2489 public final int[] toIntArray() { 2490 float[] a = toArray(); 2491 int[] res = new int[a.length]; 2492 for (int i = 0; i < a.length; i++) { 2493 float e = a[i]; 2494 res[i] = (int) FloatSpecies.toIntegralChecked(e, true); 2495 } 2496 return res; 2497 } 2498 2499 /** {@inheritDoc} <!--workaround--> 2500 */ 2501 @ForceInline 2502 @Override 2503 public final long[] toLongArray() { 2504 float[] a = toArray(); 2505 long[] res = new long[a.length]; 2506 for (int i = 0; i < a.length; i++) { 2507 float e = a[i]; 2508 res[i] = FloatSpecies.toIntegralChecked(e, false); 2509 } 2510 return res; 2511 } 2512 2513 /** {@inheritDoc} <!--workaround--> 2514 * @implNote 2515 * When this method is used on used on vectors 2516 * of type {@code FloatVector}, 2517 * there will be no loss of precision. 2518 */ 2519 @ForceInline 2520 @Override 2521 public final double[] toDoubleArray() { 2522 float[] a = toArray(); 2523 double[] res = new double[a.length]; 2524 for (int i = 0; i < a.length; i++) { 2525 res[i] = (double) a[i]; 2526 } 2527 return res; 2528 } 2529 2530 /** 2531 * Loads a vector from a byte array starting at an offset. 2532 * Bytes are composed into primitive lane elements according 2533 * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering. 2534 * The vector is arranged into lanes according to 2535 * <a href="Vector.html#lane-order">memory ordering</a>. 2536 * <p> 2537 * This method behaves as if it returns the result of calling 2538 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2539 * fromByteBuffer()} as follows: 2540 * <pre>{@code 2541 * var bb = ByteBuffer.wrap(a); 2542 * var bo = ByteOrder.LITTLE_ENDIAN; 2543 * var m = species.maskAll(true); 2544 * return fromByteBuffer(species, bb, offset, m, bo); 2545 * }</pre> 2546 * 2547 * @param species species of desired vector 2548 * @param a the byte array 2549 * @param offset the offset into the array 2550 * @return a vector loaded from a byte array 2551 * @throws IndexOutOfBoundsException 2552 * if {@code offset+N*ESIZE < 0} 2553 * or {@code offset+(N+1)*ESIZE > a.length} 2554 * for any lane {@code N} in the vector 2555 */ 2556 @ForceInline 2557 public static 2558 FloatVector fromByteArray(VectorSpecies<Float> species, 2559 byte[] a, int offset) { 2560 return fromByteArray(species, a, offset, ByteOrder.LITTLE_ENDIAN); 2561 } 2562 2563 /** 2564 * Loads a vector from a byte array starting at an offset. 2565 * Bytes are composed into primitive lane elements according 2566 * to the specified byte order. 2567 * The vector is arranged into lanes according to 2568 * <a href="Vector.html#lane-order">memory ordering</a>. 2569 * <p> 2570 * This method behaves as if it returns the result of calling 2571 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2572 * fromByteBuffer()} as follows: 2573 * <pre>{@code 2574 * var bb = ByteBuffer.wrap(a); 2575 * var m = species.maskAll(true); 2576 * return fromByteBuffer(species, bb, offset, m, bo); 2577 * }</pre> 2578 * 2579 * @param species species of desired vector 2580 * @param a the byte array 2581 * @param offset the offset into the array 2582 * @param bo the intended byte order 2583 * @return a vector loaded from a byte array 2584 * @throws IndexOutOfBoundsException 2585 * if {@code offset+N*ESIZE < 0} 2586 * or {@code offset+(N+1)*ESIZE > a.length} 2587 * for any lane {@code N} in the vector 2588 */ 2589 @ForceInline 2590 public static 2591 FloatVector fromByteArray(VectorSpecies<Float> species, 2592 byte[] a, int offset, 2593 ByteOrder bo) { 2594 FloatSpecies vsp = (FloatSpecies) species; 2595 offset = checkFromIndexSize(offset, 2596 vsp.vectorBitSize() / Byte.SIZE, 2597 a.length); 2598 return vsp.dummyVector() 2599 .fromByteArray0(a, offset).maybeSwap(bo); 2600 } 2601 2602 /** 2603 * Loads a vector from a byte array starting at an offset 2604 * and using a mask. 2605 * Lanes where the mask is unset are filled with the default 2606 * value of {@code float} (positive zero). 2607 * Bytes are composed into primitive lane elements according 2608 * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering. 2609 * The vector is arranged into lanes according to 2610 * <a href="Vector.html#lane-order">memory ordering</a>. 2611 * <p> 2612 * This method behaves as if it returns the result of calling 2613 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2614 * fromByteBuffer()} as follows: 2615 * <pre>{@code 2616 * var bb = ByteBuffer.wrap(a); 2617 * var bo = ByteOrder.LITTLE_ENDIAN; 2618 * return fromByteBuffer(species, bb, offset, bo, m); 2619 * }</pre> 2620 * 2621 * @param species species of desired vector 2622 * @param a the byte array 2623 * @param offset the offset into the array 2624 * @param m the mask controlling lane selection 2625 * @return a vector loaded from a byte array 2626 * @throws IndexOutOfBoundsException 2627 * if {@code offset+N*ESIZE < 0} 2628 * or {@code offset+(N+1)*ESIZE > a.length} 2629 * for any lane {@code N} in the vector where 2630 * the mask is set 2631 */ 2632 @ForceInline 2633 public static 2634 FloatVector fromByteArray(VectorSpecies<Float> species, 2635 byte[] a, int offset, 2636 VectorMask<Float> m) { 2637 return fromByteArray(species, a, offset, ByteOrder.LITTLE_ENDIAN, m); 2638 } 2639 2640 /** 2641 * Loads a vector from a byte array starting at an offset 2642 * and using a mask. 2643 * Lanes where the mask is unset are filled with the default 2644 * value of {@code float} (positive zero). 2645 * Bytes are composed into primitive lane elements according 2646 * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering. 2647 * The vector is arranged into lanes according to 2648 * <a href="Vector.html#lane-order">memory ordering</a>. 2649 * <p> 2650 * This method behaves as if it returns the result of calling 2651 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2652 * fromByteBuffer()} as follows: 2653 * <pre>{@code 2654 * var bb = ByteBuffer.wrap(a); 2655 * return fromByteBuffer(species, bb, offset, m, bo); 2656 * }</pre> 2657 * 2658 * @param species species of desired vector 2659 * @param a the byte array 2660 * @param offset the offset into the array 2661 * @param bo the intended byte order 2662 * @param m the mask controlling lane selection 2663 * @return a vector loaded from a byte array 2664 * @throws IndexOutOfBoundsException 2665 * if {@code offset+N*ESIZE < 0} 2666 * or {@code offset+(N+1)*ESIZE > a.length} 2667 * for any lane {@code N} in the vector 2668 * where the mask is set 2669 */ 2670 @ForceInline 2671 public static 2672 FloatVector fromByteArray(VectorSpecies<Float> species, 2673 byte[] a, int offset, 2674 ByteOrder bo, 2675 VectorMask<Float> m) { 2676 FloatSpecies vsp = (FloatSpecies) species; 2677 FloatVector zero = vsp.zero(); 2678 2679 if (offset >= 0 && offset <= (a.length - vsp.length() * 4)) { 2680 FloatVector v = zero.fromByteArray0(a, offset); 2681 return zero.blend(v.maybeSwap(bo), m); 2682 } 2683 FloatVector iota = zero.addIndex(1); 2684 ((AbstractMask<Float>)m) 2685 .checkIndexByLane(offset, a.length, iota, 4); 2686 FloatBuffer tb = wrapper(a, offset, bo); 2687 return vsp.ldOp(tb, 0, (AbstractMask<Float>)m, 2688 (tb_, __, i) -> tb_.get(i)); 2689 } 2690 2691 /** 2692 * Loads a vector from an array of type {@code float[]} 2693 * starting at an offset. 2694 * For each vector lane, where {@code N} is the vector lane index, the 2695 * array element at index {@code offset + N} is placed into the 2696 * resulting vector at lane index {@code N}. 2697 * 2698 * @param species species of desired vector 2699 * @param a the array 2700 * @param offset the offset into the array 2701 * @return the vector loaded from an array 2702 * @throws IndexOutOfBoundsException 2703 * if {@code offset+N < 0} or {@code offset+N >= a.length} 2704 * for any lane {@code N} in the vector 2705 */ 2706 @ForceInline 2707 public static 2708 FloatVector fromArray(VectorSpecies<Float> species, 2709 float[] a, int offset) { 2710 FloatSpecies vsp = (FloatSpecies) species; 2711 offset = checkFromIndexSize(offset, 2712 vsp.laneCount(), 2713 a.length); 2714 return vsp.dummyVector().fromArray0(a, offset); 2715 } 2716 2717 /** 2718 * Loads a vector from an array of type {@code float[]} 2719 * starting at an offset and using a mask. 2720 * Lanes where the mask is unset are filled with the default 2721 * value of {@code float} (positive zero). 2722 * For each vector lane, where {@code N} is the vector lane index, 2723 * if the mask lane at index {@code N} is set then the array element at 2724 * index {@code offset + N} is placed into the resulting vector at lane index 2725 * {@code N}, otherwise the default element value is placed into the 2726 * resulting vector at lane index {@code N}. 2727 * 2728 * @param species species of desired vector 2729 * @param a the array 2730 * @param offset the offset into the array 2731 * @param m the mask controlling lane selection 2732 * @return the vector loaded from an array 2733 * @throws IndexOutOfBoundsException 2734 * if {@code offset+N < 0} or {@code offset+N >= a.length} 2735 * for any lane {@code N} in the vector 2736 * where the mask is set 2737 */ 2738 @ForceInline 2739 public static 2740 FloatVector fromArray(VectorSpecies<Float> species, 2741 float[] a, int offset, 2742 VectorMask<Float> m) { 2743 FloatSpecies vsp = (FloatSpecies) species; 2744 if (offset >= 0 && offset <= (a.length - species.length())) { 2745 FloatVector zero = vsp.zero(); 2746 return zero.blend(zero.fromArray0(a, offset), m); 2747 } 2748 FloatVector iota = vsp.iota(); 2749 ((AbstractMask<Float>)m) 2750 .checkIndexByLane(offset, a.length, iota, 1); 2751 return vsp.vOp(m, i -> a[offset + i]); 2752 } 2753 2754 /** 2755 * Gathers a new vector composed of elements from an array of type 2756 * {@code float[]}, 2757 * using indexes obtained by adding a fixed {@code offset} to a 2758 * series of secondary offsets from an <em>index map</em>. 2759 * The index map is a contiguous sequence of {@code VLENGTH} 2760 * elements in a second array of {@code int}s, starting at a given 2761 * {@code mapOffset}. 2762 * <p> 2763 * For each vector lane, where {@code N} is the vector lane index, 2764 * the lane is loaded from the array 2765 * element {@code a[f(N)]}, where {@code f(N)} is the 2766 * index mapping expression 2767 * {@code offset + indexMap[mapOffset + N]]}. 2768 * 2769 * @param species species of desired vector 2770 * @param a the array 2771 * @param offset the offset into the array, may be negative if relative 2772 * indexes in the index map compensate to produce a value within the 2773 * array bounds 2774 * @param indexMap the index map 2775 * @param mapOffset the offset into the index map 2776 * @return the vector loaded from the indexed elements of the array 2777 * @throws IndexOutOfBoundsException 2778 * if {@code mapOffset+N < 0} 2779 * or if {@code mapOffset+N >= indexMap.length}, 2780 * or if {@code f(N)=offset+indexMap[mapOffset+N]} 2781 * is an invalid index into {@code a}, 2782 * for any lane {@code N} in the vector 2783 * @see FloatVector#toIntArray() 2784 */ 2785 @ForceInline 2786 public static 2787 FloatVector fromArray(VectorSpecies<Float> species, 2788 float[] a, int offset, 2789 int[] indexMap, int mapOffset) { 2790 FloatSpecies vsp = (FloatSpecies) species; 2791 Objects.requireNonNull(a); 2792 Objects.requireNonNull(indexMap); 2793 Class<? extends FloatVector> vectorType = vsp.vectorType(); 2794 2795 2796 // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k] 2797 IntVector vix = IntVector.fromArray(IntVector.species(vsp.indexShape()), indexMap, mapOffset).add(offset); 2798 2799 vix = VectorIntrinsics.checkIndex(vix, a.length); 2800 2801 return VectorIntrinsics.loadWithMap( 2802 vectorType, float.class, vsp.laneCount(), 2803 IntVector.species(vsp.indexShape()).vectorType(), 2804 a, ARRAY_BASE, vix, 2805 a, offset, indexMap, mapOffset, vsp, 2806 (float[] c, int idx, int[] iMap, int idy, FloatSpecies s) -> 2807 s.vOp(n -> c[idx + iMap[idy+n]])); 2808 } 2809 2810 /** 2811 * Gathers a new vector composed of elements from an array of type 2812 * {@code float[]}, 2813 * under the control of a mask, and 2814 * using indexes obtained by adding a fixed {@code offset} to a 2815 * series of secondary offsets from an <em>index map</em>. 2816 * The index map is a contiguous sequence of {@code VLENGTH} 2817 * elements in a second array of {@code int}s, starting at a given 2818 * {@code mapOffset}. 2819 * <p> 2820 * For each vector lane, where {@code N} is the vector lane index, 2821 * if the lane is set in the mask, 2822 * the lane is loaded from the array 2823 * element {@code a[f(N)]}, where {@code f(N)} is the 2824 * index mapping expression 2825 * {@code offset + indexMap[mapOffset + N]]}. 2826 * Unset lanes in the resulting vector are set to zero. 2827 * 2828 * @param species species of desired vector 2829 * @param a the array 2830 * @param offset the offset into the array, may be negative if relative 2831 * indexes in the index map compensate to produce a value within the 2832 * array bounds 2833 * @param indexMap the index map 2834 * @param mapOffset the offset into the index map 2835 * @param m the mask controlling lane selection 2836 * @return the vector loaded from the indexed elements of the array 2837 * @throws IndexOutOfBoundsException 2838 * if {@code mapOffset+N < 0} 2839 * or if {@code mapOffset+N >= indexMap.length}, 2840 * or if {@code f(N)=offset+indexMap[mapOffset+N]} 2841 * is an invalid index into {@code a}, 2842 * for any lane {@code N} in the vector 2843 * where the mask is set 2844 * @see FloatVector#toIntArray() 2845 */ 2846 @ForceInline 2847 public static 2848 FloatVector fromArray(VectorSpecies<Float> species, 2849 float[] a, int offset, 2850 int[] indexMap, int mapOffset, 2851 VectorMask<Float> m) { 2852 FloatSpecies vsp = (FloatSpecies) species; 2853 2854 // FIXME This can result in out of bounds errors for unset mask lanes 2855 // FIX = Use a scatter instruction which routes the unwanted lanes 2856 // into a bit-bucket variable (private to implementation). 2857 // This requires a 2-D scatter in order to set a second base address. 2858 // See notes in https://bugs.openjdk.java.net/browse/JDK-8223367 2859 assert(m.allTrue()); 2860 return (FloatVector) 2861 zero(species).blend(fromArray(species, a, offset, indexMap, mapOffset), m); 2862 2863 } 2864 2865 /** 2866 * Loads a vector from a {@linkplain ByteBuffer byte buffer} 2867 * starting at an offset into the byte buffer. 2868 * <p> 2869 * Bytes are composed into primitive lane elements according to 2870 * {@link ByteOrder#LITTLE_ENDIAN little endian} byte order. 2871 * To avoid errors, the 2872 * {@linkplain ByteBuffer#order() intrinsic byte order} 2873 * of the buffer must be little-endian. 2874 * <p> 2875 * This method behaves as if it returns the result of calling 2876 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2877 * fromByteBuffer()} as follows: 2878 * <pre>{@code 2879 * var bb = ByteBuffer.wrap(a); 2880 * var bo = ByteOrder.LITTLE_ENDIAN; 2881 * var m = species.maskAll(true); 2882 * return fromByteBuffer(species, bb, offset, m, bo); 2883 * }</pre> 2884 * 2885 * @param species species of desired vector 2886 * @param bb the byte buffer 2887 * @param offset the offset into the byte buffer 2888 * @param bo the intended byte order 2889 * @return a vector loaded from a byte buffer 2890 * @throws IllegalArgumentException if byte order of bb 2891 * is not {@link ByteOrder#LITTLE_ENDIAN} 2892 * @throws IndexOutOfBoundsException 2893 * if {@code offset+N*4 < 0} 2894 * or {@code offset+N*4 >= bb.limit()} 2895 * for any lane {@code N} in the vector 2896 */ 2897 @ForceInline 2898 public static 2899 FloatVector fromByteBuffer(VectorSpecies<Float> species, 2900 ByteBuffer bb, int offset, 2901 ByteOrder bo) { 2902 FloatSpecies vsp = (FloatSpecies) species; 2903 offset = checkFromIndexSize(offset, 2904 vsp.laneCount(), 2905 bb.limit()); 2906 return vsp.dummyVector() 2907 .fromByteBuffer0(bb, offset).maybeSwap(bo); 2908 } 2909 2910 /** 2911 * Loads a vector from a {@linkplain ByteBuffer byte buffer} 2912 * starting at an offset into the byte buffer 2913 * and using a mask. 2914 * <p> 2915 * Bytes are composed into primitive lane elements according to 2916 * {@link ByteOrder#LITTLE_ENDIAN little endian} byte order. 2917 * To avoid errors, the 2918 * {@linkplain ByteBuffer#order() intrinsic byte order} 2919 * of the buffer must be little-endian. 2920 * <p> 2921 * This method behaves as if it returns the result of calling 2922 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2923 * fromByteBuffer()} as follows: 2924 * <pre>{@code 2925 * var bb = ByteBuffer.wrap(a); 2926 * var bo = ByteOrder.LITTLE_ENDIAN; 2927 * var m = species.maskAll(true); 2928 * return fromByteBuffer(species, bb, offset, m, bo); 2929 * }</pre> 2930 * 2931 * @param species species of desired vector 2932 * @param bb the byte buffer 2933 * @param offset the offset into the byte buffer 2934 * @param bo the intended byte order 2935 * @param m the mask controlling lane selection 2936 * @return a vector loaded from a byte buffer 2937 * @throws IllegalArgumentException if byte order of bb 2938 * is not {@link ByteOrder#LITTLE_ENDIAN} 2939 * @throws IndexOutOfBoundsException 2940 * if {@code offset+N*4 < 0} 2941 * or {@code offset+N*4 >= bb.limit()} 2942 * for any lane {@code N} in the vector 2943 * where the mask is set 2944 */ 2945 @ForceInline 2946 public static 2947 FloatVector fromByteBuffer(VectorSpecies<Float> species, 2948 ByteBuffer bb, int offset, 2949 ByteOrder bo, 2950 VectorMask<Float> m) { 2951 if (m.allTrue()) { 2952 return fromByteBuffer(species, bb, offset, bo); 2953 } 2954 FloatSpecies vsp = (FloatSpecies) species; 2955 checkMaskFromIndexSize(offset, 2956 vsp, m, 1, 2957 bb.limit()); 2958 FloatVector zero = zero(vsp); 2959 FloatVector v = zero.fromByteBuffer0(bb, offset); 2960 return zero.blend(v.maybeSwap(bo), m); 2961 } 2962 2963 // Memory store operations 2964 2965 /** 2966 * Stores this vector into an array of type {@code float[]} 2967 * starting at an offset. 2968 * <p> 2969 * For each vector lane, where {@code N} is the vector lane index, 2970 * the lane element at index {@code N} is stored into the array 2971 * element {@code a[offset+N]}. 2972 * 2973 * @param a the array, of type {@code float[]} 2974 * @param offset the offset into the array 2975 * @throws IndexOutOfBoundsException 2976 * if {@code offset+N < 0} or {@code offset+N >= a.length} 2977 * for any lane {@code N} in the vector 2978 */ 2979 @ForceInline 2980 public final 2981 void intoArray(float[] a, int offset) { 2982 FloatSpecies vsp = vspecies(); 2983 offset = checkFromIndexSize(offset, 2984 vsp.laneCount(), 2985 a.length); 2986 VectorIntrinsics.store( 2987 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 2988 a, arrayAddress(a, offset), 2989 this, 2990 a, offset, 2991 (arr, off, v) 2992 -> v.stOp(arr, off, 2993 (arr_, off_, i, e) -> arr_[off_ + i] = e)); 2994 } 2995 2996 /** 2997 * Stores this vector into an array of {@code float} 2998 * starting at offset and using a mask. 2999 * <p> 3000 * For each vector lane, where {@code N} is the vector lane index, 3001 * the lane element at index {@code N} is stored into the array 3002 * element {@code a[offset+N]}. 3003 * If the mask lane at {@code N} is unset then the corresponding 3004 * array element {@code a[offset+N]} is left unchanged. 3005 * <p> 3006 * Array range checking is done for lanes where the mask is set. 3007 * Lanes where the mask is unset are not stored and do not need 3008 * to correspond to legitimate elements of {@code a}. 3009 * That is, unset lanes may correspond to array indexes less than 3010 * zero or beyond the end of the array. 3011 * 3012 * @param a the array, of type {@code float[]} 3013 * @param offset the offset into the array 3014 * @param m the mask controlling lane storage 3015 * @throws IndexOutOfBoundsException 3016 * if {@code offset+N < 0} or {@code offset+N >= a.length} 3017 * for any lane {@code N} in the vector 3018 * where the mask is set 3019 */ 3020 @ForceInline 3021 public final 3022 void intoArray(float[] a, int offset, 3023 VectorMask<Float> m) { 3024 if (m.allTrue()) { 3025 intoArray(a, offset); 3026 } else { 3027 // FIXME: Cannot vectorize yet, if there's a mask. 3028 stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v); 3029 } 3030 } 3031 3032 /** 3033 * Scatters this vector into an array of type {@code float[]} 3034 * using indexes obtained by adding a fixed {@code offset} to a 3035 * series of secondary offsets from an <em>index map</em>. 3036 * The index map is a contiguous sequence of {@code VLENGTH} 3037 * elements in a second array of {@code int}s, starting at a given 3038 * {@code mapOffset}. 3039 * <p> 3040 * For each vector lane, where {@code N} is the vector lane index, 3041 * the lane element at index {@code N} is stored into the array 3042 * element {@code a[f(N)]}, where {@code f(N)} is the 3043 * index mapping expression 3044 * {@code offset + indexMap[mapOffset + N]]}. 3045 * 3046 * @param a the array 3047 * @param offset an offset to combine with the index map offsets 3048 * @param indexMap the index map 3049 * @param mapOffset the offset into the index map 3050 * @returns a vector of the values {@code a[f(N)]}, where 3051 * {@code f(N) = offset + indexMap[mapOffset + N]]}. 3052 * @throws IndexOutOfBoundsException 3053 * if {@code mapOffset+N < 0} 3054 * or if {@code mapOffset+N >= indexMap.length}, 3055 * or if {@code f(N)=offset+indexMap[mapOffset+N]} 3056 * is an invalid index into {@code a}, 3057 * for any lane {@code N} in the vector 3058 * @see FloatVector#toIntArray() 3059 */ 3060 @ForceInline 3061 public final 3062 void intoArray(float[] a, int offset, 3063 int[] indexMap, int mapOffset) { 3064 FloatSpecies vsp = vspecies(); 3065 if (length() == 1) { 3066 intoArray(a, offset + indexMap[mapOffset]); 3067 return; 3068 } 3069 IntVector.IntSpecies isp = (IntVector.IntSpecies) vsp.indexSpecies(); 3070 if (isp.laneCount() != vsp.laneCount()) { 3071 stOp(a, offset, 3072 (arr, off, i, e) -> { 3073 int j = indexMap[mapOffset + i]; 3074 arr[off + j] = e; 3075 }); 3076 return; 3077 } 3078 3079 // Index vector: vix[0:n] = i -> offset + indexMap[mo + i] 3080 IntVector vix = IntVector 3081 .fromArray(isp, indexMap, mapOffset) 3082 .add(offset); 3083 3084 vix = VectorIntrinsics.checkIndex(vix, a.length); 3085 3086 VectorIntrinsics.storeWithMap( 3087 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3088 isp.vectorType(), 3089 a, arrayAddress(a, 0), vix, 3090 this, 3091 a, offset, indexMap, mapOffset, 3092 (arr, off, v, map, mo) 3093 -> v.stOp(arr, off, 3094 (arr_, off_, i, e) -> { 3095 int j = map[mo + i]; 3096 arr[off + j] = e; 3097 })); 3098 } 3099 3100 /** 3101 * Scatters this vector into an array of type {@code float[]}, 3102 * under the control of a mask, and 3103 * using indexes obtained by adding a fixed {@code offset} to a 3104 * series of secondary offsets from an <em>index map</em>. 3105 * The index map is a contiguous sequence of {@code VLENGTH} 3106 * elements in a second array of {@code int}s, starting at a given 3107 * {@code mapOffset}. 3108 * <p> 3109 * For each vector lane, where {@code N} is the vector lane index, 3110 * if the mask lane at index {@code N} is set then 3111 * the lane element at index {@code N} is stored into the array 3112 * element {@code a[f(N)]}, where {@code f(N)} is the 3113 * index mapping expression 3114 * {@code offset + indexMap[mapOffset + N]]}. 3115 * 3116 * @param a the array 3117 * @param offset an offset to combine with the index map offsets 3118 * @param indexMap the index map 3119 * @param mapOffset the offset into the index map 3120 * @param m the mask 3121 * @returns a vector of the values {@code m ? a[f(N)] : 0}, 3122 * {@code f(N) = offset + indexMap[mapOffset + N]]}. 3123 * @throws IndexOutOfBoundsException 3124 * if {@code mapOffset+N < 0} 3125 * or if {@code mapOffset+N >= indexMap.length}, 3126 * or if {@code f(N)=offset+indexMap[mapOffset+N]} 3127 * is an invalid index into {@code a}, 3128 * for any lane {@code N} in the vector 3129 * where the mask is set 3130 * @see FloatVector#toIntArray() 3131 */ 3132 @ForceInline 3133 public final 3134 void intoArray(float[] a, int offset, 3135 int[] indexMap, int mapOffset, 3136 VectorMask<Float> m) { 3137 FloatSpecies vsp = vspecies(); 3138 if (m.allTrue()) { 3139 intoArray(a, offset, indexMap, mapOffset); 3140 return; 3141 } 3142 throw new AssertionError("fixme"); 3143 } 3144 3145 /** 3146 * {@inheritDoc} <!--workaround--> 3147 */ 3148 @Override 3149 @ForceInline 3150 public final 3151 void intoByteArray(byte[] a, int offset) { 3152 offset = checkFromIndexSize(offset, 3153 bitSize() / Byte.SIZE, 3154 a.length); 3155 this.maybeSwap(ByteOrder.LITTLE_ENDIAN) 3156 .intoByteArray0(a, offset); 3157 } 3158 3159 /** 3160 * {@inheritDoc} <!--workaround--> 3161 */ 3162 @Override 3163 @ForceInline 3164 public final 3165 void intoByteArray(byte[] a, int offset, 3166 VectorMask<Float> m) { 3167 if (m.allTrue()) { 3168 intoByteArray(a, offset); 3169 return; 3170 } 3171 FloatSpecies vsp = vspecies(); 3172 if (offset >= 0 && offset <= (a.length - vsp.length() * 4)) { 3173 var oldVal = fromByteArray0(a, offset); 3174 var newVal = oldVal.blend(this, m); 3175 newVal.intoByteArray0(a, offset); 3176 } else { 3177 checkMaskFromIndexSize(offset, vsp, m, 4, a.length); 3178 FloatBuffer tb = wrapper(a, offset, NATIVE_ENDIAN); 3179 this.stOp(tb, 0, m, (tb_, __, i, e) -> tb_.put(i, e)); 3180 } 3181 } 3182 3183 /** 3184 * {@inheritDoc} <!--workaround--> 3185 */ 3186 @Override 3187 @ForceInline 3188 public final 3189 void intoByteArray(byte[] a, int offset, 3190 ByteOrder bo, 3191 VectorMask<Float> m) { 3192 maybeSwap(bo).intoByteArray(a, offset, m); 3193 } 3194 3195 /** 3196 * {@inheritDoc} <!--workaround--> 3197 */ 3198 @Override 3199 @ForceInline 3200 public final 3201 void intoByteBuffer(ByteBuffer bb, int offset, 3202 ByteOrder bo) { 3203 maybeSwap(bo).intoByteBuffer0(bb, offset); 3204 } 3205 3206 /** 3207 * {@inheritDoc} <!--workaround--> 3208 */ 3209 @Override 3210 @ForceInline 3211 public final 3212 void intoByteBuffer(ByteBuffer bb, int offset, 3213 ByteOrder bo, 3214 VectorMask<Float> m) { 3215 if (m.allTrue()) { 3216 intoByteBuffer(bb, offset, bo); 3217 return; 3218 } 3219 FloatSpecies vsp = vspecies(); 3220 checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit()); 3221 conditionalStoreNYI(offset, vsp, m, 4, bb.limit()); 3222 var oldVal = fromByteBuffer0(bb, offset); 3223 var newVal = oldVal.blend(this.maybeSwap(bo), m); 3224 newVal.intoByteBuffer0(bb, offset); 3225 } 3226 3227 // ================================================ 3228 3229 // Low-level memory operations. 3230 // 3231 // Note that all of these operations *must* inline into a context 3232 // where the exact species of the involved vector is a 3233 // compile-time constant. Otherwise, the intrinsic generation 3234 // will fail and performance will suffer. 3235 // 3236 // In many cases this is achieved by re-deriving a version of the 3237 // method in each concrete subclass (per species). The re-derived 3238 // method simply calls one of these generic methods, with exact 3239 // parameters for the controlling metadata, which is either a 3240 // typed vector or constant species instance. 3241 3242 // Unchecked loading operations in native byte order. 3243 // Caller is reponsible for applying index checks, masking, and 3244 // byte swapping. 3245 3246 /*package-private*/ 3247 abstract 3248 FloatVector fromArray0(float[] a, int offset); 3249 @ForceInline 3250 final 3251 FloatVector fromArray0Template(float[] a, int offset) { 3252 FloatSpecies vsp = vspecies(); 3253 return VectorIntrinsics.load( 3254 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3255 a, arrayAddress(a, offset), 3256 a, offset, vsp, 3257 (arr, off, s) -> s.ldOp(arr, off, 3258 (arr_, off_, i) -> arr_[off_ + i])); 3259 } 3260 3261 @Override 3262 abstract 3263 FloatVector fromByteArray0(byte[] a, int offset); 3264 @ForceInline 3265 final 3266 FloatVector fromByteArray0Template(byte[] a, int offset) { 3267 FloatSpecies vsp = vspecies(); 3268 return VectorIntrinsics.load( 3269 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3270 a, byteArrayAddress(a, offset), 3271 a, offset, vsp, 3272 (arr, off, s) -> { 3273 FloatBuffer tb = wrapper(arr, off, NATIVE_ENDIAN); 3274 return s.ldOp(tb, 0, (tb_, __, i) -> tb_.get(i)); 3275 }); 3276 } 3277 3278 abstract 3279 FloatVector fromByteBuffer0(ByteBuffer bb, int offset); 3280 @ForceInline 3281 final 3282 FloatVector fromByteBuffer0Template(ByteBuffer bb, int offset) { 3283 FloatSpecies vsp = vspecies(); 3284 return VectorIntrinsics.load( 3285 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3286 bufferBase(bb), bufferAddress(bb, offset), 3287 bb, offset, vsp, 3288 (buf, off, s) -> { 3289 FloatBuffer tb = wrapper(buf, off, NATIVE_ENDIAN); 3290 return s.ldOp(tb, 0, (tb_, __, i) -> tb_.get(i)); 3291 }); 3292 } 3293 3294 // Unchecked storing operations in native byte order. 3295 // Caller is reponsible for applying index checks, masking, and 3296 // byte swapping. 3297 3298 abstract 3299 void intoArray0(float[] a, int offset); 3300 @ForceInline 3301 final 3302 void intoArray0Template(float[] a, int offset) { 3303 FloatSpecies vsp = vspecies(); 3304 VectorIntrinsics.store( 3305 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3306 a, arrayAddress(a, offset), 3307 this, a, offset, 3308 (arr, off, v) 3309 -> v.stOp(arr, off, 3310 (arr_, off_, i, e) -> arr_[off_+i] = e)); 3311 } 3312 3313 abstract 3314 void intoByteArray0(byte[] a, int offset); 3315 @ForceInline 3316 final 3317 void intoByteArray0Template(byte[] a, int offset) { 3318 FloatSpecies vsp = vspecies(); 3319 VectorIntrinsics.store( 3320 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3321 a, byteArrayAddress(a, offset), 3322 this, a, offset, 3323 (arr, off, v) -> { 3324 FloatBuffer tb = wrapper(arr, off, NATIVE_ENDIAN); 3325 v.stOp(tb, 0, (tb_, __, i, e) -> tb_.put(i, e)); 3326 }); 3327 } 3328 3329 @ForceInline 3330 final 3331 void intoByteBuffer0(ByteBuffer bb, int offset) { 3332 FloatSpecies vsp = vspecies(); 3333 VectorIntrinsics.store( 3334 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3335 bufferBase(bb), bufferAddress(bb, offset), 3336 this, bb, offset, 3337 (buf, off, v) -> { 3338 FloatBuffer tb = wrapper(buf, off, NATIVE_ENDIAN); 3339 v.stOp(tb, 0, (tb_, __, i, e) -> tb_.put(i, e)); 3340 }); 3341 } 3342 3343 // End of low-level memory operations. 3344 3345 private static 3346 void checkMaskFromIndexSize(int offset, 3347 FloatSpecies vsp, 3348 VectorMask<Float> m, 3349 int scale, 3350 int limit) { 3351 ((AbstractMask<Float>)m) 3352 .checkIndexByLane(offset, limit, vsp.iota(), scale); 3353 } 3354 3355 @ForceInline 3356 private void conditionalStoreNYI(int offset, 3357 FloatSpecies vsp, 3358 VectorMask<Float> m, 3359 int scale, 3360 int limit) { 3361 if (offset < 0 || offset + vsp.laneCount() * scale > limit) { 3362 String msg = 3363 String.format("unimplemented: store @%d in [0..%d), %s in %s", 3364 offset, limit, m, vsp); 3365 throw new AssertionError(msg); 3366 } 3367 } 3368 3369 /*package-private*/ 3370 @Override 3371 @ForceInline 3372 final 3373 FloatVector maybeSwap(ByteOrder bo) { 3374 if (bo != NATIVE_ENDIAN) { 3375 return this.reinterpretAsBytes() 3376 .rearrange(swapBytesShuffle()) 3377 .reinterpretAsFloats(); 3378 } 3379 return this; 3380 } 3381 3382 static final int ARRAY_SHIFT = 3383 31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_FLOAT_INDEX_SCALE); 3384 static final long ARRAY_BASE = 3385 Unsafe.ARRAY_FLOAT_BASE_OFFSET; 3386 3387 @ForceInline 3388 static long arrayAddress(float[] a, int index) { 3389 return ARRAY_BASE + (((long)index) << ARRAY_SHIFT); 3390 } 3391 3392 @ForceInline 3393 static long byteArrayAddress(byte[] a, int index) { 3394 return Unsafe.ARRAY_BYTE_BASE_OFFSET + index; 3395 } 3396 3397 // Byte buffer wrappers. 3398 private static FloatBuffer wrapper(ByteBuffer bb, int offset, 3399 ByteOrder bo) { 3400 return bb.duplicate().position(offset).slice() 3401 .order(bo).asFloatBuffer(); 3402 } 3403 private static FloatBuffer wrapper(byte[] a, int offset, 3404 ByteOrder bo) { 3405 return ByteBuffer.wrap(a, offset, a.length - offset) 3406 .order(bo).asFloatBuffer(); 3407 } 3408 3409 // ================================================ 3410 3411 /// Reinterpreting view methods: 3412 // lanewise reinterpret: viewAsXVector() 3413 // keep shape, redraw lanes: reinterpretAsEs() 3414 3415 /** 3416 * {@inheritDoc} <!--workaround--> 3417 */ 3418 @ForceInline 3419 @Override 3420 public final ByteVector reinterpretAsBytes() { 3421 // Going to ByteVector, pay close attention to byte order. 3422 assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN); 3423 return asByteVectorRaw(); 3424 //return asByteVectorRaw().rearrange(swapBytesShuffle()); 3425 } 3426 3427 /** 3428 * {@inheritDoc} <!--workaround--> 3429 */ 3430 @ForceInline 3431 @Override 3432 public final IntVector viewAsIntegralLanes() { 3433 LaneType ilt = LaneType.FLOAT.asIntegral(); 3434 return (IntVector) asVectorRaw(ilt); 3435 } 3436 3437 /** 3438 * {@inheritDoc} <!--workaround--> 3439 */ 3440 @ForceInline 3441 @Override 3442 public final 3443 FloatVector 3444 viewAsFloatingLanes() { 3445 return this; 3446 } 3447 3448 // ================================================ 3449 3450 /// Object methods: toString, equals, hashCode 3451 // 3452 // Object methods are defined as if via Arrays.toString, etc., 3453 // is applied to the array of elements. Two equal vectors 3454 // are required to have equal species and equal lane values. 3455 3456 /** 3457 * Returns a string representation of this vector, of the form 3458 * {@code "[0,1,2...]"}, reporting the lane values of this vector, 3459 * in lane order. 3460 * 3461 * The string is produced as if by a call to {@link 3462 * java.util.Arrays#toString(float[]) Arrays.toString()}, 3463 * as appropriate to the {@code float} array returned by 3464 * {@link #toArray this.toArray()}. 3465 * 3466 * @return a string of the form {@code "[0,1,2...]"} 3467 * reporting the lane values of this vector 3468 */ 3469 @Override 3470 @ForceInline 3471 public final 3472 String toString() { 3473 // now that toArray is strongly typed, we can define this 3474 return Arrays.toString(toArray()); 3475 } 3476 3477 /** 3478 * {@inheritDoc} <!--workaround--> 3479 */ 3480 @Override 3481 @ForceInline 3482 public final 3483 boolean equals(Object obj) { 3484 if (obj instanceof Vector) { 3485 Vector<?> that = (Vector<?>) obj; 3486 if (this.species().equals(that.species())) { 3487 return this.eq(that.check(this.species())).allTrue(); 3488 } 3489 } 3490 return false; 3491 } 3492 3493 /** 3494 * {@inheritDoc} <!--workaround--> 3495 */ 3496 @Override 3497 @ForceInline 3498 public final 3499 int hashCode() { 3500 // now that toArray is strongly typed, we can define this 3501 return Objects.hash(species(), Arrays.hashCode(toArray())); 3502 } 3503 3504 // ================================================ 3505 3506 // Species 3507 3508 /** 3509 * Class representing {@link FloatVector}'s of the same {@link VectorShape VectorShape}. 3510 */ 3511 /*package-private*/ 3512 static final class FloatSpecies extends AbstractSpecies<Float> { 3513 private FloatSpecies(VectorShape shape, 3514 Class<? extends FloatVector> vectorType, 3515 Class<? extends AbstractMask<Float>> maskType, 3516 Function<Object, FloatVector> vectorFactory) { 3517 super(shape, LaneType.of(float.class), 3518 vectorType, maskType, 3519 vectorFactory); 3520 assert(this.elementSize() == Float.SIZE); 3521 } 3522 3523 // Specializing overrides: 3524 3525 @Override 3526 @ForceInline 3527 public final Class<Float> elementType() { 3528 return float.class; 3529 } 3530 3531 @Override 3532 @ForceInline 3533 public final Class<Float> genericElementType() { 3534 return Float.class; 3535 } 3536 3537 @Override 3538 @ForceInline 3539 public final Class<float[]> arrayType() { 3540 return float[].class; 3541 } 3542 3543 @SuppressWarnings("unchecked") 3544 @Override 3545 @ForceInline 3546 public final Class<? extends FloatVector> vectorType() { 3547 return (Class<? extends FloatVector>) vectorType; 3548 } 3549 3550 @Override 3551 @ForceInline 3552 public final long checkValue(long e) { 3553 longToElementBits(e); // only for exception 3554 return e; 3555 } 3556 3557 /*package-private*/ 3558 @Override 3559 @ForceInline 3560 final FloatVector broadcastBits(long bits) { 3561 return (FloatVector) 3562 VectorIntrinsics.broadcastCoerced( 3563 vectorType, float.class, laneCount, 3564 bits, this, 3565 (bits_, s_) -> s_.rvOp(i -> bits_)); 3566 } 3567 3568 /*package-private*/ 3569 @ForceInline 3570 3571 final FloatVector broadcast(float e) { 3572 return broadcastBits(toBits(e)); 3573 } 3574 3575 @Override 3576 @ForceInline 3577 public final FloatVector broadcast(long e) { 3578 return broadcastBits(longToElementBits(e)); 3579 } 3580 3581 /*package-private*/ 3582 final @Override 3583 @ForceInline 3584 long longToElementBits(long value) { 3585 // Do the conversion, and then test it for failure. 3586 float e = (float) value; 3587 if ((long) e != value) { 3588 throw badElementBits(value, e); 3589 } 3590 return toBits(e); 3591 } 3592 3593 /*package-private*/ 3594 @ForceInline 3595 static long toIntegralChecked(float e, boolean convertToInt) { 3596 long value = convertToInt ? (int) e : (long) e; 3597 if ((float) value != e) { 3598 throw badArrayBits(e, convertToInt, value); 3599 } 3600 return value; 3601 } 3602 3603 @Override 3604 @ForceInline 3605 public final FloatVector fromValues(long... values) { 3606 VectorIntrinsics.requireLength(values.length, laneCount); 3607 float[] va = new float[laneCount()]; 3608 for (int i = 0; i < va.length; i++) { 3609 long lv = values[i]; 3610 float v = (float) lv; 3611 va[i] = v; 3612 if ((long)v != lv) { 3613 throw badElementBits(lv, v); 3614 } 3615 } 3616 return dummyVector().fromArray0(va, 0); 3617 } 3618 3619 /* this non-public one is for internal conversions */ 3620 @Override 3621 @ForceInline 3622 final FloatVector fromIntValues(int[] values) { 3623 VectorIntrinsics.requireLength(values.length, laneCount); 3624 float[] va = new float[laneCount()]; 3625 for (int i = 0; i < va.length; i++) { 3626 int lv = values[i]; 3627 float v = (float) lv; 3628 va[i] = v; 3629 if ((int)v != lv) { 3630 throw badElementBits(lv, v); 3631 } 3632 } 3633 return dummyVector().fromArray0(va, 0); 3634 } 3635 3636 // Virtual constructors 3637 3638 @ForceInline 3639 @Override final 3640 public FloatVector fromArray(Object a, int offset) { 3641 // User entry point: Be careful with inputs. 3642 return FloatVector 3643 .fromArray(this, (float[]) a, offset); 3644 } 3645 3646 @Override final 3647 FloatVector dummyVector() { 3648 return (FloatVector) super.dummyVector(); 3649 } 3650 3651 final 3652 FloatVector vectorFactory(float[] vec) { 3653 // Species delegates all factory requests to its dummy 3654 // vector. The dummy knows all about it. 3655 return dummyVector().vectorFactory(vec); 3656 } 3657 3658 /*package-private*/ 3659 final @Override 3660 @ForceInline 3661 FloatVector rvOp(RVOp f) { 3662 float[] res = new float[laneCount()]; 3663 for (int i = 0; i < res.length; i++) { 3664 int bits = (int) f.apply(i); 3665 res[i] = fromBits(bits); 3666 } 3667 return dummyVector().vectorFactory(res); 3668 } 3669 3670 FloatVector vOp(FVOp f) { 3671 float[] res = new float[laneCount()]; 3672 for (int i = 0; i < res.length; i++) { 3673 res[i] = f.apply(i); 3674 } 3675 return dummyVector().vectorFactory(res); 3676 } 3677 3678 FloatVector vOp(VectorMask<Float> m, FVOp f) { 3679 float[] res = new float[laneCount()]; 3680 boolean[] mbits = ((AbstractMask<Float>)m).getBits(); 3681 for (int i = 0; i < res.length; i++) { 3682 if (mbits[i]) { 3683 res[i] = f.apply(i); 3684 } 3685 } 3686 return dummyVector().vectorFactory(res); 3687 } 3688 3689 /*package-private*/ 3690 @ForceInline 3691 <M> FloatVector ldOp(M memory, int offset, 3692 FLdOp<M> f) { 3693 return dummyVector().ldOp(memory, offset, f); 3694 } 3695 3696 /*package-private*/ 3697 @ForceInline 3698 <M> FloatVector ldOp(M memory, int offset, 3699 AbstractMask<Float> m, 3700 FLdOp<M> f) { 3701 return dummyVector().ldOp(memory, offset, m, f); 3702 } 3703 3704 /*package-private*/ 3705 @ForceInline 3706 <M> void stOp(M memory, int offset, FStOp<M> f) { 3707 dummyVector().stOp(memory, offset, f); 3708 } 3709 3710 /*package-private*/ 3711 @ForceInline 3712 <M> void stOp(M memory, int offset, 3713 AbstractMask<Float> m, 3714 FStOp<M> f) { 3715 dummyVector().stOp(memory, offset, m, f); 3716 } 3717 3718 // N.B. Make sure these constant vectors and 3719 // masks load up correctly into registers. 3720 // 3721 // Also, see if we can avoid all that switching. 3722 // Could we cache both vectors and both masks in 3723 // this species object? 3724 3725 // Zero and iota vector access 3726 @Override 3727 @ForceInline 3728 public final FloatVector zero() { 3729 if ((Class<?>) vectorType() == FloatMaxVector.class) 3730 return FloatMaxVector.ZERO; 3731 switch (vectorBitSize()) { 3732 case 64: return Float64Vector.ZERO; 3733 case 128: return Float128Vector.ZERO; 3734 case 256: return Float256Vector.ZERO; 3735 case 512: return Float512Vector.ZERO; 3736 } 3737 throw new AssertionError(); 3738 } 3739 3740 @Override 3741 @ForceInline 3742 public final FloatVector iota() { 3743 if ((Class<?>) vectorType() == FloatMaxVector.class) 3744 return FloatMaxVector.IOTA; 3745 switch (vectorBitSize()) { 3746 case 64: return Float64Vector.IOTA; 3747 case 128: return Float128Vector.IOTA; 3748 case 256: return Float256Vector.IOTA; 3749 case 512: return Float512Vector.IOTA; 3750 } 3751 throw new AssertionError(); 3752 } 3753 3754 // Mask access 3755 @Override 3756 @ForceInline 3757 public final VectorMask<Float> maskAll(boolean bit) { 3758 if ((Class<?>) vectorType() == FloatMaxVector.class) 3759 return FloatMaxVector.FloatMaxMask.maskAll(bit); 3760 switch (vectorBitSize()) { 3761 case 64: return Float64Vector.Float64Mask.maskAll(bit); 3762 case 128: return Float128Vector.Float128Mask.maskAll(bit); 3763 case 256: return Float256Vector.Float256Mask.maskAll(bit); 3764 case 512: return Float512Vector.Float512Mask.maskAll(bit); 3765 } 3766 throw new AssertionError(); 3767 } 3768 } 3769 3770 /** 3771 * Finds a species for an element type of {@code float} and shape. 3772 * 3773 * @param s the shape 3774 * @return a species for an element type of {@code float} and shape 3775 * @throws IllegalArgumentException if no such species exists for the shape 3776 */ 3777 static FloatSpecies species(VectorShape s) { 3778 Objects.requireNonNull(s); 3779 switch (s) { 3780 case S_64_BIT: return (FloatSpecies) SPECIES_64; 3781 case S_128_BIT: return (FloatSpecies) SPECIES_128; 3782 case S_256_BIT: return (FloatSpecies) SPECIES_256; 3783 case S_512_BIT: return (FloatSpecies) SPECIES_512; 3784 case S_Max_BIT: return (FloatSpecies) SPECIES_MAX; 3785 default: throw new IllegalArgumentException("Bad shape: " + s); 3786 } 3787 } 3788 3789 /** Species representing {@link FloatVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */ 3790 public static final VectorSpecies<Float> SPECIES_64 3791 = new FloatSpecies(VectorShape.S_64_BIT, 3792 Float64Vector.class, 3793 Float64Vector.Float64Mask.class, 3794 Float64Vector::new); 3795 3796 /** Species representing {@link FloatVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */ 3797 public static final VectorSpecies<Float> SPECIES_128 3798 = new FloatSpecies(VectorShape.S_128_BIT, 3799 Float128Vector.class, 3800 Float128Vector.Float128Mask.class, 3801 Float128Vector::new); 3802 3803 /** Species representing {@link FloatVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */ 3804 public static final VectorSpecies<Float> SPECIES_256 3805 = new FloatSpecies(VectorShape.S_256_BIT, 3806 Float256Vector.class, 3807 Float256Vector.Float256Mask.class, 3808 Float256Vector::new); 3809 3810 /** Species representing {@link FloatVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */ 3811 public static final VectorSpecies<Float> SPECIES_512 3812 = new FloatSpecies(VectorShape.S_512_BIT, 3813 Float512Vector.class, 3814 Float512Vector.Float512Mask.class, 3815 Float512Vector::new); 3816 3817 /** Species representing {@link FloatVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */ 3818 public static final VectorSpecies<Float> SPECIES_MAX 3819 = new FloatSpecies(VectorShape.S_Max_BIT, 3820 FloatMaxVector.class, 3821 FloatMaxVector.FloatMaxMask.class, 3822 FloatMaxVector::new); 3823 3824 /** 3825 * Preferred species for {@link FloatVector}s. 3826 * A preferred species is a species of maximal bit-size for the platform. 3827 */ 3828 public static final VectorSpecies<Float> SPECIES_PREFERRED 3829 = (FloatSpecies) VectorSpecies.ofPreferred(float.class); 3830 }