1 /*
   2  * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.FloatBuffer;
  29 import java.nio.ByteOrder;
  30 import java.util.Arrays;
  31 import java.util.Objects;
  32 import java.util.function.BinaryOperator;
  33 import java.util.function.IntUnaryOperator;
  34 import java.util.function.Function;
  35 import java.util.function.UnaryOperator;
  36 import java.util.concurrent.ThreadLocalRandom;
  37 
  38 import jdk.internal.misc.Unsafe;
  39 import jdk.internal.vm.annotation.ForceInline;
  40 
  41 import static jdk.incubator.vector.VectorIntrinsics.*;
  42 import static jdk.incubator.vector.VectorOperators.*;
  43 
  44 // -- This file was mechanically generated: Do not edit! -- //
  45 
  46 /**
  47  * A specialized {@link Vector} representing an ordered immutable sequence of
  48  * {@code float} values.
  49  */
  50 @SuppressWarnings("cast")  // warning: redundant cast
  51 public abstract class FloatVector extends AbstractVector<Float> {
  52 
  53     FloatVector() {}
  54 
  55     static final int FORBID_OPCODE_KIND = VO_NOFP;
  56 
  57     @ForceInline
  58     static int opCode(Operator op) {
  59         return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
  60     }
  61     @ForceInline
  62     static int opCode(Operator op, int requireKind) {
  63         requireKind |= VO_OPCODE_VALID;
  64         return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
  65     }
  66     @ForceInline
  67     static boolean opKind(Operator op, int bit) {
  68         return VectorOperators.opKind(op, bit);
  69     }
  70 
  71     // Virtualized factories and operators,
  72     // coded with portable definitions.
  73     // These are all @ForceInline in case
  74     // they need to be used performantly.
  75     // The various shape-specific subclasses
  76     // also specialize them by wrapping
  77     // them in a call like this:
  78     //    return (Byte128Vector)
  79     //       super.bOp((Byte128Vector) o);
  80     // The purpose of that is to forcibly inline
  81     // the generic definition from this file
  82     // into a sharply type- and size-specific
  83     // wrapper in the subclass file, so that
  84     // the JIT can specialize the code.
  85     // The code is only inlined and expanded
  86     // if it gets hot.  Think of it as a cheap
  87     // and lazy version of C++ templates.
  88 
  89     // Virtualized getter
  90 
  91     /*package-private*/
  92     abstract float[] getElements();
  93 
  94     // Virtualized constructors
  95 
  96     /**
  97      * Build a vector directly using my own constructor.
  98      * It is an error if the array is aliased elsewhere.
  99      */
 100     /*package-private*/
 101     abstract FloatVector vectorFactory(float[] vec);
 102 
 103     /**
 104      * Build a mask directly using my species.
 105      * It is an error if the array is aliased elsewhere.
 106      */
 107     /*package-private*/
 108     @ForceInline
 109     final
 110     AbstractMask<Float> maskFactory(boolean[] bits) {
 111         return vspecies().maskFactory(bits);
 112     }
 113 
 114     // Constant loader (takes dummy as vector arg)
 115     interface FVOp {
 116         float apply(int i);
 117     }
 118 
 119     /*package-private*/
 120     @ForceInline
 121     final
 122     FloatVector vOp(FVOp f) {
 123         float[] res = new float[length()];
 124         for (int i = 0; i < res.length; i++) {
 125             res[i] = f.apply(i);
 126         }
 127         return vectorFactory(res);
 128     }
 129 
 130     @ForceInline
 131     final
 132     FloatVector vOp(VectorMask<Float> m, FVOp f) {
 133         float[] res = new float[length()];
 134         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 135         for (int i = 0; i < res.length; i++) {
 136             if (mbits[i]) {
 137                 res[i] = f.apply(i);
 138             }
 139         }
 140         return vectorFactory(res);
 141     }
 142 
 143     // Unary operator
 144 
 145     /*package-private*/
 146     interface FUnOp {
 147         float apply(int i, float a);
 148     }
 149 
 150     /*package-private*/
 151     abstract
 152     FloatVector uOp(FUnOp f);
 153     @ForceInline
 154     final
 155     FloatVector uOpTemplate(FUnOp f) {
 156         float[] vec = getElements();
 157         float[] res = new float[length()];
 158         for (int i = 0; i < res.length; i++) {
 159             res[i] = f.apply(i, vec[i]);
 160         }
 161         return vectorFactory(res);
 162     }
 163 
 164     /*package-private*/
 165     abstract
 166     FloatVector uOp(VectorMask<Float> m,
 167                              FUnOp f);
 168     @ForceInline
 169     final
 170     FloatVector uOpTemplate(VectorMask<Float> m,
 171                                      FUnOp f) {
 172         float[] vec = getElements();
 173         float[] res = new float[length()];
 174         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 175         for (int i = 0; i < res.length; i++) {
 176             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 177         }
 178         return vectorFactory(res);
 179     }
 180 
 181     // Binary operator
 182 
 183     /*package-private*/
 184     interface FBinOp {
 185         float apply(int i, float a, float b);
 186     }
 187 
 188     /*package-private*/
 189     abstract
 190     FloatVector bOp(Vector<Float> o,
 191                              FBinOp f);
 192     @ForceInline
 193     final
 194     FloatVector bOpTemplate(Vector<Float> o,
 195                                      FBinOp f) {
 196         float[] res = new float[length()];
 197         float[] vec1 = this.getElements();
 198         float[] vec2 = ((FloatVector)o).getElements();
 199         for (int i = 0; i < res.length; i++) {
 200             res[i] = f.apply(i, vec1[i], vec2[i]);
 201         }
 202         return vectorFactory(res);
 203     }
 204 
 205     /*package-private*/
 206     abstract
 207     FloatVector bOp(Vector<Float> o,
 208                              VectorMask<Float> m,
 209                              FBinOp f);
 210     @ForceInline
 211     final
 212     FloatVector bOpTemplate(Vector<Float> o,
 213                                      VectorMask<Float> m,
 214                                      FBinOp f) {
 215         float[] res = new float[length()];
 216         float[] vec1 = this.getElements();
 217         float[] vec2 = ((FloatVector)o).getElements();
 218         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 219         for (int i = 0; i < res.length; i++) {
 220             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 221         }
 222         return vectorFactory(res);
 223     }
 224 
 225     // Ternary operator
 226 
 227     /*package-private*/
 228     interface FTriOp {
 229         float apply(int i, float a, float b, float c);
 230     }
 231 
 232     /*package-private*/
 233     abstract
 234     FloatVector tOp(Vector<Float> o1,
 235                              Vector<Float> o2,
 236                              FTriOp f);
 237     @ForceInline
 238     final
 239     FloatVector tOpTemplate(Vector<Float> o1,
 240                                      Vector<Float> o2,
 241                                      FTriOp f) {
 242         float[] res = new float[length()];
 243         float[] vec1 = this.getElements();
 244         float[] vec2 = ((FloatVector)o1).getElements();
 245         float[] vec3 = ((FloatVector)o2).getElements();
 246         for (int i = 0; i < res.length; i++) {
 247             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 248         }
 249         return vectorFactory(res);
 250     }
 251 
 252     /*package-private*/
 253     abstract
 254     FloatVector tOp(Vector<Float> o1,
 255                              Vector<Float> o2,
 256                              VectorMask<Float> m,
 257                              FTriOp f);
 258     @ForceInline
 259     final
 260     FloatVector tOpTemplate(Vector<Float> o1,
 261                                      Vector<Float> o2,
 262                                      VectorMask<Float> m,
 263                                      FTriOp f) {
 264         float[] res = new float[length()];
 265         float[] vec1 = this.getElements();
 266         float[] vec2 = ((FloatVector)o1).getElements();
 267         float[] vec3 = ((FloatVector)o2).getElements();
 268         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 269         for (int i = 0; i < res.length; i++) {
 270             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 271         }
 272         return vectorFactory(res);
 273     }
 274 
 275     // Reduction operator
 276 
 277     /*package-private*/
 278     abstract
 279     float rOp(float v, FBinOp f);
 280     @ForceInline
 281     final
 282     float rOpTemplate(float v, FBinOp f) {
 283         float[] vec = getElements();
 284         for (int i = 0; i < vec.length; i++) {
 285             v = f.apply(i, v, vec[i]);
 286         }
 287         return v;
 288     }
 289 
 290     // Memory reference
 291 
 292     /*package-private*/
 293     interface FLdOp<M> {
 294         float apply(M memory, int offset, int i);
 295     }
 296 
 297     /*package-private*/
 298     @ForceInline
 299     final
 300     <M> FloatVector ldOp(M memory, int offset,
 301                                   FLdOp<M> f) {
 302         //dummy; no vec = getElements();
 303         float[] res = new float[length()];
 304         for (int i = 0; i < res.length; i++) {
 305             res[i] = f.apply(memory, offset, i);
 306         }
 307         return vectorFactory(res);
 308     }
 309 
 310     /*package-private*/
 311     @ForceInline
 312     final
 313     <M> FloatVector ldOp(M memory, int offset,
 314                                   VectorMask<Float> m,
 315                                   FLdOp<M> f) {
 316         //float[] vec = getElements();
 317         float[] res = new float[length()];
 318         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 319         for (int i = 0; i < res.length; i++) {
 320             if (mbits[i]) {
 321                 res[i] = f.apply(memory, offset, i);
 322             }
 323         }
 324         return vectorFactory(res);
 325     }
 326 
 327     interface FStOp<M> {
 328         void apply(M memory, int offset, int i, float a);
 329     }
 330 
 331     /*package-private*/
 332     @ForceInline
 333     final
 334     <M> void stOp(M memory, int offset,
 335                   FStOp<M> f) {
 336         float[] vec = getElements();
 337         for (int i = 0; i < vec.length; i++) {
 338             f.apply(memory, offset, i, vec[i]);
 339         }
 340     }
 341 
 342     /*package-private*/
 343     @ForceInline
 344     final
 345     <M> void stOp(M memory, int offset,
 346                   VectorMask<Float> m,
 347                   FStOp<M> f) {
 348         float[] vec = getElements();
 349         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 350         for (int i = 0; i < vec.length; i++) {
 351             if (mbits[i]) {
 352                 f.apply(memory, offset, i, vec[i]);
 353             }
 354         }
 355     }
 356 
 357     // Binary test
 358 
 359     /*package-private*/
 360     interface FBinTest {
 361         boolean apply(int cond, int i, float a, float b);
 362     }
 363 
 364     /*package-private*/
 365     @ForceInline
 366     final
 367     AbstractMask<Float> bTest(int cond,
 368                                   Vector<Float> o,
 369                                   FBinTest f) {
 370         float[] vec1 = getElements();
 371         float[] vec2 = ((FloatVector)o).getElements();
 372         boolean[] bits = new boolean[length()];
 373         for (int i = 0; i < length(); i++){
 374             bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
 375         }
 376         return maskFactory(bits);
 377     }
 378 
 379     /*package-private*/
 380     @ForceInline
 381     static boolean doBinTest(int cond, float a, float b) {
 382         switch (cond) {
 383         case BT_eq:  return a == b;
 384         case BT_ne:  return a != b;
 385         case BT_lt:  return a < b;
 386         case BT_le:  return a <= b;
 387         case BT_gt:  return a > b;
 388         case BT_ge:  return a >= b;
 389         }
 390         throw new AssertionError(Integer.toHexString(cond));
 391     }
 392 
 393     /*package-private*/
 394     @Override
 395     abstract FloatSpecies vspecies();
 396 
 397     /*package-private*/
 398     @ForceInline
 399     static long toBits(float e) {
 400         return  Float.floatToIntBits(e);
 401     }
 402 
 403     /*package-private*/
 404     @ForceInline
 405     static float fromBits(long bits) {
 406         return Float.intBitsToFloat((int)bits);
 407     }
 408 
 409     // Static factories (other than memory operations)
 410 
 411     // Note: A surprising behavior in javadoc
 412     // sometimes makes a lone /** {@inheritDoc} */
 413     // comment drop the method altogether,
 414     // apparently if the method mentions an
 415     // parameter or return type of Vector<Float>
 416     // instead of Vector<E> as originally specified.
 417     // Adding an empty HTML fragment appears to
 418     // nudge javadoc into providing the desired
 419     // inherited documentation.  We use the HTML
 420     // comment <!--workaround--> for this.
 421 
 422     /**
 423      * {@inheritDoc} <!--workaround-->
 424      */
 425     @ForceInline
 426     public static FloatVector zero(VectorSpecies<Float> species) {
 427         FloatSpecies vsp = (FloatSpecies) species;
 428         return VectorIntrinsics.broadcastCoerced(vsp.vectorType(), float.class, species.length(),
 429                         toBits(0.0f), vsp,
 430                         ((bits_, s_) -> s_.rvOp(i -> bits_)));
 431     }
 432 
 433     /**
 434      * Returns a vector of the same species as this one
 435      * where all lane elements are set to
 436      * the primitive value {@code e}.
 437      *
 438      * The contents of the current vector are discarded;
 439      * only the species is relevant to this operation.
 440      *
 441      * <p> This method returns the value of this expression:
 442      * {@code FloatVector.broadcast(this.species(), e)}.
 443      *
 444      * @apiNote
 445      * Unlike the similar method named {@code broadcast()}
 446      * in the supertype {@code Vector}, this method does not
 447      * need to validate its argument, and cannot throw
 448      * {@code IllegalArgumentException}.  This method is
 449      * therefore preferable to the supertype method.
 450      *
 451      * @param e the value to broadcast
 452      * @return a vector where all lane elements are set to
 453      *         the primitive value {@code e}
 454      * @see #broadcast(VectorSpecies,long)
 455      * @see Vector#broadcast(long)
 456      * @see VectorSpecies#broadcast(long)
 457      */
 458     public abstract FloatVector broadcast(float e);
 459 
 460     /**
 461      * Returns a vector of the given species
 462      * where all lane elements are set to
 463      * the primitive value {@code e}.
 464      *
 465      * @param species species of the desired vector
 466      * @param e the value to broadcast
 467      * @return a vector where all lane elements are set to
 468      *         the primitive value {@code e}
 469      * @see #broadcast(long)
 470      * @see Vector#broadcast(long)
 471      * @see VectorSpecies#broadcast(long)
 472      */
 473     public static FloatVector broadcast(VectorSpecies<Float> species, float e) {
 474         FloatSpecies vsp = (FloatSpecies) species;
 475         return vsp.broadcast(e);
 476     }
 477 
 478     /*package-private*/
 479     @ForceInline
 480     final FloatVector broadcastTemplate(float e) {
 481         FloatSpecies vsp = vspecies();
 482         return vsp.broadcast(e);
 483     }
 484 
 485     /**
 486      * {@inheritDoc} <!--workaround-->
 487      * @apiNote
 488      * When working with vector subtypes like {@code FloatVector},
 489      * {@linkplain #broadcast(float) the more strongly typed method}
 490      * is typically selected.  It can be explicitly selected
 491      * using a cast: {@code v.broadcast((float)e)}.
 492      * The two expressions will produce numerically identical results.
 493      */
 494     @Override
 495     public abstract FloatVector broadcast(long e);
 496 
 497     /**
 498      * Returns a vector of the given species
 499      * where all lane elements are set to
 500      * the primitive value {@code e}.
 501      *
 502      * The {@code long} value must be accurately representable
 503      * by the {@code ETYPE} of the vector species, so that
 504      * {@code e==(long)(ETYPE)e}.
 505      *
 506      * @param species species of the desired vector
 507      * @param e the value to broadcast
 508      * @return a vector where all lane elements are set to
 509      *         the primitive value {@code e}
 510      * @throws IllegalArgumentException
 511      *         if the given {@code long} value cannot
 512      *         be represented by the vector's {@code ETYPE}
 513      * @see #broadcast(VectorSpecies,float)
 514      * @see VectorSpecies#checkValue(long)
 515      */
 516     public static FloatVector broadcast(VectorSpecies<Float> species, long e) {
 517         FloatSpecies vsp = (FloatSpecies) species;
 518         return vsp.broadcast(e);
 519     }
 520 
 521     /*package-private*/
 522     @ForceInline
 523     final FloatVector broadcastTemplate(long e) {
 524         return vspecies().broadcast(e);
 525     }
 526 
 527     /**
 528      * Returns a vector where each lane element is set to given
 529      * primitive values.
 530      * <p>
 531      * For each vector lane, where {@code N} is the vector lane index, the
 532      * the primitive value at index {@code N} is placed into the resulting
 533      * vector at lane index {@code N}.
 534      *
 535      * @param species species of the desired vector
 536      * @param es the given primitive values
 537      * @return a vector where each lane element is set to given primitive
 538      * values
 539      * @throws IllegalArgumentException
 540      *         if {@code es.length != species.length()}
 541      */
 542     @ForceInline
 543     @SuppressWarnings("unchecked")
 544     public static FloatVector fromValues(VectorSpecies<Float> species, float... es) {
 545         FloatSpecies vsp = (FloatSpecies) species;
 546         int vlength = vsp.laneCount();
 547         VectorIntrinsics.requireLength(es.length, vlength);
 548         // Get an unaliased copy and use it directly:
 549         return vsp.vectorFactory(Arrays.copyOf(es, vlength));
 550     }
 551 
 552     /**
 553      * Returns a vector where the first lane element is set to the primtive
 554      * value {@code e}, all other lane elements are set to the default
 555      * value(positive zero).
 556      *
 557      * @param species species of the desired vector
 558      * @param e the value
 559      * @return a vector where the first lane element is set to the primitive
 560      * value {@code e}
 561      */
 562     // FIXME: Does this carry its weight?
 563     @ForceInline
 564     public static FloatVector single(VectorSpecies<Float> species, float e) {
 565         return zero(species).withLane(0, e);
 566     }
 567 
 568     /**
 569      * Returns a vector where each lane element is set to a randomly
 570      * generated primitive value.
 571      *
 572      * The semantics are equivalent to calling
 573      * {@link ThreadLocalRandom#nextFloat()}
 574      * for each lane, from first to last.
 575      *
 576      * @param species species of the desired vector
 577      * @return a vector where each lane elements is set to a randomly
 578      * generated primitive value
 579      */
 580     public static FloatVector random(VectorSpecies<Float> species) {
 581         FloatSpecies vsp = (FloatSpecies) species;
 582         ThreadLocalRandom r = ThreadLocalRandom.current();
 583         return vsp.vOp(i -> nextRandom(r));
 584     }
 585     private static float nextRandom(ThreadLocalRandom r) {
 586         return r.nextFloat();
 587     }
 588 
 589     // Unary lanewise support
 590 
 591     /**
 592      * {@inheritDoc} <!--workaround-->
 593      */
 594     public abstract
 595     FloatVector lanewise(VectorOperators.Unary op);
 596 
 597     @ForceInline
 598     final
 599     FloatVector lanewiseTemplate(VectorOperators.Unary op) {
 600         if (opKind(op, VO_SPECIAL)) {
 601             if (op == ZOMO) {
 602                 return blend(broadcast(-1), compare(NE, 0));
 603             }
 604         }
 605         int opc = opCode(op);
 606         return VectorIntrinsics.unaryOp(
 607             opc, getClass(), float.class, length(),
 608             this,
 609             UN_IMPL.find(op, opc, (opc_) -> {
 610               switch (opc_) {
 611                 case VECTOR_OP_NEG: return v0 ->
 612                         v0.uOp((i, a) -> (float) -a);
 613                 case VECTOR_OP_ABS: return v0 ->
 614                         v0.uOp((i, a) -> (float) Math.abs(a));
 615                 case VECTOR_OP_SIN: return v0 ->
 616                         v0.uOp((i, a) -> (float) Math.sin(a));
 617                 case VECTOR_OP_COS: return v0 ->
 618                         v0.uOp((i, a) -> (float) Math.cos(a));
 619                 case VECTOR_OP_TAN: return v0 ->
 620                         v0.uOp((i, a) -> (float) Math.tan(a));
 621                 case VECTOR_OP_ASIN: return v0 ->
 622                         v0.uOp((i, a) -> (float) Math.asin(a));
 623                 case VECTOR_OP_ACOS: return v0 ->
 624                         v0.uOp((i, a) -> (float) Math.acos(a));
 625                 case VECTOR_OP_ATAN: return v0 ->
 626                         v0.uOp((i, a) -> (float) Math.atan(a));
 627                 case VECTOR_OP_EXP: return v0 ->
 628                         v0.uOp((i, a) -> (float) Math.exp(a));
 629                 case VECTOR_OP_LOG: return v0 ->
 630                         v0.uOp((i, a) -> (float) Math.log(a));
 631                 case VECTOR_OP_LOG10: return v0 ->
 632                         v0.uOp((i, a) -> (float) Math.log10(a));
 633                 case VECTOR_OP_SQRT: return v0 ->
 634                         v0.uOp((i, a) -> (float) Math.sqrt(a));
 635                 case VECTOR_OP_CBRT: return v0 ->
 636                         v0.uOp((i, a) -> (float) Math.cbrt(a));
 637                 case VECTOR_OP_SINH: return v0 ->
 638                         v0.uOp((i, a) -> (float) Math.sinh(a));
 639                 case VECTOR_OP_COSH: return v0 ->
 640                         v0.uOp((i, a) -> (float) Math.cosh(a));
 641                 case VECTOR_OP_TANH: return v0 ->
 642                         v0.uOp((i, a) -> (float) Math.tanh(a));
 643                 case VECTOR_OP_EXPM1: return v0 ->
 644                         v0.uOp((i, a) -> (float) Math.expm1(a));
 645                 case VECTOR_OP_LOG1P: return v0 ->
 646                         v0.uOp((i, a) -> (float) Math.log1p(a));
 647                 default: return null;
 648               }}));
 649     }
 650     private static final
 651     ImplCache<Unary,UnaryOperator<FloatVector>> UN_IMPL
 652         = new ImplCache<>(Unary.class, FloatVector.class);
 653 
 654     /**
 655      * {@inheritDoc} <!--workaround-->
 656      */
 657     @ForceInline
 658     public final
 659     FloatVector lanewise(VectorOperators.Unary op,
 660                                   VectorMask<Float> m) {
 661         return blend(lanewise(op), m);
 662     }
 663 
 664     // Binary lanewise support
 665 
 666     /**
 667      * {@inheritDoc} <!--workaround-->
 668      * @see #lanewise(VectorOperators.Binary,float)
 669      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
 670      */
 671     @Override
 672     public abstract
 673     FloatVector lanewise(VectorOperators.Binary op,
 674                                   Vector<Float> v);
 675     @ForceInline
 676     final
 677     FloatVector lanewiseTemplate(VectorOperators.Binary op,
 678                                           Vector<Float> v) {
 679         FloatVector that = (FloatVector) v;
 680         that.check(this);
 681         if (opKind(op, VO_SPECIAL )) {
 682             if (op == FIRST_NONZERO) {
 683                 // FIXME: Support this in the JIT.
 684                 VectorMask<Integer> thisNZ
 685                     = this.viewAsIntegralLanes().compare(NE, (int) 0);
 686                 that = that.blend((float) 0, thisNZ.cast(vspecies()));
 687                 op = OR_UNCHECKED;
 688                 // FIXME: Support OR_UNCHECKED on float/double also!
 689                 return this.viewAsIntegralLanes()
 690                     .lanewise(op, that.viewAsIntegralLanes())
 691                     .viewAsFloatingLanes();
 692             }
 693         }
 694         int opc = opCode(op);
 695         return VectorIntrinsics.binaryOp(
 696             opc, getClass(), float.class, length(),
 697             this, that,
 698             BIN_IMPL.find(op, opc, (opc_) -> {
 699               switch (opc_) {
 700                 case VECTOR_OP_ADD: return (v0, v1) ->
 701                         v0.bOp(v1, (i, a, b) -> (float)(a + b));
 702                 case VECTOR_OP_SUB: return (v0, v1) ->
 703                         v0.bOp(v1, (i, a, b) -> (float)(a - b));
 704                 case VECTOR_OP_MUL: return (v0, v1) ->
 705                         v0.bOp(v1, (i, a, b) -> (float)(a * b));
 706                 case VECTOR_OP_DIV: return (v0, v1) ->
 707                         v0.bOp(v1, (i, a, b) -> (float)(a / b));
 708                 case VECTOR_OP_MAX: return (v0, v1) ->
 709                         v0.bOp(v1, (i, a, b) -> (float)Math.max(a, b));
 710                 case VECTOR_OP_MIN: return (v0, v1) ->
 711                         v0.bOp(v1, (i, a, b) -> (float)Math.min(a, b));
 712                 case VECTOR_OP_FIRST_NONZERO: return (v0, v1) ->
 713                         v0.bOp(v1, (i, a, b) -> toBits(a) != 0 ? a : b);
 714                 case VECTOR_OP_OR: return (v0, v1) ->
 715                         v0.bOp(v1, (i, a, b) -> fromBits(toBits(a) | toBits(b)));
 716                 case VECTOR_OP_ATAN2: return (v0, v1) ->
 717                         v0.bOp(v1, (i, a, b) -> (float) Math.atan2(a, b));
 718                 case VECTOR_OP_POW: return (v0, v1) ->
 719                         v0.bOp(v1, (i, a, b) -> (float) Math.pow(a, b));
 720                 case VECTOR_OP_HYPOT: return (v0, v1) ->
 721                         v0.bOp(v1, (i, a, b) -> (float) Math.hypot(a, b));
 722                 default: return null;
 723                 }}));
 724     }
 725     private static final
 726     ImplCache<Binary,BinaryOperator<FloatVector>> BIN_IMPL
 727         = new ImplCache<>(Binary.class, FloatVector.class);
 728 
 729     /**
 730      * {@inheritDoc} <!--workaround-->
 731      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
 732      */
 733     @ForceInline
 734     public final
 735     FloatVector lanewise(VectorOperators.Binary op,
 736                                   Vector<Float> v,
 737                                   VectorMask<Float> m) {
 738         return blend(lanewise(op, v), m);
 739     }
 740     // FIXME: Maybe all of the public final methods in this file (the
 741     // simple ones that just call lanewise) should be pushed down to
 742     // the X-VectorBits template.  They can't optimize properly at
 743     // this level, and must rely on inlining.  Does it work?
 744     // (If it works, of course keep the code here.)
 745 
 746     /**
 747      * Combines the lane values of this vector
 748      * with the value of a broadcast scalar.
 749      *
 750      * This is a lane-wise binary operation which applies
 751      * the selected operation to each lane.
 752      * The return value will be equal to this expression:
 753      * {@code this.lanewise(op, this.broadcast(e))}.
 754      *
 755      * @param op the operation used to process lane values
 756      * @param e the input scalar
 757      * @return the result of applying the operation lane-wise
 758      *         to the two input vectors
 759      * @throws UnsupportedOperationException if this vector does
 760      *         not support the requested operation
 761      * @see #lanewise(VectorOperators.Binary,Vector)
 762      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
 763      */
 764     @ForceInline
 765     public final
 766     FloatVector lanewise(VectorOperators.Binary op,
 767                                   float e) {
 768         int opc = opCode(op);
 769         return lanewise(op, broadcast(e));
 770     }
 771 
 772     /**
 773      * Combines the lane values of this vector
 774      * with the value of a broadcast scalar,
 775      * with selection of lane elements controlled by a mask.
 776      *
 777      * This is a masked lane-wise binary operation which applies
 778      * the selected operation to each lane.
 779      * The return value will be equal to this expression:
 780      * {@code this.lanewise(op, this.broadcast(e), m)}.
 781      *
 782      * @param op the operation used to process lane values
 783      * @param e the input scalar
 784      * @param m the mask controlling lane selection
 785      * @return the result of applying the operation lane-wise
 786      *         to the input vector and the scalar
 787      * @throws UnsupportedOperationException if this vector does
 788      *         not support the requested operation
 789      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 790      * @see #lanewise(VectorOperators.Binary,float)
 791      */
 792     @ForceInline
 793     public final
 794     FloatVector lanewise(VectorOperators.Binary op,
 795                                   float e,
 796                                   VectorMask<Float> m) {
 797         return blend(lanewise(op, e), m);
 798     }
 799 
 800     /**
 801      * {@inheritDoc} <!--workaround-->
 802      * @apiNote
 803      * When working with vector subtypes like {@code FloatVector},
 804      * {@linkplain #lanewise(VectorOperators.Binary,float)
 805      * the more strongly typed method}
 806      * is typically selected.  It can be explicitly selected
 807      * using a cast: {@code v.lanewise(op,(float)e)}.
 808      * The two expressions will produce numerically identical results.
 809      */
 810     @ForceInline
 811     public final
 812     FloatVector lanewise(VectorOperators.Binary op,
 813                                   long e) {
 814         float e1 = (float) e;
 815         if ((long)e1 != e
 816             ) {
 817             vspecies().checkValue(e);  // for exception
 818         }
 819         return lanewise(op, e1);
 820     }
 821 
 822     /**
 823      * {@inheritDoc} <!--workaround-->
 824      * @apiNote
 825      * When working with vector subtypes like {@code FloatVector},
 826      * {@linkplain #lanewise(VectorOperators.Binary,float,VectorMask)
 827      * the more strongly typed method}
 828      * is typically selected.  It can be explicitly selected
 829      * using a cast: {@code v.lanewise(op,(float)e,m)}.
 830      * The two expressions will produce numerically identical results.
 831      */
 832     @ForceInline
 833     public final
 834     FloatVector lanewise(VectorOperators.Binary op,
 835                                   long e, VectorMask<Float> m) {
 836         return blend(lanewise(op, e), m);
 837     }
 838 
 839 
 840     // Ternary lanewise support
 841 
 842     // Ternary operators come in eight variations:
 843     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
 844     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
 845 
 846     // It is annoying to support all of these variations of masking
 847     // and broadcast, but it would be more surprising not to continue
 848     // the obvious pattern started by unary and binary.
 849 
 850    /**
 851      * {@inheritDoc} <!--workaround-->
 852      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
 853      * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask)
 854      * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask)
 855      * @see #lanewise(VectorOperators.Ternary,float,float)
 856      * @see #lanewise(VectorOperators.Ternary,Vector,float)
 857      * @see #lanewise(VectorOperators.Ternary,float,Vector)
 858      */
 859     @Override
 860     public abstract
 861     FloatVector lanewise(VectorOperators.Ternary op,
 862                                                   Vector<Float> v1,
 863                                                   Vector<Float> v2);
 864     @ForceInline
 865     final
 866     FloatVector lanewiseTemplate(VectorOperators.Ternary op,
 867                                           Vector<Float> v1,
 868                                           Vector<Float> v2) {
 869         FloatVector that = (FloatVector) v1;
 870         FloatVector tother = (FloatVector) v2;
 871         // It's a word: https://www.dictionary.com/browse/tother
 872         // See also Chapter 11 of Dickens, Our Mutual Friend:
 873         // "Totherest Governor," replied Mr Riderhood...
 874         that.check(this);
 875         tother.check(this);
 876         int opc = opCode(op);
 877         return VectorIntrinsics.ternaryOp(
 878             opc, getClass(), float.class, length(),
 879             this, that, tother,
 880             TERN_IMPL.find(op, opc, (opc_) -> {
 881               switch (opc_) {
 882                 case VECTOR_OP_FMA: return (v0, v1_, v2_) ->
 883                         v0.tOp(v1_, v2_, (i, a, b, c) -> Math.fma(a, b, c));
 884                 default: return null;
 885                 }}));
 886     }
 887     private static final
 888     ImplCache<Ternary,TernaryOperation<FloatVector>> TERN_IMPL
 889         = new ImplCache<>(Ternary.class, FloatVector.class);
 890 
 891     /**
 892      * {@inheritDoc} <!--workaround-->
 893      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
 894      * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask)
 895      * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask)
 896      */
 897     @ForceInline
 898     public final
 899     FloatVector lanewise(VectorOperators.Ternary op,
 900                                   Vector<Float> v1,
 901                                   Vector<Float> v2,
 902                                   VectorMask<Float> m) {
 903         return blend(lanewise(op, v1, v2), m);
 904     }
 905 
 906     /**
 907      * Combines the lane values of this vector
 908      * with the values of two broadcast scalars.
 909      *
 910      * This is a lane-wise ternary operation which applies
 911      * the selected operation to each lane.
 912      * The return value will be equal to this expression:
 913      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
 914      *
 915      * @param op the operation used to combine lane values
 916      * @param e1 the first input scalar
 917      * @param e2 the second input scalar
 918      * @return the result of applying the operation lane-wise
 919      *         to the input vector and the scalars
 920      * @throws UnsupportedOperationException if this vector does
 921      *         not support the requested operation
 922      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
 923      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
 924      */
 925     @ForceInline
 926     public final
 927     FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2)
 928                                   float e1,
 929                                   float e2) {
 930         return lanewise(op, broadcast(e1), broadcast(e2));
 931     }
 932 
 933     /**
 934      * Combines the lane values of this vector
 935      * with the values of two broadcast scalars,
 936      * with selection of lane elements controlled by a mask.
 937      *
 938      * This is a masked lane-wise ternary operation which applies
 939      * the selected operation to each lane.
 940      * The return value will be equal to this expression:
 941      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
 942      *
 943      * @param op the operation used to combine lane values
 944      * @param e1 the first input scalar
 945      * @param e2 the second input scalar
 946      * @param m the mask controlling lane selection
 947      * @return the result of applying the operation lane-wise
 948      *         to the input vector and the scalars
 949      * @throws UnsupportedOperationException if this vector does
 950      *         not support the requested operation
 951      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
 952      * @see #lanewise(VectorOperators.Ternary,float,float)
 953      */
 954     @ForceInline
 955     public final
 956     FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
 957                                   float e1,
 958                                   float e2,
 959                                   VectorMask<Float> m) {
 960         return blend(lanewise(op, e1, e2), m);
 961     }
 962 
 963     /**
 964      * Combines the lane values of this vector
 965      * with the values of another vector and a broadcast scalar.
 966      *
 967      * This is a lane-wise ternary operation which applies
 968      * the selected operation to each lane.
 969      * The return value will be equal to this expression:
 970      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
 971      *
 972      * @param op the operation used to combine lane values
 973      * @param v1 the other input vector
 974      * @param e2 the input scalar
 975      * @return the result of applying the operation lane-wise
 976      *         to the input vectors and the scalar
 977      * @throws UnsupportedOperationException if this vector does
 978      *         not support the requested operation
 979      * @see #lanewise(VectorOperators.Ternary,float,float)
 980      * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask)
 981      */
 982     @ForceInline
 983     public final
 984     FloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2)
 985                                   Vector<Float> v1,
 986                                   float e2) {
 987         return lanewise(op, v1, broadcast(e2));
 988     }
 989 
 990     /**
 991      * Combines the lane values of this vector
 992      * with the values of another vector and a broadcast scalar,
 993      * with selection of lane elements controlled by a mask.
 994      *
 995      * This is a masked lane-wise ternary operation which applies
 996      * the selected operation to each lane.
 997      * The return value will be equal to this expression:
 998      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
 999      *
1000      * @param op the operation used to combine lane values
1001      * @param v1 the other input vector
1002      * @param e2 the input scalar
1003      * @param m the mask controlling lane selection
1004      * @return the result of applying the operation lane-wise
1005      *         to the input vectors and the scalar
1006      * @throws UnsupportedOperationException if this vector does
1007      *         not support the requested operation
1008      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1009      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
1010      * @see #lanewise(VectorOperators.Ternary,Vector,float)
1011      */
1012     @ForceInline
1013     public final
1014     FloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
1015                                   Vector<Float> v1,
1016                                   float e2,
1017                                   VectorMask<Float> m) {
1018         return blend(lanewise(op, v1, e2), m);
1019     }
1020 
1021     /**
1022      * Combines the lane values of this vector
1023      * with the values of another vector and a broadcast scalar.
1024      *
1025      * This is a lane-wise ternary operation which applies
1026      * the selected operation to each lane.
1027      * The return value will be equal to this expression:
1028      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
1029      *
1030      * @param op the operation used to combine lane values
1031      * @param e1 the input scalar
1032      * @param v2 the other input vector
1033      * @return the result of applying the operation lane-wise
1034      *         to the input vectors and the scalar
1035      * @throws UnsupportedOperationException if this vector does
1036      *         not support the requested operation
1037      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1038      * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask)
1039      */
1040     @ForceInline
1041     public final
1042     FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2)
1043                                   float e1,
1044                                   Vector<Float> v2) {
1045         return lanewise(op, broadcast(e1), v2);
1046     }
1047 
1048     /**
1049      * Combines the lane values of this vector
1050      * with the values of another vector and a broadcast scalar,
1051      * with selection of lane elements controlled by a mask.
1052      *
1053      * This is a masked lane-wise ternary operation which applies
1054      * the selected operation to each lane.
1055      * The return value will be equal to this expression:
1056      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
1057      *
1058      * @param op the operation used to combine lane values
1059      * @param e1 the input scalar
1060      * @param v2 the other input vector
1061      * @param m the mask controlling lane selection
1062      * @return the result of applying the operation lane-wise
1063      *         to the input vectors and the scalar
1064      * @throws UnsupportedOperationException if this vector does
1065      *         not support the requested operation
1066      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1067      * @see #lanewise(VectorOperators.Ternary,float,Vector)
1068      */
1069     @ForceInline
1070     public final
1071     FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1072                                   float e1,
1073                                   Vector<Float> v2,
1074                                   VectorMask<Float> m) {
1075         return blend(lanewise(op, e1, v2), m);
1076     }
1077 
1078     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1079     // https://en.wikipedia.org/wiki/Ogdoad
1080 
1081     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1082     //
1083     // These include masked and non-masked versions.
1084     // This subclass adds broadcast (masked or not).
1085 
1086     /**
1087      * {@inheritDoc} <!--workaround-->
1088      * @see #add(float)
1089      */
1090     @Override
1091     @ForceInline
1092     public final FloatVector add(Vector<Float> v) {
1093         return lanewise(ADD, v);
1094     }
1095 
1096     /**
1097      * Adds this vector to the broadcast of an input scalar.
1098      *
1099      * This is a lane-wise binary operation which applies
1100      * the primitive addition operation ({@code +}) to each lane.
1101      *
1102      * This method is also equivalent to the expression
1103      * {@link #lanewise(VectorOperators.Binary,float)
1104      *    lanewise}{@code (}{@link VectorOperators#ADD
1105      *    ADD}{@code , e)}.
1106      *
1107      * @param e the input scalar
1108      * @return the result of adding each lane of this vector to the scalar
1109      * @see #add(Vector)
1110      * @see #broadcast(float)
1111      * @see #add(float,VectorMask)
1112      * @see VectorOperators#ADD
1113      * @see #lanewise(VectorOperators.Binary,Vector)
1114      * @see #lanewise(VectorOperators.Binary,float)
1115      */
1116     @ForceInline
1117     public final
1118     FloatVector add(float e) {
1119         return lanewise(ADD, e);
1120     }
1121 
1122     /**
1123      * {@inheritDoc} <!--workaround-->
1124      * @see #add(float,VectorMask)
1125      */
1126     @Override
1127     @ForceInline
1128     public final FloatVector add(Vector<Float> v,
1129                                           VectorMask<Float> m) {
1130         return lanewise(ADD, v, m);
1131     }
1132 
1133     /**
1134      * Adds this vector to the broadcast of an input scalar,
1135      * selecting lane elements controlled by a mask.
1136      *
1137      * This is a masked lane-wise binary operation which applies
1138      * the primitive addition operation ({@code +}) to each lane.
1139      *
1140      * This method is also equivalent to the expression
1141      * {@link #lanewise(VectorOperators.Binary,float,VectorMask)
1142      *    lanewise}{@code (}{@link VectorOperators#ADD
1143      *    ADD}{@code , s, m)}.
1144      *
1145      * @param e the input scalar
1146      * @param m the mask controlling lane selection
1147      * @return the result of adding each lane of this vector to the scalar
1148      * @see #add(Vector,VectorMask)
1149      * @see #broadcast(float)
1150      * @see #add(float)
1151      * @see VectorOperators#ADD
1152      * @see #lanewise(VectorOperators.Binary,Vector)
1153      * @see #lanewise(VectorOperators.Binary,float)
1154      */
1155     @ForceInline
1156     public final FloatVector add(float e,
1157                                           VectorMask<Float> m) {
1158         return lanewise(ADD, e, m);
1159     }
1160 
1161     /**
1162      * {@inheritDoc} <!--workaround-->
1163      * @see #sub(float)
1164      */
1165     @Override
1166     @ForceInline
1167     public final FloatVector sub(Vector<Float> v) {
1168         return lanewise(SUB, v);
1169     }
1170 
1171     /**
1172      * Subtracts an input scalar from this vector.
1173      *
1174      * This is a masked lane-wise binary operation which applies
1175      * the primitive subtraction operation ({@code -}) to each lane.
1176      *
1177      * This method is also equivalent to the expression
1178      * {@link #lanewise(VectorOperators.Binary,float)
1179      *    lanewise}{@code (}{@link VectorOperators#SUB
1180      *    SUB}{@code , e)}.
1181      *
1182      * @param e the input scalar
1183      * @return the result of subtracting the scalar from each lane of this vector
1184      * @see #sub(Vector)
1185      * @see #broadcast(float)
1186      * @see #sub(float,VectorMask)
1187      * @see VectorOperators#SUB
1188      * @see #lanewise(VectorOperators.Binary,Vector)
1189      * @see #lanewise(VectorOperators.Binary,float)
1190      */
1191     @ForceInline
1192     public final FloatVector sub(float e) {
1193         return lanewise(SUB, e);
1194     }
1195 
1196     /**
1197      * {@inheritDoc} <!--workaround-->
1198      * @see #sub(float,VectorMask)
1199      */
1200     @Override
1201     @ForceInline
1202     public final FloatVector sub(Vector<Float> v,
1203                                           VectorMask<Float> m) {
1204         return lanewise(SUB, v, m);
1205     }
1206 
1207     /**
1208      * Subtracts an input scalar from this vector
1209      * under the control of a mask.
1210      *
1211      * This is a masked lane-wise binary operation which applies
1212      * the primitive subtraction operation ({@code -}) to each lane.
1213      *
1214      * This method is also equivalent to the expression
1215      * {@link #lanewise(VectorOperators.Binary,float,VectorMask)
1216      *    lanewise}{@code (}{@link VectorOperators#SUB
1217      *    SUB}{@code , s, m)}.
1218      *
1219      * @param e the input scalar
1220      * @param m the mask controlling lane selection
1221      * @return the result of subtracting the scalar from each lane of this vector
1222      * @see #sub(Vector,VectorMask)
1223      * @see #broadcast(float)
1224      * @see #sub(float)
1225      * @see VectorOperators#SUB
1226      * @see #lanewise(VectorOperators.Binary,Vector)
1227      * @see #lanewise(VectorOperators.Binary,float)
1228      */
1229     @ForceInline
1230     public final FloatVector sub(float e,
1231                                           VectorMask<Float> m) {
1232         return lanewise(SUB, e, m);
1233     }
1234 
1235     /**
1236      * {@inheritDoc} <!--workaround-->
1237      * @see #mul(float)
1238      */
1239     @Override
1240     @ForceInline
1241     public final FloatVector mul(Vector<Float> v) {
1242         return lanewise(MUL, v);
1243     }
1244 
1245     /**
1246      * Multiplies this vector by the broadcast of an input scalar.
1247      *
1248      * This is a lane-wise binary operation which applies
1249      * the primitive multiplication operation ({@code *}) to each lane.
1250      *
1251      * This method is also equivalent to the expression
1252      * {@link #lanewise(VectorOperators.Binary,float)
1253      *    lanewise}{@code (}{@link VectorOperators#MUL
1254      *    MUL}{@code , e)}.
1255      *
1256      * @param e the input scalar
1257      * @return the result of multiplying this vector by the given scalar
1258      * @see #mul(Vector)
1259      * @see #broadcast(float)
1260      * @see #mul(float,VectorMask)
1261      * @see VectorOperators#MUL
1262      * @see #lanewise(VectorOperators.Binary,Vector)
1263      * @see #lanewise(VectorOperators.Binary,float)
1264      */
1265     @ForceInline
1266     public final FloatVector mul(float e) {
1267         return lanewise(MUL, e);
1268     }
1269 
1270     /**
1271      * {@inheritDoc} <!--workaround-->
1272      * @see #mul(float,VectorMask)
1273      */
1274     @Override
1275     @ForceInline
1276     public final FloatVector mul(Vector<Float> v,
1277                                           VectorMask<Float> m) {
1278         return lanewise(MUL, v, m);
1279     }
1280 
1281     /**
1282      * Multiplies this vector by the broadcast of an input scalar,
1283      * selecting lane elements controlled by a mask.
1284      *
1285      * This is a masked lane-wise binary operation which applies
1286      * the primitive multiplication operation ({@code *}) to each lane.
1287      *
1288      * This method is also equivalent to the expression
1289      * {@link #lanewise(VectorOperators.Binary,float,VectorMask)
1290      *    lanewise}{@code (}{@link VectorOperators#MUL
1291      *    MUL}{@code , s, m)}.
1292      *
1293      * @param e the input scalar
1294      * @param m the mask controlling lane selection
1295      * @return the result of muling each lane of this vector to the scalar
1296      * @see #mul(Vector,VectorMask)
1297      * @see #broadcast(float)
1298      * @see #mul(float)
1299      * @see VectorOperators#MUL
1300      * @see #lanewise(VectorOperators.Binary,Vector)
1301      * @see #lanewise(VectorOperators.Binary,float)
1302      */
1303     @ForceInline
1304     public final FloatVector mul(float e,
1305                                           VectorMask<Float> m) {
1306         return lanewise(MUL, e, m);
1307     }
1308 
1309     /**
1310      * {@inheritDoc} <!--workaround-->
1311      * @apiNote Because the underlying scalar operator is an IEEE
1312      * floating point number, division by zero in fact will
1313      * not throw an exception, but will yield a signed
1314      * infinity or NaN.
1315      */
1316     @Override
1317     @ForceInline
1318     public final FloatVector div(Vector<Float> v) {
1319         return lanewise(DIV, v);
1320     }
1321 
1322     /**
1323      * Divides this vector by the broadcast of an input scalar.
1324      *
1325      * This is a lane-wise binary operation which applies
1326      * the primitive division operation ({@code /}) to each lane.
1327      *
1328      * This method is also equivalent to the expression
1329      * {@link #lanewise(VectorOperators.Binary,float)
1330      *    lanewise}{@code (}{@link VectorOperators#DIV
1331      *    DIV}{@code , e)}.
1332      *
1333      * @apiNote Because the underlying scalar operator is an IEEE
1334      * floating point number, division by zero in fact will
1335      * not throw an exception, but will yield a signed
1336      * infinity or NaN.
1337      * @see #div(float)
1338 
1339      *
1340      * @param e the input scalar
1341      * @return the result of dividing each lane of this vector by the scalar
1342      * @see #div(Vector)
1343      * @see #broadcast(float)
1344      * @see #div(float,VectorMask)
1345      * @see VectorOperators#DIV
1346      * @see #lanewise(VectorOperators.Binary,Vector)
1347      * @see #lanewise(VectorOperators.Binary,float)
1348      */
1349     @ForceInline
1350     public final FloatVector div(float e) {
1351         return lanewise(DIV, e);
1352     }
1353 
1354     /**
1355      * {@inheritDoc} <!--workaround-->
1356      * @see #div(float,VectorMask)
1357      * @apiNote Because the underlying scalar operator is an IEEE
1358      * floating point number, division by zero in fact will
1359      * not throw an exception, but will yield a signed
1360      * infinity or NaN.
1361      */
1362     @Override
1363     @ForceInline
1364     public final FloatVector div(Vector<Float> v,
1365                                           VectorMask<Float> m) {
1366         return lanewise(DIV, v, m);
1367     }
1368 
1369     /**
1370      * Divides this vector by the broadcast of an input scalar,
1371      * selecting lane elements controlled by a mask.
1372      *
1373      * This is a masked lane-wise binary operation which applies
1374      * the primitive division operation ({@code /}) to each lane.
1375      *
1376      * This method is also equivalent to the expression
1377      * {@link #lanewise(VectorOperators.Binary,float,VectorMask)
1378      *    lanewise}{@code (}{@link VectorOperators#DIV
1379      *    DIV}{@code , s, m)}.
1380      *
1381      * @apiNote Because the underlying scalar operator is an IEEE
1382      * floating point number, division by zero in fact will
1383      * not throw an exception, but will yield a signed
1384      * infinity or NaN.
1385      *
1386      * @param e the input scalar
1387      * @param m the mask controlling lane selection
1388      * @return the result of dividing each lane of this vector by the scalar
1389      * @see #div(Vector,VectorMask)
1390      * @see #broadcast(float)
1391      * @see #div(float)
1392      * @see VectorOperators#DIV
1393      * @see #lanewise(VectorOperators.Binary,Vector)
1394      * @see #lanewise(VectorOperators.Binary,float)
1395      */
1396     @ForceInline
1397     public final FloatVector div(float e,
1398                                           VectorMask<Float> m) {
1399         return lanewise(DIV, e, m);
1400     }
1401 
1402     /// END OF FULL-SERVICE BINARY METHODS
1403 
1404     /// SECOND-TIER BINARY METHODS
1405     //
1406     // There are no masked versions.
1407 
1408     /**
1409      * {@inheritDoc} <!--workaround-->
1410      * @apiNote
1411      * For this method, floating point negative
1412      * zero {@code -0.0} is treated as a value distinct from, and less
1413      * than the default value(positive zero).
1414      */
1415     @Override
1416     @ForceInline
1417     public final FloatVector min(Vector<Float> v) {
1418         return lanewise(MIN, v);
1419     }
1420 
1421     // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
1422     /**
1423      * Computes the smaller of this vector and the broadcast of an input scalar.
1424      *
1425      * This is a lane-wise binary operation which applies the
1426      * operation {@code Math.min()} to each pair of
1427      * corresponding lane values.
1428      *
1429      * This method is also equivalent to the expression
1430      * {@link #lanewise(VectorOperators.Binary,float)
1431      *    lanewise}{@code (}{@link VectorOperators#MIN
1432      *    MIN}{@code , e)}.
1433      *
1434      * @param e the input scalar
1435      * @return the result of multiplying this vector by the given scalar
1436      * @see #min(Vector)
1437      * @see #broadcast(float)
1438      * @see VectorOperators#MIN
1439      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
1440      * @apiNote
1441      * For this method, floating point negative
1442      * zero {@code -0.0} is treated as a value distinct from, and less
1443      * than the default value(positive zero).
1444      */
1445     @ForceInline
1446     public final FloatVector min(float e) {
1447         return lanewise(MIN, e);
1448     }
1449 
1450     /**
1451      * {@inheritDoc} <!--workaround-->
1452      * @apiNote
1453      * For this method, negative floating-point zero compares
1454      * less than the default value, positive zero.
1455      */
1456     @Override
1457     @ForceInline
1458     public final FloatVector max(Vector<Float> v) {
1459         return lanewise(MAX, v);
1460     }
1461 
1462     /**
1463      * Computes the larger of this vector and the broadcast of an input scalar.
1464      *
1465      * This is a lane-wise binary operation which applies the
1466      * operation {@code Math.max()} to each pair of
1467      * corresponding lane values.
1468      *
1469      * This method is also equivalent to the expression
1470      * {@link #lanewise(VectorOperators.Binary,float)
1471      *    lanewise}{@code (}{@link VectorOperators#MAX
1472      *    MAX}{@code , e)}.
1473      *
1474      * @param e the input scalar
1475      * @return the result of multiplying this vector by the given scalar
1476      * @see #max(Vector)
1477      * @see #broadcast(float)
1478      * @see VectorOperators#MAX
1479      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
1480      * @apiNote
1481      * For this method, negative floating-point zero compares
1482      * less than the default value, positive zero.
1483      */
1484     @ForceInline
1485     public final FloatVector max(float e) {
1486         return lanewise(MAX, e);
1487     }
1488 
1489 
1490     // common FP operator: pow
1491     /**
1492      * Raises this vector to the power of a second input vector.
1493      *
1494      * This is a lane-wise binary operation which applies the
1495      * method {@code Math.pow()}
1496      * to each pair of corresponding lane values.
1497      *
1498      * This method is also equivalent to the expression
1499      * {@link #lanewise(VectorOperators.Binary,Vector)
1500      *    lanewise}{@code (}{@link VectorOperators#POW
1501      *    POW}{@code , n)}.
1502      *
1503      * <p>
1504      * This is not a full-service named operation like
1505      * {@link #add(Vector) add}.  A masked version of
1506      * version of this operation is not directly available
1507      * but may be obtained via the masked version of
1508      * {@code lanewise}.
1509      *
1510      * @param n a vector exponent by which to raise this vector
1511      * @return the {@code n}-th power of this vector
1512      * @see #pow(float)
1513      * @see VectorOperators#POW
1514      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1515      */
1516     @ForceInline
1517     public final FloatVector pow(Vector<Float> n) {
1518         return lanewise(POW, n);
1519     }
1520 
1521     /**
1522      * Raises this vector to a scalar power.
1523      *
1524      * This is a lane-wise binary operation which applies the
1525      * method {@code Math.pow()}
1526      * to each pair of corresponding lane values.
1527      *
1528      * This method is also equivalent to the expression
1529      * {@link #lanewise(VectorOperators.Binary,Vector)
1530      *    lanewise}{@code (}{@link VectorOperators#POW
1531      *    POW}{@code , n)}.
1532      *
1533      * @param n a scalar exponent by which to raise this vector
1534      * @return the {@code n}-th power of this vector
1535      * @see #pow(Vector)
1536      * @see VectorOperators#POW
1537      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
1538      */
1539     @ForceInline
1540     public final FloatVector pow(float n) {
1541         return lanewise(POW, n);
1542     }
1543 
1544     /// UNARY METHODS
1545 
1546     /**
1547      * {@inheritDoc} <!--workaround-->
1548      */
1549     @Override
1550     @ForceInline
1551     public final
1552     FloatVector neg() {
1553         return lanewise(NEG);
1554     }
1555 
1556     /**
1557      * {@inheritDoc} <!--workaround-->
1558      */
1559     @Override
1560     @ForceInline
1561     public final
1562     FloatVector abs() {
1563         return lanewise(ABS);
1564     }
1565 
1566 
1567     // sqrt
1568     /**
1569      * Computes the square root of this vector.
1570      *
1571      * This is a lane-wise unary operation which applies the
1572      * the method {@code Math.sqrt()}
1573      * to each lane value.
1574      *
1575      * This method is also equivalent to the expression
1576      * {@link #lanewise(VectorOperators.Unary)
1577      *    lanewise}{@code (}{@link VectorOperators#SQRT
1578      *    SQRT}{@code )}.
1579      *
1580      * @return the square root of this vector
1581      * @see VectorOperators#SQRT
1582      * @see #lanewise(VectorOperators.Unary,VectorMask)
1583      */
1584     @ForceInline
1585     public final FloatVector sqrt() {
1586         return lanewise(SQRT);
1587     }
1588 
1589     /// COMPARISONS
1590 
1591     /**
1592      * {@inheritDoc} <!--workaround-->
1593      */
1594     @Override
1595     @ForceInline
1596     public final
1597     VectorMask<Float> eq(Vector<Float> v) {
1598         return compare(EQ, v);
1599     }
1600 
1601     /**
1602      * Tests if this vector is equal to an input scalar.
1603      *
1604      * This is a lane-wise binary test operation which applies
1605      * the primitive equals operation ({@code ==}) to each lane.
1606      * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
1607      *
1608      * @param e the input scalar
1609      * @return the result mask of testing if this vector
1610      *         is equal to {@code e}
1611      * @see #compare(VectorOperators.Comparison,float)
1612      */
1613     @ForceInline
1614     public final
1615     VectorMask<Float> eq(float e) {
1616         return compare(EQ, e);
1617     }
1618 
1619     /**
1620      * {@inheritDoc} <!--workaround-->
1621      */
1622     @Override
1623     @ForceInline
1624     public final
1625     VectorMask<Float> lt(Vector<Float> v) {
1626         return compare(LT, v);
1627     }
1628 
1629     /**
1630      * Tests if this vector is less than an input scalar.
1631      *
1632      * This is a lane-wise binary test operation which applies
1633      * the primitive less than operation ({@code <}) to each lane.
1634      * The result is the same as {@code compare(VectorOperators.LT, e)}.
1635      *
1636      * @param e the input scalar
1637      * @return the mask result of testing if this vector
1638      *         is less than the input scalar
1639      * @see #compare(VectorOperators.Comparison,float)
1640      */
1641     @ForceInline
1642     public final
1643     VectorMask<Float> lt(float e) {
1644         return compare(LT, e);
1645     }
1646 
1647     /**
1648      * {@inheritDoc} <!--workaround-->
1649      */
1650     @Override
1651     public abstract
1652     VectorMask<Float> test(VectorOperators.Test op);
1653 
1654     /*package-private*/
1655     @ForceInline
1656     final
1657     <M extends VectorMask<Float>>
1658     M testTemplate(Class<M> maskType, Test op) {
1659         FloatSpecies vsp = vspecies();
1660         if (opKind(op, VO_SPECIAL)) {
1661             IntVector bits = this.viewAsIntegralLanes();
1662             VectorMask<Integer> m;
1663             if (op == IS_DEFAULT) {
1664                 m = bits.compare(EQ, (int) 0);
1665             } else if (op == IS_NEGATIVE) {
1666                 m = bits.compare(LT, (int) 0);
1667             }
1668             else if (op == IS_FINITE ||
1669                      op == IS_NAN ||
1670                      op == IS_INFINITE) {
1671                 // first kill the sign:
1672                 bits = bits.and(Integer.MAX_VALUE);
1673                 // next find the bit pattern for infinity:
1674                 int infbits = (int) toBits(Float.POSITIVE_INFINITY);
1675                 // now compare:
1676                 if (op == IS_FINITE) {
1677                     m = bits.compare(LT, infbits);
1678                 } else if (op == IS_NAN) {
1679                     m = bits.compare(GT, infbits);
1680                 } else {
1681                     m = bits.compare(EQ, infbits);
1682                 }
1683             }
1684             else {
1685                 throw new AssertionError(op);
1686             }
1687             return maskType.cast(m.cast(this.vspecies()));
1688         }
1689         int opc = opCode(op);
1690         throw new AssertionError(op);
1691     }
1692 
1693     /**
1694      * {@inheritDoc} <!--workaround-->
1695      */
1696     @Override
1697     @ForceInline
1698     public final
1699     VectorMask<Float> test(VectorOperators.Test op,
1700                                   VectorMask<Float> m) {
1701         return test(op).and(m);
1702     }
1703 
1704     /**
1705      * {@inheritDoc} <!--workaround-->
1706      */
1707     @Override
1708     public abstract
1709     VectorMask<Float> compare(VectorOperators.Comparison op, Vector<Float> v);
1710 
1711     /*package-private*/
1712     @ForceInline
1713     final
1714     <M extends VectorMask<Float>>
1715     M compareTemplate(Class<M> maskType, Comparison op, Vector<Float> v) {
1716         Objects.requireNonNull(v);
1717         FloatSpecies vsp = vspecies();
1718         FloatVector that = (FloatVector) v;
1719         that.check(this);
1720         int opc = opCode(op);
1721         return VectorIntrinsics.compare(
1722             opc, getClass(), maskType, float.class, length(),
1723             this, that,
1724             (cond, v0, v1) -> {
1725                 AbstractMask<Float> m
1726                     = v0.bTest(cond, v1, (cond_, i, a, b)
1727                                -> compareWithOp(cond, a, b));
1728                 @SuppressWarnings("unchecked")
1729                 M m2 = (M) m;
1730                 return m2;
1731             });
1732     }
1733 
1734     @ForceInline
1735     private static
1736     boolean compareWithOp(int cond, float a, float b) {
1737         switch (cond) {
1738         case VectorIntrinsics.BT_eq:  return a == b;
1739         case VectorIntrinsics.BT_ne:  return a != b;
1740         case VectorIntrinsics.BT_lt:  return a <  b;
1741         case VectorIntrinsics.BT_le:  return a <= b;
1742         case VectorIntrinsics.BT_gt:  return a >  b;
1743         case VectorIntrinsics.BT_ge:  return a >= b;
1744         }
1745         throw new AssertionError();
1746     }
1747 
1748     /**
1749      * {@inheritDoc} <!--workaround-->
1750      */
1751     @Override
1752     @ForceInline
1753     public final
1754     VectorMask<Float> compare(VectorOperators.Comparison op,
1755                                   Vector<Float> v,
1756                                   VectorMask<Float> m) {
1757         return compare(op, v).and(m);
1758     }
1759 
1760     /**
1761      * Tests this vector by comparing it with an input scalar,
1762      * according to the given comparison operation.
1763      *
1764      * This is a lane-wise binary test operation which applies
1765      * the comparison operation to each lane.
1766      * <p>
1767      * The result is the same as
1768      * {@code compare(op, broadcast(species(), e))}.
1769      * That is, the scalar may be regarded as broadcast to
1770      * a vector of the same species, and then compared
1771      * against the original vector, using the selected
1772      * comparison operation.
1773      *
1774      * @param op the operation used to compare lane values
1775      * @param e the input scalar
1776      * @return the mask result of testing lane-wise if this vector
1777      *         compares to the input, according to the selected
1778      *         comparison operator
1779      * @see FloatVector#compare(VectorOperators.Comparison,Vector)
1780      * @see #eq(float)
1781      * @see #lt(float)
1782      */
1783     public abstract
1784     VectorMask<Float> compare(Comparison op, float e);
1785 
1786     /*package-private*/
1787     @ForceInline
1788     final
1789     <M extends VectorMask<Float>>
1790     M compareTemplate(Class<M> maskType, Comparison op, float e) {
1791         return compareTemplate(maskType, op, broadcast(e));
1792     }
1793 
1794     /**
1795      * Tests this vector by comparing it with an input scalar,
1796      * according to the given comparison operation,
1797      * in lanes selected by a mask.
1798      *
1799      * This is a masked lane-wise binary test operation which applies
1800      * to each pair of corresponding lane values.
1801      *
1802      * The returned result is equal to the expression
1803      * {@code compare(op,s).and(m)}.
1804      *
1805      * @param op the operation used to compare lane values
1806      * @param e the input scalar
1807      * @param m the mask controlling lane selection
1808      * @return the mask result of testing lane-wise if this vector
1809      *         compares to the input, according to the selected
1810      *         comparison operator,
1811      *         and only in the lanes selected by the mask
1812      * @see FloatVector#compare(VectorOperators.Comparison,Vector,VectorMask)
1813      */
1814     @ForceInline
1815     public final VectorMask<Float> compare(VectorOperators.Comparison op,
1816                                                float e,
1817                                                VectorMask<Float> m) {
1818         return compare(op, e).and(m);
1819     }
1820 
1821     /**
1822      * {@inheritDoc} <!--workaround-->
1823      */
1824     @Override
1825     public abstract
1826     VectorMask<Float> compare(Comparison op, long e);
1827 
1828     /*package-private*/
1829     @ForceInline
1830     final
1831     <M extends VectorMask<Float>>
1832     M compareTemplate(Class<M> maskType, Comparison op, long e) {
1833         return compareTemplate(maskType, op, broadcast(e));
1834     }
1835 
1836     /**
1837      * {@inheritDoc} <!--workaround-->
1838      */
1839     @Override
1840     @ForceInline
1841     public final
1842     VectorMask<Float> compare(Comparison op, long e, VectorMask<Float> m) {
1843         return compare(op, broadcast(e), m);
1844     }
1845 
1846 
1847 
1848     /**
1849      * {@inheritDoc} <!--workaround-->
1850      */
1851     @Override public abstract
1852     FloatVector blend(Vector<Float> v, VectorMask<Float> m);
1853 
1854     /*package-private*/
1855     @ForceInline
1856     final
1857     <M extends VectorMask<Float>>
1858     FloatVector
1859     blendTemplate(Class<M> maskType, FloatVector v, M m) {
1860         v.check(this);
1861         return VectorIntrinsics.blend(
1862             getClass(), maskType, float.class, length(),
1863             this, v, m,
1864             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
1865     }
1866 
1867     /**
1868      * {@inheritDoc} <!--workaround-->
1869      */
1870     @Override public abstract FloatVector addIndex(int scale);
1871 
1872     /*package-private*/
1873     @ForceInline
1874     final FloatVector addIndexTemplate(int scale) {
1875         FloatSpecies vsp = vspecies();
1876         // make sure VLENGTH*scale doesn't overflow:
1877         vsp.checkScale(scale);
1878         return VectorIntrinsics.indexVector(
1879             getClass(), float.class, length(),
1880             this, scale, vsp,
1881             (v, scale_, s)
1882             -> {
1883                 // If the platform doesn't support an INDEX
1884                 // instruction directly, load IOTA from memory
1885                 // and multiply.
1886                 FloatVector iota = s.iota();
1887                 float sc = (float) scale_;
1888                 return v.add(sc == 1 ? iota : iota.mul(sc));
1889             });
1890     }
1891 
1892     /**
1893      * Replaces selected lanes of this vector with
1894      * a scalar value
1895      * under the control of a mask.
1896      *
1897      * This is a masked lane-wise binary operation which
1898      * selects each lane value from one or the other input.
1899      *
1900      * The returned result is equal to the expression
1901      * {@code blend(broadcast(e),m)}.
1902      *
1903      * @param e the input scalar, containing the replacement lane value
1904      * @param m the mask controlling lane selection of the scalar
1905      * @return the result of blending the lane elements of this vector with
1906      *         the scalar value
1907      */
1908     @ForceInline
1909     public final FloatVector blend(float e,
1910                                             VectorMask<Float> m) {
1911         return blend(broadcast(e), m);
1912     }
1913 
1914     /**
1915      * Replaces selected lanes of this vector with
1916      * a scalar value
1917      * under the control of a mask.
1918      *
1919      * This is a masked lane-wise binary operation which
1920      * selects each lane value from one or the other input.
1921      *
1922      * The returned result is equal to the expression
1923      * {@code blend(broadcast(e),m)}.
1924      *
1925      * @param e the input scalar, containing the replacement lane value
1926      * @param m the mask controlling lane selection of the scalar
1927      * @return the result of blending the lane elements of this vector with
1928      *         the scalar value
1929      */
1930     @ForceInline
1931     public final FloatVector blend(long e,
1932                                             VectorMask<Float> m) {
1933         return blend(broadcast(e), m);
1934     }
1935 
1936     /**
1937      * {@inheritDoc} <!--workaround-->
1938      */
1939     @Override
1940     public abstract
1941     FloatVector slice(int origin, Vector<Float> v1);
1942 
1943     /*package-private*/
1944     final
1945     @ForceInline
1946     FloatVector sliceTemplate(int origin, Vector<Float> v1) {
1947         FloatVector that = (FloatVector) v1;
1948         that.check(this);
1949         float[] a0 = this.getElements();
1950         float[] a1 = that.getElements();
1951         float[] res = new float[a0.length];
1952         int vlen = res.length;
1953         int firstPart = vlen - origin;
1954         System.arraycopy(a0, origin, res, 0, firstPart);
1955         System.arraycopy(a1, 0, res, firstPart, origin);
1956         return vectorFactory(res);
1957     }
1958 
1959     /**
1960      * {@inheritDoc} <!--workaround-->
1961      */
1962     @Override
1963     @ForceInline
1964     public final
1965     FloatVector slice(int origin,
1966                                Vector<Float> w,
1967                                VectorMask<Float> m) {
1968         return broadcast(0).blend(slice(origin, w), m);
1969     }
1970 
1971     /**
1972      * {@inheritDoc} <!--workaround-->
1973      */
1974     @Override
1975     public abstract
1976     FloatVector slice(int origin);
1977 
1978     /**
1979      * {@inheritDoc} <!--workaround-->
1980      */
1981     @Override
1982     public abstract
1983     FloatVector unslice(int origin, Vector<Float> w, int part);
1984 
1985     /*package-private*/
1986     final
1987     @ForceInline
1988     FloatVector
1989     unsliceTemplate(int origin, Vector<Float> w, int part) {
1990         FloatVector that = (FloatVector) w;
1991         that.check(this);
1992         float[] slice = this.getElements();
1993         float[] res = that.getElements();
1994         int vlen = res.length;
1995         int firstPart = vlen - origin;
1996         switch (part) {
1997         case 0:
1998             System.arraycopy(slice, 0, res, origin, firstPart);
1999             break;
2000         case 1:
2001             System.arraycopy(slice, firstPart, res, 0, origin);
2002             break;
2003         default:
2004             throw wrongPartForSlice(part);
2005         }
2006         return vectorFactory(res);
2007     }
2008 
2009     /*package-private*/
2010     final
2011     @ForceInline
2012     <M extends VectorMask<Float>>
2013     FloatVector
2014     unsliceTemplate(Class<M> maskType, int origin, Vector<Float> w, int part, M m) {
2015         FloatVector that = (FloatVector) w;
2016         that.check(this);
2017         FloatVector slice = that.sliceTemplate(origin, that);
2018         slice = slice.blendTemplate(maskType, this, m);
2019         return slice.unsliceTemplate(origin, w, part);
2020     }
2021 
2022     /**
2023      * {@inheritDoc} <!--workaround-->
2024      */
2025     @Override
2026     public abstract
2027     FloatVector unslice(int origin, Vector<Float> w, int part, VectorMask<Float> m);
2028 
2029     /**
2030      * {@inheritDoc} <!--workaround-->
2031      */
2032     @Override
2033     public abstract
2034     FloatVector unslice(int origin); 
2035 
2036     private ArrayIndexOutOfBoundsException
2037     wrongPartForSlice(int part) {
2038         String msg = String.format("bad part number %d for slice operation",
2039                                    part);
2040         return new ArrayIndexOutOfBoundsException(msg);
2041     }
2042 
2043     /**
2044      * {@inheritDoc} <!--workaround-->
2045      */
2046     @Override
2047     public abstract
2048     FloatVector rearrange(VectorShuffle<Float> m);
2049 
2050     /*package-private*/
2051     @ForceInline
2052     final
2053     <S extends VectorShuffle<Float>>
2054     FloatVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
2055         shuffle.checkIndexes();
2056         return VectorIntrinsics.rearrangeOp(
2057             getClass(), shuffletype, float.class, length(),
2058             this, shuffle,
2059             (v1, s_) -> v1.uOp((i, a) -> {
2060                 int ei = s_.laneSource(i);
2061                 return v1.lane(ei);
2062             }));
2063     }
2064 
2065     /**
2066      * {@inheritDoc} <!--workaround-->
2067      */
2068     @Override
2069     public abstract
2070     FloatVector rearrange(VectorShuffle<Float> s,
2071                                    VectorMask<Float> m);
2072 
2073     /*package-private*/
2074     @ForceInline
2075     final
2076     <S extends VectorShuffle<Float>>
2077     FloatVector rearrangeTemplate(Class<S> shuffletype,
2078                                            S shuffle,
2079                                            VectorMask<Float> m) {
2080         FloatVector unmasked =
2081             VectorIntrinsics.rearrangeOp(
2082                 getClass(), shuffletype, float.class, length(),
2083                 this, shuffle,
2084                 (v1, s_) -> v1.uOp((i, a) -> {
2085                     int ei = s_.laneSource(i);
2086                     return ei < 0 ? 0 : v1.lane(ei);
2087                 }));
2088         VectorMask<Float> valid = shuffle.laneIsValid();
2089         if (m.andNot(valid).anyTrue()) {
2090             shuffle.checkIndexes();
2091             throw new AssertionError();
2092         }
2093         return broadcast((float)0).blend(unmasked, valid);
2094     }
2095 
2096     /**
2097      * {@inheritDoc} <!--workaround-->
2098      */
2099     @Override
2100     public abstract
2101     FloatVector rearrange(VectorShuffle<Float> s,
2102                                    Vector<Float> v);
2103 
2104     /*package-private*/
2105     @ForceInline
2106     final
2107     <S extends VectorShuffle<Float>>
2108     FloatVector rearrangeTemplate(Class<S> shuffletype,
2109                                            S shuffle,
2110                                            FloatVector v) {
2111         VectorMask<Float> valid = shuffle.laneIsValid();
2112         S ws = shuffletype.cast(shuffle.wrapIndexes());
2113         FloatVector r0 =
2114             VectorIntrinsics.rearrangeOp(
2115                 getClass(), shuffletype, float.class, length(),
2116                 this, ws,
2117                 (v0, s_) -> v0.uOp((i, a) -> {
2118                     int ei = s_.laneSource(i);
2119                     return v0.lane(ei);
2120                 }));
2121         FloatVector r1 =
2122             VectorIntrinsics.rearrangeOp(
2123                 getClass(), shuffletype, float.class, length(),
2124                 v, ws,
2125                 (v1, s_) -> v1.uOp((i, a) -> {
2126                     int ei = s_.laneSource(i);
2127                     return v1.lane(ei);
2128                 }));
2129         return r1.blend(r0, valid);
2130     }
2131 
2132     /**
2133      * {@inheritDoc} <!--workaround-->
2134      */
2135     @Override
2136     public abstract
2137     FloatVector selectFrom(Vector<Float> v);
2138 
2139     /*package-private*/
2140     @ForceInline
2141     final FloatVector selectFromTemplate(FloatVector v) {
2142         return v.rearrange(this.toShuffle());
2143     }
2144 
2145     /**
2146      * {@inheritDoc} <!--workaround-->
2147      */
2148     @Override
2149     public abstract
2150     FloatVector selectFrom(Vector<Float> s, VectorMask<Float> m);
2151 
2152     /*package-private*/
2153     @ForceInline
2154     final FloatVector selectFromTemplate(FloatVector v,
2155                                                   AbstractMask<Float> m) {
2156         return v.rearrange(this.toShuffle(), m);
2157     }
2158 
2159     /// Ternary operations
2160 
2161 
2162     /**
2163      * Multiplies this vector by a second input vector, and sums
2164      * the result with a third.
2165      *
2166      * Extended precision is used for the intermediate result,
2167      * avoiding possible loss of precision from rounding once
2168      * for each of the two operations.
2169      * The result is numerically close to {@code this.mul(b).add(c)},
2170      * and is typically closer to the true mathematical result.
2171      *
2172      * This is a lane-wise ternary operation which applies the
2173      * {@link Math#fma(float,float,float) Math#fma(a,b,c)}
2174      * operation to each lane.
2175      *
2176      * This method is also equivalent to the expression
2177      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2178      *    lanewise}{@code (}{@link VectorOperators#FMA
2179      *    FMA}{@code , b, c)}.
2180      *
2181      * @param b the second input vector, supplying multiplier values
2182      * @param c the third input vector, supplying addend values
2183      * @return the product of this vector and the second input vector
2184      *         summed with the third input vector, using extended precision
2185      *         for the intermediate result
2186      * @see #fma(float,float)
2187      * @see VectorOperators#FMA
2188      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
2189      */
2190     @ForceInline
2191     public final
2192     FloatVector fma(Vector<Float> b, Vector<Float> c) {
2193         return lanewise(FMA, b, c);
2194     }
2195 
2196     /**
2197      * Multiplies this vector by a scalar multiplier, and sums
2198      * the result with a scalar addend.
2199      *
2200      * Extended precision is used for the intermediate result,
2201      * avoiding possible loss of precision from rounding once
2202      * for each of the two operations.
2203      * The result is numerically close to {@code this.mul(b).add(c)},
2204      * and is typically closer to the true mathematical result.
2205      *
2206      * This is a lane-wise ternary operation which applies the
2207      * {@link Math#fma(float,float,float) Math#fma(a,b,c)}
2208      * operation to each lane.
2209      *
2210      * This method is also equivalent to the expression
2211      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2212      *    lanewise}{@code (}{@link VectorOperators#FMA
2213      *    FMA}{@code , b, c)}.
2214      *
2215      * @param b the scalar multiplier
2216      * @param c the scalar addend
2217      * @return the product of this vector and the scalar multiplier
2218      *         summed with scalar addend, using extended precision
2219      *         for the intermediate result
2220      * @see #fma(Vector,Vector)
2221      * @see VectorOperators#FMA
2222      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
2223      */
2224     @ForceInline
2225     public final
2226     FloatVector fma(float b, float c) {
2227         return lanewise(FMA, b, c);
2228     }
2229 
2230     // Don't bother with (Vector,float) and (float,Vector) overloadings.
2231 
2232     // Type specific horizontal reductions
2233 
2234     /**
2235      * Returns a value accumulated from all the lanes of this vector.
2236      *
2237      * This is an associative cross-lane reduction operation which
2238      * applies the specified operation to all the lane elements.
2239      *
2240      * <p>
2241      * A few reduction operations do not support arbitrary reordering
2242      * of their operands, yet are included here because of their
2243      * usefulness.
2244      *
2245      * <ul>
2246      * <li>
2247      * In the case of {@code FIRST_NONZERO}, the reduction returns
2248      * the value from the lowest-numbered non-zero lane.
2249      *
2250      * (As with {@code MAX} and {@code MIN}, floating point negative
2251      * zero {@code -0.0} is treated as a value distinct from
2252      * the default value, positive zero. So a first-nonzero lane reduction
2253      * might return {@code -0.0} even in the presence of non-zero
2254      * lane values.)
2255      *
2256      * <li>
2257      * In the case of floating point addition and multiplication, the
2258      * precise result will reflect the choice of an arbitrary order
2259      * of operations, which may even vary over time.
2260      *
2261      * <li>
2262      * All other reduction operations are fully commutative and
2263      * associative.  The implementation can choose any order of
2264      * processing, yet it will always produce the same result.
2265      *
2266      * </ul>
2267      *
2268      * @implNote
2269      * The value of a floating-point reduction may be a function
2270      * both of the input values as well as the order of scalar
2271      * operations which combine those values, specifically in the
2272      * case of {@code ADD} and {@code MUL} operations, where
2273      * details of rounding depend on operand order.
2274      * In those cases, the order of operations of this method is
2275      * intentionally not defined.  This allows the JVM to generate
2276      * optimal machine code for the underlying platform at runtime. If
2277      * the platform supports a vector instruction to add or multiply
2278      * all values in the vector, or if there is some other efficient
2279      * machine code sequence, then the JVM has the option of
2280      * generating this machine code. Otherwise, the default
2281      * implementation is applied, which adds vector elements
2282      * sequentially from beginning to end.  For this reason, the
2283      * output of this method may vary for the same input values,
2284      * if the selected operator is {@code ADD} or {@code MUL}.
2285      *
2286      *
2287      * @param op the operation used to combine lane values
2288      * @return the accumulated result
2289      * @throws UnsupportedOperationException if this vector does
2290      *         not support the requested operation
2291      * @see #reduceLanes(VectorOperators.Associative,VectorMask)
2292      * @see #add(Vector)
2293      * @see #mul(Vector)
2294      * @see #min(Vector)
2295      * @see #max(Vector)
2296      * @see VectorOperators#FIRST_NONZERO
2297      */
2298     public abstract float reduceLanes(VectorOperators.Associative op);
2299 
2300     /**
2301      * Returns a value accumulated from selected lanes of this vector,
2302      * controlled by a mask.
2303      *
2304      * This is an associative cross-lane reduction operation which
2305      * applies the specified operation to the selected lane elements.
2306      * <p>
2307      * If no elements are selected, an operation-specific identity
2308      * value is returned.
2309      * <ul>
2310      * <li>
2311      * If the operation is
2312      *  {@code ADD}
2313      * or {@code FIRST_NONZERO},
2314      * then the identity value is positive zero, the default {@code float} value.
2315      * <li>
2316      * If the operation is {@code MUL},
2317      * then the identity value is one.
2318      * <li>
2319      * If the operation is {@code MAX},
2320      * then the identity value is {@code Float.NEGATIVE_INFINITY}.
2321      * <li>
2322      * If the operation is {@code MIN},
2323      * then the identity value is {@code Float.POSITIVE_INFINITY}.
2324      * </ul>
2325      *
2326      * @implNote
2327      * The value of a floating-point reduction may be a function
2328      * both of the input values as well as the order of scalar
2329      * operations which combine those values, specifically in the
2330      * case of {@code ADD} and {@code MUL} operations, where
2331      * details of rounding depend on operand order.
2332      * See {@linkplain #reduceLanes(VectorOperators.Associative)
2333      * the unmasked version of this method}
2334      * for a discussion.
2335      *
2336      *
2337      * @param op the operation used to combine lane values
2338      * @param m the mask controlling lane selection
2339      * @return the reduced result accumulated from the selected lane values
2340      * @throws UnsupportedOperationException if this vector does
2341      *         not support the requested operation
2342      * @see #reduceLanes(VectorOperators.Associative)
2343      */
2344     public abstract float reduceLanes(VectorOperators.Associative op,
2345                                        VectorMask<Float> m);
2346 
2347     /*package-private*/
2348     @ForceInline
2349     final
2350     float reduceLanesTemplate(VectorOperators.Associative op,
2351                                VectorMask<Float> m) {
2352         FloatVector v = reduceIdentityVector(op).blend(this, m);
2353         return v.reduceLanesTemplate(op);
2354     }
2355 
2356     /*package-private*/
2357     @ForceInline
2358     final
2359     float reduceLanesTemplate(VectorOperators.Associative op) {
2360         if (op == FIRST_NONZERO) {
2361             // FIXME:  The JIT should handle this, and other scan ops alos.
2362             VectorMask<Integer> thisNZ
2363                 = this.viewAsIntegralLanes().compare(NE, (int) 0);
2364             return this.lane(thisNZ.firstTrue());
2365         }
2366         int opc = opCode(op);
2367         return fromBits(VectorIntrinsics.reductionCoerced(
2368             opc, getClass(), float.class, length(),
2369             this,
2370             REDUCE_IMPL.find(op, opc, (opc_) -> {
2371               switch (opc_) {
2372               case VECTOR_OP_ADD: return v ->
2373                       toBits(v.rOp((float)0, (i, a, b) -> (float)(a + b)));
2374               case VECTOR_OP_MUL: return v ->
2375                       toBits(v.rOp((float)1, (i, a, b) -> (float)(a * b)));
2376               case VECTOR_OP_MIN: return v ->
2377                       toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (float) Math.min(a, b)));
2378               case VECTOR_OP_MAX: return v ->
2379                       toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (float) Math.max(a, b)));
2380               case VECTOR_OP_FIRST_NONZERO: return v ->
2381                       toBits(v.rOp((float)0, (i, a, b) -> toBits(a) != 0 ? a : b));
2382               case VECTOR_OP_OR: return v ->
2383                       toBits(v.rOp((float)0, (i, a, b) -> fromBits(toBits(a) | toBits(b))));
2384               default: return null;
2385               }})));
2386     }
2387     private static final
2388     ImplCache<Associative,Function<FloatVector,Long>> REDUCE_IMPL
2389         = new ImplCache<>(Associative.class, FloatVector.class);
2390 
2391     private
2392     @ForceInline
2393     FloatVector reduceIdentityVector(VectorOperators.Associative op) {
2394         int opc = opCode(op);
2395         UnaryOperator<FloatVector> fn
2396             = REDUCE_ID_IMPL.find(op, opc, (opc_) -> {
2397                 switch (opc_) {
2398                 case VECTOR_OP_ADD:
2399                 case VECTOR_OP_OR:
2400                 case VECTOR_OP_XOR:
2401                 case VECTOR_OP_FIRST_NONZERO:
2402                     return v -> v.broadcast(0);
2403                 case VECTOR_OP_MUL:
2404                     return v -> v.broadcast(1);
2405                 case VECTOR_OP_AND:
2406                     return v -> v.broadcast(-1);
2407                 case VECTOR_OP_MIN:
2408                     return v -> v.broadcast(MAX_OR_INF);
2409                 case VECTOR_OP_MAX:
2410                     return v -> v.broadcast(MIN_OR_INF);
2411                 default: return null;
2412                 }
2413             });
2414         return fn.apply(this);
2415     }
2416     private static final
2417     ImplCache<Associative,UnaryOperator<FloatVector>> REDUCE_ID_IMPL
2418         = new ImplCache<>(Associative.class, FloatVector.class);
2419 
2420     private static final float MIN_OR_INF = Float.NEGATIVE_INFINITY;
2421     private static final float MAX_OR_INF = Float.POSITIVE_INFINITY;
2422 
2423     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
2424     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
2425                                                      VectorMask<Float> m);
2426 
2427     // Type specific accessors
2428 
2429     /**
2430      * Gets the lane element at lane index {@code i}
2431      *
2432      * @param i the lane index
2433      * @return the lane element at lane index {@code i}
2434      * @throws IllegalArgumentException if the index is is out of range
2435      * ({@code < 0 || >= length()})
2436      */
2437     public abstract float lane(int i);
2438 
2439     /**
2440      * Replaces the lane element of this vector at lane index {@code i} with
2441      * value {@code e}.
2442      *
2443      * This is a cross-lane operation and behaves as if it returns the result
2444      * of blending this vector with an input vector that is the result of
2445      * broadcasting {@code e} and a mask that has only one lane set at lane
2446      * index {@code i}.
2447      *
2448      * @param i the lane index of the lane element to be replaced
2449      * @param e the value to be placed
2450      * @return the result of replacing the lane element of this vector at lane
2451      * index {@code i} with value {@code e}.
2452      * @throws IllegalArgumentException if the index is is out of range
2453      * ({@code < 0 || >= length()})
2454      */
2455     public abstract FloatVector withLane(int i, float e);
2456 
2457     // Memory load operations
2458 
2459     /**
2460      * Returns an array of type {@code float[]}
2461      * containing all the lane values.
2462      * The array length is the same as the vector length.
2463      * The array elements are stored in lane order.
2464      * <p>
2465      * This method behaves as if it stores
2466      * this vector into an allocated array
2467      * (using {@link #intoArray(float[], int) intoArray})
2468      * and returns the array as follows:
2469      * <pre>{@code
2470      *   float[] a = new float[this.length()];
2471      *   this.intoArray(a, 0);
2472      *   return a;
2473      * }</pre>
2474      *
2475      * @return an array containing the lane values of this vector
2476      */
2477     @ForceInline
2478     @Override
2479     public final float[] toArray() {
2480         float[] a = new float[vspecies().laneCount()];
2481         intoArray(a, 0);
2482         return a;
2483     }
2484 
2485     /** {@inheritDoc} <!--workaround-->
2486      */
2487     @ForceInline
2488     @Override
2489     public final int[] toIntArray() {
2490         float[] a = toArray();
2491         int[] res = new int[a.length];
2492         for (int i = 0; i < a.length; i++) {
2493             float e = a[i];
2494             res[i] = (int) FloatSpecies.toIntegralChecked(e, true);
2495         }
2496         return res;
2497     }
2498 
2499     /** {@inheritDoc} <!--workaround-->
2500      */
2501     @ForceInline
2502     @Override
2503     public final long[] toLongArray() {
2504         float[] a = toArray();
2505         long[] res = new long[a.length];
2506         for (int i = 0; i < a.length; i++) {
2507             float e = a[i];
2508             res[i] = FloatSpecies.toIntegralChecked(e, false);
2509         }
2510         return res;
2511     }
2512 
2513     /** {@inheritDoc} <!--workaround-->
2514      * @implNote
2515      * When this method is used on used on vectors
2516      * of type {@code FloatVector},
2517      * there will be no loss of precision.
2518      */
2519     @ForceInline
2520     @Override
2521     public final double[] toDoubleArray() {
2522         float[] a = toArray();
2523         double[] res = new double[a.length];
2524         for (int i = 0; i < a.length; i++) {
2525             res[i] = (double) a[i];
2526         }
2527         return res;
2528     }
2529 
2530     /**
2531      * Loads a vector from a byte array starting at an offset.
2532      * Bytes are composed into primitive lane elements according
2533      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
2534      * The vector is arranged into lanes according to
2535      * <a href="Vector.html#lane-order">memory ordering</a>.
2536      * <p>
2537      * This method behaves as if it returns the result of calling
2538      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2539      * fromByteBuffer()} as follows:
2540      * <pre>{@code
2541      * var bb = ByteBuffer.wrap(a);
2542      * var bo = ByteOrder.LITTLE_ENDIAN;
2543      * var m = species.maskAll(true);
2544      * return fromByteBuffer(species, bb, offset, m, bo);
2545      * }</pre>
2546      *
2547      * @param species species of desired vector
2548      * @param a the byte array
2549      * @param offset the offset into the array
2550      * @return a vector loaded from a byte array
2551      * @throws IndexOutOfBoundsException
2552      *         if {@code offset+N*ESIZE < 0}
2553      *         or {@code offset+(N+1)*ESIZE > a.length}
2554      *         for any lane {@code N} in the vector
2555      */
2556     @ForceInline
2557     public static
2558     FloatVector fromByteArray(VectorSpecies<Float> species,
2559                                        byte[] a, int offset) {
2560         return fromByteArray(species, a, offset, ByteOrder.LITTLE_ENDIAN);
2561     }
2562 
2563     /**
2564      * Loads a vector from a byte array starting at an offset.
2565      * Bytes are composed into primitive lane elements according
2566      * to the specified byte order.
2567      * The vector is arranged into lanes according to
2568      * <a href="Vector.html#lane-order">memory ordering</a>.
2569      * <p>
2570      * This method behaves as if it returns the result of calling
2571      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2572      * fromByteBuffer()} as follows:
2573      * <pre>{@code
2574      * var bb = ByteBuffer.wrap(a);
2575      * var m = species.maskAll(true);
2576      * return fromByteBuffer(species, bb, offset, m, bo);
2577      * }</pre>
2578      *
2579      * @param species species of desired vector
2580      * @param a the byte array
2581      * @param offset the offset into the array
2582      * @param bo the intended byte order
2583      * @return a vector loaded from a byte array
2584      * @throws IndexOutOfBoundsException
2585      *         if {@code offset+N*ESIZE < 0}
2586      *         or {@code offset+(N+1)*ESIZE > a.length}
2587      *         for any lane {@code N} in the vector
2588      */
2589     @ForceInline
2590     public static
2591     FloatVector fromByteArray(VectorSpecies<Float> species,
2592                                        byte[] a, int offset,
2593                                        ByteOrder bo) {
2594         FloatSpecies vsp = (FloatSpecies) species;
2595         offset = checkFromIndexSize(offset,
2596                                     vsp.vectorBitSize() / Byte.SIZE,
2597                                     a.length);
2598         return vsp.dummyVector()
2599             .fromByteArray0(a, offset).maybeSwap(bo);
2600     }
2601 
2602     /**
2603      * Loads a vector from a byte array starting at an offset
2604      * and using a mask.
2605      * Lanes where the mask is unset are filled with the default
2606      * value of {@code float} (positive zero).
2607      * Bytes are composed into primitive lane elements according
2608      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
2609      * The vector is arranged into lanes according to
2610      * <a href="Vector.html#lane-order">memory ordering</a>.
2611      * <p>
2612      * This method behaves as if it returns the result of calling
2613      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2614      * fromByteBuffer()} as follows:
2615      * <pre>{@code
2616      * var bb = ByteBuffer.wrap(a);
2617      * var bo = ByteOrder.LITTLE_ENDIAN;
2618      * return fromByteBuffer(species, bb, offset, bo, m);
2619      * }</pre>
2620      *
2621      * @param species species of desired vector
2622      * @param a the byte array
2623      * @param offset the offset into the array
2624      * @param m the mask controlling lane selection
2625      * @return a vector loaded from a byte array
2626      * @throws IndexOutOfBoundsException
2627      *         if {@code offset+N*ESIZE < 0}
2628      *         or {@code offset+(N+1)*ESIZE > a.length}
2629      *         for any lane {@code N} in the vector where
2630      *         the mask is set
2631      */
2632     @ForceInline
2633     public static
2634     FloatVector fromByteArray(VectorSpecies<Float> species,
2635                                        byte[] a, int offset,
2636                                        VectorMask<Float> m) {
2637         return fromByteArray(species, a, offset, ByteOrder.LITTLE_ENDIAN, m);
2638     }
2639 
2640     /**
2641      * Loads a vector from a byte array starting at an offset
2642      * and using a mask.
2643      * Lanes where the mask is unset are filled with the default
2644      * value of {@code float} (positive zero).
2645      * Bytes are composed into primitive lane elements according
2646      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
2647      * The vector is arranged into lanes according to
2648      * <a href="Vector.html#lane-order">memory ordering</a>.
2649      * <p>
2650      * This method behaves as if it returns the result of calling
2651      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2652      * fromByteBuffer()} as follows:
2653      * <pre>{@code
2654      * var bb = ByteBuffer.wrap(a);
2655      * return fromByteBuffer(species, bb, offset, m, bo);
2656      * }</pre>
2657      *
2658      * @param species species of desired vector
2659      * @param a the byte array
2660      * @param offset the offset into the array
2661      * @param bo the intended byte order
2662      * @param m the mask controlling lane selection
2663      * @return a vector loaded from a byte array
2664      * @throws IndexOutOfBoundsException
2665      *         if {@code offset+N*ESIZE < 0}
2666      *         or {@code offset+(N+1)*ESIZE > a.length}
2667      *         for any lane {@code N} in the vector
2668      *         where the mask is set
2669      */
2670     @ForceInline
2671     public static
2672     FloatVector fromByteArray(VectorSpecies<Float> species,
2673                                        byte[] a, int offset,
2674                                        ByteOrder bo,
2675                                        VectorMask<Float> m) {
2676         FloatSpecies vsp = (FloatSpecies) species;
2677         FloatVector zero = vsp.zero();
2678 
2679         if (offset >= 0 && offset <= (a.length - vsp.length() * 4)) {
2680             FloatVector v = zero.fromByteArray0(a, offset);
2681             return zero.blend(v.maybeSwap(bo), m);
2682         }
2683         FloatVector iota = zero.addIndex(1);
2684         ((AbstractMask<Float>)m)
2685             .checkIndexByLane(offset, a.length, iota, 4);
2686         FloatBuffer tb = wrapper(a, offset, bo);
2687         return vsp.ldOp(tb, 0, (AbstractMask<Float>)m,
2688                    (tb_, __, i)  -> tb_.get(i));
2689     }
2690 
2691     /**
2692      * Loads a vector from an array of type {@code float[]}
2693      * starting at an offset.
2694      * For each vector lane, where {@code N} is the vector lane index, the
2695      * array element at index {@code offset + N} is placed into the
2696      * resulting vector at lane index {@code N}.
2697      *
2698      * @param species species of desired vector
2699      * @param a the array
2700      * @param offset the offset into the array
2701      * @return the vector loaded from an array
2702      * @throws IndexOutOfBoundsException
2703      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2704      *         for any lane {@code N} in the vector
2705      */
2706     @ForceInline
2707     public static
2708     FloatVector fromArray(VectorSpecies<Float> species,
2709                                    float[] a, int offset) {
2710         FloatSpecies vsp = (FloatSpecies) species;
2711         offset = checkFromIndexSize(offset,
2712                                     vsp.laneCount(),
2713                                     a.length);
2714         return vsp.dummyVector().fromArray0(a, offset);
2715     }
2716 
2717     /**
2718      * Loads a vector from an array of type {@code float[]}
2719      * starting at an offset and using a mask.
2720      * Lanes where the mask is unset are filled with the default
2721      * value of {@code float} (positive zero).
2722      * For each vector lane, where {@code N} is the vector lane index,
2723      * if the mask lane at index {@code N} is set then the array element at
2724      * index {@code offset + N} is placed into the resulting vector at lane index
2725      * {@code N}, otherwise the default element value is placed into the
2726      * resulting vector at lane index {@code N}.
2727      *
2728      * @param species species of desired vector
2729      * @param a the array
2730      * @param offset the offset into the array
2731      * @param m the mask controlling lane selection
2732      * @return the vector loaded from an array
2733      * @throws IndexOutOfBoundsException
2734      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2735      *         for any lane {@code N} in the vector
2736      *         where the mask is set
2737      */
2738     @ForceInline
2739     public static
2740     FloatVector fromArray(VectorSpecies<Float> species,
2741                                    float[] a, int offset,
2742                                    VectorMask<Float> m) {
2743         FloatSpecies vsp = (FloatSpecies) species;
2744         if (offset >= 0 && offset <= (a.length - species.length())) {
2745             FloatVector zero = vsp.zero();
2746             return zero.blend(zero.fromArray0(a, offset), m);
2747         }
2748         FloatVector iota = vsp.iota();
2749         ((AbstractMask<Float>)m)
2750             .checkIndexByLane(offset, a.length, iota, 1);
2751         return vsp.vOp(m, i -> a[offset + i]);
2752     }
2753 
2754     /**
2755      * Gathers a new vector composed of elements from an array of type
2756      * {@code float[]},
2757      * using indexes obtained by adding a fixed {@code offset} to a
2758      * series of secondary offsets from an <em>index map</em>.
2759      * The index map is a contiguous sequence of {@code VLENGTH}
2760      * elements in a second array of {@code int}s, starting at a given
2761      * {@code mapOffset}.
2762      * <p>
2763      * For each vector lane, where {@code N} is the vector lane index,
2764      * the lane is loaded from the array
2765      * element {@code a[f(N)]}, where {@code f(N)} is the
2766      * index mapping expression
2767      * {@code offset + indexMap[mapOffset + N]]}.
2768      *
2769      * @param species species of desired vector
2770      * @param a the array
2771      * @param offset the offset into the array, may be negative if relative
2772      * indexes in the index map compensate to produce a value within the
2773      * array bounds
2774      * @param indexMap the index map
2775      * @param mapOffset the offset into the index map
2776      * @return the vector loaded from the indexed elements of the array
2777      * @throws IndexOutOfBoundsException
2778      *         if {@code mapOffset+N < 0}
2779      *         or if {@code mapOffset+N >= indexMap.length},
2780      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2781      *         is an invalid index into {@code a},
2782      *         for any lane {@code N} in the vector
2783      * @see FloatVector#toIntArray()
2784      */
2785     @ForceInline
2786     public static
2787     FloatVector fromArray(VectorSpecies<Float> species,
2788                                    float[] a, int offset,
2789                                    int[] indexMap, int mapOffset) {
2790         FloatSpecies vsp = (FloatSpecies) species;
2791         Objects.requireNonNull(a);
2792         Objects.requireNonNull(indexMap);
2793         Class<? extends FloatVector> vectorType = vsp.vectorType();
2794 
2795 
2796         // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k]
2797         IntVector vix = IntVector.fromArray(IntVector.species(vsp.indexShape()), indexMap, mapOffset).add(offset);
2798 
2799         vix = VectorIntrinsics.checkIndex(vix, a.length);
2800 
2801         return VectorIntrinsics.loadWithMap(
2802             vectorType, float.class, vsp.laneCount(),
2803             IntVector.species(vsp.indexShape()).vectorType(),
2804             a, ARRAY_BASE, vix,
2805             a, offset, indexMap, mapOffset, vsp,
2806             (float[] c, int idx, int[] iMap, int idy, FloatSpecies s) ->
2807             s.vOp(n -> c[idx + iMap[idy+n]]));
2808         }
2809 
2810     /**
2811      * Gathers a new vector composed of elements from an array of type
2812      * {@code float[]},
2813      * under the control of a mask, and
2814      * using indexes obtained by adding a fixed {@code offset} to a
2815      * series of secondary offsets from an <em>index map</em>.
2816      * The index map is a contiguous sequence of {@code VLENGTH}
2817      * elements in a second array of {@code int}s, starting at a given
2818      * {@code mapOffset}.
2819      * <p>
2820      * For each vector lane, where {@code N} is the vector lane index,
2821      * if the lane is set in the mask,
2822      * the lane is loaded from the array
2823      * element {@code a[f(N)]}, where {@code f(N)} is the
2824      * index mapping expression
2825      * {@code offset + indexMap[mapOffset + N]]}.
2826      * Unset lanes in the resulting vector are set to zero.
2827      *
2828      * @param species species of desired vector
2829      * @param a the array
2830      * @param offset the offset into the array, may be negative if relative
2831      * indexes in the index map compensate to produce a value within the
2832      * array bounds
2833      * @param indexMap the index map
2834      * @param mapOffset the offset into the index map
2835      * @param m the mask controlling lane selection
2836      * @return the vector loaded from the indexed elements of the array
2837      * @throws IndexOutOfBoundsException
2838      *         if {@code mapOffset+N < 0}
2839      *         or if {@code mapOffset+N >= indexMap.length},
2840      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2841      *         is an invalid index into {@code a},
2842      *         for any lane {@code N} in the vector
2843      *         where the mask is set
2844      * @see FloatVector#toIntArray()
2845      */
2846     @ForceInline
2847     public static
2848     FloatVector fromArray(VectorSpecies<Float> species,
2849                                    float[] a, int offset,
2850                                    int[] indexMap, int mapOffset,
2851                                    VectorMask<Float> m) {
2852         FloatSpecies vsp = (FloatSpecies) species;
2853 
2854         // FIXME This can result in out of bounds errors for unset mask lanes
2855         // FIX = Use a scatter instruction which routes the unwanted lanes
2856         // into a bit-bucket variable (private to implementation).
2857         // This requires a 2-D scatter in order to set a second base address.
2858         // See notes in https://bugs.openjdk.java.net/browse/JDK-8223367
2859         assert(m.allTrue());
2860         return (FloatVector)
2861             zero(species).blend(fromArray(species, a, offset, indexMap, mapOffset), m);
2862 
2863     }
2864 
2865     /**
2866      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
2867      * starting at an offset into the byte buffer.
2868      * <p>
2869      * Bytes are composed into primitive lane elements according to
2870      * {@link ByteOrder#LITTLE_ENDIAN little endian} byte order.
2871      * To avoid errors, the
2872      * {@linkplain ByteBuffer#order() intrinsic byte order}
2873      * of the buffer must be little-endian.
2874      * <p>
2875      * This method behaves as if it returns the result of calling
2876      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2877      * fromByteBuffer()} as follows:
2878      * <pre>{@code
2879      * var bb = ByteBuffer.wrap(a);
2880      * var bo = ByteOrder.LITTLE_ENDIAN;
2881      * var m = species.maskAll(true);
2882      * return fromByteBuffer(species, bb, offset, m, bo);
2883      * }</pre>
2884      *
2885      * @param species species of desired vector
2886      * @param bb the byte buffer
2887      * @param offset the offset into the byte buffer
2888      * @param bo the intended byte order
2889      * @return a vector loaded from a byte buffer
2890      * @throws IllegalArgumentException if byte order of bb
2891      *         is not {@link ByteOrder#LITTLE_ENDIAN}
2892      * @throws IndexOutOfBoundsException
2893      *         if {@code offset+N*4 < 0}
2894      *         or {@code offset+N*4 >= bb.limit()}
2895      *         for any lane {@code N} in the vector
2896      */
2897     @ForceInline
2898     public static
2899     FloatVector fromByteBuffer(VectorSpecies<Float> species,
2900                                         ByteBuffer bb, int offset,
2901                                         ByteOrder bo) {
2902         FloatSpecies vsp = (FloatSpecies) species;
2903         offset = checkFromIndexSize(offset,
2904                                     vsp.laneCount(),
2905                                     bb.limit());
2906         return vsp.dummyVector()
2907             .fromByteBuffer0(bb, offset).maybeSwap(bo);
2908     }
2909 
2910     /**
2911      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
2912      * starting at an offset into the byte buffer
2913      * and using a mask.
2914      * <p>
2915      * Bytes are composed into primitive lane elements according to
2916      * {@link ByteOrder#LITTLE_ENDIAN little endian} byte order.
2917      * To avoid errors, the
2918      * {@linkplain ByteBuffer#order() intrinsic byte order}
2919      * of the buffer must be little-endian.
2920      * <p>
2921      * This method behaves as if it returns the result of calling
2922      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2923      * fromByteBuffer()} as follows:
2924      * <pre>{@code
2925      * var bb = ByteBuffer.wrap(a);
2926      * var bo = ByteOrder.LITTLE_ENDIAN;
2927      * var m = species.maskAll(true);
2928      * return fromByteBuffer(species, bb, offset, m, bo);
2929      * }</pre>
2930      *
2931      * @param species species of desired vector
2932      * @param bb the byte buffer
2933      * @param offset the offset into the byte buffer
2934      * @param bo the intended byte order
2935      * @param m the mask controlling lane selection
2936      * @return a vector loaded from a byte buffer
2937      * @throws IllegalArgumentException if byte order of bb
2938      *         is not {@link ByteOrder#LITTLE_ENDIAN}
2939      * @throws IndexOutOfBoundsException
2940      *         if {@code offset+N*4 < 0}
2941      *         or {@code offset+N*4 >= bb.limit()}
2942      *         for any lane {@code N} in the vector
2943      *         where the mask is set
2944      */
2945     @ForceInline
2946     public static
2947     FloatVector fromByteBuffer(VectorSpecies<Float> species,
2948                                         ByteBuffer bb, int offset,
2949                                         ByteOrder bo,
2950                                         VectorMask<Float> m) {
2951         if (m.allTrue()) {
2952             return fromByteBuffer(species, bb, offset, bo);
2953         }
2954         FloatSpecies vsp = (FloatSpecies) species;
2955         checkMaskFromIndexSize(offset,
2956                                vsp, m, 1,
2957                                bb.limit());
2958         FloatVector zero = zero(vsp);
2959         FloatVector v = zero.fromByteBuffer0(bb, offset);
2960         return zero.blend(v.maybeSwap(bo), m);
2961     }
2962 
2963     // Memory store operations
2964 
2965     /**
2966      * Stores this vector into an array of type {@code float[]}
2967      * starting at an offset.
2968      * <p>
2969      * For each vector lane, where {@code N} is the vector lane index,
2970      * the lane element at index {@code N} is stored into the array
2971      * element {@code a[offset+N]}.
2972      *
2973      * @param a the array, of type {@code float[]}
2974      * @param offset the offset into the array
2975      * @throws IndexOutOfBoundsException
2976      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2977      *         for any lane {@code N} in the vector
2978      */
2979     @ForceInline
2980     public final
2981     void intoArray(float[] a, int offset) {
2982         FloatSpecies vsp = vspecies();
2983         offset = checkFromIndexSize(offset,
2984                                     vsp.laneCount(),
2985                                     a.length);
2986         VectorIntrinsics.store(
2987             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
2988             a, arrayAddress(a, offset),
2989             this,
2990             a, offset,
2991             (arr, off, v)
2992             -> v.stOp(arr, off,
2993                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
2994     }
2995 
2996     /**
2997      * Stores this vector into an array of {@code float}
2998      * starting at offset and using a mask.
2999      * <p>
3000      * For each vector lane, where {@code N} is the vector lane index,
3001      * the lane element at index {@code N} is stored into the array
3002      * element {@code a[offset+N]}.
3003      * If the mask lane at {@code N} is unset then the corresponding
3004      * array element {@code a[offset+N]} is left unchanged.
3005      * <p>
3006      * Array range checking is done for lanes where the mask is set.
3007      * Lanes where the mask is unset are not stored and do not need
3008      * to correspond to legitimate elements of {@code a}.
3009      * That is, unset lanes may correspond to array indexes less than
3010      * zero or beyond the end of the array.
3011      *
3012      * @param a the array, of type {@code float[]}
3013      * @param offset the offset into the array
3014      * @param m the mask controlling lane storage
3015      * @throws IndexOutOfBoundsException
3016      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3017      *         for any lane {@code N} in the vector
3018      *         where the mask is set
3019      */
3020     @ForceInline
3021     public final
3022     void intoArray(float[] a, int offset,
3023                    VectorMask<Float> m) {
3024         if (m.allTrue()) {
3025             intoArray(a, offset);
3026         } else {
3027             // FIXME: Cannot vectorize yet, if there's a mask.
3028             stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v);
3029         }
3030     }
3031 
3032     /**
3033      * Scatters this vector into an array of type {@code float[]}
3034      * using indexes obtained by adding a fixed {@code offset} to a
3035      * series of secondary offsets from an <em>index map</em>.
3036      * The index map is a contiguous sequence of {@code VLENGTH}
3037      * elements in a second array of {@code int}s, starting at a given
3038      * {@code mapOffset}.
3039      * <p>
3040      * For each vector lane, where {@code N} is the vector lane index,
3041      * the lane element at index {@code N} is stored into the array
3042      * element {@code a[f(N)]}, where {@code f(N)} is the
3043      * index mapping expression
3044      * {@code offset + indexMap[mapOffset + N]]}.
3045      *
3046      * @param a the array
3047      * @param offset an offset to combine with the index map offsets
3048      * @param indexMap the index map
3049      * @param mapOffset the offset into the index map
3050      * @returns a vector of the values {@code a[f(N)]}, where
3051      *          {@code f(N) = offset + indexMap[mapOffset + N]]}.
3052      * @throws IndexOutOfBoundsException
3053      *         if {@code mapOffset+N < 0}
3054      *         or if {@code mapOffset+N >= indexMap.length},
3055      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3056      *         is an invalid index into {@code a},
3057      *         for any lane {@code N} in the vector
3058      * @see FloatVector#toIntArray()
3059      */
3060     @ForceInline
3061     public final
3062     void intoArray(float[] a, int offset,
3063                    int[] indexMap, int mapOffset) {
3064         FloatSpecies vsp = vspecies();
3065         if (length() == 1) {
3066             intoArray(a, offset + indexMap[mapOffset]);
3067             return;
3068         }
3069         IntVector.IntSpecies isp = (IntVector.IntSpecies) vsp.indexSpecies();
3070         if (isp.laneCount() != vsp.laneCount()) {
3071             stOp(a, offset,
3072                  (arr, off, i, e) -> {
3073                      int j = indexMap[mapOffset + i];
3074                      arr[off + j] = e;
3075                  });
3076             return;
3077         }
3078 
3079         // Index vector: vix[0:n] = i -> offset + indexMap[mo + i]
3080         IntVector vix = IntVector
3081             .fromArray(isp, indexMap, mapOffset)
3082             .add(offset);
3083 
3084         vix = VectorIntrinsics.checkIndex(vix, a.length);
3085 
3086         VectorIntrinsics.storeWithMap(
3087             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3088             isp.vectorType(),
3089             a, arrayAddress(a, 0), vix,
3090             this,
3091             a, offset, indexMap, mapOffset,
3092             (arr, off, v, map, mo)
3093             -> v.stOp(arr, off,
3094                       (arr_, off_, i, e) -> {
3095                           int j = map[mo + i];
3096                           arr[off + j] = e;
3097                       }));
3098     }
3099 
3100     /**
3101      * Scatters this vector into an array of type {@code float[]},
3102      * under the control of a mask, and
3103      * using indexes obtained by adding a fixed {@code offset} to a
3104      * series of secondary offsets from an <em>index map</em>.
3105      * The index map is a contiguous sequence of {@code VLENGTH}
3106      * elements in a second array of {@code int}s, starting at a given
3107      * {@code mapOffset}.
3108      * <p>
3109      * For each vector lane, where {@code N} is the vector lane index,
3110      * if the mask lane at index {@code N} is set then
3111      * the lane element at index {@code N} is stored into the array
3112      * element {@code a[f(N)]}, where {@code f(N)} is the
3113      * index mapping expression
3114      * {@code offset + indexMap[mapOffset + N]]}.
3115      *
3116      * @param a the array
3117      * @param offset an offset to combine with the index map offsets
3118      * @param indexMap the index map
3119      * @param mapOffset the offset into the index map
3120      * @param m the mask
3121      * @returns a vector of the values {@code m ? a[f(N)] : 0},
3122      *          {@code f(N) = offset + indexMap[mapOffset + N]]}.
3123      * @throws IndexOutOfBoundsException
3124      *         if {@code mapOffset+N < 0}
3125      *         or if {@code mapOffset+N >= indexMap.length},
3126      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3127      *         is an invalid index into {@code a},
3128      *         for any lane {@code N} in the vector
3129      *         where the mask is set
3130      * @see FloatVector#toIntArray()
3131      */
3132     @ForceInline
3133     public final
3134     void intoArray(float[] a, int offset,
3135                    int[] indexMap, int mapOffset,
3136                    VectorMask<Float> m) {
3137         FloatSpecies vsp = vspecies();
3138         if (m.allTrue()) {
3139             intoArray(a, offset, indexMap, mapOffset);
3140             return;
3141         }
3142         throw new AssertionError("fixme");
3143     }
3144 
3145     /**
3146      * {@inheritDoc} <!--workaround-->
3147      */
3148     @Override
3149     @ForceInline
3150     public final
3151     void intoByteArray(byte[] a, int offset) {
3152         offset = checkFromIndexSize(offset,
3153                                     bitSize() / Byte.SIZE,
3154                                     a.length);
3155         this.maybeSwap(ByteOrder.LITTLE_ENDIAN)
3156             .intoByteArray0(a, offset);
3157     }
3158 
3159     /**
3160      * {@inheritDoc} <!--workaround-->
3161      */
3162     @Override
3163     @ForceInline
3164     public final
3165     void intoByteArray(byte[] a, int offset,
3166                        VectorMask<Float> m) {
3167         if (m.allTrue()) {
3168             intoByteArray(a, offset);
3169             return;
3170         }
3171         FloatSpecies vsp = vspecies();
3172         if (offset >= 0 && offset <= (a.length - vsp.length() * 4)) {
3173             var oldVal = fromByteArray0(a, offset);
3174             var newVal = oldVal.blend(this, m);
3175             newVal.intoByteArray0(a, offset);
3176         } else {
3177             checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
3178             FloatBuffer tb = wrapper(a, offset, NATIVE_ENDIAN);
3179             this.stOp(tb, 0, m, (tb_, __, i, e) -> tb_.put(i, e));
3180         }
3181     }
3182 
3183     /**
3184      * {@inheritDoc} <!--workaround-->
3185      */
3186     @Override
3187     @ForceInline
3188     public final
3189     void intoByteArray(byte[] a, int offset,
3190                        ByteOrder bo,
3191                        VectorMask<Float> m) {
3192         maybeSwap(bo).intoByteArray(a, offset, m);
3193     }
3194 
3195     /**
3196      * {@inheritDoc} <!--workaround-->
3197      */
3198     @Override
3199     @ForceInline
3200     public final
3201     void intoByteBuffer(ByteBuffer bb, int offset,
3202                         ByteOrder bo) {
3203         maybeSwap(bo).intoByteBuffer0(bb, offset);
3204     }
3205 
3206     /**
3207      * {@inheritDoc} <!--workaround-->
3208      */
3209     @Override
3210     @ForceInline
3211     public final
3212     void intoByteBuffer(ByteBuffer bb, int offset,
3213                         ByteOrder bo,
3214                         VectorMask<Float> m) {
3215         if (m.allTrue()) {
3216             intoByteBuffer(bb, offset, bo);
3217             return;
3218         }
3219         FloatSpecies vsp = vspecies();
3220         checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
3221         conditionalStoreNYI(offset, vsp, m, 4, bb.limit());
3222         var oldVal = fromByteBuffer0(bb, offset);
3223         var newVal = oldVal.blend(this.maybeSwap(bo), m);
3224         newVal.intoByteBuffer0(bb, offset);
3225     }
3226 
3227     // ================================================
3228 
3229     // Low-level memory operations.
3230     //
3231     // Note that all of these operations *must* inline into a context
3232     // where the exact species of the involved vector is a
3233     // compile-time constant.  Otherwise, the intrinsic generation
3234     // will fail and performance will suffer.
3235     //
3236     // In many cases this is achieved by re-deriving a version of the
3237     // method in each concrete subclass (per species).  The re-derived
3238     // method simply calls one of these generic methods, with exact
3239     // parameters for the controlling metadata, which is either a
3240     // typed vector or constant species instance.
3241 
3242     // Unchecked loading operations in native byte order.
3243     // Caller is reponsible for applying index checks, masking, and
3244     // byte swapping.
3245 
3246     /*package-private*/
3247     abstract
3248     FloatVector fromArray0(float[] a, int offset);
3249     @ForceInline
3250     final
3251     FloatVector fromArray0Template(float[] a, int offset) {
3252         FloatSpecies vsp = vspecies();
3253         return VectorIntrinsics.load(
3254             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3255             a, arrayAddress(a, offset),
3256             a, offset, vsp,
3257             (arr, off, s) -> s.ldOp(arr, off,
3258                                     (arr_, off_, i) -> arr_[off_ + i]));
3259     }
3260 
3261     @Override
3262     abstract
3263     FloatVector fromByteArray0(byte[] a, int offset);
3264     @ForceInline
3265     final
3266     FloatVector fromByteArray0Template(byte[] a, int offset) {
3267         FloatSpecies vsp = vspecies();
3268         return VectorIntrinsics.load(
3269             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3270             a, byteArrayAddress(a, offset),
3271             a, offset, vsp,
3272             (arr, off, s) -> {
3273                 FloatBuffer tb = wrapper(arr, off, NATIVE_ENDIAN);
3274                 return s.ldOp(tb, 0, (tb_, __, i) -> tb_.get(i));
3275             });
3276     }
3277 
3278     abstract
3279     FloatVector fromByteBuffer0(ByteBuffer bb, int offset);
3280     @ForceInline
3281     final
3282     FloatVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
3283         FloatSpecies vsp = vspecies();
3284         return VectorIntrinsics.load(
3285             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3286             bufferBase(bb), bufferAddress(bb, offset),
3287             bb, offset, vsp,
3288             (buf, off, s) -> {
3289                 FloatBuffer tb = wrapper(buf, off, NATIVE_ENDIAN);
3290                 return s.ldOp(tb, 0, (tb_, __, i) -> tb_.get(i));
3291            });
3292     }
3293 
3294     // Unchecked storing operations in native byte order.
3295     // Caller is reponsible for applying index checks, masking, and
3296     // byte swapping.
3297 
3298     abstract
3299     void intoArray0(float[] a, int offset);
3300     @ForceInline
3301     final
3302     void intoArray0Template(float[] a, int offset) {
3303         FloatSpecies vsp = vspecies();
3304         VectorIntrinsics.store(
3305             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3306             a, arrayAddress(a, offset),
3307             this, a, offset,
3308             (arr, off, v)
3309             -> v.stOp(arr, off,
3310                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3311     }
3312 
3313     abstract
3314     void intoByteArray0(byte[] a, int offset);
3315     @ForceInline
3316     final
3317     void intoByteArray0Template(byte[] a, int offset) {
3318         FloatSpecies vsp = vspecies();
3319         VectorIntrinsics.store(
3320             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3321             a, byteArrayAddress(a, offset),
3322             this, a, offset,
3323             (arr, off, v) -> {
3324                 FloatBuffer tb = wrapper(arr, off, NATIVE_ENDIAN);
3325                 v.stOp(tb, 0, (tb_, __, i, e) -> tb_.put(i, e));
3326             });
3327     }
3328 
3329     @ForceInline
3330     final
3331     void intoByteBuffer0(ByteBuffer bb, int offset) {
3332         FloatSpecies vsp = vspecies();
3333         VectorIntrinsics.store(
3334             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3335             bufferBase(bb), bufferAddress(bb, offset),
3336             this, bb, offset,
3337             (buf, off, v) -> {
3338                 FloatBuffer tb = wrapper(buf, off, NATIVE_ENDIAN);
3339                 v.stOp(tb, 0, (tb_, __, i, e) -> tb_.put(i, e));
3340             });
3341     }
3342 
3343     // End of low-level memory operations.
3344 
3345     private static
3346     void checkMaskFromIndexSize(int offset,
3347                                 FloatSpecies vsp,
3348                                 VectorMask<Float> m,
3349                                 int scale,
3350                                 int limit) {
3351         ((AbstractMask<Float>)m)
3352             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3353     }
3354 
3355     @ForceInline
3356     private void conditionalStoreNYI(int offset,
3357                                      FloatSpecies vsp,
3358                                      VectorMask<Float> m,
3359                                      int scale,
3360                                      int limit) {
3361         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3362             String msg =
3363                 String.format("unimplemented: store @%d in [0..%d), %s in %s",
3364                               offset, limit, m, vsp);
3365             throw new AssertionError(msg);
3366         }
3367     }
3368 
3369     /*package-private*/
3370     @Override
3371     @ForceInline
3372     final
3373     FloatVector maybeSwap(ByteOrder bo) {
3374         if (bo != NATIVE_ENDIAN) {
3375             return this.reinterpretAsBytes()
3376                 .rearrange(swapBytesShuffle())
3377                 .reinterpretAsFloats();
3378         }
3379         return this;
3380     }
3381 
3382     static final int ARRAY_SHIFT =
3383         31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_FLOAT_INDEX_SCALE);
3384     static final long ARRAY_BASE =
3385         Unsafe.ARRAY_FLOAT_BASE_OFFSET;
3386 
3387     @ForceInline
3388     static long arrayAddress(float[] a, int index) {
3389         return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
3390     }
3391 
3392     @ForceInline
3393     static long byteArrayAddress(byte[] a, int index) {
3394         return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
3395     }
3396 
3397     // Byte buffer wrappers.
3398     private static FloatBuffer wrapper(ByteBuffer bb, int offset,
3399                                         ByteOrder bo) {
3400         return bb.duplicate().position(offset).slice()
3401             .order(bo).asFloatBuffer();
3402     }
3403     private static FloatBuffer wrapper(byte[] a, int offset,
3404                                         ByteOrder bo) {
3405         return ByteBuffer.wrap(a, offset, a.length - offset)
3406             .order(bo).asFloatBuffer();
3407     }
3408 
3409     // ================================================
3410 
3411     /// Reinterpreting view methods:
3412     //   lanewise reinterpret: viewAsXVector()
3413     //   keep shape, redraw lanes: reinterpretAsEs()
3414 
3415     /**
3416      * {@inheritDoc} <!--workaround-->
3417      */
3418     @ForceInline
3419     @Override
3420     public final ByteVector reinterpretAsBytes() {
3421          // Going to ByteVector, pay close attention to byte order.
3422          assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);
3423          return asByteVectorRaw();
3424          //return asByteVectorRaw().rearrange(swapBytesShuffle());
3425     }
3426 
3427     /**
3428      * {@inheritDoc} <!--workaround-->
3429      */
3430     @ForceInline
3431     @Override
3432     public final IntVector viewAsIntegralLanes() {
3433         LaneType ilt = LaneType.FLOAT.asIntegral();
3434         return (IntVector) asVectorRaw(ilt);
3435     }
3436 
3437     /**
3438      * {@inheritDoc} <!--workaround-->
3439      */
3440     @ForceInline
3441     @Override
3442     public final
3443     FloatVector
3444     viewAsFloatingLanes() {
3445         return this;
3446     }
3447 
3448     // ================================================
3449 
3450     /// Object methods: toString, equals, hashCode
3451     //
3452     // Object methods are defined as if via Arrays.toString, etc.,
3453     // is applied to the array of elements.  Two equal vectors
3454     // are required to have equal species and equal lane values.
3455 
3456     /**
3457      * Returns a string representation of this vector, of the form
3458      * {@code "[0,1,2...]"}, reporting the lane values of this vector,
3459      * in lane order.
3460      *
3461      * The string is produced as if by a call to {@link
3462      * java.util.Arrays#toString(float[]) Arrays.toString()},
3463      * as appropriate to the {@code float} array returned by
3464      * {@link #toArray this.toArray()}.
3465      *
3466      * @return a string of the form {@code "[0,1,2...]"}
3467      * reporting the lane values of this vector
3468      */
3469     @Override
3470     @ForceInline
3471     public final
3472     String toString() {
3473         // now that toArray is strongly typed, we can define this
3474         return Arrays.toString(toArray());
3475     }
3476 
3477     /**
3478      * {@inheritDoc} <!--workaround-->
3479      */
3480     @Override
3481     @ForceInline
3482     public final
3483     boolean equals(Object obj) {
3484         if (obj instanceof Vector) {
3485             Vector<?> that = (Vector<?>) obj;
3486             if (this.species().equals(that.species())) {
3487                 return this.eq(that.check(this.species())).allTrue();
3488             }
3489         }
3490         return false;
3491     }
3492 
3493     /**
3494      * {@inheritDoc} <!--workaround-->
3495      */
3496     @Override
3497     @ForceInline
3498     public final
3499     int hashCode() {
3500         // now that toArray is strongly typed, we can define this
3501         return Objects.hash(species(), Arrays.hashCode(toArray()));
3502     }
3503 
3504     // ================================================
3505 
3506     // Species
3507 
3508     /**
3509      * Class representing {@link FloatVector}'s of the same {@link VectorShape VectorShape}.
3510      */
3511     /*package-private*/
3512     static final class FloatSpecies extends AbstractSpecies<Float> {
3513         private FloatSpecies(VectorShape shape,
3514                 Class<? extends FloatVector> vectorType,
3515                 Class<? extends AbstractMask<Float>> maskType,
3516                 Function<Object, FloatVector> vectorFactory) {
3517             super(shape, LaneType.of(float.class),
3518                   vectorType, maskType,
3519                   vectorFactory);
3520             assert(this.elementSize() == Float.SIZE);
3521         }
3522 
3523         // Specializing overrides:
3524 
3525         @Override
3526         @ForceInline
3527         public final Class<Float> elementType() {
3528             return float.class;
3529         }
3530 
3531         @Override
3532         @ForceInline
3533         public final Class<Float> genericElementType() {
3534             return Float.class;
3535         }
3536 
3537         @Override
3538         @ForceInline
3539         public final Class<float[]> arrayType() {
3540             return float[].class;
3541         }
3542 
3543         @SuppressWarnings("unchecked")
3544         @Override
3545         @ForceInline
3546         public final Class<? extends FloatVector> vectorType() {
3547             return (Class<? extends FloatVector>) vectorType;
3548         }
3549 
3550         @Override
3551         @ForceInline
3552         public final long checkValue(long e) {
3553             longToElementBits(e);  // only for exception
3554             return e;
3555         }
3556 
3557         /*package-private*/
3558         @Override
3559         @ForceInline
3560         final FloatVector broadcastBits(long bits) {
3561             return (FloatVector)
3562                 VectorIntrinsics.broadcastCoerced(
3563                     vectorType, float.class, laneCount,
3564                     bits, this,
3565                     (bits_, s_) -> s_.rvOp(i -> bits_));
3566         }
3567 
3568         /*package-private*/
3569         @ForceInline
3570         
3571         final FloatVector broadcast(float e) {
3572             return broadcastBits(toBits(e));
3573         }
3574 
3575         @Override
3576         @ForceInline
3577         public final FloatVector broadcast(long e) {
3578             return broadcastBits(longToElementBits(e));
3579         }
3580 
3581         /*package-private*/
3582         final @Override
3583         @ForceInline
3584         long longToElementBits(long value) {
3585             // Do the conversion, and then test it for failure.
3586             float e = (float) value;
3587             if ((long) e != value) {
3588                 throw badElementBits(value, e);
3589             }
3590             return toBits(e);
3591         }
3592 
3593         /*package-private*/
3594         @ForceInline
3595         static long toIntegralChecked(float e, boolean convertToInt) {
3596             long value = convertToInt ? (int) e : (long) e;
3597             if ((float) value != e) {
3598                 throw badArrayBits(e, convertToInt, value);
3599             }
3600             return value;
3601         }
3602 
3603         @Override
3604         @ForceInline
3605         public final FloatVector fromValues(long... values) {
3606             VectorIntrinsics.requireLength(values.length, laneCount);
3607             float[] va = new float[laneCount()];
3608             for (int i = 0; i < va.length; i++) {
3609                 long lv = values[i];
3610                 float v = (float) lv;
3611                 va[i] = v;
3612                 if ((long)v != lv) {
3613                     throw badElementBits(lv, v);
3614                 }
3615             }
3616             return dummyVector().fromArray0(va, 0);
3617         }
3618 
3619         /* this non-public one is for internal conversions */
3620         @Override
3621         @ForceInline
3622         final FloatVector fromIntValues(int[] values) {
3623             VectorIntrinsics.requireLength(values.length, laneCount);
3624             float[] va = new float[laneCount()];
3625             for (int i = 0; i < va.length; i++) {
3626                 int lv = values[i];
3627                 float v = (float) lv;
3628                 va[i] = v;
3629                 if ((int)v != lv) {
3630                     throw badElementBits(lv, v);
3631                 }
3632             }
3633             return dummyVector().fromArray0(va, 0);
3634         }
3635 
3636         // Virtual constructors
3637 
3638         @ForceInline
3639         @Override final
3640         public FloatVector fromArray(Object a, int offset) {
3641             // User entry point:  Be careful with inputs.
3642             return FloatVector
3643                 .fromArray(this, (float[]) a, offset);
3644         }
3645 
3646         @Override final
3647         FloatVector dummyVector() {
3648             return (FloatVector) super.dummyVector();
3649         }
3650 
3651         final
3652         FloatVector vectorFactory(float[] vec) {
3653             // Species delegates all factory requests to its dummy
3654             // vector.  The dummy knows all about it.
3655             return dummyVector().vectorFactory(vec);
3656         }
3657 
3658         /*package-private*/
3659         final @Override
3660         @ForceInline
3661         FloatVector rvOp(RVOp f) {
3662             float[] res = new float[laneCount()];
3663             for (int i = 0; i < res.length; i++) {
3664                 int bits = (int) f.apply(i);
3665                 res[i] = fromBits(bits);
3666             }
3667             return dummyVector().vectorFactory(res);
3668         }
3669 
3670         FloatVector vOp(FVOp f) {
3671             float[] res = new float[laneCount()];
3672             for (int i = 0; i < res.length; i++) {
3673                 res[i] = f.apply(i);
3674             }
3675             return dummyVector().vectorFactory(res);
3676         }
3677 
3678         FloatVector vOp(VectorMask<Float> m, FVOp f) {
3679             float[] res = new float[laneCount()];
3680             boolean[] mbits = ((AbstractMask<Float>)m).getBits();
3681             for (int i = 0; i < res.length; i++) {
3682                 if (mbits[i]) {
3683                     res[i] = f.apply(i);
3684                 }
3685             }
3686             return dummyVector().vectorFactory(res);
3687         }
3688 
3689         /*package-private*/
3690         @ForceInline
3691         <M> FloatVector ldOp(M memory, int offset,
3692                                       FLdOp<M> f) {
3693             return dummyVector().ldOp(memory, offset, f);
3694         }
3695 
3696         /*package-private*/
3697         @ForceInline
3698         <M> FloatVector ldOp(M memory, int offset,
3699                                       AbstractMask<Float> m,
3700                                       FLdOp<M> f) {
3701             return dummyVector().ldOp(memory, offset, m, f);
3702         }
3703 
3704         /*package-private*/
3705         @ForceInline
3706         <M> void stOp(M memory, int offset, FStOp<M> f) {
3707             dummyVector().stOp(memory, offset, f);
3708         }
3709 
3710         /*package-private*/
3711         @ForceInline
3712         <M> void stOp(M memory, int offset,
3713                       AbstractMask<Float> m,
3714                       FStOp<M> f) {
3715             dummyVector().stOp(memory, offset, m, f);
3716         }
3717 
3718         // N.B. Make sure these constant vectors and
3719         // masks load up correctly into registers.
3720         //
3721         // Also, see if we can avoid all that switching.
3722         // Could we cache both vectors and both masks in
3723         // this species object?
3724 
3725         // Zero and iota vector access
3726         @Override
3727         @ForceInline
3728         public final FloatVector zero() {
3729             if ((Class<?>) vectorType() == FloatMaxVector.class)
3730                 return FloatMaxVector.ZERO;
3731             switch (vectorBitSize()) {
3732                 case 64: return Float64Vector.ZERO;
3733                 case 128: return Float128Vector.ZERO;
3734                 case 256: return Float256Vector.ZERO;
3735                 case 512: return Float512Vector.ZERO;
3736             }
3737             throw new AssertionError();
3738         }        
3739 
3740         @Override
3741         @ForceInline
3742         public final FloatVector iota() {
3743             if ((Class<?>) vectorType() == FloatMaxVector.class)
3744                 return FloatMaxVector.IOTA;
3745             switch (vectorBitSize()) {
3746                 case 64: return Float64Vector.IOTA;
3747                 case 128: return Float128Vector.IOTA;
3748                 case 256: return Float256Vector.IOTA;
3749                 case 512: return Float512Vector.IOTA;
3750             }
3751             throw new AssertionError();
3752         }
3753 
3754         // Mask access
3755         @Override
3756         @ForceInline
3757         public final VectorMask<Float> maskAll(boolean bit) {
3758             if ((Class<?>) vectorType() == FloatMaxVector.class)
3759                 return FloatMaxVector.FloatMaxMask.maskAll(bit);
3760             switch (vectorBitSize()) {
3761                 case 64: return Float64Vector.Float64Mask.maskAll(bit);
3762                 case 128: return Float128Vector.Float128Mask.maskAll(bit);
3763                 case 256: return Float256Vector.Float256Mask.maskAll(bit);
3764                 case 512: return Float512Vector.Float512Mask.maskAll(bit);
3765             }
3766             throw new AssertionError();
3767         }
3768     }
3769 
3770     /**
3771      * Finds a species for an element type of {@code float} and shape.
3772      *
3773      * @param s the shape
3774      * @return a species for an element type of {@code float} and shape
3775      * @throws IllegalArgumentException if no such species exists for the shape
3776      */
3777     static FloatSpecies species(VectorShape s) {
3778         Objects.requireNonNull(s);
3779         switch (s) {
3780             case S_64_BIT: return (FloatSpecies) SPECIES_64;
3781             case S_128_BIT: return (FloatSpecies) SPECIES_128;
3782             case S_256_BIT: return (FloatSpecies) SPECIES_256;
3783             case S_512_BIT: return (FloatSpecies) SPECIES_512;
3784             case S_Max_BIT: return (FloatSpecies) SPECIES_MAX;
3785             default: throw new IllegalArgumentException("Bad shape: " + s);
3786         }
3787     }
3788 
3789     /** Species representing {@link FloatVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
3790     public static final VectorSpecies<Float> SPECIES_64
3791         = new FloatSpecies(VectorShape.S_64_BIT,
3792                             Float64Vector.class,
3793                             Float64Vector.Float64Mask.class,
3794                             Float64Vector::new);
3795 
3796     /** Species representing {@link FloatVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
3797     public static final VectorSpecies<Float> SPECIES_128
3798         = new FloatSpecies(VectorShape.S_128_BIT,
3799                             Float128Vector.class,
3800                             Float128Vector.Float128Mask.class,
3801                             Float128Vector::new);
3802 
3803     /** Species representing {@link FloatVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
3804     public static final VectorSpecies<Float> SPECIES_256
3805         = new FloatSpecies(VectorShape.S_256_BIT,
3806                             Float256Vector.class,
3807                             Float256Vector.Float256Mask.class,
3808                             Float256Vector::new);
3809 
3810     /** Species representing {@link FloatVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
3811     public static final VectorSpecies<Float> SPECIES_512
3812         = new FloatSpecies(VectorShape.S_512_BIT,
3813                             Float512Vector.class,
3814                             Float512Vector.Float512Mask.class,
3815                             Float512Vector::new);
3816 
3817     /** Species representing {@link FloatVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
3818     public static final VectorSpecies<Float> SPECIES_MAX
3819         = new FloatSpecies(VectorShape.S_Max_BIT,
3820                             FloatMaxVector.class,
3821                             FloatMaxVector.FloatMaxMask.class,
3822                             FloatMaxVector::new);
3823 
3824     /**
3825      * Preferred species for {@link FloatVector}s.
3826      * A preferred species is a species of maximal bit-size for the platform.
3827      */
3828     public static final VectorSpecies<Float> SPECIES_PREFERRED
3829         = (FloatSpecies) VectorSpecies.ofPreferred(float.class);
3830 }