New src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java

   1 /*
   2  * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.FloatBuffer;
  29 import java.nio.ByteOrder;
  30 import java.util.Arrays;
  31 import java.util.Objects;
  32 import java.util.function.BinaryOperator;
  33 import java.util.function.IntUnaryOperator;
  34 import java.util.function.Function;
  35 import java.util.function.UnaryOperator;
  36 import java.util.concurrent.ThreadLocalRandom;
  37 
  38 import jdk.internal.misc.Unsafe;
  39 import jdk.internal.vm.annotation.ForceInline;
  40 
  41 import static jdk.incubator.vector.VectorIntrinsics.*;
  42 import static jdk.incubator.vector.VectorOperators.*;
  43 
  44 // -- This file was mechanically generated: Do not edit! -- //
  45 
  46 /**
  47  * A specialized {@link Vector} representing an ordered immutable sequence of
  48  * {@code float} values.
  49  */
  50 @SuppressWarnings("cast")  // warning: redundant cast
  51 public abstract class FloatVector extends AbstractVector<Float> {
  52 
  53     FloatVector() {}
  54 
  55     static final int FORBID_OPCODE_KIND = VO_NOFP;
  56 
  57     @ForceInline
  58     static int opCode(Operator op) {
  59         return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
  60     }
  61     @ForceInline
  62     static int opCode(Operator op, int requireKind) {
  63         requireKind |= VO_OPCODE_VALID;
  64         return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
  65     }
  66     @ForceInline
  67     static boolean opKind(Operator op, int bit) {
  68         return VectorOperators.opKind(op, bit);
  69     }
  70 
  71     // Virtualized factories and operators,
  72     // coded with portable definitions.
  73     // These are all @ForceInline in case
  74     // they need to be used performantly.
  75     // The various shape-specific subclasses
  76     // also specialize them by wrapping
  77     // them in a call like this:
  78     //    return (Byte128Vector)
  79     //       super.bOp((Byte128Vector) o);
  80     // The purpose of that is to forcibly inline
  81     // the generic definition from this file
  82     // into a sharply type- and size-specific
  83     // wrapper in the subclass file, so that
  84     // the JIT can specialize the code.
  85     // The code is only inlined and expanded
  86     // if it gets hot.  Think of it as a cheap
  87     // and lazy version of C++ templates.
  88 
  89     // Virtualized getter
  90 
  91     /*package-private*/
  92     abstract float[] getElements();
  93 
  94     // Virtualized constructors
  95 
  96     /**
  97      * Build a vector directly using my own constructor.
  98      * It is an error if the array is aliased elsewhere.
  99      */
 100     /*package-private*/
 101     abstract FloatVector vectorFactory(float[] vec);
 102 
 103     /**
 104      * Build a mask directly using my species.
 105      * It is an error if the array is aliased elsewhere.
 106      */
 107     /*package-private*/
 108     @ForceInline
 109     final
 110     AbstractMask<Float> maskFactory(boolean[] bits) {
 111         return vspecies().maskFactory(bits);
 112     }
 113 
 114     // Constant loader (takes dummy as vector arg)
 115     interface FVOp {
 116         float apply(int i);
 117     }
 118 
 119     /*package-private*/
 120     @ForceInline
 121     final
 122     FloatVector vOp(FVOp f) {
 123         float[] res = new float[length()];
 124         for (int i = 0; i < res.length; i++) {
 125             res[i] = f.apply(i);
 126         }
 127         return vectorFactory(res);
 128     }
 129 
 130     @ForceInline
 131     final
 132     FloatVector vOp(VectorMask<Float> m, FVOp f) {
 133         float[] res = new float[length()];
 134         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 135         for (int i = 0; i < res.length; i++) {
 136             if (mbits[i]) {
 137                 res[i] = f.apply(i);
 138             }
 139         }
 140         return vectorFactory(res);
 141     }
 142 
 143     // Unary operator
 144 
 145     /*package-private*/
 146     interface FUnOp {
 147         float apply(int i, float a);
 148     }
 149 
 150     /*package-private*/
 151     abstract
 152     FloatVector uOp(FUnOp f);
 153     @ForceInline
 154     final
 155     FloatVector uOpTemplate(FUnOp f) {
 156         float[] vec = getElements();
 157         float[] res = new float[length()];
 158         for (int i = 0; i < res.length; i++) {
 159             res[i] = f.apply(i, vec[i]);
 160         }
 161         return vectorFactory(res);
 162     }
 163 
 164     /*package-private*/
 165     abstract
 166     FloatVector uOp(VectorMask<Float> m,
 167                              FUnOp f);
 168     @ForceInline
 169     final
 170     FloatVector uOpTemplate(VectorMask<Float> m,
 171                                      FUnOp f) {
 172         float[] vec = getElements();
 173         float[] res = new float[length()];
 174         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 175         for (int i = 0; i < res.length; i++) {
 176             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 177         }
 178         return vectorFactory(res);
 179     }
 180 
 181     // Binary operator
 182 
 183     /*package-private*/
 184     interface FBinOp {
 185         float apply(int i, float a, float b);
 186     }
 187 
 188     /*package-private*/
 189     abstract
 190     FloatVector bOp(Vector<Float> o,
 191                              FBinOp f);
 192     @ForceInline
 193     final
 194     FloatVector bOpTemplate(Vector<Float> o,
 195                                      FBinOp f) {
 196         float[] res = new float[length()];
 197         float[] vec1 = this.getElements();
 198         float[] vec2 = ((FloatVector)o).getElements();
 199         for (int i = 0; i < res.length; i++) {
 200             res[i] = f.apply(i, vec1[i], vec2[i]);
 201         }
 202         return vectorFactory(res);
 203     }
 204 
 205     /*package-private*/
 206     abstract
 207     FloatVector bOp(Vector<Float> o,
 208                              VectorMask<Float> m,
 209                              FBinOp f);
 210     @ForceInline
 211     final
 212     FloatVector bOpTemplate(Vector<Float> o,
 213                                      VectorMask<Float> m,
 214                                      FBinOp f) {
 215         float[] res = new float[length()];
 216         float[] vec1 = this.getElements();
 217         float[] vec2 = ((FloatVector)o).getElements();
 218         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 219         for (int i = 0; i < res.length; i++) {
 220             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 221         }
 222         return vectorFactory(res);
 223     }
 224 
 225     // Ternary operator
 226 
 227     /*package-private*/
 228     interface FTriOp {
 229         float apply(int i, float a, float b, float c);
 230     }
 231 
 232     /*package-private*/
 233     abstract
 234     FloatVector tOp(Vector<Float> o1,
 235                              Vector<Float> o2,
 236                              FTriOp f);
 237     @ForceInline
 238     final
 239     FloatVector tOpTemplate(Vector<Float> o1,
 240                                      Vector<Float> o2,
 241                                      FTriOp f) {
 242         float[] res = new float[length()];
 243         float[] vec1 = this.getElements();
 244         float[] vec2 = ((FloatVector)o1).getElements();
 245         float[] vec3 = ((FloatVector)o2).getElements();
 246         for (int i = 0; i < res.length; i++) {
 247             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 248         }
 249         return vectorFactory(res);
 250     }
 251 
 252     /*package-private*/
 253     abstract
 254     FloatVector tOp(Vector<Float> o1,
 255                              Vector<Float> o2,
 256                              VectorMask<Float> m,
 257                              FTriOp f);
 258     @ForceInline
 259     final
 260     FloatVector tOpTemplate(Vector<Float> o1,
 261                                      Vector<Float> o2,
 262                                      VectorMask<Float> m,
 263                                      FTriOp f) {
 264         float[] res = new float[length()];
 265         float[] vec1 = this.getElements();
 266         float[] vec2 = ((FloatVector)o1).getElements();
 267         float[] vec3 = ((FloatVector)o2).getElements();
 268         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 269         for (int i = 0; i < res.length; i++) {
 270             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 271         }
 272         return vectorFactory(res);
 273     }
 274 
 275     // Reduction operator
 276 
 277     /*package-private*/
 278     abstract
 279     float rOp(float v, FBinOp f);
 280     @ForceInline
 281     final
 282     float rOpTemplate(float v, FBinOp f) {
 283         float[] vec = getElements();
 284         for (int i = 0; i < vec.length; i++) {
 285             v = f.apply(i, v, vec[i]);
 286         }
 287         return v;
 288     }
 289 
 290     // Memory reference
 291 
 292     /*package-private*/
 293     interface FLdOp<M> {
 294         float apply(M memory, int offset, int i);
 295     }
 296 
 297     /*package-private*/
 298     @ForceInline
 299     final
 300     <M> FloatVector ldOp(M memory, int offset,
 301                                   FLdOp<M> f) {
 302         //dummy; no vec = getElements();
 303         float[] res = new float[length()];
 304         for (int i = 0; i < res.length; i++) {
 305             res[i] = f.apply(memory, offset, i);
 306         }
 307         return vectorFactory(res);
 308     }
 309 
 310     /*package-private*/
 311     @ForceInline
 312     final
 313     <M> FloatVector ldOp(M memory, int offset,
 314                                   VectorMask<Float> m,
 315                                   FLdOp<M> f) {
 316         //float[] vec = getElements();
 317         float[] res = new float[length()];
 318         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 319         for (int i = 0; i < res.length; i++) {
 320             if (mbits[i]) {
 321                 res[i] = f.apply(memory, offset, i);
 322             }
 323         }
 324         return vectorFactory(res);
 325     }
 326 
 327     interface FStOp<M> {
 328         void apply(M memory, int offset, int i, float a);
 329     }
 330 
 331     /*package-private*/
 332     @ForceInline
 333     final
 334     <M> void stOp(M memory, int offset,
 335                   FStOp<M> f) {
 336         float[] vec = getElements();
 337         for (int i = 0; i < vec.length; i++) {
 338             f.apply(memory, offset, i, vec[i]);
 339         }
 340     }
 341 
 342     /*package-private*/
 343     @ForceInline
 344     final
 345     <M> void stOp(M memory, int offset,
 346                   VectorMask<Float> m,
 347                   FStOp<M> f) {
 348         float[] vec = getElements();
 349         boolean[] mbits = ((AbstractMask<Float>)m).getBits();
 350         for (int i = 0; i < vec.length; i++) {
 351             if (mbits[i]) {
 352                 f.apply(memory, offset, i, vec[i]);
 353             }
 354         }
 355     }
 356 
 357     // Binary test
 358 
 359     /*package-private*/
 360     interface FBinTest {
 361         boolean apply(int cond, int i, float a, float b);
 362     }
 363 
 364     /*package-private*/
 365     @ForceInline
 366     final
 367     AbstractMask<Float> bTest(int cond,
 368                                   Vector<Float> o,
 369                                   FBinTest f) {
 370         float[] vec1 = getElements();
 371         float[] vec2 = ((FloatVector)o).getElements();
 372         boolean[] bits = new boolean[length()];
 373         for (int i = 0; i < length(); i++){
 374             bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
 375         }
 376         return maskFactory(bits);
 377     }
 378 
 379     /*package-private*/
 380     @ForceInline
 381     static boolean doBinTest(int cond, float a, float b) {
 382         switch (cond) {
 383         case BT_eq:  return a == b;
 384         case BT_ne:  return a != b;
 385         case BT_lt:  return a < b;
 386         case BT_le:  return a <= b;
 387         case BT_gt:  return a > b;
 388         case BT_ge:  return a >= b;
 389         }
 390         throw new AssertionError(Integer.toHexString(cond));
 391     }
 392 
 393     /*package-private*/
 394     @Override
 395     abstract FloatSpecies vspecies();
 396 
 397     /*package-private*/
 398     @ForceInline
 399     static long toBits(float e) {
 400         return  Float.floatToIntBits(e);
 401     }
 402 
 403     /*package-private*/
 404     @ForceInline
 405     static float fromBits(long bits) {
 406         return Float.intBitsToFloat((int)bits);
 407     }
 408 
 409     // Static factories (other than memory operations)
 410 
 411     // Note: A surprising behavior in javadoc
 412     // sometimes makes a lone /** {@inheritDoc} */
 413     // comment drop the method altogether,
 414     // apparently if the method mentions an
 415     // parameter or return type of Vector<Float>
 416     // instead of Vector<E> as originally specified.
 417     // Adding an empty HTML fragment appears to
 418     // nudge javadoc into providing the desired
 419     // inherited documentation.  We use the HTML
 420     // comment <!--workaround--> for this.
 421 
 422     /**
 423      * {@inheritDoc} <!--workaround-->
 424      */
 425     @ForceInline
 426     public static FloatVector zero(VectorSpecies<Float> species) {
 427         FloatSpecies vsp = (FloatSpecies) species;
 428         return VectorIntrinsics.broadcastCoerced(vsp.vectorType(), float.class, species.length(),
 429                         toBits(0.0f), vsp,
 430                         ((bits_, s_) -> s_.rvOp(i -> bits_)));
 431     }
 432 
 433     /**
 434      * Returns a vector of the same species as this one
 435      * where all lane elements are set to
 436      * the primitive value {@code e}.
 437      *
 438      * The contents of the current vector are discarded;
 439      * only the species is relevant to this operation.
 440      *
 441      * <p> This method returns the value of this expression:
 442      * {@code FloatVector.broadcast(this.species(), e)}.
 443      *
 444      * @apiNote
 445      * Unlike the similar method named {@code broadcast()}
 446      * in the supertype {@code Vector}, this method does not
 447      * need to validate its argument, and cannot throw
 448      * {@code IllegalArgumentException}.  This method is
 449      * therefore preferable to the supertype method.
 450      *
 451      * @param e the value to broadcast
 452      * @return a vector where all lane elements are set to
 453      *         the primitive value {@code e}
 454      * @see #broadcast(VectorSpecies,long)
 455      * @see Vector#broadcast(long)
 456      * @see VectorSpecies#broadcast(long)
 457      */
 458     public abstract FloatVector broadcast(float e);
 459 
 460     /**
 461      * Returns a vector of the given species
 462      * where all lane elements are set to
 463      * the primitive value {@code e}.
 464      *
 465      * @param species species of the desired vector
 466      * @param e the value to broadcast
 467      * @return a vector where all lane elements are set to
 468      *         the primitive value {@code e}
 469      * @see #broadcast(long)
 470      * @see Vector#broadcast(long)
 471      * @see VectorSpecies#broadcast(long)
 472      */
 473     public static FloatVector broadcast(VectorSpecies<Float> species, float e) {
 474         FloatSpecies vsp = (FloatSpecies) species;
 475         return vsp.broadcast(e);
 476     }
 477 
 478     /*package-private*/
 479     @ForceInline
 480     final FloatVector broadcastTemplate(float e) {
 481         FloatSpecies vsp = vspecies();
 482         return vsp.broadcast(e);
 483     }
 484 
 485     /**
 486      * {@inheritDoc} <!--workaround-->
 487      * @apiNote
 488      * When working with vector subtypes like {@code FloatVector},
 489      * {@linkplain #broadcast(float) the more strongly typed method}
 490      * is typically selected.  It can be explicitly selected
 491      * using a cast: {@code v.broadcast((float)e)}.
 492      * The two expressions will produce numerically identical results.
 493      */
 494     @Override
 495     public abstract FloatVector broadcast(long e);
 496 
 497     /**
 498      * Returns a vector of the given species
 499      * where all lane elements are set to
 500      * the primitive value {@code e}.
 501      *
 502      * The {@code long} value must be accurately representable
 503      * by the {@code ETYPE} of the vector species, so that
 504      * {@code e==(long)(ETYPE)e}.
 505      *
 506      * @param species species of the desired vector
 507      * @param e the value to broadcast
 508      * @return a vector where all lane elements are set to
 509      *         the primitive value {@code e}
 510      * @throws IllegalArgumentException
 511      *         if the given {@code long} value cannot
 512      *         be represented by the vector's {@code ETYPE}
 513      * @see #broadcast(VectorSpecies,float)
 514      * @see VectorSpecies#checkValue(VectorSpecies,long)
 515      */
 516     public static FloatVector broadcast(VectorSpecies<Float> species, long e) {
 517         FloatSpecies vsp = (FloatSpecies) species;
 518         return vsp.broadcast(e);
 519     }
 520 
 521     /*package-private*/
 522     @ForceInline
 523     final FloatVector broadcastTemplate(long e) {
 524         return vspecies().broadcast(e);
 525     }
 526 
 527     /**
 528      * Returns a vector where each lane element is set to given
 529      * primitive values.
 530      * <p>
 531      * For each vector lane, where {@code N} is the vector lane index, the
 532      * the primitive value at index {@code N} is placed into the resulting
 533      * vector at lane index {@code N}.
 534      *
 535      * @param species species of the desired vector
 536      * @param es the given primitive values
 537      * @return a vector where each lane element is set to given primitive
 538      * values
 539      * @throws IllegalArgumentException
 540      *         if {@code es.length != species.length()}
 541      */
 542     @ForceInline
 543     @SuppressWarnings("unchecked")
 544     public static FloatVector fromValues(VectorSpecies<Float> species, float... es) {
 545         FloatSpecies vsp = (FloatSpecies) species;
 546         int vlength = vsp.laneCount();
 547         VectorIntrinsics.requireLength(es.length, vlength);
 548         // Get an unaliased copy and use it directly:
 549         return vsp.vectorFactory(Arrays.copyOf(es, vlength));
 550     }
 551 
 552     /**
 553      * Returns a vector where the first lane element is set to the primtive
 554      * value {@code e}, all other lane elements are set to the default
 555      * value.
 556      *
 557      * @param species species of the desired vector
 558      * @param e the value
 559      * @return a vector where the first lane element is set to the primitive
 560      * value {@code e}
 561      */
 562     // FIXME: Does this carry its weight?
 563     @ForceInline
 564     public static FloatVector single(VectorSpecies<Float> species, float e) {
 565         return zero(species).withLane(0, e);
 566     }
 567 
 568     /**
 569      * Returns a vector where each lane element is set to a randomly
 570      * generated primitive value.
 571      *
 572      * The semantics are equivalent to calling
 573      * {@link ThreadLocalRandom#nextFloat()}
 574      * for each lane, from first to last.
 575      *
 576      * @param species species of the desired vector
 577      * @return a vector where each lane elements is set to a randomly
 578      * generated primitive value
 579      */
 580     public static FloatVector random(VectorSpecies<Float> species) {
 581         FloatSpecies vsp = (FloatSpecies) species;
 582         ThreadLocalRandom r = ThreadLocalRandom.current();
 583         return vsp.vOp(i -> nextRandom(r));
 584     }
 585     private static float nextRandom(ThreadLocalRandom r) {
 586         return r.nextFloat();
 587     }
 588 
 589     // Unary lanewise support
 590 
 591     /**
 592      * {@inheritDoc} <!--workaround-->
 593      */
 594     public abstract
 595     FloatVector lanewise(VectorOperators.Unary op);
 596 
 597     @ForceInline
 598     final
 599     FloatVector lanewiseTemplate(VectorOperators.Unary op) {
 600         if (opKind(op, VO_SPECIAL)) {
 601             if (op == ZOMO) {
 602                 return blend(broadcast(-1), compare(NE, 0));
 603             }
 604         }
 605         int opc = opCode(op);
 606         return VectorIntrinsics.unaryOp(
 607             opc, getClass(), float.class, length(),
 608             this,
 609             UN_IMPL.find(op, opc, (opc_) -> {
 610               switch (opc_) {
 611                 case VECTOR_OP_NEG: return v0 ->
 612                         v0.uOp((i, a) -> (float) -a);
 613                 case VECTOR_OP_ABS: return v0 ->
 614                         v0.uOp((i, a) -> (float) Math.abs(a));
 615                 case VECTOR_OP_SIN: return v0 ->
 616                         v0.uOp((i, a) -> (float) Math.sin(a));
 617                 case VECTOR_OP_COS: return v0 ->
 618                         v0.uOp((i, a) -> (float) Math.cos(a));
 619                 case VECTOR_OP_TAN: return v0 ->
 620                         v0.uOp((i, a) -> (float) Math.tan(a));
 621                 case VECTOR_OP_ASIN: return v0 ->
 622                         v0.uOp((i, a) -> (float) Math.asin(a));
 623                 case VECTOR_OP_ACOS: return v0 ->
 624                         v0.uOp((i, a) -> (float) Math.acos(a));
 625                 case VECTOR_OP_ATAN: return v0 ->
 626                         v0.uOp((i, a) -> (float) Math.atan(a));
 627                 case VECTOR_OP_EXP: return v0 ->
 628                         v0.uOp((i, a) -> (float) Math.exp(a));
 629                 case VECTOR_OP_LOG: return v0 ->
 630                         v0.uOp((i, a) -> (float) Math.log(a));
 631                 case VECTOR_OP_LOG10: return v0 ->
 632                         v0.uOp((i, a) -> (float) Math.log10(a));
 633                 case VECTOR_OP_SQRT: return v0 ->
 634                         v0.uOp((i, a) -> (float) Math.sqrt(a));
 635                 case VECTOR_OP_CBRT: return v0 ->
 636                         v0.uOp((i, a) -> (float) Math.cbrt(a));
 637                 case VECTOR_OP_SINH: return v0 ->
 638                         v0.uOp((i, a) -> (float) Math.sinh(a));
 639                 case VECTOR_OP_COSH: return v0 ->
 640                         v0.uOp((i, a) -> (float) Math.cosh(a));
 641                 case VECTOR_OP_TANH: return v0 ->
 642                         v0.uOp((i, a) -> (float) Math.tanh(a));
 643                 case VECTOR_OP_EXPM1: return v0 ->
 644                         v0.uOp((i, a) -> (float) Math.expm1(a));
 645                 case VECTOR_OP_LOG1P: return v0 ->
 646                         v0.uOp((i, a) -> (float) Math.log1p(a));
 647                 default: return null;
 648               }}));
 649     }
 650     private static final
 651     ImplCache<Unary,UnaryOperator<FloatVector>> UN_IMPL
 652         = new ImplCache<>(Unary.class, FloatVector.class);
 653 
 654     /**
 655      * {@inheritDoc} <!--workaround-->
 656      */
 657     @ForceInline
 658     public final
 659     FloatVector lanewise(VectorOperators.Unary op,
 660                                   VectorMask<Float> m) {
 661         return blend(lanewise(op), m);
 662     }
 663 
 664     // Binary lanewise support
 665 
 666     /**
 667      * {@inheritDoc} <!--workaround-->
 668      * @see #lanewise(VectorOperators.Binary,float)
 669      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
 670      */
 671     @Override
 672     public abstract
 673     FloatVector lanewise(VectorOperators.Binary op,
 674                                   Vector<Float> v);
 675     @ForceInline
 676     final
 677     FloatVector lanewiseTemplate(VectorOperators.Binary op,
 678                                           Vector<Float> v) {
 679         FloatVector that = (FloatVector) v;
 680         that.check(this);
 681         if (opKind(op, VO_SPECIAL )) {
 682             if (op == FIRST_NONZERO) {
 683                 // FIXME: Support this in the JIT.
 684                 VectorMask<Integer> thisNZ
 685                     = this.viewAsIntegralLanes().compare(NE, (int) 0);
 686                 that = that.blend((float) 0, thisNZ.cast(vspecies()));
 687                 op = OR_UNCHECKED;
 688                 // FIXME: Support OR_UNCHECKED on float/double also!
 689                 return this.viewAsIntegralLanes()
 690                     .lanewise(op, that.viewAsIntegralLanes())
 691                     .viewAsFloatingLanes();
 692             }
 693         }
 694         int opc = opCode(op);
 695         return VectorIntrinsics.binaryOp(
 696             opc, getClass(), float.class, length(),
 697             this, that,
 698             BIN_IMPL.find(op, opc, (opc_) -> {
 699               switch (opc_) {
 700                 case VECTOR_OP_ADD: return (v0, v1) ->
 701                         v0.bOp(v1, (i, a, b) -> (float)(a + b));
 702                 case VECTOR_OP_SUB: return (v0, v1) ->
 703                         v0.bOp(v1, (i, a, b) -> (float)(a - b));
 704                 case VECTOR_OP_MUL: return (v0, v1) ->
 705                         v0.bOp(v1, (i, a, b) -> (float)(a * b));
 706                 case VECTOR_OP_DIV: return (v0, v1) ->
 707                         v0.bOp(v1, (i, a, b) -> (float)(a / b));
 708                 case VECTOR_OP_MAX: return (v0, v1) ->
 709                         v0.bOp(v1, (i, a, b) -> (float)Math.max(a, b));
 710                 case VECTOR_OP_MIN: return (v0, v1) ->
 711                         v0.bOp(v1, (i, a, b) -> (float)Math.min(a, b));
 712                 case VECTOR_OP_FIRST_NONZERO: return (v0, v1) ->
 713                         v0.bOp(v1, (i, a, b) -> toBits(a) != 0 ? a : b);
 714                 case VECTOR_OP_OR: return (v0, v1) ->
 715                         v0.bOp(v1, (i, a, b) -> fromBits(toBits(a) | toBits(b)));
 716                 case VECTOR_OP_ATAN2: return (v0, v1) ->
 717                         v0.bOp(v1, (i, a, b) -> (float) Math.atan2(a, b));
 718                 case VECTOR_OP_POW: return (v0, v1) ->
 719                         v0.bOp(v1, (i, a, b) -> (float) Math.pow(a, b));
 720                 case VECTOR_OP_HYPOT: return (v0, v1) ->
 721                         v0.bOp(v1, (i, a, b) -> (float) Math.hypot(a, b));
 722                 default: return null;
 723                 }}));
 724     }
 725     private static final
 726     ImplCache<Binary,BinaryOperator<FloatVector>> BIN_IMPL
 727         = new ImplCache<>(Binary.class, FloatVector.class);
 728 
 729     /**
 730      * {@inheritDoc} <!--workaround-->
 731      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
 732      */
 733     @ForceInline
 734     public final
 735     FloatVector lanewise(VectorOperators.Binary op,
 736                                   Vector<Float> v,
 737                                   VectorMask<Float> m) {
 738         return blend(lanewise(op, v), m);
 739     }
 740     // FIXME: Maybe all of the public final methods in this file (the
 741     // simple ones that just call lanewise) should be pushed down to
 742     // the X-VectorBits template.  They can't optimize properly at
 743     // this level, and must rely on inlining.  Does it work?
 744     // (If it works, of course keep the code here.)
 745 
 746     /**
 747      * Combines the lane values of this vector
 748      * with the value of a broadcast scalar.
 749      *
 750      * This is a lane-wise binary operation which applies
 751      * the selected operation to each lane.
 752      * The return value will be equal to this expression:
 753      * {@code this.lanewise(op, this.broadcast(e))}.
 754      *
 755      * @param e the input scalar
 756      * @return the result of applying the operation lane-wise
 757      *         to the two input vectors
 758      * @throws UnsupportedOperationException if this vector does
 759      *         not support the requested operation
 760      * @see #lanewise(VectorOperators.Binary,Vector)
 761      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
 762      */
 763     @ForceInline
 764     public final
 765     FloatVector lanewise(VectorOperators.Binary op,
 766                                   float e) {
 767         int opc = opCode(op);
 768         return lanewise(op, broadcast(e));
 769     }
 770 
 771     /**
 772      * Combines the lane values of this vector
 773      * with the value of a broadcast scalar,
 774      * with selection of lane elements controlled by a mask.
 775      *
 776      * This is a masked lane-wise binary operation which applies
 777      * the selected operation to each lane.
 778      * The return value will be equal to this expression:
 779      * {@code this.lanewise(op, this.broadcast(e), m)}.
 780      *
 781      * @param e the input scalar
 782      * @param m the mask controlling lane selection
 783      * @return the result of applying the operation lane-wise
 784      *         to the input vector and the scalar
 785      * @throws UnsupportedOperationException if this vector does
 786      *         not support the requested operation
 787      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 788      * @see #lanewise(VectorOperators.Binary,float)
 789      */
 790     @ForceInline
 791     public final
 792     FloatVector lanewise(VectorOperators.Binary op,
 793                                   float e,
 794                                   VectorMask<Float> m) {
 795         return blend(lanewise(op, e), m);
 796     }
 797 
 798     /**
 799      * {@inheritDoc} <!--workaround-->
 800      * @apiNote
 801      * When working with vector subtypes like {@code FloatVector},
 802      * {@linkplain #lanewise(VectorOperators.Binary,float)
 803      * the more strongly typed method}
 804      * is typically selected.  It can be explicitly selected
 805      * using a cast: {@code v.lanewise(op,(float)e)}.
 806      * The two expressions will produce numerically identical results.
 807      */
 808     @ForceInline
 809     public final
 810     FloatVector lanewise(VectorOperators.Binary op,
 811                                   long e) {
 812         float e1 = (float) e;
 813         if ((long)e1 != e
 814             ) {
 815             vspecies().checkValue(e);  // for exception
 816         }
 817         return lanewise(op, e1);
 818     }
 819 
 820     /**
 821      * {@inheritDoc} <!--workaround-->
 822      * @apiNote
 823      * When working with vector subtypes like {@code FloatVector},
 824      * {@linkplain #lanewise(VectorOperators.Binary,float,VectorMask)
 825      * the more strongly typed method}
 826      * is typically selected.  It can be explicitly selected
 827      * using a cast: {@code v.lanewise(op,(float)e,m)}.
 828      * The two expressions will produce numerically identical results.
 829      */
 830     @ForceInline
 831     public final
 832     FloatVector lanewise(VectorOperators.Binary op,
 833                                   long e, VectorMask<Float> m) {
 834         return blend(lanewise(op, e), m);
 835     }
 836 
 837 
 838     // Ternary lanewise support
 839 
 840     // Ternary operators come in eight variations:
 841     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
 842     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
 843 
 844     // It is annoying to support all of these variations of masking
 845     // and broadcast, but it would be more surprising not to continue
 846     // the obvious pattern started by unary and binary.
 847 
 848    /**
 849      * {@inheritDoc} <!--workaround-->
 850      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
 851      * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask)
 852      * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask)
 853      * @see #lanewise(VectorOperators.Ternary,float,float)
 854      * @see #lanewise(VectorOperators.Ternary,Vector,float)
 855      * @see #lanewise(VectorOperators.Ternary,float,Vector)
 856      */
 857     @Override
 858     public abstract
 859     FloatVector lanewise(VectorOperators.Ternary op,
 860                                                   Vector<Float> v1,
 861                                                   Vector<Float> v2);
 862     @ForceInline
 863     final
 864     FloatVector lanewiseTemplate(VectorOperators.Ternary op,
 865                                           Vector<Float> v1,
 866                                           Vector<Float> v2) {
 867         FloatVector that = (FloatVector) v1;
 868         FloatVector tother = (FloatVector) v2;
 869         // It's a word: https://www.dictionary.com/browse/tother
 870         // See also Chapter 11 of Dickens, Our Mutual Friend:
 871         // "Totherest Governor," replied Mr Riderhood...
 872         that.check(this);
 873         tother.check(this);
 874         int opc = opCode(op);
 875         return VectorIntrinsics.ternaryOp(
 876             opc, getClass(), float.class, length(),
 877             this, that, tother,
 878             TERN_IMPL.find(op, opc, (opc_) -> {
 879               switch (opc_) {
 880                 case VECTOR_OP_FMA: return (v0, v1_, v2_) ->
 881                         v0.tOp(v1_, v2_, (i, a, b, c) -> Math.fma(a, b, c));
 882                 default: return null;
 883                 }}));
 884     }
 885     private static final
 886     ImplCache<Ternary,TernaryOperation<FloatVector>> TERN_IMPL
 887         = new ImplCache<>(Ternary.class, FloatVector.class);
 888 
 889     /**
 890      * {@inheritDoc} <!--workaround-->
 891      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
 892      * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask)
 893      * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask)
 894      */
 895     @ForceInline
 896     public final
 897     FloatVector lanewise(VectorOperators.Ternary op,
 898                                   Vector<Float> v1,
 899                                   Vector<Float> v2,
 900                                   VectorMask<Float> m) {
 901         return blend(lanewise(op, v1, v2), m);
 902     }
 903 
 904     /**
 905      * Combines the lane values of this vector
 906      * with the values of two broadcast scalars.
 907      *
 908      * This is a lane-wise ternary operation which applies
 909      * the selected operation to each lane.
 910      * The return value will be equal to this expression:
 911      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
 912      *
 913      * @param e1 the first input scalar
 914      * @param e2 the second input scalar
 915      * @return the result of applying the operation lane-wise
 916      *         to the input vector and the scalars
 917      * @throws UnsupportedOperationException if this vector does
 918      *         not support the requested operation
 919      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
 920      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
 921      */
 922     @ForceInline
 923     public final
 924     FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2)
 925                                   float e1,
 926                                   float e2) {
 927         return lanewise(op, broadcast(e1), broadcast(e1));
 928     }
 929 
 930     /**
 931      * Combines the lane values of this vector
 932      * with the values of two broadcast scalars,
 933      * with selection of lane elements controlled by a mask.
 934      *
 935      * This is a masked lane-wise ternary operation which applies
 936      * the selected operation to each lane.
 937      * The return value will be equal to this expression:
 938      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
 939      *
 940      * @param e1 the first input scalar
 941      * @param e2 the second input scalar
 942      * @param m the mask controlling lane selection
 943      * @return the result of applying the operation lane-wise
 944      *         to the input vector and the scalars
 945      * @throws UnsupportedOperationException if this vector does
 946      *         not support the requested operation
 947      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
 948      * @see #lanewise(VectorOperators.Ternary,float,float)
 949      */
 950     @ForceInline
 951     public final
 952     FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
 953                                   float e1,
 954                                   float e2,
 955                                   VectorMask<Float> m) {
 956         return blend(lanewise(op, e1, e2), m);
 957     }
 958 
 959     /**
 960      * Combines the lane values of this vector
 961      * with the values of another vector and a broadcast scalar.
 962      *
 963      * This is a lane-wise ternary operation which applies
 964      * the selected operation to each lane.
 965      * The return value will be equal to this expression:
 966      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
 967      *
 968      * @param v1 the other input vector
 969      * @param e2 the input scalar
 970      * @return the result of applying the operation lane-wise
 971      *         to the input vectors and the scalar
 972      * @throws UnsupportedOperationException if this vector does
 973      *         not support the requested operation
 974      * @see #lanewise(VectorOperators.Ternary,float,float)
 975      * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask)
 976      */
 977     @ForceInline
 978     public final
 979     FloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2)
 980                                   Vector<Float> v1,
 981                                   float e2) {
 982         return lanewise(op, v1, broadcast(e2));
 983     }
 984 
 985     /**
 986      * Combines the lane values of this vector
 987      * with the values of another vector and a broadcast scalar,
 988      * with selection of lane elements controlled by a mask.
 989      *
 990      * This is a masked lane-wise ternary operation which applies
 991      * the selected operation to each lane.
 992      * The return value will be equal to this expression:
 993      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
 994      *
 995      * @param v1 the other input vector
 996      * @param e2 the input scalar
 997      * @param m the mask controlling lane selection
 998      * @return the result of applying the operation lane-wise
 999      *         to the input vectors and the scalar
1000      * @throws UnsupportedOperationException if this vector does
1001      *         not support the requested operation
1002      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1003      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
1004      * @see #lanewise(VectorOperators.Ternary,Vector,float)
1005      */
1006     @ForceInline
1007     public final
1008     FloatVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
1009                                   Vector<Float> v1,
1010                                   float e2,
1011                                   VectorMask<Float> m) {
1012         return blend(lanewise(op, v1, e2), m);
1013     }
1014 
1015     /**
1016      * Combines the lane values of this vector
1017      * with the values of another vector and a broadcast scalar.
1018      *
1019      * This is a lane-wise ternary operation which applies
1020      * the selected operation to each lane.
1021      * The return value will be equal to this expression:
1022      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
1023      *
1024      * @param e1 the input scalar
1025      * @param v2 the other input vector
1026      * @return the result of applying the operation lane-wise
1027      *         to the input vectors and the scalar
1028      * @throws UnsupportedOperationException if this vector does
1029      *         not support the requested operation
1030      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1031      * @see #lanewise(VectorOperators.Ternary,float,Vector,VectorMask)
1032      */
1033     @ForceInline
1034     public final
1035     FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2)
1036                                   float e1,
1037                                   Vector<Float> v2) {
1038         return lanewise(op, broadcast(e1), v2);
1039     }
1040 
1041     /**
1042      * Combines the lane values of this vector
1043      * with the values of another vector and a broadcast scalar,
1044      * with selection of lane elements controlled by a mask.
1045      *
1046      * This is a masked lane-wise ternary operation which applies
1047      * the selected operation to each lane.
1048      * The return value will be equal to this expression:
1049      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
1050      *
1051      * @param e1 the input scalar
1052      * @param v2 the other input vector
1053      * @param m the mask controlling lane selection
1054      * @return the result of applying the operation lane-wise
1055      *         to the input vectors and the scalar
1056      * @throws UnsupportedOperationException if this vector does
1057      *         not support the requested operation
1058      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1059      * @see #lanewise(VectorOperators.Ternary,float,Vector)
1060      */
1061     @ForceInline
1062     public final
1063     FloatVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1064                                   float e1,
1065                                   Vector<Float> v2,
1066                                   VectorMask<Float> m) {
1067         return blend(lanewise(op, e1, v2), m);
1068     }
1069 
1070     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1071     // https://en.wikipedia.org/wiki/Ogdoad
1072 
1073     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1074     //
1075     // These include masked and non-masked versions.
1076     // This subclass adds broadcast (masked or not).
1077 
1078     /**
1079      * {@inheritDoc} <!--workaround-->
1080      * @see #add(float)
1081      */
1082     @Override
1083     @ForceInline
1084     public final FloatVector add(Vector<Float> v) {
1085         return lanewise(ADD, v);
1086     }
1087 
1088     /**
1089      * Adds this vector to the broadcast of an input scalar.
1090      *
1091      * This is a lane-wise binary operation which applies
1092      * the primitive addition operation ({@code +}) to each lane.
1093      *
1094      * This method is also equivalent to the expression
1095      * {@link #lanewise(VectorOperators.Binary,float)
1096      *    lanewise}{@code (}{@link VectorOperators#ADD
1097      *    ADD}{@code , e)}.
1098      *
1099      * @param e the input scalar
1100      * @return the result of adding each lane of this vector to the scalar
1101      * @see #add(Vector)
1102      * @see #broadcast(float)
1103      * @see #add(int,VectorMask)
1104      * @see VectorOperators#ADD
1105      * @see #lanewise(VectorOperators.Binary,Vector)
1106      * @see #lanewise(VectorOperators.Binary,float)
1107      */
1108     @ForceInline
1109     public final
1110     FloatVector add(float e) {
1111         return lanewise(ADD, e);
1112     }
1113 
1114     /**
1115      * {@inheritDoc} <!--workaround-->
1116      * @see #add(float,VectorMask)
1117      */
1118     @Override
1119     @ForceInline
1120     public final FloatVector add(Vector<Float> v,
1121                                           VectorMask<Float> m) {
1122         return lanewise(ADD, v, m);
1123     }
1124 
1125     /**
1126      * Adds this vector to the broadcast of an input scalar,
1127      * selecting lane elements controlled by a mask.
1128      *
1129      * This is a masked lane-wise binary operation which applies
1130      * the primitive addition operation ({@code +}) to each lane.
1131      *
1132      * This method is also equivalent to the expression
1133      * {@link #lanewise(VectorOperators.Binary,float,VectorMask)
1134      *    lanewise}{@code (}{@link VectorOperators#ADD
1135      *    ADD}{@code , s, m)}.
1136      *
1137      * @param e the input scalar
1138      * @param m the mask controlling lane selection
1139      * @return the result of adding each lane of this vector to the scalar
1140      * @see #add(Vector,VectorMask)
1141      * @see #broadcast(float)
1142      * @see #add(int)
1143      * @see VectorOperators#ADD
1144      * @see #lanewise(VectorOperators.Binary,Vector)
1145      * @see #lanewise(VectorOperators.Binary,float)
1146      */
1147     @ForceInline
1148     public final FloatVector add(float e,
1149                                           VectorMask<Float> m) {
1150         return lanewise(ADD, e, m);
1151     }
1152 
1153     /**
1154      * {@inheritDoc} <!--workaround-->
1155      * @see #sub(float)
1156      */
1157     @Override
1158     @ForceInline
1159     public final FloatVector sub(Vector<Float> v) {
1160         return lanewise(SUB, v);
1161     }
1162 
1163     /**
1164      * Subtracts an input scalar from this vector.
1165      *
1166      * This is a masked lane-wise binary operation which applies
1167      * the primitive subtraction operation ({@code -}) to each lane.
1168      *
1169      * This method is also equivalent to the expression
1170      * {@link #lanewise(VectorOperators.Binary,float)
1171      *    lanewise}{@code (}{@link VectorOperators#SUB
1172      *    SUB}{@code , e)}.
1173      *
1174      * @param e the input scalar
1175      * @return the result of subtracting the scalar from each lane of this vector
1176      * @see #sub(Vector)
1177      * @see #broadcast(float)
1178      * @see #sub(int,VectorMask)
1179      * @see VectorOperators#SUB
1180      * @see #lanewise(VectorOperators.Binary,Vector)
1181      * @see #lanewise(VectorOperators.Binary,float)
1182      */
1183     @ForceInline
1184     public final FloatVector sub(float e) {
1185         return lanewise(SUB, e);
1186     }
1187 
1188     /**
1189      * {@inheritDoc} <!--workaround-->
1190      * @see #sub(float,VectorMask)
1191      */
1192     @Override
1193     @ForceInline
1194     public final FloatVector sub(Vector<Float> v,
1195                                           VectorMask<Float> m) {
1196         return lanewise(SUB, v, m);
1197     }
1198 
1199     /**
1200      * Subtracts an input scalar from this vector
1201      * under the control of a mask.
1202      *
1203      * This is a masked lane-wise binary operation which applies
1204      * the primitive subtraction operation ({@code -}) to each lane.
1205      *
1206      * This method is also equivalent to the expression
1207      * {@link #lanewise(VectorOperators.Binary,float,VectorMask)
1208      *    lanewise}{@code (}{@link VectorOperators#SUB
1209      *    SUB}{@code , s, m)}.
1210      *
1211      * @param e the input scalar
1212      * @param m the mask controlling lane selection
1213      * @return the result of subtracting the scalar from each lane of this vector
1214      * @see #sub(Vector,VectorMask)
1215      * @see #broadcast(float)
1216      * @see #sub(int)
1217      * @see VectorOperators#SUB
1218      * @see #lanewise(VectorOperators.Binary,Vector)
1219      * @see #lanewise(VectorOperators.Binary,float)
1220      */
1221     @ForceInline
1222     public final FloatVector sub(float e,
1223                                           VectorMask<Float> m) {
1224         return lanewise(SUB, e, m);
1225     }
1226 
1227     /**
1228      * {@inheritDoc} <!--workaround-->
1229      * @see #mul(float)
1230      */
1231     @Override
1232     @ForceInline
1233     public final FloatVector mul(Vector<Float> v) {
1234         return lanewise(MUL, v);
1235     }
1236 
1237     /**
1238      * Multiplies this vector by the broadcast of an input scalar.
1239      *
1240      * This is a lane-wise binary operation which applies
1241      * the primitive multiplication operation ({@code *}) to each lane.
1242      *
1243      * This method is also equivalent to the expression
1244      * {@link #lanewise(VectorOperators.Binary,float)
1245      *    lanewise}{@code (}{@link VectorOperators#MUL
1246      *    MUL}{@code , e)}.
1247      *
1248      * @param e the input scalar
1249      * @return the result of multiplying this vector by the given scalar
1250      * @see #mul(Vector)
1251      * @see #broadcast(float)
1252      * @see #mul(int,VectorMask)
1253      * @see VectorOperators#MUL
1254      * @see #lanewise(VectorOperators.Binary,Vector)
1255      * @see #lanewise(VectorOperators.Binary,float)
1256      */
1257     @ForceInline
1258     public final FloatVector mul(float e) {
1259         return lanewise(MUL, e);
1260     }
1261 
1262     /**
1263      * {@inheritDoc} <!--workaround-->
1264      * @see #mul(float,VectorMask)
1265      */
1266     @Override
1267     @ForceInline
1268     public final FloatVector mul(Vector<Float> v,
1269                                           VectorMask<Float> m) {
1270         return lanewise(MUL, v, m);
1271     }
1272 
1273     /**
1274      * Multiplies this vector by the broadcast of an input scalar,
1275      * selecting lane elements controlled by a mask.
1276      *
1277      * This is a masked lane-wise binary operation which applies
1278      * the primitive multiplication operation ({@code *}) to each lane.
1279      *
1280      * This method is also equivalent to the expression
1281      * {@link #lanewise(VectorOperators.Binary,float,VectorMask)
1282      *    lanewise}{@code (}{@link VectorOperators#MUL
1283      *    MUL}{@code , s, m)}.
1284      *
1285      * @param e the input scalar
1286      * @param m the mask controlling lane selection
1287      * @return the result of muling each lane of this vector to the scalar
1288      * @see #mul(Vector,VectorMask)
1289      * @see #broadcast(float)
1290      * @see #mul(int)
1291      * @see VectorOperators#MUL
1292      * @see #lanewise(VectorOperators.Binary,Vector)
1293      * @see #lanewise(VectorOperators.Binary,float)
1294      */
1295     @ForceInline
1296     public final FloatVector mul(float e,
1297                                           VectorMask<Float> m) {
1298         return lanewise(MUL, e, m);
1299     }
1300 
1301     /**
1302      * {@inheritDoc} <!--workaround-->
1303      * @see #div(float)
1304      * <p> Because the underlying scalar operator is an IEEE
1305      * floating point number, division by zero in fact will
1306      * not throw an exception, but will yield a signed
1307      * infinity or NaN.
1308      */
1309     @Override
1310     @ForceInline
1311     public final FloatVector div(Vector<Float> v) {
1312         return lanewise(DIV, v);
1313     }
1314 
1315     /**
1316      * Divides this vector by the broadcast of an input scalar.
1317      *
1318      * This is a lane-wise binary operation which applies
1319      * the primitive division operation ({@code /}) to each lane.
1320      *
1321      * This method is also equivalent to the expression
1322      * {@link #lanewise(VectorOperators.Binary,float)
1323      *    lanewise}{@code (}{@link VectorOperators#DIV
1324      *    DIV}{@code , e)}.
1325      *
1326      * <p>
1327      * If the underlying scalar operator does not support
1328      * division by zero, but is presented with a zero divisor,
1329      * an {@code ArithmeticException} will be thrown.
1330      * Because the underlying scalar operator is an IEEE
1331      * floating point number, division by zero in fact will
1332      * not throw an exception, but will yield a signed
1333      * infinity or NaN.
1334      *
1335      * @param e the input scalar
1336      * @return the result of dividing each lane of this vector by the scalar
1337      * @see #div(Vector)
1338      * @see #broadcast(float)
1339      * @see #div(int,VectorMask)
1340      * @see VectorOperators#DIV
1341      * @see #lanewise(VectorOperators.Binary,Vector)
1342      * @see #lanewise(VectorOperators.Binary,float)
1343      */
1344     @ForceInline
1345     public final FloatVector div(float e) {
1346         return lanewise(DIV, e);
1347     }
1348 
1349     /**
1350      * {@inheritDoc} <!--workaround-->
1351      * @see #div(float,VectorMask)
1352      * <p> Because the underlying scalar operator is an IEEE
1353      * floating point number, division by zero in fact will
1354      * not throw an exception, but will yield a signed
1355      * infinity or NaN.
1356      */
1357     @Override
1358     @ForceInline
1359     public final FloatVector div(Vector<Float> v,
1360                                           VectorMask<Float> m) {
1361         return lanewise(DIV, v, m);
1362     }
1363 
1364     /**
1365      * Divides this vector by the broadcast of an input scalar,
1366      * selecting lane elements controlled by a mask.
1367      *
1368      * This is a masked lane-wise binary operation which applies
1369      * the primitive division operation ({@code /}) to each lane.
1370      *
1371      * This method is also equivalent to the expression
1372      * {@link #lanewise(VectorOperators.Binary,float,VectorMask)
1373      *    lanewise}{@code (}{@link VectorOperators#DIV
1374      *    DIV}{@code , s, m)}.
1375      *
1376      * <p>
1377      * If the underlying scalar operator does not support
1378      * division by zero, but is presented with a zero divisor,
1379      * an {@code ArithmeticException} will be thrown.
1380      * Because the underlying scalar operator is an IEEE
1381      * floating point number, division by zero in fact will
1382      * not throw an exception, but will yield a signed
1383      * infinity or NaN.
1384      *
1385      * @param e the input scalar
1386      * @param m the mask controlling lane selection
1387      * @return the result of dividing each lane of this vector by the scalar
1388      * @see #div(Vector,VectorMask)
1389      * @see #broadcast(float)
1390      * @see #div(int)
1391      * @see VectorOperators#DIV
1392      * @see #lanewise(VectorOperators.Binary,Vector)
1393      * @see #lanewise(VectorOperators.Binary,float)
1394      */
1395     @ForceInline
1396     public final FloatVector div(float e,
1397                                           VectorMask<Float> m) {
1398         return lanewise(DIV, e, m);
1399     }
1400 
1401     /// END OF FULL-SERVICE BINARY METHODS
1402 
1403     /// SECOND-TIER BINARY METHODS
1404     //
1405     // There are no masked versions.
1406 
1407     /**
1408      * {@inheritDoc} <!--workaround-->
1409      * @apiNote
1410      * For this method, floating point negative
1411      * zero {@code -0.0} is treated as a value distinct from, and less
1412      * than, the default zero value.
1413      */
1414     @Override
1415     @ForceInline
1416     public final FloatVector min(Vector<Float> v) {
1417         return lanewise(MIN, v);
1418     }
1419 
1420     // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
1421     /**
1422      * Computes the smaller of this vector and the broadcast of an input scalar.
1423      *
1424      * This is a lane-wise binary operation which appliesthe
1425      * operation {@code (a, b) -> a < b ? a : b} to each pair of
1426      * corresponding lane values.
1427      *
1428      * This method is also equivalent to the expression
1429      * {@link #lanewise(VectorOperators.Binary,float)
1430      *    lanewise}{@code (}{@link VectorOperators#MIN
1431      *    MIN}{@code , e)}.
1432      *
1433      * @param e the input scalar
1434      * @return the result of multiplying this vector by the given scalar
1435      * @see #min(Vector)
1436      * @see #broadcast(float)
1437      * @see VectorOperators#MIN
1438      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
1439      * @apiNote
1440      * For this method, floating point negative
1441      * zero {@code -0.0} is treated as a value distinct from, and less
1442      * than, the default zero value.
1443      */
1444     @ForceInline
1445     public final FloatVector min(float e) {
1446         return lanewise(MIN, e);
1447     }
1448 
1449     /**
1450      * {@inheritDoc} <!--workaround-->
1451      * @apiNote
1452      * For this method, negative floating-point zero compares
1453      * less than the default value, positive zero.
1454      */
1455     @Override
1456     @ForceInline
1457     public final FloatVector max(Vector<Float> v) {
1458         return lanewise(MAX, v);
1459     }
1460 
1461     /**
1462      * Computes the larger of this vector and the broadcast of an input scalar.
1463      *
1464      * This is a lane-wise binary operation which appliesthe
1465      * operation {@code (a, b) -> a > b ? a : b} to each pair of
1466      * corresponding lane values.
1467      *
1468      * This method is also equivalent to the expression
1469      * {@link #lanewise(VectorOperators.Binary,float)
1470      *    lanewise}{@code (}{@link VectorOperators#MAX
1471      *    MAX}{@code , e)}.
1472      *
1473      * @param e the input scalar
1474      * @return the result of multiplying this vector by the given scalar
1475      * @see #max(Vector)
1476      * @see #broadcast(float)
1477      * @see VectorOperators#MAX
1478      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
1479      * @apiNote
1480      * For this method, negative floating-point zero compares
1481      * less than the default value, positive zero.
1482      */
1483     @ForceInline
1484     public final FloatVector max(float e) {
1485         return lanewise(MAX, e);
1486     }
1487 
1488 
1489     // common FP operator: pow
1490     /**
1491      * Raises this vector to the power of a second input vector.
1492      *
1493      * This is a lane-wise binary operation which applies the
1494      * method {@code Math.pow()}
1495      * to each pair of corresponding lane values.
1496      *
1497      * This method is also equivalent to the expression
1498      * {@link #lanewise(VectorOperators.Binary,Vector)
1499      *    lanewise}{@code (}{@link VectorOperators#POW
1500      *    POW}{@code , n)}.
1501      *
1502      * <p>
1503      * This is not a full-service named operation like
1504      * {@link #add(Vector) add}.  A masked version of
1505      * version of this operation is not directly available
1506      * but may be obtained via the masked version of
1507      * {@code lanewise}.
1508      *
1509      * @param n a vector exponent by which to raise this vector
1510      * @return the {@code n}-th power of this vector
1511      * @see #pow(float)
1512      * @see VectorOperators#POW
1513      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1514      */
1515     @ForceInline
1516     public final FloatVector pow(Vector<Float> n) {
1517         return lanewise(POW, n);
1518     }
1519 
1520     /**
1521      * Raises this vector to a scalar power.
1522      *
1523      * This is a lane-wise binary operation which applies the
1524      * method {@code Math.pow()}
1525      * to each pair of corresponding lane values.
1526      *
1527      * This method is also equivalent to the expression
1528      * {@link #lanewise(VectorOperators.Binary,Vector)
1529      *    lanewise}{@code (}{@link VectorOperators#POW
1530      *    POW}{@code , n)}.
1531      *
1532      * @param n a scalar exponent by which to raise this vector
1533      * @return the {@code n}-th power of this vector
1534      * @see #pow(Vector)
1535      * @see VectorOperators#POW
1536      * @see #lanewise(VectorOperators.Binary,float,VectorMask)
1537      */
1538     @ForceInline
1539     public final FloatVector pow(float n) {
1540         return lanewise(POW, n);
1541     }
1542 
1543     /// UNARY METHODS
1544 
1545     /**
1546      * {@inheritDoc} <!--workaround-->
1547      */
1548     @Override
1549     @ForceInline
1550     public final
1551     FloatVector neg() {
1552         return lanewise(NEG);
1553     }
1554 
1555     /**
1556      * {@inheritDoc} <!--workaround-->
1557      */
1558     @Override
1559     @ForceInline
1560     public final
1561     FloatVector abs() {
1562         return lanewise(ABS);
1563     }
1564 
1565 
1566     // sqrt
1567     /**
1568      * Computes the square root of this vector.
1569      *
1570      * This is a lane-wise unary operation which applies the
1571      * the method {@code Math.sqrt()}
1572      * to each lane value.
1573      *
1574      * This method is also equivalent to the expression
1575      * {@link #lanewise(VectorOperators.Unary,Vector)
1576      *    lanewise}{@code (}{@link VectorOperators#SQRT
1577      *    SQRT}{@code )}.
1578      *
1579      * @return the square root of this vector
1580      * @see VectorOperators#SQRT
1581      * @see #lanewise(VectorOperators.Unary,Vector,VectorMask)
1582      */
1583     @ForceInline
1584     public final FloatVector sqrt() {
1585         return lanewise(SQRT);
1586     }
1587 
1588     /// COMPARISONS
1589 
1590     /**
1591      * {@inheritDoc} <!--workaround-->
1592      */
1593     @Override
1594     @ForceInline
1595     public final
1596     VectorMask<Float> eq(Vector<Float> v) {
1597         return compare(EQ, v);
1598     }
1599 
1600     /**
1601      * Tests if this vector is equal to an input scalar.
1602      *
1603      * This is a lane-wise binary test operation which applies
1604      * the primitive equals operation ({@code ==}) to each lane.
1605      * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
1606      *
1607      * @param e the input scalar
1608      * @return the result mask of testing if this vector
1609      *         is equal to {@code e}
1610      * @see #compare(VectorOperators.Comparison,float)
1611      */
1612     @ForceInline
1613     public final
1614     VectorMask<Float> eq(float e) {
1615         return compare(EQ, e);
1616     }
1617 
1618     /**
1619      * {@inheritDoc} <!--workaround-->
1620      */
1621     @Override
1622     @ForceInline
1623     public final
1624     VectorMask<Float> lt(Vector<Float> v) {
1625         return compare(LT, v);
1626     }
1627 
1628     /**
1629      * Tests if this vector is less than an input scalar.
1630      *
1631      * This is a lane-wise binary test operation which applies
1632      * the primitive less than operation ({@code <}) to each lane.
1633      * The result is the same as {@code compare(VectorOperators.LT, e)}.
1634      *
1635      * @param e the input scalar
1636      * @return the mask result of testing if this vector
1637      *         is less than the input scalar
1638      * @see #compare(VectorOperators.Comparison,float)
1639      */
1640     @ForceInline
1641     public final
1642     VectorMask<Float> lt(float e) {
1643         return compare(LT, e);
1644     }
1645 
1646     /**
1647      * {@inheritDoc} <!--workaround-->
1648      */
1649     @Override
1650     public abstract
1651     VectorMask<Float> compare(VectorOperators.Comparison op, Vector<Float> v);
1652 
1653     /*package-private*/
1654     @ForceInline
1655     final
1656     <M extends VectorMask<Float>>
1657     M compareTemplate(Class<M> maskType, Comparison op, Vector<Float> v) {
1658         Objects.requireNonNull(v);
1659         FloatSpecies vsp = vspecies();
1660         int opc = opCode(op);
1661         return VectorIntrinsics.compare(
1662             opc, getClass(), maskType, float.class, length(),
1663             this, (FloatVector) v,
1664             (cond, v0, v1) -> {
1665                 AbstractMask<Float> m
1666                     = v0.bTest(cond, v1, (cond_, i, a, b)
1667                                -> compareWithOp(cond, a, b));
1668                 @SuppressWarnings("unchecked")
1669                 M m2 = (M) m;
1670                 return m2;
1671             });
1672     }
1673 
1674     @ForceInline
1675     private static
1676     boolean compareWithOp(int cond, float a, float b) {
1677         switch (cond) {
1678         case VectorIntrinsics.BT_eq:  return a == b;
1679         case VectorIntrinsics.BT_ne:  return a != b;
1680         case VectorIntrinsics.BT_lt:  return a <  b;
1681         case VectorIntrinsics.BT_le:  return a <= b;
1682         case VectorIntrinsics.BT_gt:  return a >  b;
1683         case VectorIntrinsics.BT_ge:  return a >= b;
1684         }
1685         throw new AssertionError();
1686     }
1687 
1688     /**
1689      * {@inheritDoc} <!--workaround-->
1690      */
1691     @Override
1692     @ForceInline
1693     public final
1694     VectorMask<Float> compare(VectorOperators.Comparison op,
1695                                   Vector<Float> v,
1696                                   VectorMask<Float> m) {
1697         return compare(op, v).and(m);
1698     }
1699 
1700     /**
1701      * Tests this vector by comparing it with an input scalar,
1702      * according to the given comparison operation.
1703      *
1704      * This is a lane-wise binary test operation which applies
1705      * the comparison operation to each lane.
1706      * <p>
1707      * The result is the same as
1708      * {@code compare(op, broadcast(species(), e))}.
1709      * That is, the scalar may be regarded as broadcast to
1710      * a vector of the same species, and then compared
1711      * against the original vector, using the selected
1712      * comparison operation.
1713      *
1714      * @param e the input scalar
1715      * @return the mask result of testing lane-wise if this vector
1716      *         compares to the input, according to the selected
1717      *         comparison operator
1718      * @see FloatVector#compare(VectorOperators.Comparison,Vector)
1719      * @see #eq(float)
1720      * @see #lt(float)
1721      */
1722     public abstract
1723     VectorMask<Float> compare(Comparison op, float e);
1724 
1725     /*package-private*/
1726     @ForceInline
1727     final
1728     <M extends VectorMask<Float>>
1729     M compareTemplate(Class<M> maskType, Comparison op, float e) {
1730         return compareTemplate(maskType, op, broadcast(e));
1731     }
1732 
1733     /**
1734      * Tests this vector by comparing it with an input scalar,
1735      * according to the given comparison operation,
1736      * in lanes selected by a mask.
1737      *
1738      * This is a masked lane-wise binary test operation which applies
1739      * to each pair of corresponding lane values.
1740      *
1741      * The returned result is equal to the expression
1742      * {@code compare(op,s).and(m)}.
1743      *
1744      * @param e the input scalar
1745      * @param m the mask controlling lane selection
1746      * @return the mask result of testing lane-wise if this vector
1747      *         compares to the input, according to the selected
1748      *         comparison operator,
1749      *         and only in the lanes selected by the mask
1750      * @see FloatVector#compare(VectorOperators.Comparison,Vector,VectorMask)
1751      */
1752     @ForceInline
1753     public final VectorMask<Float> compare(VectorOperators.Comparison op,
1754                                                float e,
1755                                                VectorMask<Float> m) {
1756         return compare(op, e).and(m);
1757     }
1758 
1759     /**
1760      * {@inheritDoc} <!--workaround-->
1761      */
1762     @Override
1763     public abstract
1764     VectorMask<Float> compare(Comparison op, long e);
1765 
1766     /*package-private*/
1767     @ForceInline
1768     final
1769     <M extends VectorMask<Float>>
1770     M compareTemplate(Class<M> maskType, Comparison op, long e) {
1771         return compareTemplate(maskType, op, broadcast(e));
1772     }
1773 
1774     /**
1775      * {@inheritDoc} <!--workaround-->
1776      */
1777     @Override
1778     @ForceInline
1779     public final
1780     VectorMask<Float> compare(Comparison op, long e, VectorMask<Float> m) {
1781         return compare(op, broadcast(e), m);
1782     }
1783 
1784 
1785 
1786     /**
1787      * {@inheritDoc} <!--workaround-->
1788      */
1789     @Override public abstract
1790     FloatVector blend(Vector<Float> v, VectorMask<Float> m);
1791 
1792     /*package-private*/
1793     @ForceInline
1794     final
1795     <M extends VectorMask<Float>>
1796     FloatVector
1797     blendTemplate(Class<M> maskType, FloatVector v, M m) {
1798         v.check(this);
1799         return VectorIntrinsics.blend(
1800             getClass(), maskType, float.class, length(),
1801             this, v, m,
1802             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
1803     }
1804 
1805     /**
1806      * {@inheritDoc} <!--workaround-->
1807      */
1808     @Override public abstract FloatVector addIndex(int scale);
1809 
1810     /*package-private*/
1811     @ForceInline
1812     final FloatVector addIndexTemplate(int scale) {
1813         FloatSpecies vsp = vspecies();
1814         // make sure VLENGTH*scale doesn't overflow:
1815         vsp.checkScale(scale);
1816         return VectorIntrinsics.indexVector(
1817             getClass(), float.class, length(),
1818             this, scale, vsp,
1819             (v, scale_, s)
1820             -> {
1821                 // If the platform doesn't support an INDEX
1822                 // instruction directly, load IOTA from memory
1823                 // and multiply.
1824                 FloatVector iota = s.iota();
1825                 float sc = (float) scale_;
1826                 return v.add(sc == 1 ? iota : iota.mul(sc));
1827             });
1828     }
1829 
1830     /**
1831      * Replaces selected lanes of this vector with
1832      * a scalar value
1833      * under the control of a mask.
1834      *
1835      * This is a masked lane-wise binary operation which
1836      * selects each lane value from one or the other input.
1837      *
1838      * The returned result is equal to the expression
1839      * {@code blend(broadcast(e),m)}.
1840      *
1841      * @param e the input scalar, containing the replacement lane value
1842      * @param m the mask controlling lane selection of the scalar
1843      * @return the result of blending the lane elements of this vector with
1844      *         the scalar value
1845      */
1846     @ForceInline
1847     public final FloatVector blend(float e,
1848                                             VectorMask<Float> m) {
1849         return blend(broadcast(e), m);
1850     }
1851 
1852     /**
1853      * Replaces selected lanes of this vector with
1854      * a scalar value
1855      * under the control of a mask.
1856      *
1857      * This is a masked lane-wise binary operation which
1858      * selects each lane value from one or the other input.
1859      *
1860      * The returned result is equal to the expression
1861      * {@code blend(broadcast(e),m)}.
1862      *
1863      * @param e the input scalar, containing the replacement lane value
1864      * @param m the mask controlling lane selection of the scalar
1865      * @return the result of blending the lane elements of this vector with
1866      *         the scalar value
1867      */
1868     @ForceInline
1869     public final FloatVector blend(long e,
1870                                             VectorMask<Float> m) {
1871         return blend(broadcast(e), m);
1872     }
1873 
1874     /**
1875      * {@inheritDoc} <!--workaround-->
1876      */
1877     @Override
1878     public abstract
1879     FloatVector slice(int origin, Vector<Float> v1);
1880 
1881     /*package-private*/
1882     final
1883     @ForceInline
1884     FloatVector sliceTemplate(int origin, Vector<Float> v1) {
1885         FloatVector that = (FloatVector) v1;
1886         that.check(this);
1887         float[] a0 = this.getElements();
1888         float[] a1 = that.getElements();
1889         float[] res = new float[a0.length];
1890         int vlen = res.length;
1891         int firstPart = vlen - origin;
1892         System.arraycopy(a0, origin, res, 0, firstPart);
1893         System.arraycopy(a1, 0, res, firstPart, origin);
1894         return vectorFactory(res);
1895     }
1896 
1897     /**
1898      * {@inheritDoc} <!--workaround-->
1899      */
1900     @Override
1901     @ForceInline
1902     public final
1903     FloatVector slice(int origin,
1904                                Vector<Float> w,
1905                                VectorMask<Float> m) {
1906         return broadcast(0).blend(slice(origin, w), m);
1907     }
1908 
1909     /**
1910      * {@inheritDoc} <!--workaround-->
1911      */
1912     @Override
1913     public abstract
1914     FloatVector slice(int origin);
1915 
1916     /**
1917      * {@inheritDoc} <!--workaround-->
1918      */
1919     @Override
1920     public abstract
1921     FloatVector unslice(int origin, Vector<Float> w, int part);
1922 
1923     /*package-private*/
1924     final
1925     @ForceInline
1926     FloatVector
1927     unsliceTemplate(int origin, Vector<Float> w, int part) {
1928         FloatVector that = (FloatVector) w;
1929         that.check(this);
1930         float[] slice = this.getElements();
1931         float[] res = that.getElements();
1932         int vlen = res.length;
1933         int firstPart = vlen - origin;
1934         switch (part) {
1935         case 0:
1936             System.arraycopy(slice, 0, res, origin, firstPart);
1937             break;
1938         case 1:
1939             System.arraycopy(slice, firstPart, res, 0, origin);
1940             break;
1941         default:
1942             throw wrongPartForSlice(part);
1943         }
1944         return vectorFactory(res);
1945     }
1946 
1947     /*package-private*/
1948     final
1949     @ForceInline
1950     <M extends VectorMask<Float>>
1951     FloatVector
1952     unsliceTemplate(Class<M> maskType, int origin, Vector<Float> w, int part, M m) {
1953         FloatVector that = (FloatVector) w;
1954         that.check(this);
1955         FloatVector slice = that.sliceTemplate(origin, that);
1956         slice = slice.blendTemplate(maskType, this, m);
1957         return slice.unsliceTemplate(origin, w, part);
1958     }
1959 
1960     /**
1961      * {@inheritDoc} <!--workaround-->
1962      */
1963     @Override
1964     public abstract
1965     FloatVector unslice(int origin, Vector<Float> w, int part, VectorMask<Float> m);
1966 
1967     /**
1968      * {@inheritDoc} <!--workaround-->
1969      */
1970     @Override
1971     public abstract
1972     FloatVector unslice(int origin); 
1973 
1974     private ArrayIndexOutOfBoundsException
1975     wrongPartForSlice(int part) {
1976         String msg = String.format("bad part number %d for slice operation",
1977                                    part);
1978         return new ArrayIndexOutOfBoundsException(msg);
1979     }
1980 
1981     /**
1982      * {@inheritDoc} <!--workaround-->
1983      */
1984     @Override
1985     public abstract
1986     FloatVector rearrange(VectorShuffle<Float> m);
1987 
1988     /*package-private*/
1989     @ForceInline
1990     final
1991     <S extends VectorShuffle<Float>>
1992     FloatVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
1993         shuffle.checkIndexes();
1994         return VectorIntrinsics.rearrangeOp(
1995             getClass(), shuffletype, float.class, length(),
1996             this, shuffle,
1997             (v1, s_) -> v1.uOp((i, a) -> {
1998                 int ei = s_.laneSource(i);
1999                 return v1.lane(ei);
2000             }));
2001     }
2002 
2003     /**
2004      * {@inheritDoc} <!--workaround-->
2005      */
2006     @Override
2007     public abstract
2008     FloatVector rearrange(VectorShuffle<Float> s,
2009                                    VectorMask<Float> m);
2010 
2011     /*package-private*/
2012     @ForceInline
2013     final
2014     <S extends VectorShuffle<Float>>
2015     FloatVector rearrangeTemplate(Class<S> shuffletype,
2016                                            S shuffle,
2017                                            VectorMask<Float> m) {
2018         FloatVector unmasked =
2019             VectorIntrinsics.rearrangeOp(
2020                 getClass(), shuffletype, float.class, length(),
2021                 this, shuffle,
2022                 (v1, s_) -> v1.uOp((i, a) -> {
2023                     int ei = s_.laneSource(i);
2024                     return ei < 0 ? 0 : v1.lane(ei);
2025                 }));
2026         VectorMask<Float> valid = shuffle.laneIsValid();
2027         if (m.andNot(valid).anyTrue()) {
2028             shuffle.checkIndexes();
2029             throw new AssertionError();
2030         }
2031         return broadcast((float)0).blend(unmasked, valid);
2032     }
2033 
2034     /**
2035      * {@inheritDoc} <!--workaround-->
2036      */
2037     @Override
2038     public abstract
2039     FloatVector rearrange(VectorShuffle<Float> s,
2040                                    Vector<Float> v);
2041 
2042     /*package-private*/
2043     @ForceInline
2044     final
2045     <S extends VectorShuffle<Float>>
2046     FloatVector rearrangeTemplate(Class<S> shuffletype,
2047                                            S shuffle,
2048                                            FloatVector v) {
2049         VectorMask<Float> valid = shuffle.laneIsValid();
2050         VectorShuffle<Float> ws = shuffle.wrapIndexes();
2051         FloatVector r1 =
2052             VectorIntrinsics.rearrangeOp(
2053                 getClass(), shuffletype, float.class, length(),
2054                 this, shuffle,
2055                 (v1, s_) -> v1.uOp((i, a) -> {
2056                     int ei = s_.laneSource(i);
2057                     return v1.lane(ei);
2058                 }));
2059         FloatVector r2 =
2060             VectorIntrinsics.rearrangeOp(
2061                 getClass(), shuffletype, float.class, length(),
2062                 v, shuffle,
2063                 (v1, s_) -> v1.uOp((i, a) -> {
2064                     int ei = s_.laneSource(i);
2065                     return v1.lane(ei);
2066                 }));
2067         return r2.blend(r1, valid);
2068     }
2069 
2070     /**
2071      * {@inheritDoc} <!--workaround-->
2072      */
2073     @Override
2074     public abstract
2075     FloatVector selectFrom(Vector<Float> v);
2076 
2077     /*package-private*/
2078     @ForceInline
2079     final FloatVector selectFromTemplate(FloatVector v) {
2080         return v.rearrange(this.toShuffle());
2081     }
2082 
2083     /**
2084      * {@inheritDoc} <!--workaround-->
2085      */
2086     @Override
2087     public abstract
2088     FloatVector selectFrom(Vector<Float> s, VectorMask<Float> m);
2089 
2090     /*package-private*/
2091     @ForceInline
2092     final FloatVector selectFromTemplate(FloatVector v,
2093                                                   AbstractMask<Float> m) {
2094         return v.rearrange(this.toShuffle(), m);
2095     }
2096 
2097     /// Ternary operations
2098 
2099 
2100     /**
2101      * Multiplies this vector by a second input vector, and sums
2102      * the result with a third.
2103      *
2104      * Extended precision is used for the intermediate result,
2105      * avoiding possible loss of precision from rounding once
2106      * for each of the two operations.
2107      * The result is numerically close to {@code this.mul(b).add(c)},
2108      * and is typically closer to the true mathematical result.
2109      *
2110      * This is a lane-wise ternary operation which applies the
2111      * {@link Math#fma(float,float,float) Math#fma(a,b,c)}
2112      * operation to each lane.
2113      *
2114      * This method is also equivalent to the expression
2115      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2116      *    lanewise}{@code (}{@link VectorOperators#FMA
2117      *    FMA}{@code , b, c)}.
2118      *
2119      * @param b the second input vector, supplying multiplier values
2120      * @param b the third input vector, supplying addend values
2121      * @return the product of this vector and the second input vector
2122      *         summed with the third input vector, using extended precision
2123      *         for the intermediate result
2124      * @see #fma(float,float)
2125      * @see VectorOperators#FMA
2126      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
2127      */
2128     @ForceInline
2129     public final
2130     FloatVector fma(Vector<Float> b, Vector<Float> c) {
2131         return lanewise(FMA, b, c);
2132     }
2133 
2134     /**
2135      * Multiplies this vector by a scalar multiplier, and sums
2136      * the result with a scalar addend.
2137      *
2138      * Extended precision is used for the intermediate result,
2139      * avoiding possible loss of precision from rounding once
2140      * for each of the two operations.
2141      * The result is numerically close to {@code this.mul(b).add(c)},
2142      * and is typically closer to the true mathematical result.
2143      *
2144      * This is a lane-wise ternary operation which applies the
2145      * {@link Math#fma(float,float,float) Math#fma(a,b,c)}
2146      * operation to each lane.
2147      *
2148      * This method is also equivalent to the expression
2149      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2150      *    lanewise}{@code (}{@link VectorOperators#FMA
2151      *    FMA}{@code , b, c)}.
2152      *
2153      * @param b the scalar multiplier
2154      * @param c the scalar addend
2155      * @return the product of this vector and the scalar multiplier
2156      *         summed with scalar addend, using extended precision
2157      *         for the intermediate result
2158      * @see #fma(Vector,Vector)
2159      * @see VectorOperators#FMA
2160      * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask)
2161      */
2162     @ForceInline
2163     public final
2164     FloatVector fma(float b, float c) {
2165         return lanewise(FMA, b, c);
2166     }
2167 
2168     // Don't bother with (Vector,float) and (float,Vector) overloadings.
2169 
2170     // Type specific horizontal reductions
2171 
2172     /**
2173      * Returns a value accumulated from all the lanes of this vector.
2174      *
2175      * This is an associative cross-lane reduction operation which
2176      * applies the specified operation to all the lane elements.
2177      *
2178      * <p>
2179      * A few reduction operations do not support arbitrary reordering
2180      * of their operands, yet are included here because of their
2181      * usefulness.
2182      *
2183      * <ul>
2184      * <li>
2185      * In the case of {@code FIRST_NONZERO}, the reduction returns
2186      * the value from the lowest-numbered non-zero lane.
2187      *
2188      * (As with {@code MAX} and {@code MIN}, floating point negative
2189      * zero {@code -0.0} is treated as a value distinct from
2190      * the default zero value, so a first-nonzero lane reduction
2191      * might return {@code -0.0} even in the presence of non-zero
2192      * lane values.)
2193      *
2194      * <li>
2195      * In the case of floating point addition and multiplication, the
2196      * precise result will reflect the choice of an arbitrary order
2197      * of operations, which may even vary over time.
2198      *
2199      * <li>
2200      * All other reduction operations are fully commutative and
2201      * associative.  The implementation can choose any order of
2202      * processing, yet it will always produce the same result.
2203      *
2204      * </ul>
2205      *
2206      * @implNote
2207      * The value of a floating-point reduction may be a function
2208      * both of the input values as well as the order of scalar
2209      * operations which combine those values, specifically in the
2210      * case of {@code ADD} and {@code MUL} operations, where
2211      * details of rounding depend on operand order.
2212      * In those cases, the order of operations of this method is
2213      * intentionally not defined.  This allows the JVM to generate
2214      * optimal machine code for the underlying platform at runtime. If
2215      * the platform supports a vector instruction to add or multiply
2216      * all values in the vector, or if there is some other efficient
2217      * machine code sequence, then the JVM has the option of
2218      * generating this machine code. Otherwise, the default
2219      * implementation is applied, which adds vector elements
2220      * sequentially from beginning to end.  For this reason, the
2221      * output of this method may vary for the same input values,
2222      * if the selected operator is {@code ADD} or {@code MUL}.
2223      *
2224      *
2225      * @param op the operation used to combine lane values
2226      * @return the accumulated result
2227      * @throws UnsupportedOperationException if this vector does
2228      *         not support the requested operation
2229      * @see #reduceLanes(VectorOperators.Associative,VectorMask)
2230      * @see #add(Vector)
2231      * @see #mul(Vector)
2232      * @see #min(Vector)
2233      * @see #max(Vector)
2234      * @see VectorOperators#FIRST_NONZERO
2235      */
2236     public abstract float reduceLanes(VectorOperators.Associative op);
2237 
2238     /**
2239      * Returns a value accumulated from selected lanes of this vector,
2240      * controlled by a mask.
2241      *
2242      * This is an associative cross-lane reduction operation which
2243      * applies the specified operation to the selected lane elements.
2244      * <p>
2245      * If no elements are selected, an operation-specific identity
2246      * value is returned.
2247      * <ul>
2248      * <li>
2249      * If the operation is
2250      *  {@code ADD}
2251      * or {@code FIRST_NONZERO},
2252      * then the identity value is zero, the default {@code float} value.
2253      * <li>
2254      * If the operation is {@code MUL},
2255      * then the identity value is one.
2256      * <li>
2257      * If the operation is {@code MAX},
2258      * then the identity value is {@code Float.NEGATIVE_INFINITY}.
2259      * <li>
2260      * If the operation is {@code MIN},
2261      * then the identity value is {@code Float.POSITIVE_INFINITY}.
2262      * </ul>
2263      *
2264      * @implNote
2265      * The value of a floating-point reduction may be a function
2266      * both of the input values as well as the order of scalar
2267      * operations which combine those values, specifically in the
2268      * case of {@code ADD} and {@code MUL} operations, where
2269      * details of rounding depend on operand order.
2270      * See {@linkplain #reduceLanes(VectorOperators.Associative)
2271      * the unmasked version of this method}
2272      * for a discussion.
2273      *
2274      *
2275      * @param op the operation used to combine lane values
2276      * @param m the mask controlling lane selection
2277      * @return the reduced result accumulated from the selected lane values
2278      * @throws UnsupportedOperationException if this vector does
2279      *         not support the requested operation
2280      * @see #reduceLanes(VectorOperators.Associative)
2281      */
2282     public abstract float reduceLanes(VectorOperators.Associative op,
2283                                        VectorMask<Float> m);
2284 
2285     /*package-private*/
2286     @ForceInline
2287     final
2288     float reduceLanesTemplate(VectorOperators.Associative op,
2289                                VectorMask<Float> m) {
2290         FloatVector v = reduceIdentityVector(op).blend(this, m);
2291         return v.reduceLanesTemplate(op);
2292     }
2293 
2294     /*package-private*/
2295     @ForceInline
2296     final
2297     float reduceLanesTemplate(VectorOperators.Associative op) {
2298         if (op == FIRST_NONZERO) {
2299             // FIXME:  The JIT should handle this, and other scan ops alos.
2300             VectorMask<Integer> thisNZ
2301                 = this.viewAsIntegralLanes().compare(NE, (int) 0);
2302             return this.lane(thisNZ.firstTrue());
2303         }
2304         int opc = opCode(op);
2305         return fromBits(VectorIntrinsics.reductionCoerced(
2306             opc, getClass(), float.class, length(),
2307             this,
2308             REDUCE_IMPL.find(op, opc, (opc_) -> {
2309               switch (opc_) {
2310               case VECTOR_OP_ADD: return v ->
2311                       toBits(v.rOp((float)0, (i, a, b) -> (float)(a + b)));
2312               case VECTOR_OP_MUL: return v ->
2313                       toBits(v.rOp((float)1, (i, a, b) -> (float)(a * b)));
2314               case VECTOR_OP_MIN: return v ->
2315                       toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (float) Math.min(a, b)));
2316               case VECTOR_OP_MAX: return v ->
2317                       toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (float) Math.max(a, b)));
2318               case VECTOR_OP_FIRST_NONZERO: return v ->
2319                       toBits(v.rOp((float)0, (i, a, b) -> toBits(a) != 0 ? a : b));
2320               case VECTOR_OP_OR: return v ->
2321                       toBits(v.rOp((float)0, (i, a, b) -> fromBits(toBits(a) | toBits(b))));
2322               default: return null;
2323               }})));
2324     }
2325     private static final
2326     ImplCache<Associative,Function<FloatVector,Long>> REDUCE_IMPL
2327         = new ImplCache<>(Associative.class, FloatVector.class);
2328 
2329     private
2330     @ForceInline
2331     FloatVector reduceIdentityVector(VectorOperators.Associative op) {
2332         int opc = opCode(op);
2333         UnaryOperator<FloatVector> fn
2334             = REDUCE_ID_IMPL.find(op, opc, (opc_) -> {
2335                 switch (opc_) {
2336                 case VECTOR_OP_ADD:
2337                 case VECTOR_OP_OR:
2338                 case VECTOR_OP_XOR:
2339                 case VECTOR_OP_FIRST_NONZERO:
2340                     return v -> v.broadcast(0);
2341                 case VECTOR_OP_MUL:
2342                     return v -> v.broadcast(1);
2343                 case VECTOR_OP_AND:
2344                     return v -> v.broadcast(-1);
2345                 case VECTOR_OP_MIN:
2346                     return v -> v.broadcast(MAX_OR_INF);
2347                 case VECTOR_OP_MAX:
2348                     return v -> v.broadcast(MIN_OR_INF);
2349                 default: return null;
2350                 }
2351             });
2352         return fn.apply(this);
2353     }
2354     private static final
2355     ImplCache<Associative,UnaryOperator<FloatVector>> REDUCE_ID_IMPL
2356         = new ImplCache<>(Associative.class, FloatVector.class);
2357 
2358     private static final float MIN_OR_INF = Float.NEGATIVE_INFINITY;
2359     private static final float MAX_OR_INF = Float.POSITIVE_INFINITY;
2360 
2361     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
2362     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
2363                                                      VectorMask<Float> m);
2364 
2365     // Type specific accessors
2366 
2367     /**
2368      * Gets the lane element at lane index {@code i}
2369      *
2370      * @param i the lane index
2371      * @return the lane element at lane index {@code i}
2372      * @throws IllegalArgumentException if the index is is out of range
2373      * ({@code < 0 || >= length()})
2374      */
2375     public abstract float lane(int i);
2376 
2377     /**
2378      * Replaces the lane element of this vector at lane index {@code i} with
2379      * value {@code e}.
2380      *
2381      * This is a cross-lane operation and behaves as if it returns the result
2382      * of blending this vector with an input vector that is the result of
2383      * broadcasting {@code e} and a mask that has only one lane set at lane
2384      * index {@code i}.
2385      *
2386      * @param i the lane index of the lane element to be replaced
2387      * @param e the value to be placed
2388      * @return the result of replacing the lane element of this vector at lane
2389      * index {@code i} with value {@code e}.
2390      * @throws IllegalArgumentException if the index is is out of range
2391      * ({@code < 0 || >= length()})
2392      */
2393     public abstract FloatVector withLane(int i, float e);
2394 
2395     // Memory load operations
2396 
2397     /**
2398      * Returns an array of type {@code float[]}
2399      * containing all the lane values.
2400      * The array length is the same as the vector length.
2401      * The array elements are stored in lane order.
2402      * <p>
2403      * This method behaves as if it stores
2404      * this vector into an allocated array
2405      * (using {@link #intoArray(float[], int) intoArray})
2406      * and returns the array as follows:
2407      * <pre>{@code
2408      *   float[] a = new float[this.length()];
2409      *   this.intoArray(a, 0);
2410      *   return a;
2411      * }</pre>
2412      *
2413      * @return an array containing the lane values of this vector
2414      */
2415     @ForceInline
2416     @Override
2417     public final float[] toArray() {
2418         float[] a = new float[vspecies().laneCount()];
2419         intoArray(a, 0);
2420         return a;
2421     }
2422 
2423     /** {@inheritDoc} <!--workaround-->
2424      * @implNote
2425      * When this method is used on used on vectors
2426      * of type {@code FloatVector},
2427      * fractional bits in lane values will be lost,
2428      * and lane values of large magnitude will be
2429      * clipped to {@code Long.MAX_VALUE} or
2430      * {@code Long.MIN_VALUE}.
2431      */
2432     @ForceInline
2433     @Override
2434     public final long[] toLongArray() {
2435         float[] a = toArray();
2436         long[] res = new long[a.length];
2437         for (int i = 0; i < a.length; i++) {
2438             res[i] = (long) a[i];
2439         }
2440         return res;
2441     }
2442 
2443     /** {@inheritDoc} <!--workaround-->
2444      * @implNote
2445      * When this method is used on used on vectors
2446      * of type {@code FloatVector},
2447      * there will be no loss of precision.
2448      */
2449     @ForceInline
2450     @Override
2451     public final double[] toDoubleArray() {
2452         float[] a = toArray();
2453         double[] res = new double[a.length];
2454         for (int i = 0; i < a.length; i++) {
2455             res[i] = (double) a[i];
2456         }
2457         return res;
2458     }
2459 
2460     /**
2461      * Loads a vector from a byte array starting at an offset.
2462      * Bytes are composed into primitive lane elements according
2463      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
2464      * The vector is arranged into lanes according to
2465      * <a href="Vector.html#lane-order">memory ordering</a>.
2466      * <p>
2467      * This method behaves as if it returns the result of calling
2468      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2469      * fromByteBuffer()} as follows:
2470      * <pre>{@code
2471      * var bb = ByteBuffer.wrap(a);
2472      * var bo = ByteOrder.LITTLE_ENDIAN;
2473      * var m = species.maskAll(true);
2474      * return fromByteBuffer(species, bb, offset, m, bo);
2475      * }</pre>
2476      *
2477      * @param species species of desired vector
2478      * @param a the byte array
2479      * @param offset the offset into the array
2480      * @return a vector loaded from a byte array
2481      * @throws IndexOutOfBoundsException
2482      *         if {@code offset+N*ESIZE < 0}
2483      *         or {@code offset+(N+1)*ESIZE > a.length}
2484      *         for any lane {@code N} in the vector
2485      */
2486     @ForceInline
2487     public static
2488     FloatVector fromByteArray(VectorSpecies<Float> species,
2489                                        byte[] a, int offset) {
2490         return fromByteArray(species, a, offset, ByteOrder.LITTLE_ENDIAN);
2491     }
2492 
2493     /**
2494      * Loads a vector from a byte array starting at an offset.
2495      * Bytes are composed into primitive lane elements according
2496      * to the specified byte order.
2497      * The vector is arranged into lanes according to
2498      * <a href="Vector.html#lane-order">memory ordering</a>.
2499      * <p>
2500      * This method behaves as if it returns the result of calling
2501      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2502      * fromByteBuffer()} as follows:
2503      * <pre>{@code
2504      * var bb = ByteBuffer.wrap(a);
2505      * var m = species.maskAll(true);
2506      * return fromByteBuffer(species, bb, offset, m, bo);
2507      * }</pre>
2508      *
2509      * @param species species of desired vector
2510      * @param a the byte array
2511      * @param offset the offset into the array
2512      * @param bo the intended byte order
2513      * @return a vector loaded from a byte array
2514      * @throws IndexOutOfBoundsException
2515      *         if {@code offset+N*ESIZE < 0}
2516      *         or {@code offset+(N+1)*ESIZE > a.length}
2517      *         for any lane {@code N} in the vector
2518      */
2519     @ForceInline
2520     public static
2521     FloatVector fromByteArray(VectorSpecies<Float> species,
2522                                        byte[] a, int offset,
2523                                        ByteOrder bo) {
2524         FloatSpecies vsp = (FloatSpecies) species;
2525         offset = checkFromIndexSize(offset,
2526                                     vsp.vectorBitSize() / Byte.SIZE,
2527                                     a.length);
2528         return vsp.dummyVector()
2529             .fromByteArray0(a, offset).maybeSwap(bo);
2530     }
2531 
2532     /**
2533      * Loads a vector from a byte array starting at an offset
2534      * and using a mask.
2535      * Lanes where the mask is unset are filled with the default
2536      * value of {@code float} (zero).
2537      * Bytes are composed into primitive lane elements according
2538      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
2539      * The vector is arranged into lanes according to
2540      * <a href="Vector.html#lane-order">memory ordering</a>.
2541      * <p>
2542      * This method behaves as if it returns the result of calling
2543      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2544      * fromByteBuffer()} as follows:
2545      * <pre>{@code
2546      * var bb = ByteBuffer.wrap(a);
2547      * var bo = ByteOrder.LITTLE_ENDIAN;
2548      * return fromByteBuffer(species, bb, offset, bo, m);
2549      * }</pre>
2550      *
2551      * @param species species of desired vector
2552      * @param a the byte array
2553      * @param offset the offset into the array
2554      * @param m the mask controlling lane selection
2555      * @return a vector loaded from a byte array
2556      * @throws IndexOutOfBoundsException
2557      *         if {@code offset+N*ESIZE < 0}
2558      *         or {@code offset+(N+1)*ESIZE > a.length}
2559      *         for any lane {@code N} in the vector where
2560      *         the mask is set
2561      */
2562     @ForceInline
2563     public static
2564     FloatVector fromByteArray(VectorSpecies<Float> species,
2565                                        byte[] a, int offset,
2566                                        VectorMask<Float> m) {
2567         return fromByteArray(species, a, offset, ByteOrder.LITTLE_ENDIAN, m);
2568     }
2569 
2570     /**
2571      * Loads a vector from a byte array starting at an offset
2572      * and using a mask.
2573      * Lanes where the mask is unset are filled with the default
2574      * value of {@code float} (zero).
2575      * Bytes are composed into primitive lane elements according
2576      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
2577      * The vector is arranged into lanes according to
2578      * <a href="Vector.html#lane-order">memory ordering</a>.
2579      * <p>
2580      * This method behaves as if it returns the result of calling
2581      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2582      * fromByteBuffer()} as follows:
2583      * <pre>{@code
2584      * var bb = ByteBuffer.wrap(a);
2585      * return fromByteBuffer(species, bb, offset, m, bo);
2586      * }</pre>
2587      *
2588      * @param species species of desired vector
2589      * @param a the byte array
2590      * @param offset the offset into the array
2591      * @param bo the intended byte order
2592      * @param m the mask controlling lane selection
2593      * @return a vector loaded from a byte array
2594      * @throws IndexOutOfBoundsException
2595      *         if {@code offset+N*ESIZE < 0}
2596      *         or {@code offset+(N+1)*ESIZE > a.length}
2597      *         for any lane {@code N} in the vector
2598      *         where the mask is set
2599      */
2600     @ForceInline
2601     public static
2602     FloatVector fromByteArray(VectorSpecies<Float> species,
2603                                        byte[] a, int offset,
2604                                        ByteOrder bo,
2605                                        VectorMask<Float> m) {
2606         FloatSpecies vsp = (FloatSpecies) species;
2607         FloatVector zero = vsp.zero();
2608         FloatVector iota = zero.addIndex(1);
2609         ((AbstractMask<Float>)m)
2610             .checkIndexByLane(offset, a.length, iota, 4);
2611         FloatVector v = zero.fromByteArray0(a, offset);
2612         return zero.blend(v.maybeSwap(bo), m);
2613     }
2614 
2615     /**
2616      * Loads a vector from an array of type {@code float[]}
2617      * starting at an offset.
2618      * For each vector lane, where {@code N} is the vector lane index, the
2619      * array element at index {@code offset + N} is placed into the
2620      * resulting vector at lane index {@code N}.
2621      *
2622      * @param species species of desired vector
2623      * @param a the array
2624      * @param offset the offset into the array
2625      * @return the vector loaded from an array
2626      * @throws IndexOutOfBoundsException
2627      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2628      *         for any lane {@code N} in the vector
2629      */
2630     @ForceInline
2631     public static
2632     FloatVector fromArray(VectorSpecies<Float> species,
2633                                    float[] a, int offset) {
2634         FloatSpecies vsp = (FloatSpecies) species;
2635         offset = checkFromIndexSize(offset,
2636                                     vsp.laneCount(),
2637                                     a.length);
2638         return vsp.dummyVector().fromArray0(a, offset);
2639     }
2640 
2641     /**
2642      * Loads a vector from an array of type {@code float[]}
2643      * starting at an offset and using a mask.
2644      * Lanes where the mask is unset are filled with the default
2645      * value of {@code float} (zero).
2646      * For each vector lane, where {@code N} is the vector lane index,
2647      * if the mask lane at index {@code N} is set then the array element at
2648      * index {@code offset + N} is placed into the resulting vector at lane index
2649      * {@code N}, otherwise the default element value is placed into the
2650      * resulting vector at lane index {@code N}.
2651      *
2652      * @param species species of desired vector
2653      * @param a the array
2654      * @param offset the offset into the array
2655      * @param m the mask controlling lane selection
2656      * @return the vector loaded from an array
2657      * @throws IndexOutOfBoundsException
2658      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2659      *         for any lane {@code N} in the vector
2660      *         where the mask is set
2661      */
2662     @ForceInline
2663     public static
2664     FloatVector fromArray(VectorSpecies<Float> species,
2665                                    float[] a, int offset,
2666                                    VectorMask<Float> m) {
2667         FloatSpecies vsp = (FloatSpecies) species;
2668         FloatVector zero = vsp.zero();
2669         FloatVector iota = vsp.iota();
2670         ((AbstractMask<Float>)m)
2671             .checkIndexByLane(offset, a.length, iota, 1);
2672         return zero.blend(zero.fromArray0(a, offset), m);
2673     }
2674 
2675     /**
2676      * FIXME: EDIT THIS
2677      * Loads a vector from an array using indexes obtained from an index
2678      * map.
2679      * <p>
2680      * For each vector lane, where {@code N} is the vector lane index, the
2681      * array element at index {@code offset + indexMap[mapOffset + N]} is placed into the
2682      * resulting vector at lane index {@code N}.
2683      *
2684      * @param species species of desired vector
2685      * @param a the array
2686      * @param offset the offset into the array, may be negative if relative
2687      * indexes in the index map compensate to produce a value within the
2688      * array bounds
2689      * @param indexMap the index map
2690      * @param mapOffset the offset into the index map
2691      * @return the vector loaded from an array
2692      * @throws IndexOutOfBoundsException if {@code mapOffset < 0}, or
2693      * {@code mapOffset > indexMap.length - species.length()},
2694      * or for any vector lane index {@code N} the result of
2695      * {@code offset + indexMap[mapOffset + N]} is {@code < 0} or {@code >= a.length}
2696      */
2697     @ForceInline
2698     public static
2699     FloatVector fromArray(VectorSpecies<Float> species,
2700                                    float[] a, int offset,
2701                                    int[] indexMap, int mapOffset) {
2702         FloatSpecies vsp = (FloatSpecies) species;
2703         Objects.requireNonNull(a);
2704         Objects.requireNonNull(indexMap);
2705         Class<? extends FloatVector> vectorType = vsp.vectorType();
2706 
2707 
2708         // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k]
2709         IntVector vix = IntVector.fromArray(IntVector.species(vsp.indexShape()), indexMap, mapOffset).add(offset);
2710 
2711         vix = VectorIntrinsics.checkIndex(vix, a.length);
2712 
2713         return VectorIntrinsics.loadWithMap(
2714             vectorType, float.class, vsp.laneCount(),
2715             IntVector.species(vsp.indexShape()).vectorType(),
2716             a, ARRAY_BASE, vix,
2717             a, offset, indexMap, mapOffset, vsp,
2718             (float[] c, int idx, int[] iMap, int idy, FloatSpecies s) ->
2719             s.vOp(n -> c[idx + iMap[idy+n]]));
2720         }
2721 
2722     /**
2723      * Loads a vector from an array using indexes obtained from an index
2724      * map and using a mask.
2725      * FIXME: EDIT THIS
2726      * <p>
2727      * For each vector lane, where {@code N} is the vector lane index,
2728      * if the mask lane at index {@code N} is set then the array element at
2729      * index {@code offset + indexMap[mapOffset + N]} is placed into the resulting vector
2730      * at lane index {@code N}.
2731      *
2732      * @param species species of desired vector
2733      * @param a the array
2734      * @param offset the offset into the array, may be negative if relative
2735      * indexes in the index map compensate to produce a value within the
2736      * array bounds
2737      * @param indexMap the index map
2738      * @param mapOffset the offset into the index map
2739      * @param m the mask controlling lane selection
2740      * @return the vector loaded from an array
2741      * @throws IndexOutOfBoundsException if {@code mapOffset < 0}, or
2742      * {@code mapOffset > indexMap.length - species.length()},
2743      * or for any vector lane index {@code N} where the mask at lane
2744      * {@code N} is set the result of {@code offset + indexMap[mapOffset + N]} is
2745      * {@code < 0} or {@code >= a.length}
2746      */
2747     public static
2748     FloatVector fromArray(VectorSpecies<Float> species,
2749                                    float[] a, int offset,
2750                                    int[] indexMap, int mapOffset,
2751                                    VectorMask<Float> m) {
2752         FloatSpecies vsp = (FloatSpecies) species;
2753 
2754         // FIXME This can result in out of bounds errors for unset mask lanes
2755         // FIX = Use a scatter instruction which routes the unwanted lanes
2756         // into a bit-bucket variable (private to implementation).
2757         // This requires a 2-D scatter in order to set a second base address.
2758         // See notes in https://bugs.openjdk.java.net/browse/JDK-8223367
2759         assert(m.allTrue());
2760         return (FloatVector)
2761             zero(species).blend(fromArray(species, a, offset, indexMap, mapOffset), m);
2762 
2763     }
2764 
2765     /**
2766      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
2767      * starting at an offset into the byte buffer.
2768      * <p>
2769      * Bytes are composed into primitive lane elements according to
2770      * {@link ByteOrder#LITTLE_ENDIAN little endian} byte order.
2771      * To avoid errors, the
2772      * {@linkplain ByteBuffer#order() intrinsic byte order}
2773      * of the buffer must be little-endian.
2774      * <p>
2775      * This method behaves as if it returns the result of calling
2776      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2777      * fromByteBuffer()} as follows:
2778      * <pre>{@code
2779      * var bb = ByteBuffer.wrap(a);
2780      * var bo = ByteOrder.LITTLE_ENDIAN;
2781      * var m = species.maskAll(true);
2782      * return fromByteBuffer(species, bb, offset, m, bo);
2783      * }</pre>
2784      *
2785      * @param species species of desired vector
2786      * @param bb the byte buffer
2787      * @param offset the offset into the byte buffer
2788      * @return a vector loaded from a byte buffer
2789      * @throws IllegalArgumentException if byte order of bb
2790      *         is not {@link ByteOrder#LITTLE_ENDIAN}
2791      * @throws IndexOutOfBoundsException
2792      *         if {@code offset+N*4 < 0}
2793      *         or {@code offset+N*4 >= bb.limit()}
2794      *         for any lane {@code N} in the vector
2795      */
2796     @ForceInline
2797     public static
2798     FloatVector fromByteBuffer(VectorSpecies<Float> species,
2799                                         ByteBuffer bb, int offset,
2800                                         ByteOrder bo) {
2801         FloatSpecies vsp = (FloatSpecies) species;
2802         offset = checkFromIndexSize(offset,
2803                                     vsp.laneCount(),
2804                                     bb.limit());
2805         return vsp.dummyVector()
2806             .fromByteBuffer0(bb, offset).maybeSwap(bo);
2807     }
2808 
2809     /**
2810      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
2811      * starting at an offset into the byte buffer
2812      * and using a mask.
2813      * <p>
2814      * Bytes are composed into primitive lane elements according to
2815      * {@link ByteOrder#LITTLE_ENDIAN little endian} byte order.
2816      * To avoid errors, the
2817      * {@linkplain ByteBuffer#order() intrinsic byte order}
2818      * of the buffer must be little-endian.
2819      * <p>
2820      * This method behaves as if it returns the result of calling
2821      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2822      * fromByteBuffer()} as follows:
2823      * <pre>{@code
2824      * var bb = ByteBuffer.wrap(a);
2825      * var bo = ByteOrder.LITTLE_ENDIAN;
2826      * var m = species.maskAll(true);
2827      * return fromByteBuffer(species, bb, offset, m, bo);
2828      * }</pre>
2829      *
2830      * @param species species of desired vector
2831      * @param bb the byte buffer
2832      * @param offset the offset into the byte buffer
2833      * @param m the mask controlling lane selection
2834      * @return a vector loaded from a byte buffer
2835      * @throws IllegalArgumentException if byte order of bb
2836      *         is not {@link ByteOrder#LITTLE_ENDIAN}
2837      * @throws IndexOutOfBoundsException
2838      *         if {@code offset+N*4 < 0}
2839      *         or {@code offset+N*4 >= bb.limit()}
2840      *         for any lane {@code N} in the vector
2841      *         where the mask is set
2842      */
2843     @ForceInline
2844     public static
2845     FloatVector fromByteBuffer(VectorSpecies<Float> species,
2846                                         ByteBuffer bb, int offset,
2847                                         ByteOrder bo,
2848                                         VectorMask<Float> m) {
2849         if (m.allTrue()) {
2850             return fromByteBuffer(species, bb, offset, bo);
2851         }
2852         FloatSpecies vsp = (FloatSpecies) species;
2853         checkMaskFromIndexSize(offset,
2854                                vsp, m, 1,
2855                                bb.limit());
2856         FloatVector zero = zero(vsp);
2857         FloatVector v = zero.fromByteBuffer0(bb, offset);
2858         return zero.blend(v.maybeSwap(bo), m);
2859     }
2860 
2861     // Memory store operations
2862 
2863     /**
2864      * Stores this vector into an array of type {@code float[]}
2865      * starting at an offset.
2866      * <p>
2867      * For each vector lane, where {@code N} is the vector lane index,
2868      * the lane element at index {@code N} is stored into the array
2869      * element {@code a[offset+N]}.
2870      *
2871      * @param a the array, of type {@code float[]}
2872      * @param offset the offset into the array
2873      * @throws IndexOutOfBoundsException
2874      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2875      *         for any lane {@code N} in the vector
2876      */
2877     @ForceInline
2878     public final
2879     void intoArray(float[] a, int offset) {
2880         FloatSpecies vsp = vspecies();
2881         offset = checkFromIndexSize(offset,
2882                                     vsp.laneCount(),
2883                                     a.length);
2884         VectorIntrinsics.store(
2885             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
2886             a, arrayAddress(a, offset),
2887             this,
2888             a, offset,
2889             (arr, off, v)
2890             -> v.stOp(arr, off,
2891                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
2892     }
2893 
2894     /**
2895      * Stores this vector into an array of {@code float}
2896      * starting at offset and using a mask.
2897      * <p>
2898      * For each vector lane, where {@code N} is the vector lane index,
2899      * the lane element at index {@code N} is stored into the array
2900      * element {@code a[offset+N]}.
2901      * If the mask lane at {@code N} is unset then the corresponding
2902      * array element {@code a[offset+N]} is left unchanged.
2903      * <p>
2904      * Array range checking is done for lanes where the mask is set.
2905      * Lanes where the mask is unset are not stored and do not need
2906      * to correspond to legitimate elements of {@code a}.
2907      * That is, unset lanes may correspond to array indexes less than
2908      * zero or beyond the end of the array.
2909      *
2910      * @param a the array, of type {@code float[]}
2911      * @param offset the offset into the array
2912      * @param m the mask controlling lane storage
2913      * @throws IndexOutOfBoundsException
2914      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2915      *         for any lane {@code N} in the vector
2916      *         where the mask is set
2917      */
2918     @ForceInline
2919     public final
2920     void intoArray(float[] a, int offset,
2921                    VectorMask<Float> m) {
2922         if (m.allTrue()) {
2923             intoArray(a, offset);
2924         } else {
2925             // FIXME: Cannot vectorize yet, if there's a mask.
2926             stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v);
2927         }
2928     }
2929 
2930     /**
2931      * Stores this vector into an array of type {@code float[]}
2932      * using indexes obtained from an index map
2933      * and using a mask.
2934      * <p>
2935      * For each vector lane, where {@code N} is the vector lane index,
2936      * if the mask lane at index {@code N} is set then
2937      * the lane element at index {@code N} is stored into the array
2938      * element {@code a[f(N)]}, where {@code f(N)} is the
2939      * index mapping expression
2940      * {@code offset + indexMap[mapOffset + N]]}.
2941      *
2942      * @param a the array
2943      * @param offset an offset to combine with the index map offsets
2944      * @param indexMap the index map
2945      * @param mapOffset the offset into the index map
2946      * @param m the mask
2947      * @returns a vector of the values {@code m ? a[f(N)] : 0},
2948      *          {@code f(N) = offset + indexMap[mapOffset + N]]}.
2949      * @throws IndexOutOfBoundsException
2950      *         if {@code mapOffset+N < 0}
2951      *         or if {@code mapOffset+N >= indexMap.length},
2952      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2953      *         is an invalid index into {@code a},
2954      *         for any lane {@code N} in the vector
2955      *         where the mask is set
2956      */
2957     @ForceInline
2958     public final
2959     void intoArray(float[] a, int offset,
2960                    int[] indexMap, int mapOffset) {
2961         FloatSpecies vsp = vspecies();
2962         if (length() == 1) {
2963             intoArray(a, offset + indexMap[mapOffset]);
2964             return;
2965         }
2966         IntVector.IntSpecies isp = (IntVector.IntSpecies) vsp.indexSpecies();
2967         if (isp.laneCount() != vsp.laneCount()) {
2968             stOp(a, offset,
2969                  (arr, off, i, e) -> {
2970                      int j = indexMap[mapOffset + i];
2971                      arr[off + j] = e;
2972                  });
2973             return;
2974         }
2975 
2976         // Index vector: vix[0:n] = i -> offset + indexMap[mo + i]
2977         IntVector vix = IntVector
2978             .fromArray(isp, indexMap, mapOffset)
2979             .add(offset);
2980 
2981         vix = VectorIntrinsics.checkIndex(vix, a.length);
2982 
2983         VectorIntrinsics.storeWithMap(
2984             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
2985             isp.vectorType(),
2986             a, arrayAddress(a, 0), vix,
2987             this,
2988             a, offset, indexMap, mapOffset,
2989             (arr, off, v, map, mo)
2990             -> v.stOp(arr, off,
2991                       (arr_, off_, i, e) -> {
2992                           int j = map[mo + i];
2993                           arr[off + j] = e;
2994                       }));
2995     }
2996 
2997     /**
2998      * Stores this vector into an array of type {@code float[]}
2999      * using indexes obtained from an index map
3000      * and using a mask.
3001      * <p>
3002      * For each vector lane, where {@code N} is the vector lane index,
3003      * if the mask lane at index {@code N} is set then
3004      * the lane element at index {@code N} is stored into the array
3005      * element {@code a[f(N)]}, where {@code f(N)} is the
3006      * index mapping expression
3007      * {@code offset + indexMap[mapOffset + N]]}.
3008      *
3009      * @param a the array
3010      * @param offset an offset to combine with the index map offsets
3011      * @param indexMap the index map
3012      * @param mapOffset the offset into the index map
3013      * @param m the mask
3014      * @returns a vector of the values {@code m ? a[f(N)] : 0},
3015      *          {@code f(N) = offset + indexMap[mapOffset + N]]}.
3016      * @throws IndexOutOfBoundsException
3017      *         if {@code mapOffset+N < 0}
3018      *         or if {@code mapOffset+N >= indexMap.length},
3019      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3020      *         is an invalid index into {@code a},
3021      *         for any lane {@code N} in the vector
3022      *         where the mask is set
3023      */
3024     @ForceInline
3025     public final
3026     void intoArray(float[] a, int offset,
3027                    int[] indexMap, int mapOffset,
3028                    VectorMask<Float> m) {
3029         FloatSpecies vsp = vspecies();
3030         if (m.allTrue()) {
3031             intoArray(a, offset, indexMap, mapOffset);
3032             return;
3033         }
3034         throw new AssertionError("fixme");
3035     }
3036 
3037     /**
3038      * {@inheritDoc} <!--workaround-->
3039      */
3040     @Override
3041     @ForceInline
3042     public final
3043     void intoByteArray(byte[] a, int offset) {
3044         offset = checkFromIndexSize(offset,
3045                                     bitSize() / Byte.SIZE,
3046                                     a.length);
3047         this.maybeSwap(ByteOrder.LITTLE_ENDIAN)
3048             .intoByteArray0(a, offset);
3049     }
3050 
3051     /**
3052      * {@inheritDoc} <!--workaround-->
3053      */
3054     @Override
3055     @ForceInline
3056     public final
3057     void intoByteArray(byte[] a, int offset,
3058                        VectorMask<Float> m) {
3059         if (m.allTrue()) {
3060             intoByteArray(a, offset);
3061             return;
3062         }
3063         FloatSpecies vsp = vspecies();
3064         checkMaskFromIndexSize(offset, vsp, m, 4, a.length);
3065         conditionalStoreNYI(offset, vsp, m, 4, a.length);
3066         var oldVal = fromByteArray0(a, offset);
3067         var newVal = oldVal.blend(this, m);
3068         newVal.intoByteArray0(a, offset);
3069     }
3070 
3071     /**
3072      * {@inheritDoc} <!--workaround-->
3073      */
3074     @Override
3075     @ForceInline
3076     public final
3077     void intoByteArray(byte[] a, int offset,
3078                        ByteOrder bo,
3079                        VectorMask<Float> m) {
3080         maybeSwap(bo).intoByteArray(a, offset, m);
3081     }
3082 
3083     /**
3084      * {@inheritDoc} <!--workaround-->
3085      */
3086     @Override
3087     @ForceInline
3088     public final
3089     void intoByteBuffer(ByteBuffer bb, int offset,
3090                         ByteOrder bo) {
3091         maybeSwap(bo).intoByteBuffer0(bb, offset);
3092     }
3093 
3094     /**
3095      * {@inheritDoc} <!--workaround-->
3096      */
3097     @Override
3098     @ForceInline
3099     public final
3100     void intoByteBuffer(ByteBuffer bb, int offset,
3101                         ByteOrder bo,
3102                         VectorMask<Float> m) {
3103         if (m.allTrue()) {
3104             intoByteBuffer(bb, offset, bo);
3105             return;
3106         }
3107         FloatSpecies vsp = vspecies();
3108         checkMaskFromIndexSize(offset, vsp, m, 4, bb.limit());
3109         conditionalStoreNYI(offset, vsp, m, 4, bb.limit());
3110         var oldVal = fromByteBuffer0(bb, offset);
3111         var newVal = oldVal.blend(this.maybeSwap(bo), m);
3112         newVal.intoByteBuffer0(bb, offset);
3113     }
3114 
3115     // ================================================
3116 
3117     // Low-level memory operations.
3118     //
3119     // Note that all of these operations *must* inline into a context
3120     // where the exact species of the involved vector is a
3121     // compile-time constant.  Otherwise, the intrinsic generation
3122     // will fail and performance will suffer.
3123     //
3124     // In many cases this is achieved by re-deriving a version of the
3125     // method in each concrete subclass (per species).  The re-derived
3126     // method simply calls one of these generic methods, with exact
3127     // parameters for the controlling metadata, which is either a
3128     // typed vector or constant species instance.
3129 
3130     // Unchecked loading operations in native byte order.
3131     // Caller is reponsible for applying index checks, masking, and
3132     // byte swapping.
3133 
3134     /*package-private*/
3135     abstract
3136     FloatVector fromArray0(float[] a, int offset);
3137     @ForceInline
3138     final
3139     FloatVector fromArray0Template(float[] a, int offset) {
3140         FloatSpecies vsp = vspecies();
3141         return VectorIntrinsics.load(
3142             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3143             a, arrayAddress(a, offset),
3144             a, offset, vsp,
3145             (arr, off, s) -> s.ldOp(arr, off,
3146                                     (arr_, off_, i) -> arr_[off_ + i]));
3147     }
3148 
3149     @Override
3150     abstract
3151     FloatVector fromByteArray0(byte[] a, int offset);
3152     @ForceInline
3153     final
3154     FloatVector fromByteArray0Template(byte[] a, int offset) {
3155         FloatSpecies vsp = vspecies();
3156         return VectorIntrinsics.load(
3157             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3158             a, byteArrayAddress(a, offset),
3159             a, offset, vsp,
3160             (arr, off, s) -> {
3161                 FloatBuffer tb = wrapper(arr, off, NATIVE_ENDIAN);
3162                 return s.ldOp(tb, 0, (tb_, __, i) -> tb_.get(i));
3163             });
3164     }
3165 
3166     abstract
3167     FloatVector fromByteBuffer0(ByteBuffer bb, int offset);
3168     @ForceInline
3169     final
3170     FloatVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
3171         FloatSpecies vsp = vspecies();
3172         return VectorIntrinsics.load(
3173             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3174             bufferBase(bb), bufferAddress(bb, offset),
3175             bb, offset, vsp,
3176             (buf, off, s) -> {
3177                 FloatBuffer tb = wrapper(buf, off, NATIVE_ENDIAN);
3178                 return s.ldOp(tb, 0, (tb_, __, i) -> tb_.get(i));
3179            });
3180     }
3181 
3182     // Unchecked storing operations in native byte order.
3183     // Caller is reponsible for applying index checks, masking, and
3184     // byte swapping.
3185 
3186     abstract
3187     void intoArray0(float[] a, int offset);
3188     @ForceInline
3189     final
3190     void intoArray0Template(float[] a, int offset) {
3191         FloatSpecies vsp = vspecies();
3192         VectorIntrinsics.store(
3193             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3194             a, arrayAddress(a, offset),
3195             this, a, offset,
3196             (arr, off, v)
3197             -> v.stOp(arr, off,
3198                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3199     }
3200 
3201     abstract
3202     void intoByteArray0(byte[] a, int offset);
3203     @ForceInline
3204     final
3205     void intoByteArray0Template(byte[] a, int offset) {
3206         FloatSpecies vsp = vspecies();
3207         VectorIntrinsics.store(
3208             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3209             a, byteArrayAddress(a, offset),
3210             this, a, offset,
3211             (arr, off, v) -> {
3212                 FloatBuffer tb = wrapper(arr, off, NATIVE_ENDIAN);
3213                 v.stOp(tb, 0, (tb_, __, i, e) -> tb_.put(i, e));
3214             });
3215     }
3216 
3217     @ForceInline
3218     final
3219     void intoByteBuffer0(ByteBuffer bb, int offset) {
3220         FloatSpecies vsp = vspecies();
3221         VectorIntrinsics.store(
3222             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3223             bufferBase(bb), bufferAddress(bb, offset),
3224             this, bb, offset,
3225             (buf, off, v) -> {
3226                 FloatBuffer tb = wrapper(buf, off, NATIVE_ENDIAN);
3227                 v.stOp(tb, 0, (tb_, __, i, e) -> tb_.put(i, e));
3228             });
3229     }
3230 
3231     // End of low-level memory operations.
3232 
3233     private static
3234     void checkMaskFromIndexSize(int offset,
3235                                 FloatSpecies vsp,
3236                                 VectorMask<Float> m,
3237                                 int scale,
3238                                 int limit) {
3239         ((AbstractMask<Float>)m)
3240             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3241     }
3242 
3243     @ForceInline
3244     private void conditionalStoreNYI(int offset,
3245                                      FloatSpecies vsp,
3246                                      VectorMask<Float> m,
3247                                      int scale,
3248                                      int limit) {
3249         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3250             String msg =
3251                 String.format("unimplemented: store @%d in [0..%d), %s in %s",
3252                               offset, limit, m, vsp);
3253             throw new AssertionError(msg);
3254         }
3255     }
3256 
3257     /*package-private*/
3258     @Override
3259     @ForceInline
3260     final
3261     FloatVector maybeSwap(ByteOrder bo) {
3262         if (bo != NATIVE_ENDIAN) {
3263             return this.reinterpretAsBytes()
3264                 .rearrange(swapBytesShuffle())
3265                 .reinterpretAsFloats();
3266         }
3267         return this;
3268     }
3269 
3270     static final int ARRAY_SHIFT =
3271         31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_FLOAT_INDEX_SCALE);
3272     static final long ARRAY_BASE =
3273         Unsafe.ARRAY_FLOAT_BASE_OFFSET;
3274 
3275     @ForceInline
3276     static long arrayAddress(float[] a, int index) {
3277         return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
3278     }
3279 
3280     @ForceInline
3281     static long byteArrayAddress(byte[] a, int index) {
3282         return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
3283     }
3284 
3285     // Byte buffer wrappers.
3286     private static FloatBuffer wrapper(ByteBuffer bb, int offset,
3287                                         ByteOrder bo) {
3288         return bb.duplicate().position(offset).slice()
3289             .order(bo).asFloatBuffer();
3290     }
3291     private static FloatBuffer wrapper(byte[] a, int offset,
3292                                         ByteOrder bo) {
3293         return ByteBuffer.wrap(a, offset, a.length - offset)
3294             .order(bo).asFloatBuffer();
3295     }
3296 
3297     // ================================================
3298 
3299     /// Reinterpreting view methods:
3300     //   lanewise reinterpret: viewAsXVector()
3301     //   keep shape, redraw lanes: reinterpretAsEs()
3302 
3303     /**
3304      * {@inheritDoc} <!--workaround-->
3305      */
3306     @ForceInline
3307     @Override
3308     public final ByteVector reinterpretAsBytes() {
3309          // Going to ByteVector, pay close attention to byte order.
3310          assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);
3311          return asByteVectorRaw();
3312          //return asByteVectorRaw().rearrange(swapBytesShuffle());
3313     }
3314 
3315     /**
3316      * {@inheritDoc} <!--workaround-->
3317      */
3318     @ForceInline
3319     @Override
3320     public final IntVector viewAsIntegralLanes() {
3321         LaneType ilt = LaneType.FLOAT.asIntegral();
3322         return (IntVector) asVectorRaw(ilt);
3323     }
3324 
3325     /**
3326      * {@inheritDoc} <!--workaround-->
3327      */
3328     @ForceInline
3329     @Override
3330     public final
3331     FloatVector
3332     viewAsFloatingLanes() {
3333         return this;
3334     }
3335 
3336     // ================================================
3337 
3338     /// Object methods: toString, equals, hashCode
3339     //
3340     // Object methods are defined as if via Arrays.toString, etc.,
3341     // is applied to the array of elements.  Two equal vectors
3342     // are required to have equal species and equal lane values.
3343 
3344     /**
3345      * Returns a string representation of this vector, of the form
3346      * {@code "[0,1,2...]"}, reporting the lane values of this vector,
3347      * in lane order.
3348      *
3349      * The string is produced as if by a call to {@link
3350      * java.util.Arrays#toString(float[]) Arrays.toString()},
3351      * as appropriate to the {@code float} array returned by
3352      * {@link #toArray this.toArray()}.
3353      *
3354      * @return a string of the form {@code "[0,1,2...]"}
3355      * reporting the lane values of this vector
3356      */
3357     @Override
3358     @ForceInline
3359     public final
3360     String toString() {
3361         // now that toArray is strongly typed, we can define this
3362         return Arrays.toString(toArray());
3363     }
3364 
3365     /**
3366      * {@inheritDoc} <!--workaround-->
3367      */
3368     @Override
3369     @ForceInline
3370     public final
3371     boolean equals(Object obj) {
3372         if (obj instanceof Vector) {
3373             Vector<?> that = (Vector<?>) obj;
3374             if (this.species().equals(that.species())) {
3375                 return this.eq(that.check(this.species())).allTrue();
3376             }
3377         }
3378         return false;
3379     }
3380 
3381     /**
3382      * {@inheritDoc} <!--workaround-->
3383      */
3384     @Override
3385     @ForceInline
3386     public final
3387     int hashCode() {
3388         // now that toArray is strongly typed, we can define this
3389         return Objects.hash(species(), Arrays.hashCode(toArray()));
3390     }
3391 
3392     // ================================================
3393 
3394     // Species
3395 
3396     /**
3397      * Class representing {@link FloatVector}'s of the same {@link VectorShape VectorShape}.
3398      */
3399     /*package-private*/
3400     static final class FloatSpecies extends AbstractSpecies<Float> {
3401         private FloatSpecies(VectorShape shape,
3402                 Class<? extends FloatVector> vectorType,
3403                 Class<? extends AbstractMask<Float>> maskType,
3404                 Function<Object, FloatVector> vectorFactory) {
3405             super(shape, LaneType.of(float.class),
3406                   vectorType, maskType,
3407                   vectorFactory);
3408             assert(this.elementSize() == Float.SIZE);
3409         }
3410 
3411         // Specializing overrides:
3412 
3413         @Override
3414         @ForceInline
3415         public final Class<Float> elementType() {
3416             return float.class;
3417         }
3418 
3419         @Override
3420         @ForceInline
3421         public final Class<Float> genericElementType() {
3422             return Float.class;
3423         }
3424 
3425         @Override
3426         @ForceInline
3427         public final Class<float[]> arrayType() {
3428             return float[].class;
3429         }
3430 
3431         @SuppressWarnings("unchecked")
3432         @Override
3433         @ForceInline
3434         public final Class<? extends FloatVector> vectorType() {
3435             return (Class<? extends FloatVector>) vectorType;
3436         }
3437 
3438         @Override
3439         @ForceInline
3440         public final long checkValue(long e) {
3441             longToElementBits(e);  // only for exception
3442             return e;
3443         }
3444 
3445         /*package-private*/
3446         @Override
3447         @ForceInline
3448         final FloatVector broadcastBits(long bits) {
3449             return (FloatVector)
3450                 VectorIntrinsics.broadcastCoerced(
3451                     vectorType, float.class, laneCount,
3452                     bits, this,
3453                     (bits_, s_) -> s_.rvOp(i -> bits_));
3454         }
3455 
3456         /*package-private*/
3457         @ForceInline
3458         
3459         final FloatVector broadcast(float e) {
3460             return broadcastBits(toBits(e));
3461         }
3462 
3463         @Override
3464         @ForceInline
3465         public final FloatVector broadcast(long e) {
3466             return broadcastBits(longToElementBits(e));
3467         }
3468 
3469         /*package-private*/
3470         final @Override
3471         @ForceInline
3472         long longToElementBits(long value) {
3473             // Do the conversion, and then test it for failure.
3474             float e = (float) value;
3475             if ((long) e != value) {
3476                 throw badElementBits(value, e);
3477             }
3478             return toBits(e);
3479         }
3480 
3481         @Override
3482         @ForceInline
3483         public final FloatVector fromValues(long... values) {
3484             VectorIntrinsics.requireLength(values.length, laneCount);
3485             float[] va = new float[laneCount()];
3486             for (int i = 0; i < va.length; i++) {
3487                 long lv = values[i];
3488                 float v = (float) lv;
3489                 va[i] = v;
3490                 if ((long)v != lv) {
3491                     throw badElementBits(lv, v);
3492                 }
3493             }
3494             return dummyVector().fromArray0(va, 0);
3495         }
3496 
3497         /* this non-public one is for internal conversions */
3498         @Override
3499         @ForceInline
3500         final FloatVector fromIntValues(int[] values) {
3501             VectorIntrinsics.requireLength(values.length, laneCount);
3502             float[] va = new float[laneCount()];
3503             for (int i = 0; i < va.length; i++) {
3504                 int lv = values[i];
3505                 float v = (float) lv;
3506                 va[i] = v;
3507                 if ((int)v != lv) {
3508                     throw badElementBits(lv, v);
3509                 }
3510             }
3511             return dummyVector().fromArray0(va, 0);
3512         }
3513 
3514         // Virtual constructors
3515 
3516         @ForceInline
3517         @Override final
3518         public FloatVector fromArray(Object a, int offset) {
3519             // User entry point:  Be careful with inputs.
3520             return FloatVector
3521                 .fromArray(this, (float[]) a, offset);
3522         }
3523 
3524         @Override final
3525         FloatVector dummyVector() {
3526             return (FloatVector) super.dummyVector();
3527         }
3528 
3529         final
3530         FloatVector vectorFactory(float[] vec) {
3531             // Species delegates all factory requests to its dummy
3532             // vector.  The dummy knows all about it.
3533             return dummyVector().vectorFactory(vec);
3534         }
3535 
3536         /*package-private*/
3537         final @Override
3538         @ForceInline
3539         FloatVector rvOp(RVOp f) {
3540             float[] res = new float[laneCount()];
3541             for (int i = 0; i < res.length; i++) {
3542                 int bits = (int) f.apply(i);
3543                 res[i] = fromBits(bits);
3544             }
3545             return dummyVector().vectorFactory(res);
3546         }
3547 
3548         FloatVector vOp(FVOp f) {
3549             float[] res = new float[laneCount()];
3550             for (int i = 0; i < res.length; i++) {
3551                 res[i] = f.apply(i);
3552             }
3553             return dummyVector().vectorFactory(res);
3554         }
3555 
3556         FloatVector vOp(VectorMask<Float> m, FVOp f) {
3557             float[] res = new float[laneCount()];
3558             boolean[] mbits = ((AbstractMask<Float>)m).getBits();
3559             for (int i = 0; i < res.length; i++) {
3560                 if (mbits[i]) {
3561                     res[i] = f.apply(i);
3562                 }
3563             }
3564             return dummyVector().vectorFactory(res);
3565         }
3566 
3567         /*package-private*/
3568         @ForceInline
3569         <M> FloatVector ldOp(M memory, int offset,
3570                                       FLdOp<M> f) {
3571             return dummyVector().ldOp(memory, offset, f);
3572         }
3573 
3574         /*package-private*/
3575         @ForceInline
3576         <M> FloatVector ldOp(M memory, int offset,
3577                                       AbstractMask<Float> m,
3578                                       FLdOp<M> f) {
3579             return dummyVector().ldOp(memory, offset, m, f);
3580         }
3581 
3582         /*package-private*/
3583         @ForceInline
3584         <M> void stOp(M memory, int offset, FStOp<M> f) {
3585             dummyVector().stOp(memory, offset, f);
3586         }
3587 
3588         /*package-private*/
3589         @ForceInline
3590         <M> void stOp(M memory, int offset,
3591                       AbstractMask<Float> m,
3592                       FStOp<M> f) {
3593             dummyVector().stOp(memory, offset, m, f);
3594         }
3595 
3596         // N.B. Make sure these constant vectors and
3597         // masks load up correctly into registers.
3598         //
3599         // Also, see if we can avoid all that switching.
3600         // Could we cache both vectors and both masks in
3601         // this species object?
3602 
3603         // Zero and iota vector access
3604         @Override
3605         @ForceInline
3606         public final FloatVector zero() {
3607             if ((Class<?>) vectorType() == FloatMaxVector.class)
3608                 return FloatMaxVector.ZERO;
3609             switch (vectorBitSize()) {
3610                 case 64: return Float64Vector.ZERO;
3611                 case 128: return Float128Vector.ZERO;
3612                 case 256: return Float256Vector.ZERO;
3613                 case 512: return Float512Vector.ZERO;
3614             }
3615             throw new AssertionError();
3616         }        
3617 
3618         @Override
3619         @ForceInline
3620         public final FloatVector iota() {
3621             if ((Class<?>) vectorType() == FloatMaxVector.class)
3622                 return FloatMaxVector.IOTA;
3623             switch (vectorBitSize()) {
3624                 case 64: return Float64Vector.IOTA;
3625                 case 128: return Float128Vector.IOTA;
3626                 case 256: return Float256Vector.IOTA;
3627                 case 512: return Float512Vector.IOTA;
3628             }
3629             throw new AssertionError();
3630         }
3631 
3632         // Mask access
3633         @Override
3634         @ForceInline
3635         public final VectorMask<Float> maskAll(boolean bit) {
3636             if ((Class<?>) vectorType() == FloatMaxVector.class)
3637                 return FloatMaxVector.FloatMaxMask.maskAll(bit);
3638             switch (vectorBitSize()) {
3639                 case 64: return Float64Vector.Float64Mask.maskAll(bit);
3640                 case 128: return Float128Vector.Float128Mask.maskAll(bit);
3641                 case 256: return Float256Vector.Float256Mask.maskAll(bit);
3642                 case 512: return Float512Vector.Float512Mask.maskAll(bit);
3643             }
3644             throw new AssertionError();
3645         }
3646     }
3647 
3648     /**
3649      * Finds a species for an element type of {@code float} and shape.
3650      *
3651      * @param s the shape
3652      * @return a species for an element type of {@code float} and shape
3653      * @throws IllegalArgumentException if no such species exists for the shape
3654      */
3655     static FloatSpecies species(VectorShape s) {
3656         Objects.requireNonNull(s);
3657         switch (s) {
3658             case S_64_BIT: return (FloatSpecies) SPECIES_64;
3659             case S_128_BIT: return (FloatSpecies) SPECIES_128;
3660             case S_256_BIT: return (FloatSpecies) SPECIES_256;
3661             case S_512_BIT: return (FloatSpecies) SPECIES_512;
3662             case S_Max_BIT: return (FloatSpecies) SPECIES_MAX;
3663             default: throw new IllegalArgumentException("Bad shape: " + s);
3664         }
3665     }
3666 
3667     /** Species representing {@link FloatVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
3668     public static final VectorSpecies<Float> SPECIES_64
3669         = new FloatSpecies(VectorShape.S_64_BIT,
3670                             Float64Vector.class,
3671                             Float64Vector.Float64Mask.class,
3672                             Float64Vector::new);
3673 
3674     /** Species representing {@link FloatVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
3675     public static final VectorSpecies<Float> SPECIES_128
3676         = new FloatSpecies(VectorShape.S_128_BIT,
3677                             Float128Vector.class,
3678                             Float128Vector.Float128Mask.class,
3679                             Float128Vector::new);
3680 
3681     /** Species representing {@link FloatVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
3682     public static final VectorSpecies<Float> SPECIES_256
3683         = new FloatSpecies(VectorShape.S_256_BIT,
3684                             Float256Vector.class,
3685                             Float256Vector.Float256Mask.class,
3686                             Float256Vector::new);
3687 
3688     /** Species representing {@link FloatVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
3689     public static final VectorSpecies<Float> SPECIES_512
3690         = new FloatSpecies(VectorShape.S_512_BIT,
3691                             Float512Vector.class,
3692                             Float512Vector.Float512Mask.class,
3693                             Float512Vector::new);
3694 
3695     /** Species representing {@link FloatVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
3696     public static final VectorSpecies<Float> SPECIES_MAX
3697         = new FloatSpecies(VectorShape.S_Max_BIT,
3698                             FloatMaxVector.class,
3699                             FloatMaxVector.FloatMaxMask.class,
3700                             FloatMaxVector::new);
3701 
3702     /**
3703      * Preferred species for {@link FloatVector}s.
3704      * A preferred species is a species of maximal bit-size for the platform.
3705      */
3706     public static final VectorSpecies<Float> SPECIES_PREFERRED
3707         = (FloatSpecies) VectorSpecies.ofPreferred(float.class);
3708 
3709 
3710     // ==== JROSE NAME CHANGES ====
3711 
3712     /** Use lanewise(NEG, m). */
3713     @Deprecated
3714     public final FloatVector neg(VectorMask<Float> m) {
3715         return lanewise(NEG, m);
3716     }
3717 
3718     /** Use lanewise(ABS, m). */
3719     @Deprecated
3720     public final FloatVector abs(VectorMask<Float> m) {
3721         return lanewise(ABS, m);
3722     }
3723 
3724     /** Use explicit argument of ByteOrder.LITTLE_ENDIAN */
3725     @Deprecated
3726     public static
3727     FloatVector fromByteBuffer(VectorSpecies<Float> species,
3728                                         ByteBuffer bb, int offset) {
3729         ByteOrder bo = ByteOrder.LITTLE_ENDIAN;
3730         if (bb.order() != bo)  throw new IllegalArgumentException();
3731         return fromByteBuffer(species, bb, offset, bo);
3732     }
3733 
3734     /** Use explicit argument of ByteOrder.LITTLE_ENDIAN */
3735     @Deprecated
3736     public static
3737     FloatVector fromByteBuffer(VectorSpecies<Float> species,
3738                                         ByteBuffer bb, int offset,
3739                                         VectorMask<Float> m) {
3740         ByteOrder bo = ByteOrder.LITTLE_ENDIAN;
3741         if (bb.order() != bo)  throw new IllegalArgumentException();
3742         return fromByteBuffer(species, bb, offset, bo, m);
3743     }
3744 
3745     /** Use fromValues(s, value...) */
3746     @Deprecated
3747     public static
3748     FloatVector scalars(VectorSpecies<Float> species,
3749                                  float... values) {
3750         return fromValues(species, values);
3751     }
3752 
3753     @Deprecated public final float addLanes() { return reduceLanes(ADD); }
3754     @Deprecated public final float addLanes(VectorMask<Float> m) { return reduceLanes(ADD, m); }
3755     @Deprecated public final float mulLanes() { return reduceLanes(MUL); }
3756     @Deprecated public final float mulLanes(VectorMask<Float> m) { return reduceLanes(MUL, m); }
3757     @Deprecated public final float minLanes() { return reduceLanes(MIN); }
3758     @Deprecated public final float minLanes(VectorMask<Float> m) { return reduceLanes(MIN, m); }
3759     @Deprecated public final float maxLanes() { return reduceLanes(MAX); }
3760     @Deprecated public final float maxLanes(VectorMask<Float> m) { return reduceLanes(MAX, m); }
3761     @Deprecated public final float orLanes() { return reduceLanes(OR); }
3762     @Deprecated public final float orLanes(VectorMask<Float> m) { return reduceLanes(OR, m); }
3763     @Deprecated public final float andLanes() { return reduceLanes(AND); }
3764     @Deprecated public final float andLanes(VectorMask<Float> m) { return reduceLanes(AND, m); }
3765     @Deprecated public final float xorLanes() { return reduceLanes(XOR); }
3766     @Deprecated public final float xorLanes(VectorMask<Float> m) { return reduceLanes(XOR, m); }
3767     @Deprecated public final FloatVector sqrt(VectorMask<Float> m) { return lanewise(SQRT, m); }
3768     @Deprecated public final FloatVector tan() { return lanewise(TAN); }
3769     @Deprecated public final FloatVector tan(VectorMask<Float> m) { return lanewise(TAN, m); }
3770     @Deprecated public final FloatVector tanh() { return lanewise(TANH); }
3771     @Deprecated public final FloatVector tanh(VectorMask<Float> m) { return lanewise(TANH, m); }
3772     @Deprecated public final FloatVector sin() { return lanewise(SIN); }
3773     @Deprecated public final FloatVector sin(VectorMask<Float> m) { return lanewise(SIN, m); }
3774     @Deprecated public final FloatVector sinh() { return lanewise(SINH); }
3775     @Deprecated public final FloatVector sinh(VectorMask<Float> m) { return lanewise(SINH, m); }
3776     @Deprecated public final FloatVector cos() { return lanewise(COS); }
3777     @Deprecated public final FloatVector cos(VectorMask<Float> m) { return lanewise(COS, m); }
3778     @Deprecated public final FloatVector cosh() { return lanewise(COSH); }
3779     @Deprecated public final FloatVector cosh(VectorMask<Float> m) { return lanewise(COSH, m); }
3780     @Deprecated public final FloatVector asin() { return lanewise(ASIN); }
3781     @Deprecated public final FloatVector asin(VectorMask<Float> m) { return lanewise(ASIN, m); }
3782     @Deprecated public final FloatVector acos() { return lanewise(ACOS); }
3783     @Deprecated public final FloatVector acos(VectorMask<Float> m) { return lanewise(ACOS, m); }
3784     @Deprecated public final FloatVector atan() { return lanewise(ATAN); }
3785     @Deprecated public final FloatVector atan(VectorMask<Float> m) { return lanewise(ATAN, m); }
3786     @Deprecated public final FloatVector atan2(Vector<Float> v) { return lanewise(ATAN2, v); }
3787     @Deprecated public final FloatVector atan2(float s) { return lanewise(ATAN2, s); }
3788     @Deprecated public final FloatVector atan2(Vector<Float> v, VectorMask<Float> m) { return lanewise(ATAN2, v, m); }
3789     @Deprecated public final FloatVector atan2(float s, VectorMask<Float> m) { return lanewise(ATAN2, s, m); }
3790     @Deprecated public final FloatVector cbrt() { return lanewise(CBRT); }
3791     @Deprecated public final FloatVector cbrt(VectorMask<Float> m) { return lanewise(CBRT, m); }
3792     @Deprecated public final FloatVector log() { return lanewise(LOG); }
3793     @Deprecated public final FloatVector log(VectorMask<Float> m) { return lanewise(LOG, m); }
3794     @Deprecated public final FloatVector log10() { return lanewise(LOG10); }
3795     @Deprecated public final FloatVector log10(VectorMask<Float> m) { return lanewise(LOG10, m); }
3796     @Deprecated public final FloatVector log1p() { return lanewise(LOG1P); }
3797     @Deprecated public final FloatVector log1p(VectorMask<Float> m) { return lanewise(LOG1P, m); }
3798     @Deprecated public final FloatVector pow(Vector<Float> v, VectorMask<Float> m) { return lanewise(POW, v, m); }
3799     @Deprecated public final FloatVector pow(float s, VectorMask<Float> m) { return lanewise(POW, s, m); }
3800     @Deprecated public final FloatVector exp() { return lanewise(EXP); }
3801     @Deprecated public final FloatVector exp(VectorMask<Float> m) { return lanewise(EXP, m); }
3802     @Deprecated public final FloatVector expm1() { return lanewise(EXPM1); }
3803     @Deprecated public final FloatVector expm1(VectorMask<Float> m) { return lanewise(EXPM1, m); }
3804     @Deprecated public final FloatVector hypot(Vector<Float> v) { return lanewise(HYPOT, v); }
3805     @Deprecated public final FloatVector hypot(float s) { return lanewise(HYPOT, s); }
3806     @Deprecated public final FloatVector hypot(Vector<Float> v, VectorMask<Float> m) { return lanewise(HYPOT, v, m); }
3807     @Deprecated public final FloatVector hypot(float s, VectorMask<Float> m) { return lanewise(HYPOT, s, m); }
3808     @Deprecated public final FloatVector and(Vector<Float> v, VectorMask<Float> m) { return lanewise(AND, v, m); }
3809     @Deprecated public final FloatVector and(float s, VectorMask<Float> m) { return lanewise(AND, s, m); }
3810     @Deprecated public final FloatVector or(Vector<Float> v, VectorMask<Float> m) { return lanewise(OR, v, m); }
3811     @Deprecated public final FloatVector or(float s, VectorMask<Float> m) { return lanewise(OR, s, m); }
3812     @Deprecated public final FloatVector xor(Vector<Float> v) { return lanewise(XOR, v); }
3813     @Deprecated public final FloatVector xor(float s) { return lanewise(XOR, s); }
3814     @Deprecated public final FloatVector xor(Vector<Float> v, VectorMask<Float> m) { return lanewise(XOR, v, m); }
3815     @Deprecated public final FloatVector xor(float s, VectorMask<Float> m) { return lanewise(XOR, s, m); }
3816     @Deprecated public final FloatVector not(VectorMask<Float> m) { return lanewise(NOT, m); }
3817     @Deprecated public final FloatVector shiftLeft(int s) { return lanewise(LSHL, (float) s); }
3818     @Deprecated public final FloatVector shiftLeft(int s, VectorMask<Float> m) { return lanewise(LSHL, (float) s, m); }
3819     @Deprecated public final FloatVector shiftLeft(Vector<Float> v) { return lanewise(LSHL, v); }
3820     @Deprecated public final FloatVector shiftLeft(Vector<Float> v, VectorMask<Float> m) { return lanewise(LSHL, v, m); }
3821     @Deprecated public final FloatVector shiftRight(int s) { return lanewise(LSHR, (float) s); }
3822     @Deprecated public final FloatVector shiftRight(int s, VectorMask<Float> m) { return lanewise(LSHR, (float) s, m); }
3823     @Deprecated public final FloatVector shiftRight(Vector<Float> v) { return lanewise(LSHR, v); }
3824     @Deprecated public final FloatVector shiftRight(Vector<Float> v, VectorMask<Float> m) { return lanewise(LSHR, v, m); }
3825     @Deprecated public final FloatVector shiftArithmeticRight(int s) { return lanewise(ASHR, (float) s); }
3826     @Deprecated public final FloatVector shiftArithmeticRight(int s, VectorMask<Float> m) { return lanewise(ASHR, (float) s, m); }
3827     @Deprecated public final FloatVector shiftArithmeticRight(Vector<Float> v) { return lanewise(ASHR, v); }
3828     @Deprecated public final FloatVector shiftArithmeticRight(Vector<Float> v, VectorMask<Float> m) { return lanewise(ASHR, v, m); }
3829     @Deprecated public final FloatVector rotateLeft(int s) { return lanewise(ROL, (float) s); }
3830     @Deprecated public final FloatVector rotateLeft(int s, VectorMask<Float> m) { return lanewise(ROL, (float) s, m); }
3831     @Deprecated public final FloatVector rotateRight(int s) { return lanewise(ROR, (float) s); }
3832     @Deprecated public final FloatVector rotateRight(int s, VectorMask<Float> m) { return lanewise(ROR, (float) s, m); }
3833     @Deprecated @Override public FloatVector rotateLanesLeft(int i) { return (FloatVector) super.rotateLanesLeft(i); }
3834     @Deprecated @Override public FloatVector rotateLanesRight(int i) { return (FloatVector) super.rotateLanesRight(i); }
3835     @Deprecated @Override public FloatVector shiftLanesLeft(int i) { return (FloatVector) super.shiftLanesLeft(i); }
3836     @Deprecated @Override public FloatVector shiftLanesRight(int i) { return (FloatVector) super.shiftLanesRight(i); }
3837     @Deprecated public FloatVector with(int i, float e) { return withLane(i, e); }
3838 }