1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.FloatBuffer;
  29 import java.nio.ByteOrder;
  30 import java.util.Objects;
  31 import java.util.function.IntUnaryOperator;
  32 import java.util.function.Function;
  33 import java.util.concurrent.ThreadLocalRandom;
  34 
  35 import jdk.internal.misc.Unsafe;
  36 import jdk.internal.vm.annotation.ForceInline;
  37 import static jdk.incubator.vector.VectorIntrinsics.*;
  38 
  39 
  40 /**
  41  * A specialized {@link Vector} representing an ordered immutable sequence of
  42  * {@code float} values.
  43  */
  44 @SuppressWarnings("cast")
  45 public abstract class FloatVector extends Vector<Float> {
  46 
  47     FloatVector() {}
  48 
  49     private static final int ARRAY_SHIFT = 31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_FLOAT_INDEX_SCALE);
  50 
  51     // Unary operator
  52 
  53     interface FUnOp {
  54         float apply(int i, float a);
  55     }
  56 
  57     abstract FloatVector uOp(FUnOp f);
  58 
  59     abstract FloatVector uOp(VectorMask<Float> m, FUnOp f);
  60 
  61     // Binary operator
  62 
  63     interface FBinOp {
  64         float apply(int i, float a, float b);
  65     }
  66 
  67     abstract FloatVector bOp(Vector<Float> v, FBinOp f);
  68 
  69     abstract FloatVector bOp(Vector<Float> v, VectorMask<Float> m, FBinOp f);
  70 
  71     // Trinary operator
  72 
  73     interface FTriOp {
  74         float apply(int i, float a, float b, float c);
  75     }
  76 
  77     abstract FloatVector tOp(Vector<Float> v1, Vector<Float> v2, FTriOp f);
  78 
  79     abstract FloatVector tOp(Vector<Float> v1, Vector<Float> v2, VectorMask<Float> m, FTriOp f);
  80 
  81     // Reduction operator
  82 
  83     abstract float rOp(float v, FBinOp f);
  84 
  85     // Binary test
  86 
  87     interface FBinTest {
  88         boolean apply(int i, float a, float b);
  89     }
  90 
  91     abstract VectorMask<Float> bTest(Vector<Float> v, FBinTest f);
  92 
  93     // Foreach
  94 
  95     interface FUnCon {
  96         void apply(int i, float a);
  97     }
  98 
  99     abstract void forEach(FUnCon f);
 100 
 101     abstract void forEach(VectorMask<Float> m, FUnCon f);
 102 
 103     // Static factories
 104 
 105     /**
 106      * Returns a vector where all lane elements are set to the default
 107      * primitive value.
 108      *
 109      * @param species species of desired vector
 110      * @return a zero vector of given species
 111      */
 112     @ForceInline
 113     @SuppressWarnings("unchecked")
 114     public static FloatVector zero(VectorSpecies<Float> species) {
 115         return VectorIntrinsics.broadcastCoerced((Class<FloatVector>) species.vectorType(), float.class, species.length(),
 116                                                  Float.floatToIntBits(0.0f), species,
 117                                                  ((bits, s) -> ((FloatSpecies)s).op(i -> Float.intBitsToFloat((int)bits))));
 118     }
 119 
 120     @ForceInline
 121     @SuppressWarnings("unchecked")
 122     static VectorShuffle<Float> shuffleIotaHelper(VectorSpecies<Float> species, int step) {
 123         switch (species.bitSize()) {
 124             case 64: return VectorIntrinsics.shuffleIota(float.class, Float64Vector.Float64Shuffle.class, species,
 125                                                         64 / Float.SIZE, step,
 126                                                         (val, l) -> new Float64Vector.Float64Shuffle(i -> ((i + val) & (l-1))));
 127             case 128: return VectorIntrinsics.shuffleIota(float.class, Float128Vector.Float128Shuffle.class, species,
 128                                                         128/ Float.SIZE, step,
 129                                                         (val, l) -> new Float128Vector.Float128Shuffle(i -> ((i + val) & (l-1))));
 130             case 256: return VectorIntrinsics.shuffleIota(float.class, Float256Vector.Float256Shuffle.class, species,
 131                                                         256/ Float.SIZE, step,
 132                                                         (val, l) -> new Float256Vector.Float256Shuffle(i -> ((i + val) & (l-1))));
 133             case 512: return VectorIntrinsics.shuffleIota(float.class, Float512Vector.Float512Shuffle.class, species,
 134                                                         512 / Float.SIZE, step,
 135                                                         (val, l) -> new Float512Vector.Float512Shuffle(i -> ((i + val) & (l-1))));
 136             default: throw new IllegalArgumentException(Integer.toString(species.bitSize()));
 137         }
 138     }
 139 
 140     /**
 141      * Loads a vector from a byte array starting at an offset.
 142      * <p>
 143      * Bytes are composed into primitive lane elements according to the
 144      * native byte order of the underlying platform
 145      * <p>
 146      * This method behaves as if it returns the result of calling the
 147      * byte buffer, offset, and mask accepting
 148      * {@link #fromByteBuffer(VectorSpecies, ByteBuffer, int, VectorMask) method} as follows:
 149      * <pre>{@code
 150      * return fromByteBuffer(species, ByteBuffer.wrap(a), offset, VectorMask.allTrue());
 151      * }</pre>
 152      *
 153      * @param species species of desired vector
 154      * @param a the byte array
 155      * @param offset the offset into the array
 156      * @return a vector loaded from a byte array
 157      * @throws IndexOutOfBoundsException if {@code i < 0} or
 158      * {@code offset > a.length - (species.length() * species.elementSize() / Byte.SIZE)}
 159      */
 160     @ForceInline
 161     @SuppressWarnings("unchecked")
 162     public static FloatVector fromByteArray(VectorSpecies<Float> species, byte[] a, int offset) {
 163         Objects.requireNonNull(a);
 164         offset = VectorIntrinsics.checkIndex(offset, a.length, species.bitSize() / Byte.SIZE);
 165         return VectorIntrinsics.load((Class<FloatVector>) species.vectorType(), float.class, species.length(),
 166                                      a, ((long) offset) + Unsafe.ARRAY_BYTE_BASE_OFFSET,
 167                                      a, offset, species,
 168                                      (c, idx, s) -> {
 169                                          ByteBuffer bbc = ByteBuffer.wrap(c, idx, c.length - idx).order(ByteOrder.nativeOrder());
 170                                          FloatBuffer tb = bbc.asFloatBuffer();
 171                                          return ((FloatSpecies)s).op(i -> tb.get());
 172                                      });
 173     }
 174 
 175     /**
 176      * Loads a vector from a byte array starting at an offset and using a
 177      * mask.
 178      * <p>
 179      * Bytes are composed into primitive lane elements according to the
 180      * native byte order of the underlying platform.
 181      * <p>
 182      * This method behaves as if it returns the result of calling the
 183      * byte buffer, offset, and mask accepting
 184      * {@link #fromByteBuffer(VectorSpecies, ByteBuffer, int, VectorMask) method} as follows:
 185      * <pre>{@code
 186      * return fromByteBuffer(species, ByteBuffer.wrap(a), offset, m);
 187      * }</pre>
 188      *
 189      * @param species species of desired vector
 190      * @param a the byte array
 191      * @param offset the offset into the array
 192      * @param m the mask
 193      * @return a vector loaded from a byte array
 194      * @throws IndexOutOfBoundsException if {@code offset < 0} or
 195      * for any vector lane index {@code N} where the mask at lane {@code N}
 196      * is set
 197      * {@code offset >= a.length - (N * species.elementSize() / Byte.SIZE)}
 198      */
 199     @ForceInline
 200     public static FloatVector fromByteArray(VectorSpecies<Float> species, byte[] a, int offset, VectorMask<Float> m) {
 201         return zero(species).blend(fromByteArray(species, a, offset), m);
 202     }
 203 
 204     /**
 205      * Loads a vector from an array starting at offset.
 206      * <p>
 207      * For each vector lane, where {@code N} is the vector lane index, the
 208      * array element at index {@code offset + N} is placed into the
 209      * resulting vector at lane index {@code N}.
 210      *
 211      * @param species species of desired vector
 212      * @param a the array
 213      * @param offset the offset into the array
 214      * @return the vector loaded from an array
 215      * @throws IndexOutOfBoundsException if {@code offset < 0}, or
 216      * {@code offset > a.length - species.length()}
 217      */
 218     @ForceInline
 219     @SuppressWarnings("unchecked")
 220     public static FloatVector fromArray(VectorSpecies<Float> species, float[] a, int offset){
 221         Objects.requireNonNull(a);
 222         offset = VectorIntrinsics.checkIndex(offset, a.length, species.length());
 223         return VectorIntrinsics.load((Class<FloatVector>) species.vectorType(), float.class, species.length(),
 224                                      a, (((long) offset) << ARRAY_SHIFT) + Unsafe.ARRAY_FLOAT_BASE_OFFSET,
 225                                      a, offset, species,
 226                                      (c, idx, s) -> ((FloatSpecies)s).op(n -> c[idx + n]));
 227     }
 228 
 229 
 230     /**
 231      * Loads a vector from an array starting at offset and using a mask.
 232      * <p>
 233      * For each vector lane, where {@code N} is the vector lane index,
 234      * if the mask lane at index {@code N} is set then the array element at
 235      * index {@code offset + N} is placed into the resulting vector at lane index
 236      * {@code N}, otherwise the default element value is placed into the
 237      * resulting vector at lane index {@code N}.
 238      *
 239      * @param species species of desired vector
 240      * @param a the array
 241      * @param offset the offset into the array
 242      * @param m the mask
 243      * @return the vector loaded from an array
 244      * @throws IndexOutOfBoundsException if {@code offset < 0}, or
 245      * for any vector lane index {@code N} where the mask at lane {@code N}
 246      * is set {@code offset > a.length - N}
 247      */
 248     @ForceInline
 249     public static FloatVector fromArray(VectorSpecies<Float> species, float[] a, int offset, VectorMask<Float> m) {
 250         return zero(species).blend(fromArray(species, a, offset), m);
 251     }
 252 
 253     /**
 254      * Loads a vector from an array using indexes obtained from an index
 255      * map.
 256      * <p>
 257      * For each vector lane, where {@code N} is the vector lane index, the
 258      * array element at index {@code a_offset + indexMap[i_offset + N]} is placed into the
 259      * resulting vector at lane index {@code N}.
 260      *
 261      * @param species species of desired vector
 262      * @param a the array
 263      * @param a_offset the offset into the array, may be negative if relative
 264      * indexes in the index map compensate to produce a value within the
 265      * array bounds
 266      * @param indexMap the index map
 267      * @param i_offset the offset into the index map
 268      * @return the vector loaded from an array
 269      * @throws IndexOutOfBoundsException if {@code i_offset < 0}, or
 270      * {@code i_offset > indexMap.length - species.length()},
 271      * or for any vector lane index {@code N} the result of
 272      * {@code a_offset + indexMap[i_offset + N]} is {@code < 0} or {@code >= a.length}
 273      */
 274     @ForceInline
 275     @SuppressWarnings("unchecked")
 276     public static FloatVector fromArray(VectorSpecies<Float> species, float[] a, int a_offset, int[] indexMap, int i_offset) {
 277         Objects.requireNonNull(a);
 278         Objects.requireNonNull(indexMap);
 279 
 280 
 281         // Index vector: vix[0:n] = k -> a_offset + indexMap[i_offset + k]
 282         IntVector vix = IntVector.fromArray(IntVector.species(species.indexShape()), indexMap, i_offset).add(a_offset);
 283 
 284         vix = VectorIntrinsics.checkIndex(vix, a.length);
 285 
 286         return VectorIntrinsics.loadWithMap((Class<FloatVector>) species.vectorType(), float.class, species.length(),
 287                                             IntVector.species(species.indexShape()).vectorType(), a, Unsafe.ARRAY_FLOAT_BASE_OFFSET, vix,
 288                                             a, a_offset, indexMap, i_offset, species,
 289                                             (float[] c, int idx, int[] iMap, int idy, VectorSpecies<Float> s) ->
 290                                                 ((FloatSpecies)s).op(n -> c[idx + iMap[idy+n]]));
 291         }
 292 
 293     /**
 294      * Loads a vector from an array using indexes obtained from an index
 295      * map and using a mask.
 296      * <p>
 297      * For each vector lane, where {@code N} is the vector lane index,
 298      * if the mask lane at index {@code N} is set then the array element at
 299      * index {@code a_offset + indexMap[i_offset + N]} is placed into the resulting vector
 300      * at lane index {@code N}.
 301      *
 302      * @param species species of desired vector
 303      * @param a the array
 304      * @param a_offset the offset into the array, may be negative if relative
 305      * indexes in the index map compensate to produce a value within the
 306      * array bounds
 307      * @param m the mask
 308      * @param indexMap the index map
 309      * @param i_offset the offset into the index map
 310      * @return the vector loaded from an array
 311      * @throws IndexOutOfBoundsException if {@code i_offset < 0}, or
 312      * {@code i_offset > indexMap.length - species.length()},
 313      * or for any vector lane index {@code N} where the mask at lane
 314      * {@code N} is set the result of {@code a_offset + indexMap[i_offset + N]} is
 315      * {@code < 0} or {@code >= a.length}
 316      */
 317     @ForceInline
 318     @SuppressWarnings("unchecked")
 319     public static FloatVector fromArray(VectorSpecies<Float> species, float[] a, int a_offset, VectorMask<Float> m, int[] indexMap, int i_offset) {
 320         // @@@ This can result in out of bounds errors for unset mask lanes
 321         return zero(species).blend(fromArray(species, a, a_offset, indexMap, i_offset), m);
 322     }
 323 
 324 
 325     /**
 326      * Loads a vector from a {@link ByteBuffer byte buffer} starting at an
 327      * offset into the byte buffer.
 328      * <p>
 329      * Bytes are composed into primitive lane elements according to the
 330      * native byte order of the underlying platform.
 331      * <p>
 332      * This method behaves as if it returns the result of calling the
 333      * byte buffer, offset, and mask accepting
 334      * {@link #fromByteBuffer(VectorSpecies, ByteBuffer, int, VectorMask)} method} as follows:
 335      * <pre>{@code
 336      *   return fromByteBuffer(b, offset, VectorMask.allTrue())
 337      * }</pre>
 338      *
 339      * @param species species of desired vector
 340      * @param bb the byte buffer
 341      * @param offset the offset into the byte buffer
 342      * @return a vector loaded from a byte buffer
 343      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 344      * or {@code > b.limit()},
 345      * or if there are fewer than
 346      * {@code species.length() * species.elementSize() / Byte.SIZE} bytes
 347      * remaining in the byte buffer from the given offset
 348      */
 349     @ForceInline
 350     @SuppressWarnings("unchecked")
 351     public static FloatVector fromByteBuffer(VectorSpecies<Float> species, ByteBuffer bb, int offset) {
 352         if (bb.order() != ByteOrder.nativeOrder()) {
 353             throw new IllegalArgumentException();
 354         }
 355         offset = VectorIntrinsics.checkIndex(offset, bb.limit(), species.bitSize() / Byte.SIZE);
 356         return VectorIntrinsics.load((Class<FloatVector>) species.vectorType(), float.class, species.length(),
 357                                      U.getReference(bb, BYTE_BUFFER_HB), U.getLong(bb, BUFFER_ADDRESS) + offset,
 358                                      bb, offset, species,
 359                                      (c, idx, s) -> {
 360                                          ByteBuffer bbc = c.duplicate().position(idx).order(ByteOrder.nativeOrder());
 361                                          FloatBuffer tb = bbc.asFloatBuffer();
 362                                          return ((FloatSpecies)s).op(i -> tb.get());
 363                                      });
 364     }
 365 
 366     /**
 367      * Loads a vector from a {@link ByteBuffer byte buffer} starting at an
 368      * offset into the byte buffer and using a mask.
 369      * <p>
 370      * This method behaves as if the byte buffer is viewed as a primitive
 371      * {@link java.nio.Buffer buffer} for the primitive element type,
 372      * according to the native byte order of the underlying platform, and
 373      * the returned vector is loaded with a mask from a primitive array
 374      * obtained from the primitive buffer.
 375      * The following pseudocode expresses the behaviour, where
 376      * {@code EBuffer} is the primitive buffer type, {@code e} is the
 377      * primitive element type, and {@code ESpecies} is the primitive
 378      * species for {@code e}:
 379      * <pre>{@code
 380      * EBuffer eb = b.duplicate().
 381      *     order(ByteOrder.nativeOrder()).position(offset).
 382      *     asEBuffer();
 383      * e[] es = new e[species.length()];
 384      * for (int n = 0; n < t.length; n++) {
 385      *     if (m.isSet(n))
 386      *         es[n] = eb.get(n);
 387      * }
 388      * EVector r = EVector.fromArray(es, 0, m);
 389      * }</pre>
 390      *
 391      * @param species species of desired vector
 392      * @param bb the byte buffer
 393      * @param offset the offset into the byte buffer
 394      * @param m the mask
 395      * @return a vector loaded from a byte buffer
 396      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 397      * or {@code > b.limit()},
 398      * for any vector lane index {@code N} where the mask at lane {@code N}
 399      * is set
 400      * {@code offset >= b.limit() - (N * species.elementSize() / Byte.SIZE)}
 401      */
 402     @ForceInline
 403     public static FloatVector fromByteBuffer(VectorSpecies<Float> species, ByteBuffer bb, int offset, VectorMask<Float> m) {
 404         return zero(species).blend(fromByteBuffer(species, bb, offset), m);
 405     }
 406 
 407     /**
 408      * Returns a vector where all lane elements are set to the primitive
 409      * value {@code e}.
 410      *
 411      * @param species species of the desired vector
 412      * @param e the value to be broadcasted
 413      * @return a vector of vector where all lane elements are set to
 414      * the primitive value {@code e}
 415      */
 416     @ForceInline
 417     @SuppressWarnings("unchecked")
 418     public static FloatVector broadcast(VectorSpecies<Float> species, float e) {
 419         return VectorIntrinsics.broadcastCoerced(
 420             (Class<FloatVector>) species.vectorType(), float.class, species.length(),
 421             Float.floatToIntBits(e), species,
 422             ((bits, sp) -> ((FloatSpecies)sp).op(i -> Float.intBitsToFloat((int)bits))));
 423     }
 424 
 425     /**
 426      * Returns a vector where each lane element is set to given
 427      * primitive values.
 428      * <p>
 429      * For each vector lane, where {@code N} is the vector lane index, the
 430      * the primitive value at index {@code N} is placed into the resulting
 431      * vector at lane index {@code N}.
 432      *
 433      * @param species species of the desired vector
 434      * @param es the given primitive values
 435      * @return a vector where each lane element is set to given primitive
 436      * values
 437      * @throws IndexOutOfBoundsException if {@code es.length < species.length()}
 438      */
 439     @ForceInline
 440     @SuppressWarnings("unchecked")
 441     public static FloatVector scalars(VectorSpecies<Float> species, float... es) {
 442         Objects.requireNonNull(es);
 443         int ix = VectorIntrinsics.checkIndex(0, es.length, species.length());
 444         return VectorIntrinsics.load((Class<FloatVector>) species.vectorType(), float.class, species.length(),
 445                                      es, Unsafe.ARRAY_FLOAT_BASE_OFFSET,
 446                                      es, ix, species,
 447                                      (c, idx, sp) -> ((FloatSpecies)sp).op(n -> c[idx + n]));
 448     }
 449 
 450     /**
 451      * Returns a vector where the first lane element is set to the primtive
 452      * value {@code e}, all other lane elements are set to the default
 453      * value.
 454      *
 455      * @param species species of the desired vector
 456      * @param e the value
 457      * @return a vector where the first lane element is set to the primitive
 458      * value {@code e}
 459      */
 460     @ForceInline
 461     public static final FloatVector single(VectorSpecies<Float> species, float e) {
 462         return zero(species).with(0, e);
 463     }
 464 
 465     /**
 466      * Returns a vector where each lane element is set to a randomly
 467      * generated primitive value.
 468      *
 469      * The semantics are equivalent to calling
 470      * {@link ThreadLocalRandom#nextFloat()}
 471      *
 472      * @param species species of the desired vector
 473      * @return a vector where each lane elements is set to a randomly
 474      * generated primitive value
 475      */
 476     public static FloatVector random(VectorSpecies<Float> species) {
 477         ThreadLocalRandom r = ThreadLocalRandom.current();
 478         return ((FloatSpecies)species).op(i -> r.nextFloat());
 479     }
 480 
 481     // Ops
 482 
 483     /**
 484      * {@inheritDoc}
 485      */
 486     @Override
 487     public abstract FloatVector add(Vector<Float> v);
 488 
 489     /**
 490      * Adds this vector to the broadcast of an input scalar.
 491      * <p>
 492      * This is a lane-wise binary operation which applies the primitive addition operation
 493      * ({@code +}) to each lane.
 494      *
 495      * @param s the input scalar
 496      * @return the result of adding this vector to the broadcast of an input
 497      * scalar
 498      */
 499     public abstract FloatVector add(float s);
 500 
 501     /**
 502      * {@inheritDoc}
 503      */
 504     @Override
 505     public abstract FloatVector add(Vector<Float> v, VectorMask<Float> m);
 506 
 507     /**
 508      * Adds this vector to broadcast of an input scalar,
 509      * selecting lane elements controlled by a mask.
 510      * <p>
 511      * This is a lane-wise binary operation which applies the primitive addition operation
 512      * ({@code +}) to each lane.
 513      *
 514      * @param s the input scalar
 515      * @param m the mask controlling lane selection
 516      * @return the result of adding this vector to the broadcast of an input
 517      * scalar
 518      */
 519     public abstract FloatVector add(float s, VectorMask<Float> m);
 520 
 521     /**
 522      * {@inheritDoc}
 523      */
 524     @Override
 525     public abstract FloatVector sub(Vector<Float> v);
 526 
 527     /**
 528      * Subtracts the broadcast of an input scalar from this vector.
 529      * <p>
 530      * This is a lane-wise binary operation which applies the primitive subtraction
 531      * operation ({@code -}) to each lane.
 532      *
 533      * @param s the input scalar
 534      * @return the result of subtracting the broadcast of an input
 535      * scalar from this vector
 536      */
 537     public abstract FloatVector sub(float s);
 538 
 539     /**
 540      * {@inheritDoc}
 541      */
 542     @Override
 543     public abstract FloatVector sub(Vector<Float> v, VectorMask<Float> m);
 544 
 545     /**
 546      * Subtracts the broadcast of an input scalar from this vector, selecting
 547      * lane elements controlled by a mask.
 548      * <p>
 549      * This is a lane-wise binary operation which applies the primitive subtraction
 550      * operation ({@code -}) to each lane.
 551      *
 552      * @param s the input scalar
 553      * @param m the mask controlling lane selection
 554      * @return the result of subtracting the broadcast of an input
 555      * scalar from this vector
 556      */
 557     public abstract FloatVector sub(float s, VectorMask<Float> m);
 558 
 559     /**
 560      * {@inheritDoc}
 561      */
 562     @Override
 563     public abstract FloatVector mul(Vector<Float> v);
 564 
 565     /**
 566      * Multiplies this vector with the broadcast of an input scalar.
 567      * <p>
 568      * This is a lane-wise binary operation which applies the primitive multiplication
 569      * operation ({@code *}) to each lane.
 570      *
 571      * @param s the input scalar
 572      * @return the result of multiplying this vector with the broadcast of an
 573      * input scalar
 574      */
 575     public abstract FloatVector mul(float s);
 576 
 577     /**
 578      * {@inheritDoc}
 579      */
 580     @Override
 581     public abstract FloatVector mul(Vector<Float> v, VectorMask<Float> m);
 582 
 583     /**
 584      * Multiplies this vector with the broadcast of an input scalar, selecting
 585      * lane elements controlled by a mask.
 586      * <p>
 587      * This is a lane-wise binary operation which applies the primitive multiplication
 588      * operation ({@code *}) to each lane.
 589      *
 590      * @param s the input scalar
 591      * @param m the mask controlling lane selection
 592      * @return the result of multiplying this vector with the broadcast of an
 593      * input scalar
 594      */
 595     public abstract FloatVector mul(float s, VectorMask<Float> m);
 596 
 597     /**
 598      * {@inheritDoc}
 599      */
 600     @Override
 601     public abstract FloatVector neg();
 602 
 603     /**
 604      * {@inheritDoc}
 605      */
 606     @Override
 607     public abstract FloatVector neg(VectorMask<Float> m);
 608 
 609     /**
 610      * {@inheritDoc}
 611      */
 612     @Override
 613     public abstract FloatVector abs();
 614 
 615     /**
 616      * {@inheritDoc}
 617      */
 618     @Override
 619     public abstract FloatVector abs(VectorMask<Float> m);
 620 
 621     /**
 622      * {@inheritDoc}
 623      */
 624     @Override
 625     public abstract FloatVector min(Vector<Float> v);
 626 
 627     /**
 628      * {@inheritDoc}
 629      */
 630     @Override
 631     public abstract FloatVector min(Vector<Float> v, VectorMask<Float> m);
 632 
 633     /**
 634      * Returns the minimum of this vector and the broadcast of an input scalar.
 635      * <p>
 636      * This is a lane-wise binary operation which applies the operation
 637      * {@code (a, b) -> Math.min(a, b)} to each lane.
 638      *
 639      * @param s the input scalar
 640      * @return the minimum of this vector and the broadcast of an input scalar
 641      */
 642     public abstract FloatVector min(float s);
 643 
 644     /**
 645      * {@inheritDoc}
 646      */
 647     @Override
 648     public abstract FloatVector max(Vector<Float> v);
 649 
 650     /**
 651      * {@inheritDoc}
 652      */
 653     @Override
 654     public abstract FloatVector max(Vector<Float> v, VectorMask<Float> m);
 655 
 656     /**
 657      * Returns the maximum of this vector and the broadcast of an input scalar.
 658      * <p>
 659      * This is a lane-wise binary operation which applies the operation
 660      * {@code (a, b) -> Math.max(a, b)} to each lane.
 661      *
 662      * @param s the input scalar
 663      * @return the maximum of this vector and the broadcast of an input scalar
 664      */
 665     public abstract FloatVector max(float s);
 666 
 667     /**
 668      * {@inheritDoc}
 669      */
 670     @Override
 671     public abstract VectorMask<Float> equal(Vector<Float> v);
 672 
 673     /**
 674      * Tests if this vector is equal to the broadcast of an input scalar.
 675      * <p>
 676      * This is a lane-wise binary test operation which applies the primitive equals
 677      * operation ({@code ==}) each lane.
 678      *
 679      * @param s the input scalar
 680      * @return the result mask of testing if this vector is equal to the
 681      * broadcast of an input scalar
 682      */
 683     public abstract VectorMask<Float> equal(float s);
 684 
 685     /**
 686      * {@inheritDoc}
 687      */
 688     @Override
 689     public abstract VectorMask<Float> notEqual(Vector<Float> v);
 690 
 691     /**
 692      * Tests if this vector is not equal to the broadcast of an input scalar.
 693      * <p>
 694      * This is a lane-wise binary test operation which applies the primitive not equals
 695      * operation ({@code !=}) to each lane.
 696      *
 697      * @param s the input scalar
 698      * @return the result mask of testing if this vector is not equal to the
 699      * broadcast of an input scalar
 700      */
 701     public abstract VectorMask<Float> notEqual(float s);
 702 
 703     /**
 704      * {@inheritDoc}
 705      */
 706     @Override
 707     public abstract VectorMask<Float> lessThan(Vector<Float> v);
 708 
 709     /**
 710      * Tests if this vector is less than the broadcast of an input scalar.
 711      * <p>
 712      * This is a lane-wise binary test operation which applies the primitive less than
 713      * operation ({@code <}) to each lane.
 714      *
 715      * @param s the input scalar
 716      * @return the mask result of testing if this vector is less than the
 717      * broadcast of an input scalar
 718      */
 719     public abstract VectorMask<Float> lessThan(float s);
 720 
 721     /**
 722      * {@inheritDoc}
 723      */
 724     @Override
 725     public abstract VectorMask<Float> lessThanEq(Vector<Float> v);
 726 
 727     /**
 728      * Tests if this vector is less or equal to the broadcast of an input scalar.
 729      * <p>
 730      * This is a lane-wise binary test operation which applies the primitive less than
 731      * or equal to operation ({@code <=}) to each lane.
 732      *
 733      * @param s the input scalar
 734      * @return the mask result of testing if this vector is less than or equal
 735      * to the broadcast of an input scalar
 736      */
 737     public abstract VectorMask<Float> lessThanEq(float s);
 738 
 739     /**
 740      * {@inheritDoc}
 741      */
 742     @Override
 743     public abstract VectorMask<Float> greaterThan(Vector<Float> v);
 744 
 745     /**
 746      * Tests if this vector is greater than the broadcast of an input scalar.
 747      * <p>
 748      * This is a lane-wise binary test operation which applies the primitive greater than
 749      * operation ({@code >}) to each lane.
 750      *
 751      * @param s the input scalar
 752      * @return the mask result of testing if this vector is greater than the
 753      * broadcast of an input scalar
 754      */
 755     public abstract VectorMask<Float> greaterThan(float s);
 756 
 757     /**
 758      * {@inheritDoc}
 759      */
 760     @Override
 761     public abstract VectorMask<Float> greaterThanEq(Vector<Float> v);
 762 
 763     /**
 764      * Tests if this vector is greater than or equal to the broadcast of an
 765      * input scalar.
 766      * <p>
 767      * This is a lane-wise binary test operation which applies the primitive greater than
 768      * or equal to operation ({@code >=}) to each lane.
 769      *
 770      * @param s the input scalar
 771      * @return the mask result of testing if this vector is greater than or
 772      * equal to the broadcast of an input scalar
 773      */
 774     public abstract VectorMask<Float> greaterThanEq(float s);
 775 
 776     /**
 777      * {@inheritDoc}
 778      */
 779     @Override
 780     public abstract FloatVector blend(Vector<Float> v, VectorMask<Float> m);
 781 
 782     /**
 783      * Blends the lane elements of this vector with those of the broadcast of an
 784      * input scalar, selecting lanes controlled by a mask.
 785      * <p>
 786      * For each lane of the mask, at lane index {@code N}, if the mask lane
 787      * is set then the lane element at {@code N} from the input vector is
 788      * selected and placed into the resulting vector at {@code N},
 789      * otherwise the the lane element at {@code N} from this input vector is
 790      * selected and placed into the resulting vector at {@code N}.
 791      *
 792      * @param s the input scalar
 793      * @param m the mask controlling lane selection
 794      * @return the result of blending the lane elements of this vector with
 795      * those of the broadcast of an input scalar
 796      */
 797     public abstract FloatVector blend(float s, VectorMask<Float> m);
 798 
 799     /**
 800      * {@inheritDoc}
 801      */
 802     @Override
 803     public abstract FloatVector rearrange(Vector<Float> v,
 804                                                       VectorShuffle<Float> s, VectorMask<Float> m);
 805 
 806     /**
 807      * {@inheritDoc}
 808      */
 809     @Override
 810     public abstract FloatVector rearrange(VectorShuffle<Float> m);
 811 
 812     /**
 813      * {@inheritDoc}
 814      */
 815     @Override
 816     public abstract FloatVector reshape(VectorSpecies<Float> s);
 817 
 818     /**
 819      * {@inheritDoc}
 820      */
 821     @Override
 822     public abstract FloatVector rotateLanesLeft(int i);
 823 
 824     /**
 825      * {@inheritDoc}
 826      */
 827     @Override
 828     public abstract FloatVector rotateLanesRight(int i);
 829 
 830     /**
 831      * {@inheritDoc}
 832      */
 833     @Override
 834     public abstract FloatVector shiftLanesLeft(int i);
 835 
 836     /**
 837      * {@inheritDoc}
 838      */
 839     @Override
 840     public abstract FloatVector shiftLanesRight(int i);
 841 
 842     /**
 843      * Divides this vector by an input vector.
 844      * <p>
 845      * This is a lane-wise binary operation which applies the primitive division
 846      * operation ({@code /}) to each lane.
 847      *
 848      * @param v the input vector
 849      * @return the result of dividing this vector by the input vector
 850      */
 851     public abstract FloatVector div(Vector<Float> v);
 852 
 853     /**
 854      * Divides this vector by the broadcast of an input scalar.
 855      * <p>
 856      * This is a lane-wise binary operation which applies the primitive division
 857      * operation ({@code /}) to each lane.
 858      *
 859      * @param s the input scalar
 860      * @return the result of dividing this vector by the broadcast of an input
 861      * scalar
 862      */
 863     public abstract FloatVector div(float s);
 864 
 865     /**
 866      * Divides this vector by an input vector, selecting lane elements
 867      * controlled by a mask.
 868      * <p>
 869      * This is a lane-wise binary operation which applies the primitive division
 870      * operation ({@code /}) to each lane.
 871      *
 872      * @param v the input vector
 873      * @param m the mask controlling lane selection
 874      * @return the result of dividing this vector by the input vector
 875      */
 876     public abstract FloatVector div(Vector<Float> v, VectorMask<Float> m);
 877 
 878     /**
 879      * Divides this vector by the broadcast of an input scalar, selecting lane
 880      * elements controlled by a mask.
 881      * <p>
 882      * This is a lane-wise binary operation which applies the primitive division
 883      * operation ({@code /}) to each lane.
 884      *
 885      * @param s the input scalar
 886      * @param m the mask controlling lane selection
 887      * @return the result of dividing this vector by the broadcast of an input
 888      * scalar
 889      */
 890     public abstract FloatVector div(float s, VectorMask<Float> m);
 891 
 892     /**
 893      * Calculates the square root of this vector.
 894      * <p>
 895      * This is a lane-wise unary operation which applies the {@link Math#sqrt} operation
 896      * to each lane.
 897      *
 898      * @return the square root of this vector
 899      */
 900     public abstract FloatVector sqrt();
 901 
 902     /**
 903      * Calculates the square root of this vector, selecting lane elements
 904      * controlled by a mask.
 905      * <p>
 906      * This is a lane-wise unary operation which applies the {@link Math#sqrt} operation
 907      * to each lane.
 908      *
 909      * @param m the mask controlling lane selection
 910      * @return the square root of this vector
 911      */
 912     public FloatVector sqrt(VectorMask<Float> m) {
 913         return uOp(m, (i, a) -> (float) Math.sqrt((double) a));
 914     }
 915 
 916     /**
 917      * Calculates the trigonometric tangent of this vector.
 918      * <p>
 919      * This is a lane-wise unary operation with same semantic definition as
 920      * {@link Math#tan} operation applied to each lane.
 921      * The implementation is not required to return same
 922      * results as {@link Math#tan}, but adheres to rounding, monotonicity,
 923      * and special case semantics as defined in the {@link Math#tan}
 924      * specifications. The computed result will be within 1 ulp of the
 925      * exact result.
 926      *
 927      * @return the tangent of this vector
 928      */
 929     public FloatVector tan() {
 930         return uOp((i, a) -> (float) Math.tan((double) a));
 931     }
 932 
 933     /**
 934      * Calculates the trigonometric tangent of this vector, selecting lane
 935      * elements controlled by a mask.
 936      * <p>
 937      * Semantics for rounding, monotonicity, and special cases are
 938      * described in {@link FloatVector#tan}
 939      *
 940      * @param m the mask controlling lane selection
 941      * @return the tangent of this vector
 942      */
 943     public FloatVector tan(VectorMask<Float> m) {
 944         return uOp(m, (i, a) -> (float) Math.tan((double) a));
 945     }
 946 
 947     /**
 948      * Calculates the hyperbolic tangent of this vector.
 949      * <p>
 950      * This is a lane-wise unary operation with same semantic definition as
 951      * {@link Math#tanh} operation applied to each lane.
 952      * The implementation is not required to return same
 953      * results as {@link Math#tanh}, but adheres to rounding, monotonicity,
 954      * and special case semantics as defined in the {@link Math#tanh}
 955      * specifications. The computed result will be within 2.5 ulps of the
 956      * exact result.
 957      *
 958      * @return the hyperbolic tangent of this vector
 959      */
 960     public FloatVector tanh() {
 961         return uOp((i, a) -> (float) Math.tanh((double) a));
 962     }
 963 
 964     /**
 965      * Calculates the hyperbolic tangent of this vector, selecting lane elements
 966      * controlled by a mask.
 967      * <p>
 968      * Semantics for rounding, monotonicity, and special cases are
 969      * described in {@link FloatVector#tanh}
 970      *
 971      * @param m the mask controlling lane selection
 972      * @return the hyperbolic tangent of this vector
 973      */
 974     public FloatVector tanh(VectorMask<Float> m) {
 975         return uOp(m, (i, a) -> (float) Math.tanh((double) a));
 976     }
 977 
 978     /**
 979      * Calculates the trigonometric sine of this vector.
 980      * <p>
 981      * This is a lane-wise unary operation with same semantic definition as
 982      * {@link Math#sin} operation applied to each lane.
 983      * The implementation is not required to return same
 984      * results as {@link Math#sin}, but adheres to rounding, monotonicity,
 985      * and special case semantics as defined in the {@link Math#sin}
 986      * specifications. The computed result will be within 1 ulp of the
 987      * exact result.
 988      *
 989      * @return the sine of this vector
 990      */
 991     public FloatVector sin() {
 992         return uOp((i, a) -> (float) Math.sin((double) a));
 993     }
 994 
 995     /**
 996      * Calculates the trigonometric sine of this vector, selecting lane elements
 997      * controlled by a mask.
 998      * <p>
 999      * Semantics for rounding, monotonicity, and special cases are
1000      * described in {@link FloatVector#sin}
1001      *
1002      * @param m the mask controlling lane selection
1003      * @return the sine of this vector
1004      */
1005     public FloatVector sin(VectorMask<Float> m) {
1006         return uOp(m, (i, a) -> (float) Math.sin((double) a));
1007     }
1008 
1009     /**
1010      * Calculates the hyperbolic sine of this vector.
1011      * <p>
1012      * This is a lane-wise unary operation with same semantic definition as
1013      * {@link Math#sinh} operation applied to each lane.
1014      * The implementation is not required to return same
1015      * results as  {@link Math#sinh}, but adheres to rounding, monotonicity,
1016      * and special case semantics as defined in the {@link Math#sinh}
1017      * specifications. The computed result will be within 2.5 ulps of the
1018      * exact result.
1019      *
1020      * @return the hyperbolic sine of this vector
1021      */
1022     public FloatVector sinh() {
1023         return uOp((i, a) -> (float) Math.sinh((double) a));
1024     }
1025 
1026     /**
1027      * Calculates the hyperbolic sine of this vector, selecting lane elements
1028      * controlled by a mask.
1029      * <p>
1030      * Semantics for rounding, monotonicity, and special cases are
1031      * described in {@link FloatVector#sinh}
1032      *
1033      * @param m the mask controlling lane selection
1034      * @return the hyperbolic sine of this vector
1035      */
1036     public FloatVector sinh(VectorMask<Float> m) {
1037         return uOp(m, (i, a) -> (float) Math.sinh((double) a));
1038     }
1039 
1040     /**
1041      * Calculates the trigonometric cosine of this vector.
1042      * <p>
1043      * This is a lane-wise unary operation with same semantic definition as
1044      * {@link Math#cos} operation applied to each lane.
1045      * The implementation is not required to return same
1046      * results as {@link Math#cos}, but adheres to rounding, monotonicity,
1047      * and special case semantics as defined in the {@link Math#cos}
1048      * specifications. The computed result will be within 1 ulp of the
1049      * exact result.
1050      *
1051      * @return the cosine of this vector
1052      */
1053     public FloatVector cos() {
1054         return uOp((i, a) -> (float) Math.cos((double) a));
1055     }
1056 
1057     /**
1058      * Calculates the trigonometric cosine of this vector, selecting lane
1059      * elements controlled by a mask.
1060      * <p>
1061      * Semantics for rounding, monotonicity, and special cases are
1062      * described in {@link FloatVector#cos}
1063      *
1064      * @param m the mask controlling lane selection
1065      * @return the cosine of this vector
1066      */
1067     public FloatVector cos(VectorMask<Float> m) {
1068         return uOp(m, (i, a) -> (float) Math.cos((double) a));
1069     }
1070 
1071     /**
1072      * Calculates the hyperbolic cosine of this vector.
1073      * <p>
1074      * This is a lane-wise unary operation with same semantic definition as
1075      * {@link Math#cosh} operation applied to each lane.
1076      * The implementation is not required to return same
1077      * results as {@link Math#cosh}, but adheres to rounding, monotonicity,
1078      * and special case semantics as defined in the {@link Math#cosh}
1079      * specifications. The computed result will be within 2.5 ulps of the
1080      * exact result.
1081      *
1082      * @return the hyperbolic cosine of this vector
1083      */
1084     public FloatVector cosh() {
1085         return uOp((i, a) -> (float) Math.cosh((double) a));
1086     }
1087 
1088     /**
1089      * Calculates the hyperbolic cosine of this vector, selecting lane elements
1090      * controlled by a mask.
1091      * <p>
1092      * Semantics for rounding, monotonicity, and special cases are
1093      * described in {@link FloatVector#cosh}
1094      *
1095      * @param m the mask controlling lane selection
1096      * @return the hyperbolic cosine of this vector
1097      */
1098     public FloatVector cosh(VectorMask<Float> m) {
1099         return uOp(m, (i, a) -> (float) Math.cosh((double) a));
1100     }
1101 
1102     /**
1103      * Calculates the arc sine of this vector.
1104      * <p>
1105      * This is a lane-wise unary operation with same semantic definition as
1106      * {@link Math#asin} operation applied to each lane.
1107      * The implementation is not required to return same
1108      * results as {@link Math#asin}, but adheres to rounding, monotonicity,
1109      * and special case semantics as defined in the {@link Math#asin}
1110      * specifications. The computed result will be within 1 ulp of the
1111      * exact result.
1112      *
1113      * @return the arc sine of this vector
1114      */
1115     public FloatVector asin() {
1116         return uOp((i, a) -> (float) Math.asin((double) a));
1117     }
1118 
1119     /**
1120      * Calculates the arc sine of this vector, selecting lane elements
1121      * controlled by a mask.
1122      * <p>
1123      * Semantics for rounding, monotonicity, and special cases are
1124      * described in {@link FloatVector#asin}
1125      *
1126      * @param m the mask controlling lane selection
1127      * @return the arc sine of this vector
1128      */
1129     public FloatVector asin(VectorMask<Float> m) {
1130         return uOp(m, (i, a) -> (float) Math.asin((double) a));
1131     }
1132 
1133     /**
1134      * Calculates the arc cosine of this vector.
1135      * <p>
1136      * This is a lane-wise unary operation with same semantic definition as
1137      * {@link Math#acos} operation applied to each lane.
1138      * The implementation is not required to return same
1139      * results as {@link Math#acos}, but adheres to rounding, monotonicity,
1140      * and special case semantics as defined in the {@link Math#acos}
1141      * specifications. The computed result will be within 1 ulp of the
1142      * exact result.
1143      *
1144      * @return the arc cosine of this vector
1145      */
1146     public FloatVector acos() {
1147         return uOp((i, a) -> (float) Math.acos((double) a));
1148     }
1149 
1150     /**
1151      * Calculates the arc cosine of this vector, selecting lane elements
1152      * controlled by a mask.
1153      * <p>
1154      * Semantics for rounding, monotonicity, and special cases are
1155      * described in {@link FloatVector#acos}
1156      *
1157      * @param m the mask controlling lane selection
1158      * @return the arc cosine of this vector
1159      */
1160     public FloatVector acos(VectorMask<Float> m) {
1161         return uOp(m, (i, a) -> (float) Math.acos((double) a));
1162     }
1163 
1164     /**
1165      * Calculates the arc tangent of this vector.
1166      * <p>
1167      * This is a lane-wise unary operation with same semantic definition as
1168      * {@link Math#atan} operation applied to each lane.
1169      * The implementation is not required to return same
1170      * results as {@link Math#atan}, but adheres to rounding, monotonicity,
1171      * and special case semantics as defined in the {@link Math#atan}
1172      * specifications. The computed result will be within 1 ulp of the
1173      * exact result.
1174      *
1175      * @return the arc tangent of this vector
1176      */
1177     public FloatVector atan() {
1178         return uOp((i, a) -> (float) Math.atan((double) a));
1179     }
1180 
1181     /**
1182      * Calculates the arc tangent of this vector, selecting lane elements
1183      * controlled by a mask.
1184      * <p>
1185      * Semantics for rounding, monotonicity, and special cases are
1186      * described in {@link FloatVector#atan}
1187      *
1188      * @param m the mask controlling lane selection
1189      * @return the arc tangent of this vector
1190      */
1191     public FloatVector atan(VectorMask<Float> m) {
1192         return uOp(m, (i, a) -> (float) Math.atan((double) a));
1193     }
1194 
1195     /**
1196      * Calculates the arc tangent of this vector divided by an input vector.
1197      * <p>
1198      * This is a lane-wise binary operation with same semantic definition as
1199      * {@link Math#atan2} operation applied to each lane.
1200      * The implementation is not required to return same
1201      * results as {@link Math#atan2}, but adheres to rounding, monotonicity,
1202      * and special case semantics as defined in the {@link Math#atan2}
1203      * specifications. The computed result will be within 2 ulps of the
1204      * exact result.
1205      *
1206      * @param v the input vector
1207      * @return the arc tangent of this vector divided by the input vector
1208      */
1209     public FloatVector atan2(Vector<Float> v) {
1210         return bOp(v, (i, a, b) -> (float) Math.atan2((double) a, (double) b));
1211     }
1212 
1213     /**
1214      * Calculates the arc tangent of this vector divided by the broadcast of an
1215      * an input scalar.
1216      * <p>
1217      * This is a lane-wise binary operation with same semantic definition as
1218      * {@link Math#atan2} operation applied to each lane.
1219      * The implementation is not required to return same
1220      * results as {@link Math#atan2}, but adheres to rounding, monotonicity,
1221      * and special case semantics as defined in the {@link Math#atan2}
1222      * specifications. The computed result will be within 1 ulp of the
1223      * exact result.
1224      *
1225      * @param s the input scalar
1226      * @return the arc tangent of this vector over the input vector
1227      */
1228     public abstract FloatVector atan2(float s);
1229 
1230     /**
1231      * Calculates the arc tangent of this vector divided by an input vector,
1232      * selecting lane elements controlled by a mask.
1233      * <p>
1234      * Semantics for rounding, monotonicity, and special cases are
1235      * described in {@link FloatVector#atan2}
1236      *
1237      * @param v the input vector
1238      * @param m the mask controlling lane selection
1239      * @return the arc tangent of this vector divided by the input vector
1240      */
1241     public FloatVector atan2(Vector<Float> v, VectorMask<Float> m) {
1242         return bOp(v, m, (i, a, b) -> (float) Math.atan2((double) a, (double) b));
1243     }
1244 
1245     /**
1246      * Calculates the arc tangent of this vector divided by the broadcast of an
1247      * an input scalar, selecting lane elements controlled by a mask.
1248      * <p>
1249      * Semantics for rounding, monotonicity, and special cases are
1250      * described in {@link FloatVector#atan2}
1251      *
1252      * @param s the input scalar
1253      * @param m the mask controlling lane selection
1254      * @return the arc tangent of this vector over the input vector
1255      */
1256     public abstract FloatVector atan2(float s, VectorMask<Float> m);
1257 
1258     /**
1259      * Calculates the cube root of this vector.
1260      * <p>
1261      * This is a lane-wise unary operation with same semantic definition as
1262      * {@link Math#cbrt} operation applied to each lane.
1263      * The implementation is not required to return same
1264      * results as {@link Math#cbrt}, but adheres to rounding, monotonicity,
1265      * and special case semantics as defined in the {@link Math#cbrt}
1266      * specifications. The computed result will be within 1 ulp of the
1267      * exact result.
1268      *
1269      * @return the cube root of this vector
1270      */
1271     public FloatVector cbrt() {
1272         return uOp((i, a) -> (float) Math.cbrt((double) a));
1273     }
1274 
1275     /**
1276      * Calculates the cube root of this vector, selecting lane elements
1277      * controlled by a mask.
1278      * <p>
1279      * Semantics for rounding, monotonicity, and special cases are
1280      * described in {@link FloatVector#cbrt}
1281      *
1282      * @param m the mask controlling lane selection
1283      * @return the cube root of this vector
1284      */
1285     public FloatVector cbrt(VectorMask<Float> m) {
1286         return uOp(m, (i, a) -> (float) Math.cbrt((double) a));
1287     }
1288 
1289     /**
1290      * Calculates the natural logarithm of this vector.
1291      * <p>
1292      * This is a lane-wise unary operation with same semantic definition as
1293      * {@link Math#log} operation applied to each lane.
1294      * The implementation is not required to return same
1295      * results as {@link Math#log}, but adheres to rounding, monotonicity,
1296      * and special case semantics as defined in the {@link Math#log}
1297      * specifications. The computed result will be within 1 ulp of the
1298      * exact result.
1299      *
1300      * @return the natural logarithm of this vector
1301      */
1302     public FloatVector log() {
1303         return uOp((i, a) -> (float) Math.log((double) a));
1304     }
1305 
1306     /**
1307      * Calculates the natural logarithm of this vector, selecting lane elements
1308      * controlled by a mask.
1309      * <p>
1310      * Semantics for rounding, monotonicity, and special cases are
1311      * described in {@link FloatVector#log}
1312      *
1313      * @param m the mask controlling lane selection
1314      * @return the natural logarithm of this vector
1315      */
1316     public FloatVector log(VectorMask<Float> m) {
1317         return uOp(m, (i, a) -> (float) Math.log((double) a));
1318     }
1319 
1320     /**
1321      * Calculates the base 10 logarithm of this vector.
1322      * <p>
1323      * This is a lane-wise unary operation with same semantic definition as
1324      * {@link Math#log10} operation applied to each lane.
1325      * The implementation is not required to return same
1326      * results as {@link Math#log10}, but adheres to rounding, monotonicity,
1327      * and special case semantics as defined in the {@link Math#log10}
1328      * specifications. The computed result will be within 1 ulp of the
1329      * exact result.
1330      *
1331      * @return the base 10 logarithm of this vector
1332      */
1333     public FloatVector log10() {
1334         return uOp((i, a) -> (float) Math.log10((double) a));
1335     }
1336 
1337     /**
1338      * Calculates the base 10 logarithm of this vector, selecting lane elements
1339      * controlled by a mask.
1340      * <p>
1341      * Semantics for rounding, monotonicity, and special cases are
1342      * described in {@link FloatVector#log10}
1343      *
1344      * @param m the mask controlling lane selection
1345      * @return the base 10 logarithm of this vector
1346      */
1347     public FloatVector log10(VectorMask<Float> m) {
1348         return uOp(m, (i, a) -> (float) Math.log10((double) a));
1349     }
1350 
1351     /**
1352      * Calculates the natural logarithm of the sum of this vector and the
1353      * broadcast of {@code 1}.
1354      * <p>
1355      * This is a lane-wise unary operation with same semantic definition as
1356      * {@link Math#log1p} operation applied to each lane.
1357      * The implementation is not required to return same
1358      * results as  {@link Math#log1p}, but adheres to rounding, monotonicity,
1359      * and special case semantics as defined in the {@link Math#log1p}
1360      * specifications. The computed result will be within 1 ulp of the
1361      * exact result.
1362      *
1363      * @return the natural logarithm of the sum of this vector and the broadcast
1364      * of {@code 1}
1365      */
1366     public FloatVector log1p() {
1367         return uOp((i, a) -> (float) Math.log1p((double) a));
1368     }
1369 
1370     /**
1371      * Calculates the natural logarithm of the sum of this vector and the
1372      * broadcast of {@code 1}, selecting lane elements controlled by a mask.
1373      * <p>
1374      * Semantics for rounding, monotonicity, and special cases are
1375      * described in {@link FloatVector#log1p}
1376      *
1377      * @param m the mask controlling lane selection
1378      * @return the natural logarithm of the sum of this vector and the broadcast
1379      * of {@code 1}
1380      */
1381     public FloatVector log1p(VectorMask<Float> m) {
1382         return uOp(m, (i, a) -> (float) Math.log1p((double) a));
1383     }
1384 
1385     /**
1386      * Calculates this vector raised to the power of an input vector.
1387      * <p>
1388      * This is a lane-wise binary operation with same semantic definition as
1389      * {@link Math#pow} operation applied to each lane.
1390      * The implementation is not required to return same
1391      * results as {@link Math#pow}, but adheres to rounding, monotonicity,
1392      * and special case semantics as defined in the {@link Math#pow}
1393      * specifications. The computed result will be within 1 ulp of the
1394      * exact result.
1395      *
1396      * @param v the input vector
1397      * @return this vector raised to the power of an input vector
1398      */
1399     public FloatVector pow(Vector<Float> v) {
1400         return bOp(v, (i, a, b) -> (float) Math.pow((double) a, (double) b));
1401     }
1402 
1403     /**
1404      * Calculates this vector raised to the power of the broadcast of an input
1405      * scalar.
1406      * <p>
1407      * This is a lane-wise binary operation with same semantic definition as
1408      * {@link Math#pow} operation applied to each lane.
1409      * The implementation is not required to return same
1410      * results as {@link Math#pow}, but adheres to rounding, monotonicity,
1411      * and special case semantics as defined in the {@link Math#pow}
1412      * specifications. The computed result will be within 1 ulp of the
1413      * exact result.
1414      *
1415      * @param s the input scalar
1416      * @return this vector raised to the power of the broadcast of an input
1417      * scalar.
1418      */
1419     public abstract FloatVector pow(float s);
1420 
1421     /**
1422      * Calculates this vector raised to the power of an input vector, selecting
1423      * lane elements controlled by a mask.
1424      * <p>
1425      * Semantics for rounding, monotonicity, and special cases are
1426      * described in {@link FloatVector#pow}
1427      *
1428      * @param v the input vector
1429      * @param m the mask controlling lane selection
1430      * @return this vector raised to the power of an input vector
1431      */
1432     public FloatVector pow(Vector<Float> v, VectorMask<Float> m) {
1433         return bOp(v, m, (i, a, b) -> (float) Math.pow((double) a, (double) b));
1434     }
1435 
1436     /**
1437      * Calculates this vector raised to the power of the broadcast of an input
1438      * scalar, selecting lane elements controlled by a mask.
1439      * <p>
1440      * Semantics for rounding, monotonicity, and special cases are
1441      * described in {@link FloatVector#pow}
1442      *
1443      * @param s the input scalar
1444      * @param m the mask controlling lane selection
1445      * @return this vector raised to the power of the broadcast of an input
1446      * scalar.
1447      */
1448     public abstract FloatVector pow(float s, VectorMask<Float> m);
1449 
1450     /**
1451      * Calculates the broadcast of Euler's number {@code e} raised to the power
1452      * of this vector.
1453      * <p>
1454      * This is a lane-wise unary operation with same semantic definition as
1455      * {@link Math#exp} operation applied to each lane.
1456      * The implementation is not required to return same
1457      * results as {@link Math#exp}, but adheres to rounding, monotonicity,
1458      * and special case semantics as defined in the {@link Math#exp}
1459      * specifications. The computed result will be within 1 ulp of the
1460      * exact result.
1461      *
1462      * @return the broadcast of Euler's number {@code e} raised to the power of
1463      * this vector
1464      */
1465     public FloatVector exp() {
1466         return uOp((i, a) -> (float) Math.exp((double) a));
1467     }
1468 
1469     /**
1470      * Calculates the broadcast of Euler's number {@code e} raised to the power
1471      * of this vector, selecting lane elements controlled by a mask.
1472      * <p>
1473      * Semantics for rounding, monotonicity, and special cases are
1474      * described in {@link FloatVector#exp}
1475      *
1476      * @param m the mask controlling lane selection
1477      * @return the broadcast of Euler's number {@code e} raised to the power of
1478      * this vector
1479      */
1480     public FloatVector exp(VectorMask<Float> m) {
1481         return uOp(m, (i, a) -> (float) Math.exp((double) a));
1482     }
1483 
1484     /**
1485      * Calculates the broadcast of Euler's number {@code e} raised to the power
1486      * of this vector minus the broadcast of {@code -1}.
1487      * More specifically as if the following (ignoring any differences in
1488      * numerical accuracy):
1489      * <pre>{@code
1490      *   this.exp().sub(EVector.broadcast(this.species(), 1))
1491      * }</pre>
1492      * <p>
1493      * This is a lane-wise unary operation with same semantic definition as
1494      * {@link Math#expm1} operation applied to each lane.
1495      * The implementation is not required to return same
1496      * results as {@link Math#expm1}, but adheres to rounding, monotonicity,
1497      * and special case semantics as defined in the {@link Math#expm1}
1498      * specifications. The computed result will be within 1 ulp of the
1499      * exact result.
1500      *
1501      * @return the broadcast of Euler's number {@code e} raised to the power of
1502      * this vector minus the broadcast of {@code -1}
1503      */
1504     public FloatVector expm1() {
1505         return uOp((i, a) -> (float) Math.expm1((double) a));
1506     }
1507 
1508     /**
1509      * Calculates the broadcast of Euler's number {@code e} raised to the power
1510      * of this vector minus the broadcast of {@code -1}, selecting lane elements
1511      * controlled by a mask
1512      * More specifically as if the following (ignoring any differences in
1513      * numerical accuracy):
1514      * <pre>{@code
1515      *   this.exp(m).sub(EVector.broadcast(this.species(), 1), m)
1516      * }</pre>
1517      * <p>
1518      * Semantics for rounding, monotonicity, and special cases are
1519      * described in {@link FloatVector#expm1}
1520      *
1521      * @param m the mask controlling lane selection
1522      * @return the broadcast of Euler's number {@code e} raised to the power of
1523      * this vector minus the broadcast of {@code -1}
1524      */
1525     public FloatVector expm1(VectorMask<Float> m) {
1526         return uOp(m, (i, a) -> (float) Math.expm1((double) a));
1527     }
1528 
1529     /**
1530      * Calculates the product of this vector and a first input vector summed
1531      * with a second input vector.
1532      * More specifically as if the following (ignoring any differences in
1533      * numerical accuracy):
1534      * <pre>{@code
1535      *   this.mul(v1).add(v2)
1536      * }</pre>
1537      * <p>
1538      * This is a lane-wise ternary operation which applies the {@link Math#fma} operation
1539      * to each lane.
1540      *
1541      * @param v1 the first input vector
1542      * @param v2 the second input vector
1543      * @return the product of this vector and the first input vector summed with
1544      * the second input vector
1545      */
1546     public abstract FloatVector fma(Vector<Float> v1, Vector<Float> v2);
1547 
1548     /**
1549      * Calculates the product of this vector and the broadcast of a first input
1550      * scalar summed with the broadcast of a second input scalar.
1551      * More specifically as if the following:
1552      * <pre>{@code
1553      *   this.fma(EVector.broadcast(this.species(), s1), EVector.broadcast(this.species(), s2))
1554      * }</pre>
1555      * <p>
1556      * This is a lane-wise ternary operation which applies the {@link Math#fma} operation
1557      * to each lane.
1558      *
1559      * @param s1 the first input scalar
1560      * @param s2 the second input scalar
1561      * @return the product of this vector and the broadcast of a first input
1562      * scalar summed with the broadcast of a second input scalar
1563      */
1564     public abstract FloatVector fma(float s1, float s2);
1565 
1566     /**
1567      * Calculates the product of this vector and a first input vector summed
1568      * with a second input vector, selecting lane elements controlled by a mask.
1569      * More specifically as if the following (ignoring any differences in
1570      * numerical accuracy):
1571      * <pre>{@code
1572      *   this.mul(v1, m).add(v2, m)
1573      * }</pre>
1574      * <p>
1575      * This is a lane-wise ternary operation which applies the {@link Math#fma} operation
1576      * to each lane.
1577      *
1578      * @param v1 the first input vector
1579      * @param v2 the second input vector
1580      * @param m the mask controlling lane selection
1581      * @return the product of this vector and the first input vector summed with
1582      * the second input vector
1583      */
1584     public FloatVector fma(Vector<Float> v1, Vector<Float> v2, VectorMask<Float> m) {
1585         return tOp(v1, v2, m, (i, a, b, c) -> Math.fma(a, b, c));
1586     }
1587 
1588     /**
1589      * Calculates the product of this vector and the broadcast of a first input
1590      * scalar summed with the broadcast of a second input scalar, selecting lane
1591      * elements controlled by a mask
1592      * More specifically as if the following:
1593      * <pre>{@code
1594      *   this.fma(EVector.broadcast(this.species(), s1), EVector.broadcast(this.species(), s2), m)
1595      * }</pre>
1596      * <p>
1597      * This is a lane-wise ternary operation which applies the {@link Math#fma} operation
1598      * to each lane.
1599      *
1600      * @param s1 the first input scalar
1601      * @param s2 the second input scalar
1602      * @param m the mask controlling lane selection
1603      * @return the product of this vector and the broadcast of a first input
1604      * scalar summed with the broadcast of a second input scalar
1605      */
1606     public abstract FloatVector fma(float s1, float s2, VectorMask<Float> m);
1607 
1608     /**
1609      * Calculates square root of the sum of the squares of this vector and an
1610      * input vector.
1611      * More specifically as if the following (ignoring any differences in
1612      * numerical accuracy):
1613      * <pre>{@code
1614      *   this.mul(this).add(v.mul(v)).sqrt()
1615      * }</pre>
1616      * <p>
1617      * This is a lane-wise binary operation with same semantic definition as
1618      * {@link Math#hypot} operation applied to each lane.
1619      * The implementation is not required to return same
1620      * results as {@link Math#hypot}, but adheres to rounding, monotonicity,
1621      * and special case semantics as defined in the {@link Math#hypot}
1622      * specifications. The computed result will be within 1 ulp of the
1623      * exact result.
1624      *
1625      * @param v the input vector
1626      * @return square root of the sum of the squares of this vector and an input
1627      * vector
1628      */
1629     public FloatVector hypot(Vector<Float> v) {
1630         return bOp(v, (i, a, b) -> (float) Math.hypot((double) a, (double) b));
1631     }
1632 
1633     /**
1634      * Calculates square root of the sum of the squares of this vector and the
1635      * broadcast of an input scalar.
1636      * More specifically as if the following (ignoring any differences in
1637      * numerical accuracy):
1638      * <pre>{@code
1639      *   this.mul(this).add(EVector.broadcast(this.species(), s * s)).sqrt()
1640      * }</pre>
1641      * <p>
1642      * This is a lane-wise binary operation with same semantic definition as
1643      * {@link Math#hypot} operation applied to each.
1644      * The implementation is not required to return same
1645      * results as {@link Math#hypot}, but adheres to rounding, monotonicity,
1646      * and special case semantics as defined in the {@link Math#hypot}
1647      * specifications. The computed result will be within 1 ulp of the
1648      * exact result.
1649      *
1650      * @param s the input scalar
1651      * @return square root of the sum of the squares of this vector and the
1652      * broadcast of an input scalar
1653      */
1654     public abstract FloatVector hypot(float s);
1655 
1656     /**
1657      * Calculates square root of the sum of the squares of this vector and an
1658      * input vector, selecting lane elements controlled by a mask.
1659      * More specifically as if the following (ignoring any differences in
1660      * numerical accuracy):
1661      * <pre>{@code
1662      *   this.mul(this, m).add(v.mul(v), m).sqrt(m)
1663      * }</pre>
1664      * <p>
1665      * Semantics for rounding, monotonicity, and special cases are
1666      * described in {@link FloatVector#hypot}
1667      *
1668      * @param v the input vector
1669      * @param m the mask controlling lane selection
1670      * @return square root of the sum of the squares of this vector and an input
1671      * vector
1672      */
1673     public FloatVector hypot(Vector<Float> v, VectorMask<Float> m) {
1674         return bOp(v, m, (i, a, b) -> (float) Math.hypot((double) a, (double) b));
1675     }
1676 
1677     /**
1678      * Calculates square root of the sum of the squares of this vector and the
1679      * broadcast of an input scalar, selecting lane elements controlled by a
1680      * mask.
1681      * More specifically as if the following (ignoring any differences in
1682      * numerical accuracy):
1683      * <pre>{@code
1684      *   this.mul(this, m).add(EVector.broadcast(this.species(), s * s), m).sqrt(m)
1685      * }</pre>
1686      * <p>
1687      * Semantics for rounding, monotonicity, and special cases are
1688      * described in {@link FloatVector#hypot}
1689      *
1690      * @param s the input scalar
1691      * @param m the mask controlling lane selection
1692      * @return square root of the sum of the squares of this vector and the
1693      * broadcast of an input scalar
1694      */
1695     public abstract FloatVector hypot(float s, VectorMask<Float> m);
1696 
1697 
1698     /**
1699      * {@inheritDoc}
1700      */
1701     @Override
1702     public abstract void intoByteArray(byte[] a, int ix);
1703 
1704     /**
1705      * {@inheritDoc}
1706      */
1707     @Override
1708     public abstract void intoByteArray(byte[] a, int ix, VectorMask<Float> m);
1709 
1710     /**
1711      * {@inheritDoc}
1712      */
1713     @Override
1714     public abstract void intoByteBuffer(ByteBuffer bb, int ix);
1715 
1716     /**
1717      * {@inheritDoc}
1718      */
1719     @Override
1720     public abstract void intoByteBuffer(ByteBuffer bb, int ix, VectorMask<Float> m);
1721 
1722 
1723     // Type specific horizontal reductions
1724     /**
1725      * Adds all lane elements of this vector.
1726      * <p>
1727      * This is a cross-lane reduction operation which applies the addition
1728      * operation ({@code +}) to lane elements,
1729      * and the identity value is {@code 0.0}.
1730      *
1731      * <p>The value of a floating-point sum is a function both of the input values as well
1732      * as the order of addition operations. The order of addition operations of this method
1733      * is intentionally not defined to allow for JVM to generate optimal machine
1734      * code for the underlying platform at runtime. If the platform supports a vector
1735      * instruction to add all values in the vector, or if there is some other efficient machine
1736      * code sequence, then the JVM has the option of generating this machine code. Otherwise,
1737      * the default implementation of adding vectors sequentially from left to right is used.
1738      * For this reason, the output of this method may vary for the same input values.
1739      *
1740      * @return the addition of all the lane elements of this vector
1741      */
1742     public abstract float addLanes();
1743 
1744     /**
1745      * Adds all lane elements of this vector, selecting lane elements
1746      * controlled by a mask.
1747      * <p>
1748      * This is a cross-lane reduction operation which applies the addition
1749      * operation ({@code +}) to lane elements,
1750      * and the identity value is {@code 0.0}.
1751      *
1752      * <p>The value of a floating-point sum is a function both of the input values as well
1753      * as the order of addition operations. The order of addition operations of this method
1754      * is intentionally not defined to allow for JVM to generate optimal machine
1755      * code for the underlying platform at runtime. If the platform supports a vector
1756      * instruction to add all values in the vector, or if there is some other efficient machine
1757      * code sequence, then the JVM has the option of generating this machine code. Otherwise,
1758      * the default implementation of adding vectors sequentially from left to right is used.
1759      * For this reason, the output of this method may vary on the same input values.
1760      *
1761      * @param m the mask controlling lane selection
1762      * @return the addition of the selected lane elements of this vector
1763      */
1764     public abstract float addLanes(VectorMask<Float> m);
1765 
1766     /**
1767      * Multiplies all lane elements of this vector.
1768      * <p>
1769      * This is a cross-lane reduction operation which applies the
1770      * multiplication operation ({@code *}) to lane elements,
1771      * and the identity value is {@code 1.0}.
1772      *
1773      * <p>The order of multiplication operations of this method
1774      * is intentionally not defined to allow for JVM to generate optimal machine
1775      * code for the underlying platform at runtime. If the platform supports a vector
1776      * instruction to multiply all values in the vector, or if there is some other efficient machine
1777      * code sequence, then the JVM has the option of generating this machine code. Otherwise,
1778      * the default implementation of multiplying vectors sequentially from left to right is used.
1779      * For this reason, the output of this method may vary on the same input values.
1780      *
1781      * @return the multiplication of all the lane elements of this vector
1782      */
1783     public abstract float mulLanes();
1784 
1785     /**
1786      * Multiplies all lane elements of this vector, selecting lane elements
1787      * controlled by a mask.
1788      * <p>
1789      * This is a cross-lane reduction operation which applies the
1790      * multiplication operation ({@code *}) to lane elements,
1791      * and the identity value is {@code 1.0}.
1792      *
1793      * <p>The order of multiplication operations of this method
1794      * is intentionally not defined to allow for JVM to generate optimal machine
1795      * code for the underlying platform at runtime. If the platform supports a vector
1796      * instruction to multiply all values in the vector, or if there is some other efficient machine
1797      * code sequence, then the JVM has the option of generating this machine code. Otherwise,
1798      * the default implementation of multiplying vectors sequentially from left to right is used.
1799      * For this reason, the output of this method may vary on the same input values.
1800      *
1801      * @param m the mask controlling lane selection
1802      * @return the multiplication of all the lane elements of this vector
1803      */
1804     public abstract float mulLanes(VectorMask<Float> m);
1805 
1806     /**
1807      * Returns the minimum lane element of this vector.
1808      * <p>
1809      * This is an associative cross-lane reduction operation which applies the operation
1810      * {@code (a, b) -> Math.min(a, b)} to lane elements,
1811      * and the identity value is
1812      * {@link Float#POSITIVE_INFINITY}.
1813      *
1814      * @return the minimum lane element of this vector
1815      */
1816     public abstract float minLanes();
1817 
1818     /**
1819      * Returns the minimum lane element of this vector, selecting lane elements
1820      * controlled by a mask.
1821      * <p>
1822      * This is an associative cross-lane reduction operation which applies the operation
1823      * {@code (a, b) -> Math.min(a, b)} to lane elements,
1824      * and the identity value is
1825      * {@link Float#POSITIVE_INFINITY}.
1826      *
1827      * @param m the mask controlling lane selection
1828      * @return the minimum lane element of this vector
1829      */
1830     public abstract float minLanes(VectorMask<Float> m);
1831 
1832     /**
1833      * Returns the maximum lane element of this vector.
1834      * <p>
1835      * This is an associative cross-lane reduction operation which applies the operation
1836      * {@code (a, b) -> Math.max(a, b)} to lane elements,
1837      * and the identity value is
1838      * {@link Float#NEGATIVE_INFINITY}.
1839      *
1840      * @return the maximum lane element of this vector
1841      */
1842     public abstract float maxLanes();
1843 
1844     /**
1845      * Returns the maximum lane element of this vector, selecting lane elements
1846      * controlled by a mask.
1847      * <p>
1848      * This is an associative cross-lane reduction operation which applies the operation
1849      * {@code (a, b) -> Math.max(a, b)} to lane elements,
1850      * and the identity value is
1851      * {@link Float#NEGATIVE_INFINITY}.
1852      *
1853      * @param m the mask controlling lane selection
1854      * @return the maximum lane element of this vector
1855      */
1856     public abstract float maxLanes(VectorMask<Float> m);
1857 
1858 
1859     // Type specific accessors
1860 
1861     /**
1862      * Gets the lane element at lane index {@code i}
1863      *
1864      * @param i the lane index
1865      * @return the lane element at lane index {@code i}
1866      * @throws IllegalArgumentException if the index is is out of range
1867      * ({@code < 0 || >= length()})
1868      */
1869     public abstract float lane(int i);
1870 
1871     /**
1872      * Replaces the lane element of this vector at lane index {@code i} with
1873      * value {@code e}.
1874      * <p>
1875      * This is a cross-lane operation and behaves as if it returns the result
1876      * of blending this vector with an input vector that is the result of
1877      * broadcasting {@code e} and a mask that has only one lane set at lane
1878      * index {@code i}.
1879      *
1880      * @param i the lane index of the lane element to be replaced
1881      * @param e the value to be placed
1882      * @return the result of replacing the lane element of this vector at lane
1883      * index {@code i} with value {@code e}.
1884      * @throws IllegalArgumentException if the index is is out of range
1885      * ({@code < 0 || >= length()})
1886      */
1887     public abstract FloatVector with(int i, float e);
1888 
1889     // Type specific extractors
1890 
1891     /**
1892      * Returns an array containing the lane elements of this vector.
1893      * <p>
1894      * This method behaves as if it {@link #intoArray(float[], int)} stores}
1895      * this vector into an allocated array and returns the array as follows:
1896      * <pre>{@code
1897      *   float[] a = new float[this.length()];
1898      *   this.intoArray(a, 0);
1899      *   return a;
1900      * }</pre>
1901      *
1902      * @return an array containing the the lane elements of this vector
1903      */
1904     @ForceInline
1905     public final float[] toArray() {
1906         float[] a = new float[species().length()];
1907         intoArray(a, 0);
1908         return a;
1909     }
1910 
1911     /**
1912      * Stores this vector into an array starting at offset.
1913      * <p>
1914      * For each vector lane, where {@code N} is the vector lane index,
1915      * the lane element at index {@code N} is stored into the array at index
1916      * {@code offset + N}.
1917      *
1918      * @param a the array
1919      * @param offset the offset into the array
1920      * @throws IndexOutOfBoundsException if {@code offset < 0}, or
1921      * {@code offset > a.length - this.length()}
1922      */
1923     public abstract void intoArray(float[] a, int offset);
1924 
1925     /**
1926      * Stores this vector into an array starting at offset and using a mask.
1927      * <p>
1928      * For each vector lane, where {@code N} is the vector lane index,
1929      * if the mask lane at index {@code N} is set then the lane element at
1930      * index {@code N} is stored into the array index {@code offset + N}.
1931      *
1932      * @param a the array
1933      * @param offset the offset into the array
1934      * @param m the mask
1935      * @throws IndexOutOfBoundsException if {@code offset < 0}, or
1936      * for any vector lane index {@code N} where the mask at lane {@code N}
1937      * is set {@code offset >= a.length - N}
1938      */
1939     public abstract void intoArray(float[] a, int offset, VectorMask<Float> m);
1940 
1941     /**
1942      * Stores this vector into an array using indexes obtained from an index
1943      * map.
1944      * <p>
1945      * For each vector lane, where {@code N} is the vector lane index, the
1946      * lane element at index {@code N} is stored into the array at index
1947      * {@code a_offset + indexMap[i_offset + N]}.
1948      *
1949      * @param a the array
1950      * @param a_offset the offset into the array, may be negative if relative
1951      * indexes in the index map compensate to produce a value within the
1952      * array bounds
1953      * @param indexMap the index map
1954      * @param i_offset the offset into the index map
1955      * @throws IndexOutOfBoundsException if {@code i_offset < 0}, or
1956      * {@code i_offset > indexMap.length - this.length()},
1957      * or for any vector lane index {@code N} the result of
1958      * {@code a_offset + indexMap[i_offset + N]} is {@code < 0} or {@code >= a.length}
1959      */
1960     public abstract void intoArray(float[] a, int a_offset, int[] indexMap, int i_offset);
1961 
1962     /**
1963      * Stores this vector into an array using indexes obtained from an index
1964      * map and using a mask.
1965      * <p>
1966      * For each vector lane, where {@code N} is the vector lane index,
1967      * if the mask lane at index {@code N} is set then the lane element at
1968      * index {@code N} is stored into the array at index
1969      * {@code a_offset + indexMap[i_offset + N]}.
1970      *
1971      * @param a the array
1972      * @param a_offset the offset into the array, may be negative if relative
1973      * indexes in the index map compensate to produce a value within the
1974      * array bounds
1975      * @param m the mask
1976      * @param indexMap the index map
1977      * @param i_offset the offset into the index map
1978      * @throws IndexOutOfBoundsException if {@code j < 0}, or
1979      * {@code i_offset > indexMap.length - this.length()},
1980      * or for any vector lane index {@code N} where the mask at lane
1981      * {@code N} is set the result of {@code a_offset + indexMap[i_offset + N]} is
1982      * {@code < 0} or {@code >= a.length}
1983      */
1984     public abstract void intoArray(float[] a, int a_offset, VectorMask<Float> m, int[] indexMap, int i_offset);
1985     // Species
1986 
1987     /**
1988      * {@inheritDoc}
1989      */
1990     @Override
1991     public abstract VectorSpecies<Float> species();
1992 
1993     /**
1994      * Class representing {@link FloatVector}'s of the same {@link VectorShape VectorShape}.
1995      */
1996     static final class FloatSpecies extends AbstractSpecies<Float> {
1997         final Function<float[], FloatVector> vectorFactory;
1998 
1999         private FloatSpecies(VectorShape shape,
2000                           Class<?> vectorType,
2001                           Class<?> maskType,
2002                           Function<float[], FloatVector> vectorFactory,
2003                           Function<boolean[], VectorMask<Float>> maskFactory,
2004                           Function<IntUnaryOperator, VectorShuffle<Float>> shuffleFromArrayFactory,
2005                           fShuffleFromArray<Float> shuffleFromOpFactory) {
2006             super(shape, float.class, Float.SIZE, vectorType, maskType, maskFactory,
2007                   shuffleFromArrayFactory, shuffleFromOpFactory);
2008             this.vectorFactory = vectorFactory;
2009         }
2010 
2011         interface FOp {
2012             float apply(int i);
2013         }
2014 
2015         FloatVector op(FOp f) {
2016             float[] res = new float[length()];
2017             for (int i = 0; i < length(); i++) {
2018                 res[i] = f.apply(i);
2019             }
2020             return vectorFactory.apply(res);
2021         }
2022 
2023         FloatVector op(VectorMask<Float> o, FOp f) {
2024             float[] res = new float[length()];
2025             boolean[] mbits = ((AbstractMask<Float>)o).getBits();
2026             for (int i = 0; i < length(); i++) {
2027                 if (mbits[i]) {
2028                     res[i] = f.apply(i);
2029                 }
2030             }
2031             return vectorFactory.apply(res);
2032         }
2033     }
2034 
2035     /**
2036      * Finds the preferred species for an element type of {@code float}.
2037      * <p>
2038      * A preferred species is a species chosen by the platform that has a
2039      * shape of maximal bit size.  A preferred species for different element
2040      * types will have the same shape, and therefore vectors, masks, and
2041      * shuffles created from such species will be shape compatible.
2042      *
2043      * @return the preferred species for an element type of {@code float}
2044      */
2045     private static FloatSpecies preferredSpecies() {
2046         return (FloatSpecies) VectorSpecies.ofPreferred(float.class);
2047     }
2048 
2049     /**
2050      * Finds a species for an element type of {@code float} and shape.
2051      *
2052      * @param s the shape
2053      * @return a species for an element type of {@code float} and shape
2054      * @throws IllegalArgumentException if no such species exists for the shape
2055      */
2056     static FloatSpecies species(VectorShape s) {
2057         Objects.requireNonNull(s);
2058         switch (s) {
2059             case S_64_BIT: return (FloatSpecies) SPECIES_64;
2060             case S_128_BIT: return (FloatSpecies) SPECIES_128;
2061             case S_256_BIT: return (FloatSpecies) SPECIES_256;
2062             case S_512_BIT: return (FloatSpecies) SPECIES_512;
2063             case S_Max_BIT: return (FloatSpecies) SPECIES_MAX;
2064             default: throw new IllegalArgumentException("Bad shape: " + s);
2065         }
2066     }
2067 
2068     /** Species representing {@link FloatVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
2069     public static final VectorSpecies<Float> SPECIES_64 = new FloatSpecies(VectorShape.S_64_BIT, Float64Vector.class, Float64Vector.Float64Mask.class,
2070                                                                      Float64Vector::new, Float64Vector.Float64Mask::new,
2071                                                                      Float64Vector.Float64Shuffle::new, Float64Vector.Float64Shuffle::new);
2072 
2073     /** Species representing {@link FloatVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
2074     public static final VectorSpecies<Float> SPECIES_128 = new FloatSpecies(VectorShape.S_128_BIT, Float128Vector.class, Float128Vector.Float128Mask.class,
2075                                                                       Float128Vector::new, Float128Vector.Float128Mask::new,
2076                                                                       Float128Vector.Float128Shuffle::new, Float128Vector.Float128Shuffle::new);
2077 
2078     /** Species representing {@link FloatVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
2079     public static final VectorSpecies<Float> SPECIES_256 = new FloatSpecies(VectorShape.S_256_BIT, Float256Vector.class, Float256Vector.Float256Mask.class,
2080                                                                       Float256Vector::new, Float256Vector.Float256Mask::new,
2081                                                                       Float256Vector.Float256Shuffle::new, Float256Vector.Float256Shuffle::new);
2082 
2083     /** Species representing {@link FloatVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
2084     public static final VectorSpecies<Float> SPECIES_512 = new FloatSpecies(VectorShape.S_512_BIT, Float512Vector.class, Float512Vector.Float512Mask.class,
2085                                                                       Float512Vector::new, Float512Vector.Float512Mask::new,
2086                                                                       Float512Vector.Float512Shuffle::new, Float512Vector.Float512Shuffle::new);
2087 
2088     /** Species representing {@link FloatVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
2089     public static final VectorSpecies<Float> SPECIES_MAX = new FloatSpecies(VectorShape.S_Max_BIT, FloatMaxVector.class, FloatMaxVector.FloatMaxMask.class,
2090                                                                       FloatMaxVector::new, FloatMaxVector.FloatMaxMask::new,
2091                                                                       FloatMaxVector.FloatMaxShuffle::new, FloatMaxVector.FloatMaxShuffle::new);
2092 
2093     /**
2094      * Preferred species for {@link FloatVector}s.
2095      * A preferred species is a species of maximal bit size for the platform.
2096      */
2097     public static final VectorSpecies<Float> SPECIES_PREFERRED = (VectorSpecies<Float>) preferredSpecies();
2098 }