New src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java

   1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.FloatBuffer;
  29 import java.nio.ByteOrder;
  30 import java.util.Objects;
  31 import java.util.function.IntUnaryOperator;
  32 import java.util.function.Function;
  33 import java.util.concurrent.ThreadLocalRandom;
  34 
  35 import jdk.internal.misc.Unsafe;
  36 import jdk.internal.vm.annotation.ForceInline;
  37 import static jdk.incubator.vector.VectorIntrinsics.*;
  38 
  39 
  40 /**
  41  * A specialized {@link Vector} representing an ordered immutable sequence of
  42  * {@code float} values.
  43  */
  44 @SuppressWarnings("cast")
  45 public abstract class FloatVector extends Vector<Float> {
  46 
  47     FloatVector() {}
  48 
  49     private static final int ARRAY_SHIFT = 31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_FLOAT_INDEX_SCALE);
  50 
  51     // Unary operator
  52 
  53     interface FUnOp {
  54         float apply(int i, float a);
  55     }
  56 
  57     abstract FloatVector uOp(FUnOp f);
  58 
  59     abstract FloatVector uOp(VectorMask<Float> m, FUnOp f);
  60 
  61     // Binary operator
  62 
  63     interface FBinOp {
  64         float apply(int i, float a, float b);
  65     }
  66 
  67     abstract FloatVector bOp(Vector<Float> v, FBinOp f);
  68 
  69     abstract FloatVector bOp(Vector<Float> v, VectorMask<Float> m, FBinOp f);
  70 
  71     // Trinary operator
  72 
  73     interface FTriOp {
  74         float apply(int i, float a, float b, float c);
  75     }
  76 
  77     abstract FloatVector tOp(Vector<Float> v1, Vector<Float> v2, FTriOp f);
  78 
  79     abstract FloatVector tOp(Vector<Float> v1, Vector<Float> v2, VectorMask<Float> m, FTriOp f);
  80 
  81     // Reduction operator
  82 
  83     abstract float rOp(float v, FBinOp f);
  84 
  85     // Binary test
  86 
  87     interface FBinTest {
  88         boolean apply(int i, float a, float b);
  89     }
  90 
  91     abstract VectorMask<Float> bTest(Vector<Float> v, FBinTest f);
  92 
  93     // Foreach
  94 
  95     interface FUnCon {
  96         void apply(int i, float a);
  97     }
  98 
  99     abstract void forEach(FUnCon f);
 100 
 101     abstract void forEach(VectorMask<Float> m, FUnCon f);
 102 
 103     // Static factories
 104 
 105     /**
 106      * Returns a vector where all lane elements are set to the default
 107      * primitive value.
 108      *
 109      * @param species species of desired vector
 110      * @return a zero vector of given species
 111      */
 112     @ForceInline
 113     @SuppressWarnings("unchecked")
 114     public static FloatVector zero(VectorSpecies<Float> species) {
 115         return VectorIntrinsics.broadcastCoerced((Class<FloatVector>) species.boxType(), float.class, species.length(),
 116                                                  Float.floatToIntBits(0.0f), species,
 117                                                  ((bits, s) -> ((FloatSpecies)s).op(i -> Float.intBitsToFloat((int)bits))));
 118     }
 119 
 120     /**
 121      * Loads a vector from a byte array starting at an offset.
 122      * <p>
 123      * Bytes are composed into primitive lane elements according to the
 124      * native byte order of the underlying platform
 125      * <p>
 126      * This method behaves as if it returns the result of calling the
 127      * byte buffer, offset, and mask accepting
 128      * {@link #fromByteBuffer(VectorSpecies, ByteBuffer, int, VectorMask) method} as follows:
 129      * <pre>{@code
 130      * return fromByteBuffer(species, ByteBuffer.wrap(a), offset, VectorMask.allTrue());
 131      * }</pre>
 132      *
 133      * @param species species of desired vector
 134      * @param a the byte array
 135      * @param offset the offset into the array
 136      * @return a vector loaded from a byte array
 137      * @throws IndexOutOfBoundsException if {@code i < 0} or
 138      * {@code offset > a.length - (species.length() * species.elementSize() / Byte.SIZE)}
 139      */
 140     @ForceInline
 141     @SuppressWarnings("unchecked")
 142     public static FloatVector fromByteArray(VectorSpecies<Float> species, byte[] a, int offset) {
 143         Objects.requireNonNull(a);
 144         offset = VectorIntrinsics.checkIndex(offset, a.length, species.bitSize() / Byte.SIZE);
 145         return VectorIntrinsics.load((Class<FloatVector>) species.boxType(), float.class, species.length(),
 146                                      a, ((long) offset) + Unsafe.ARRAY_BYTE_BASE_OFFSET,
 147                                      a, offset, species,
 148                                      (c, idx, s) -> {
 149                                          ByteBuffer bbc = ByteBuffer.wrap(c, idx, a.length - idx).order(ByteOrder.nativeOrder());
 150                                          FloatBuffer tb = bbc.asFloatBuffer();
 151                                          return ((FloatSpecies)s).op(i -> tb.get());
 152                                      });
 153     }
 154 
 155     /**
 156      * Loads a vector from a byte array starting at an offset and using a
 157      * mask.
 158      * <p>
 159      * Bytes are composed into primitive lane elements according to the
 160      * native byte order of the underlying platform.
 161      * <p>
 162      * This method behaves as if it returns the result of calling the
 163      * byte buffer, offset, and mask accepting
 164      * {@link #fromByteBuffer(VectorSpecies, ByteBuffer, int, VectorMask) method} as follows:
 165      * <pre>{@code
 166      * return fromByteBuffer(species, ByteBuffer.wrap(a), offset, m);
 167      * }</pre>
 168      *
 169      * @param species species of desired vector
 170      * @param a the byte array
 171      * @param offset the offset into the array
 172      * @param m the mask
 173      * @return a vector loaded from a byte array
 174      * @throws IndexOutOfBoundsException if {@code offset < 0} or
 175      * for any vector lane index {@code N} where the mask at lane {@code N}
 176      * is set
 177      * {@code offset >= a.length - (N * species.elementSize() / Byte.SIZE)}
 178      */
 179     @ForceInline
 180     public static FloatVector fromByteArray(VectorSpecies<Float> species, byte[] a, int offset, VectorMask<Float> m) {
 181         return zero(species).blend(fromByteArray(species, a, offset), m);
 182     }
 183 
 184     /**
 185      * Loads a vector from an array starting at offset.
 186      * <p>
 187      * For each vector lane, where {@code N} is the vector lane index, the
 188      * array element at index {@code offset + N} is placed into the
 189      * resulting vector at lane index {@code N}.
 190      *
 191      * @param species species of desired vector
 192      * @param a the array
 193      * @param offset the offset into the array
 194      * @return the vector loaded from an array
 195      * @throws IndexOutOfBoundsException if {@code offset < 0}, or
 196      * {@code offset > a.length - species.length()}
 197      */
 198     @ForceInline
 199     @SuppressWarnings("unchecked")
 200     public static FloatVector fromArray(VectorSpecies<Float> species, float[] a, int offset){
 201         Objects.requireNonNull(a);
 202         offset = VectorIntrinsics.checkIndex(offset, a.length, species.length());
 203         return VectorIntrinsics.load((Class<FloatVector>) species.boxType(), float.class, species.length(),
 204                                      a, (((long) offset) << ARRAY_SHIFT) + Unsafe.ARRAY_FLOAT_BASE_OFFSET,
 205                                      a, offset, species,
 206                                      (c, idx, s) -> ((FloatSpecies)s).op(n -> c[idx + n]));
 207     }
 208 
 209 
 210     /**
 211      * Loads a vector from an array starting at offset and using a mask.
 212      * <p>
 213      * For each vector lane, where {@code N} is the vector lane index,
 214      * if the mask lane at index {@code N} is set then the array element at
 215      * index {@code offset + N} is placed into the resulting vector at lane index
 216      * {@code N}, otherwise the default element value is placed into the
 217      * resulting vector at lane index {@code N}.
 218      *
 219      * @param species species of desired vector
 220      * @param a the array
 221      * @param offset the offset into the array
 222      * @param m the mask
 223      * @return the vector loaded from an array
 224      * @throws IndexOutOfBoundsException if {@code offset < 0}, or
 225      * for any vector lane index {@code N} where the mask at lane {@code N}
 226      * is set {@code offset > a.length - N}
 227      */
 228     @ForceInline
 229     public static FloatVector fromArray(VectorSpecies<Float> species, float[] a, int offset, VectorMask<Float> m) {
 230         return zero(species).blend(fromArray(species, a, offset), m);
 231     }
 232 
 233     /**
 234      * Loads a vector from an array using indexes obtained from an index
 235      * map.
 236      * <p>
 237      * For each vector lane, where {@code N} is the vector lane index, the
 238      * array element at index {@code a_offset + indexMap[i_offset + N]} is placed into the
 239      * resulting vector at lane index {@code N}.
 240      *
 241      * @param species species of desired vector
 242      * @param a the array
 243      * @param a_offset the offset into the array, may be negative if relative
 244      * indexes in the index map compensate to produce a value within the
 245      * array bounds
 246      * @param indexMap the index map
 247      * @param i_offset the offset into the index map
 248      * @return the vector loaded from an array
 249      * @throws IndexOutOfBoundsException if {@code i_offset < 0}, or
 250      * {@code i_offset > indexMap.length - species.length()},
 251      * or for any vector lane index {@code N} the result of
 252      * {@code a_offset + indexMap[i_offset + N]} is {@code < 0} or {@code >= a.length}
 253      */
 254     @ForceInline
 255     @SuppressWarnings("unchecked")
 256     public static FloatVector fromArray(VectorSpecies<Float> species, float[] a, int a_offset, int[] indexMap, int i_offset) {
 257         Objects.requireNonNull(a);
 258         Objects.requireNonNull(indexMap);
 259 
 260 
 261         // Index vector: vix[0:n] = k -> a_offset + indexMap[i_offset + k]
 262         IntVector vix = IntVector.fromArray(IntVector.species(species.indexShape()), indexMap, i_offset).add(a_offset);
 263 
 264         vix = VectorIntrinsics.checkIndex(vix, a.length);
 265 
 266         return VectorIntrinsics.loadWithMap((Class<FloatVector>) species.boxType(), float.class, species.length(),
 267                                             IntVector.species(species.indexShape()).boxType(), a, Unsafe.ARRAY_FLOAT_BASE_OFFSET, vix,
 268                                             a, a_offset, indexMap, i_offset, species,
 269                                             (float[] c, int idx, int[] iMap, int idy, VectorSpecies<Float> s) ->
 270                                                 ((FloatSpecies)s).op(n -> c[idx + iMap[idy+n]]));
 271         }
 272 
 273     /**
 274      * Loads a vector from an array using indexes obtained from an index
 275      * map and using a mask.
 276      * <p>
 277      * For each vector lane, where {@code N} is the vector lane index,
 278      * if the mask lane at index {@code N} is set then the array element at
 279      * index {@code a_offset + indexMap[i_offset + N]} is placed into the resulting vector
 280      * at lane index {@code N}.
 281      *
 282      * @param species species of desired vector
 283      * @param a the array
 284      * @param a_offset the offset into the array, may be negative if relative
 285      * indexes in the index map compensate to produce a value within the
 286      * array bounds
 287      * @param m the mask
 288      * @param indexMap the index map
 289      * @param i_offset the offset into the index map
 290      * @return the vector loaded from an array
 291      * @throws IndexOutOfBoundsException if {@code i_offset < 0}, or
 292      * {@code i_offset > indexMap.length - species.length()},
 293      * or for any vector lane index {@code N} where the mask at lane
 294      * {@code N} is set the result of {@code a_offset + indexMap[i_offset + N]} is
 295      * {@code < 0} or {@code >= a.length}
 296      */
 297     @ForceInline
 298     @SuppressWarnings("unchecked")
 299     public static FloatVector fromArray(VectorSpecies<Float> species, float[] a, int a_offset, VectorMask<Float> m, int[] indexMap, int i_offset) {
 300         // @@@ This can result in out of bounds errors for unset mask lanes
 301         return zero(species).blend(fromArray(species, a, a_offset, indexMap, i_offset), m);
 302     }
 303 
 304 
 305     /**
 306      * Loads a vector from a {@link ByteBuffer byte buffer} starting at an
 307      * offset into the byte buffer.
 308      * <p>
 309      * Bytes are composed into primitive lane elements according to the
 310      * native byte order of the underlying platform.
 311      * <p>
 312      * This method behaves as if it returns the result of calling the
 313      * byte buffer, offset, and mask accepting
 314      * {@link #fromByteBuffer(VectorSpecies, ByteBuffer, int, VectorMask)} method} as follows:
 315      * <pre>{@code
 316      *   return fromByteBuffer(b, offset, VectorMask.allTrue())
 317      * }</pre>
 318      *
 319      * @param species species of desired vector
 320      * @param bb the byte buffer
 321      * @param offset the offset into the byte buffer
 322      * @return a vector loaded from a byte buffer
 323      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 324      * or {@code > b.limit()},
 325      * or if there are fewer than
 326      * {@code species.length() * species.elementSize() / Byte.SIZE} bytes
 327      * remaining in the byte buffer from the given offset
 328      */
 329     @ForceInline
 330     @SuppressWarnings("unchecked")
 331     public static FloatVector fromByteBuffer(VectorSpecies<Float> species, ByteBuffer bb, int offset) {
 332         if (bb.order() != ByteOrder.nativeOrder()) {
 333             throw new IllegalArgumentException();
 334         }
 335         offset = VectorIntrinsics.checkIndex(offset, bb.limit(), species.bitSize() / Byte.SIZE);
 336         return VectorIntrinsics.load((Class<FloatVector>) species.boxType(), float.class, species.length(),
 337                                      U.getReference(bb, BYTE_BUFFER_HB), U.getLong(bb, BUFFER_ADDRESS) + offset,
 338                                      bb, offset, species,
 339                                      (c, idx, s) -> {
 340                                          ByteBuffer bbc = c.duplicate().position(idx).order(ByteOrder.nativeOrder());
 341                                          FloatBuffer tb = bbc.asFloatBuffer();
 342                                          return ((FloatSpecies)s).op(i -> tb.get());
 343                                      });
 344     }
 345 
 346     /**
 347      * Loads a vector from a {@link ByteBuffer byte buffer} starting at an
 348      * offset into the byte buffer and using a mask.
 349      * <p>
 350      * This method behaves as if the byte buffer is viewed as a primitive
 351      * {@link java.nio.Buffer buffer} for the primitive element type,
 352      * according to the native byte order of the underlying platform, and
 353      * the returned vector is loaded with a mask from a primitive array
 354      * obtained from the primitive buffer.
 355      * The following pseudocode expresses the behaviour, where
 356      * {@code EBuffer} is the primitive buffer type, {@code e} is the
 357      * primitive element type, and {@code ESpecies} is the primitive
 358      * species for {@code e}:
 359      * <pre>{@code
 360      * EBuffer eb = b.duplicate().
 361      *     order(ByteOrder.nativeOrder()).position(offset).
 362      *     asEBuffer();
 363      * e[] es = new e[species.length()];
 364      * for (int n = 0; n < t.length; n++) {
 365      *     if (m.isSet(n))
 366      *         es[n] = eb.get(n);
 367      * }
 368      * EVector r = EVector.fromArray(es, 0, m);
 369      * }</pre>
 370      *
 371      * @param species species of desired vector
 372      * @param bb the byte buffer
 373      * @param offset the offset into the byte buffer
 374      * @param m the mask
 375      * @return a vector loaded from a byte buffer
 376      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 377      * or {@code > b.limit()},
 378      * for any vector lane index {@code N} where the mask at lane {@code N}
 379      * is set
 380      * {@code offset >= b.limit() - (N * species.elementSize() / Byte.SIZE)}
 381      */
 382     @ForceInline
 383     public static FloatVector fromByteBuffer(VectorSpecies<Float> species, ByteBuffer bb, int offset, VectorMask<Float> m) {
 384         return zero(species).blend(fromByteBuffer(species, bb, offset), m);
 385     }
 386 
 387     /**
 388      * Returns a vector where all lane elements are set to the primitive
 389      * value {@code e}.
 390      *
 391      * @param species species of the desired vector
 392      * @param e the value
 393      * @return a vector of vector where all lane elements are set to
 394      * the primitive value {@code e}
 395      */
 396     @ForceInline
 397     @SuppressWarnings("unchecked")
 398     public static FloatVector broadcast(VectorSpecies<Float> species, float e) {
 399         return VectorIntrinsics.broadcastCoerced(
 400             (Class<FloatVector>) species.boxType(), float.class, species.length(),
 401             Float.floatToIntBits(e), species,
 402             ((bits, sp) -> ((FloatSpecies)sp).op(i -> Float.intBitsToFloat((int)bits))));
 403     }
 404 
 405     /**
 406      * Returns a vector where each lane element is set to given
 407      * primitive values.
 408      * <p>
 409      * For each vector lane, where {@code N} is the vector lane index, the
 410      * the primitive value at index {@code N} is placed into the resulting
 411      * vector at lane index {@code N}.
 412      *
 413      * @param species species of the desired vector
 414      * @param es the given primitive values
 415      * @return a vector where each lane element is set to given primitive
 416      * values
 417      * @throws IndexOutOfBoundsException if {@code es.length < species.length()}
 418      */
 419     @ForceInline
 420     @SuppressWarnings("unchecked")
 421     public static FloatVector scalars(VectorSpecies<Float> species, float... es) {
 422         Objects.requireNonNull(es);
 423         int ix = VectorIntrinsics.checkIndex(0, es.length, species.length());
 424         return VectorIntrinsics.load((Class<FloatVector>) species.boxType(), float.class, species.length(),
 425                                      es, Unsafe.ARRAY_FLOAT_BASE_OFFSET,
 426                                      es, ix, species,
 427                                      (c, idx, sp) -> ((FloatSpecies)sp).op(n -> c[idx + n]));
 428     }
 429 
 430     /**
 431      * Returns a vector where the first lane element is set to the primtive
 432      * value {@code e}, all other lane elements are set to the default
 433      * value.
 434      *
 435      * @param species species of the desired vector
 436      * @param e the value
 437      * @return a vector where the first lane element is set to the primitive
 438      * value {@code e}
 439      */
 440     @ForceInline
 441     public static final FloatVector single(VectorSpecies<Float> species, float e) {
 442         return zero(species).with(0, e);
 443     }
 444 
 445     /**
 446      * Returns a vector where each lane element is set to a randomly
 447      * generated primitive value.
 448      *
 449      * The semantics are equivalent to calling
 450      * {@link ThreadLocalRandom#nextFloat()}
 451      *
 452      * @param species species of the desired vector
 453      * @return a vector where each lane elements is set to a randomly
 454      * generated primitive value
 455      */
 456     public static FloatVector random(VectorSpecies<Float> species) {
 457         ThreadLocalRandom r = ThreadLocalRandom.current();
 458         return ((FloatSpecies)species).op(i -> r.nextFloat());
 459     }
 460 
 461     // Ops
 462 
 463     @Override
 464     public abstract FloatVector add(Vector<Float> v);
 465 
 466     /**
 467      * Adds this vector to the broadcast of an input scalar.
 468      * <p>
 469      * This is a lane-wise binary operation which applies the primitive addition operation
 470      * ({@code +}) to each lane.
 471      *
 472      * @param s the input scalar
 473      * @return the result of adding this vector to the broadcast of an input
 474      * scalar
 475      */
 476     public abstract FloatVector add(float s);
 477 
 478     @Override
 479     public abstract FloatVector add(Vector<Float> v, VectorMask<Float> m);
 480 
 481     /**
 482      * Adds this vector to broadcast of an input scalar,
 483      * selecting lane elements controlled by a mask.
 484      * <p>
 485      * This is a lane-wise binary operation which applies the primitive addition operation
 486      * ({@code +}) to each lane.
 487      *
 488      * @param s the input scalar
 489      * @param m the mask controlling lane selection
 490      * @return the result of adding this vector to the broadcast of an input
 491      * scalar
 492      */
 493     public abstract FloatVector add(float s, VectorMask<Float> m);
 494 
 495     @Override
 496     public abstract FloatVector sub(Vector<Float> v);
 497 
 498     /**
 499      * Subtracts the broadcast of an input scalar from this vector.
 500      * <p>
 501      * This is a lane-wise binary operation which applies the primitive subtraction
 502      * operation ({@code -}) to each lane.
 503      *
 504      * @param s the input scalar
 505      * @return the result of subtracting the broadcast of an input
 506      * scalar from this vector
 507      */
 508     public abstract FloatVector sub(float s);
 509 
 510     @Override
 511     public abstract FloatVector sub(Vector<Float> v, VectorMask<Float> m);
 512 
 513     /**
 514      * Subtracts the broadcast of an input scalar from this vector, selecting
 515      * lane elements controlled by a mask.
 516      * <p>
 517      * This is a lane-wise binary operation which applies the primitive subtraction
 518      * operation ({@code -}) to each lane.
 519      *
 520      * @param s the input scalar
 521      * @param m the mask controlling lane selection
 522      * @return the result of subtracting the broadcast of an input
 523      * scalar from this vector
 524      */
 525     public abstract FloatVector sub(float s, VectorMask<Float> m);
 526 
 527     @Override
 528     public abstract FloatVector mul(Vector<Float> v);
 529 
 530     /**
 531      * Multiplies this vector with the broadcast of an input scalar.
 532      * <p>
 533      * This is a lane-wise binary operation which applies the primitive multiplication
 534      * operation ({@code *}) to each lane.
 535      *
 536      * @param s the input scalar
 537      * @return the result of multiplying this vector with the broadcast of an
 538      * input scalar
 539      */
 540     public abstract FloatVector mul(float s);
 541 
 542     @Override
 543     public abstract FloatVector mul(Vector<Float> v, VectorMask<Float> m);
 544 
 545     /**
 546      * Multiplies this vector with the broadcast of an input scalar, selecting
 547      * lane elements controlled by a mask.
 548      * <p>
 549      * This is a lane-wise binary operation which applies the primitive multiplication
 550      * operation ({@code *}) to each lane.
 551      *
 552      * @param s the input scalar
 553      * @param m the mask controlling lane selection
 554      * @return the result of multiplying this vector with the broadcast of an
 555      * input scalar
 556      */
 557     public abstract FloatVector mul(float s, VectorMask<Float> m);
 558 
 559     @Override
 560     public abstract FloatVector neg();
 561 
 562     @Override
 563     public abstract FloatVector neg(VectorMask<Float> m);
 564 
 565     @Override
 566     public abstract FloatVector abs();
 567 
 568     @Override
 569     public abstract FloatVector abs(VectorMask<Float> m);
 570 
 571     @Override
 572     public abstract FloatVector min(Vector<Float> v);
 573 
 574     @Override
 575     public abstract FloatVector min(Vector<Float> v, VectorMask<Float> m);
 576 
 577     /**
 578      * Returns the minimum of this vector and the broadcast of an input scalar.
 579      * <p>
 580      * This is a lane-wise binary operation which applies the operation
 581      * {@code (a, b) -> Math.min(a, b)} to each lane.
 582      *
 583      * @param s the input scalar
 584      * @return the minimum of this vector and the broadcast of an input scalar
 585      */
 586     public abstract FloatVector min(float s);
 587 
 588     @Override
 589     public abstract FloatVector max(Vector<Float> v);
 590 
 591     @Override
 592     public abstract FloatVector max(Vector<Float> v, VectorMask<Float> m);
 593 
 594     /**
 595      * Returns the maximum of this vector and the broadcast of an input scalar.
 596      * <p>
 597      * This is a lane-wise binary operation which applies the operation
 598      * {@code (a, b) -> Math.max(a, b)} to each lane.
 599      *
 600      * @param s the input scalar
 601      * @return the maximum of this vector and the broadcast of an input scalar
 602      */
 603     public abstract FloatVector max(float s);
 604 
 605     @Override
 606     public abstract VectorMask<Float> equal(Vector<Float> v);
 607 
 608     /**
 609      * Tests if this vector is equal to the broadcast of an input scalar.
 610      * <p>
 611      * This is a lane-wise binary test operation which applies the primitive equals
 612      * operation ({@code ==}) each lane.
 613      *
 614      * @param s the input scalar
 615      * @return the result mask of testing if this vector is equal to the
 616      * broadcast of an input scalar
 617      */
 618     public abstract VectorMask<Float> equal(float s);
 619 
 620     @Override
 621     public abstract VectorMask<Float> notEqual(Vector<Float> v);
 622 
 623     /**
 624      * Tests if this vector is not equal to the broadcast of an input scalar.
 625      * <p>
 626      * This is a lane-wise binary test operation which applies the primitive not equals
 627      * operation ({@code !=}) to each lane.
 628      *
 629      * @param s the input scalar
 630      * @return the result mask of testing if this vector is not equal to the
 631      * broadcast of an input scalar
 632      */
 633     public abstract VectorMask<Float> notEqual(float s);
 634 
 635     @Override
 636     public abstract VectorMask<Float> lessThan(Vector<Float> v);
 637 
 638     /**
 639      * Tests if this vector is less than the broadcast of an input scalar.
 640      * <p>
 641      * This is a lane-wise binary test operation which applies the primitive less than
 642      * operation ({@code <}) to each lane.
 643      *
 644      * @param s the input scalar
 645      * @return the mask result of testing if this vector is less than the
 646      * broadcast of an input scalar
 647      */
 648     public abstract VectorMask<Float> lessThan(float s);
 649 
 650     @Override
 651     public abstract VectorMask<Float> lessThanEq(Vector<Float> v);
 652 
 653     /**
 654      * Tests if this vector is less or equal to the broadcast of an input scalar.
 655      * <p>
 656      * This is a lane-wise binary test operation which applies the primitive less than
 657      * or equal to operation ({@code <=}) to each lane.
 658      *
 659      * @param s the input scalar
 660      * @return the mask result of testing if this vector is less than or equal
 661      * to the broadcast of an input scalar
 662      */
 663     public abstract VectorMask<Float> lessThanEq(float s);
 664 
 665     @Override
 666     public abstract VectorMask<Float> greaterThan(Vector<Float> v);
 667 
 668     /**
 669      * Tests if this vector is greater than the broadcast of an input scalar.
 670      * <p>
 671      * This is a lane-wise binary test operation which applies the primitive greater than
 672      * operation ({@code >}) to each lane.
 673      *
 674      * @param s the input scalar
 675      * @return the mask result of testing if this vector is greater than the
 676      * broadcast of an input scalar
 677      */
 678     public abstract VectorMask<Float> greaterThan(float s);
 679 
 680     @Override
 681     public abstract VectorMask<Float> greaterThanEq(Vector<Float> v);
 682 
 683     /**
 684      * Tests if this vector is greater than or equal to the broadcast of an
 685      * input scalar.
 686      * <p>
 687      * This is a lane-wise binary test operation which applies the primitive greater than
 688      * or equal to operation ({@code >=}) to each lane.
 689      *
 690      * @param s the input scalar
 691      * @return the mask result of testing if this vector is greater than or
 692      * equal to the broadcast of an input scalar
 693      */
 694     public abstract VectorMask<Float> greaterThanEq(float s);
 695 
 696     @Override
 697     public abstract FloatVector blend(Vector<Float> v, VectorMask<Float> m);
 698 
 699     /**
 700      * Blends the lane elements of this vector with those of the broadcast of an
 701      * input scalar, selecting lanes controlled by a mask.
 702      * <p>
 703      * For each lane of the mask, at lane index {@code N}, if the mask lane
 704      * is set then the lane element at {@code N} from the input vector is
 705      * selected and placed into the resulting vector at {@code N},
 706      * otherwise the the lane element at {@code N} from this input vector is
 707      * selected and placed into the resulting vector at {@code N}.
 708      *
 709      * @param s the input scalar
 710      * @param m the mask controlling lane selection
 711      * @return the result of blending the lane elements of this vector with
 712      * those of the broadcast of an input scalar
 713      */
 714     public abstract FloatVector blend(float s, VectorMask<Float> m);
 715 
 716     @Override
 717     public abstract FloatVector rearrange(Vector<Float> v,
 718                                                       VectorShuffle<Float> s, VectorMask<Float> m);
 719 
 720     @Override
 721     public abstract FloatVector rearrange(VectorShuffle<Float> m);
 722 
 723     @Override
 724     public abstract FloatVector reshape(VectorSpecies<Float> s);
 725 
 726     @Override
 727     public abstract FloatVector rotateEL(int i);
 728 
 729     @Override
 730     public abstract FloatVector rotateER(int i);
 731 
 732     @Override
 733     public abstract FloatVector shiftEL(int i);
 734 
 735     @Override
 736     public abstract FloatVector shiftER(int i);
 737 
 738     /**
 739      * Divides this vector by an input vector.
 740      * <p>
 741      * This is a lane-wise binary operation which applies the primitive division
 742      * operation ({@code /}) to each lane.
 743      *
 744      * @param v the input vector
 745      * @return the result of dividing this vector by the input vector
 746      */
 747     public abstract FloatVector div(Vector<Float> v);
 748 
 749     /**
 750      * Divides this vector by the broadcast of an input scalar.
 751      * <p>
 752      * This is a lane-wise binary operation which applies the primitive division
 753      * operation ({@code /}) to each lane.
 754      *
 755      * @param s the input scalar
 756      * @return the result of dividing this vector by the broadcast of an input
 757      * scalar
 758      */
 759     public abstract FloatVector div(float s);
 760 
 761     /**
 762      * Divides this vector by an input vector, selecting lane elements
 763      * controlled by a mask.
 764      * <p>
 765      * This is a lane-wise binary operation which applies the primitive division
 766      * operation ({@code /}) to each lane.
 767      *
 768      * @param v the input vector
 769      * @param m the mask controlling lane selection
 770      * @return the result of dividing this vector by the input vector
 771      */
 772     public abstract FloatVector div(Vector<Float> v, VectorMask<Float> m);
 773 
 774     /**
 775      * Divides this vector by the broadcast of an input scalar, selecting lane
 776      * elements controlled by a mask.
 777      * <p>
 778      * This is a lane-wise binary operation which applies the primitive division
 779      * operation ({@code /}) to each lane.
 780      *
 781      * @param s the input scalar
 782      * @param m the mask controlling lane selection
 783      * @return the result of dividing this vector by the broadcast of an input
 784      * scalar
 785      */
 786     public abstract FloatVector div(float s, VectorMask<Float> m);
 787 
 788     /**
 789      * Calculates the square root of this vector.
 790      * <p>
 791      * This is a lane-wise unary operation which applies the {@link Math#sqrt} operation
 792      * to each lane.
 793      *
 794      * @return the square root of this vector
 795      */
 796     public abstract FloatVector sqrt();
 797 
 798     /**
 799      * Calculates the square root of this vector, selecting lane elements
 800      * controlled by a mask.
 801      * <p>
 802      * This is a lane-wise unary operation which applies the {@link Math#sqrt} operation
 803      * to each lane.
 804      *
 805      * @param m the mask controlling lane selection
 806      * @return the square root of this vector
 807      */
 808     public FloatVector sqrt(VectorMask<Float> m) {
 809         return uOp(m, (i, a) -> (float) Math.sqrt((double) a));
 810     }
 811 
 812     /**
 813      * Calculates the trigonometric tangent of this vector.
 814      * <p>
 815      * This is a lane-wise unary operation with same semantic definition as
 816      * {@link Math#tan} operation applied to each lane.
 817      * The implementation is not required to return same
 818      * results as {@link Math#tan}, but adheres to rounding, monotonicity,
 819      * and special case semantics as defined in the {@link Math#tan}
 820      * specifications. The computed result will be within 1 ulp of the
 821      * exact result.
 822      *
 823      * @return the tangent of this vector
 824      */
 825     public FloatVector tan() {
 826         return uOp((i, a) -> (float) Math.tan((double) a));
 827     }
 828 
 829     /**
 830      * Calculates the trigonometric tangent of this vector, selecting lane
 831      * elements controlled by a mask.
 832      * <p>
 833      * Semantics for rounding, monotonicity, and special cases are
 834      * described in {@link FloatVector#tan}
 835      *
 836      * @param m the mask controlling lane selection
 837      * @return the tangent of this vector
 838      */
 839     public FloatVector tan(VectorMask<Float> m) {
 840         return uOp(m, (i, a) -> (float) Math.tan((double) a));
 841     }
 842 
 843     /**
 844      * Calculates the hyperbolic tangent of this vector.
 845      * <p>
 846      * This is a lane-wise unary operation with same semantic definition as
 847      * {@link Math#tanh} operation applied to each lane.
 848      * The implementation is not required to return same
 849      * results as {@link Math#tanh}, but adheres to rounding, monotonicity,
 850      * and special case semantics as defined in the {@link Math#tanh}
 851      * specifications. The computed result will be within 2.5 ulps of the
 852      * exact result.
 853      *
 854      * @return the hyperbolic tangent of this vector
 855      */
 856     public FloatVector tanh() {
 857         return uOp((i, a) -> (float) Math.tanh((double) a));
 858     }
 859 
 860     /**
 861      * Calculates the hyperbolic tangent of this vector, selecting lane elements
 862      * controlled by a mask.
 863      * <p>
 864      * Semantics for rounding, monotonicity, and special cases are
 865      * described in {@link FloatVector#tanh}
 866      *
 867      * @param m the mask controlling lane selection
 868      * @return the hyperbolic tangent of this vector
 869      */
 870     public FloatVector tanh(VectorMask<Float> m) {
 871         return uOp(m, (i, a) -> (float) Math.tanh((double) a));
 872     }
 873 
 874     /**
 875      * Calculates the trigonometric sine of this vector.
 876      * <p>
 877      * This is a lane-wise unary operation with same semantic definition as
 878      * {@link Math#sin} operation applied to each lane.
 879      * The implementation is not required to return same
 880      * results as {@link Math#sin}, but adheres to rounding, monotonicity,
 881      * and special case semantics as defined in the {@link Math#sin}
 882      * specifications. The computed result will be within 1 ulp of the
 883      * exact result.
 884      *
 885      * @return the sine of this vector
 886      */
 887     public FloatVector sin() {
 888         return uOp((i, a) -> (float) Math.sin((double) a));
 889     }
 890 
 891     /**
 892      * Calculates the trigonometric sine of this vector, selecting lane elements
 893      * controlled by a mask.
 894      * <p>
 895      * Semantics for rounding, monotonicity, and special cases are
 896      * described in {@link FloatVector#sin}
 897      *
 898      * @param m the mask controlling lane selection
 899      * @return the sine of this vector
 900      */
 901     public FloatVector sin(VectorMask<Float> m) {
 902         return uOp(m, (i, a) -> (float) Math.sin((double) a));
 903     }
 904 
 905     /**
 906      * Calculates the hyperbolic sine of this vector.
 907      * <p>
 908      * This is a lane-wise unary operation with same semantic definition as
 909      * {@link Math#sinh} operation applied to each lane.
 910      * The implementation is not required to return same
 911      * results as  {@link Math#sinh}, but adheres to rounding, monotonicity,
 912      * and special case semantics as defined in the {@link Math#sinh}
 913      * specifications. The computed result will be within 2.5 ulps of the
 914      * exact result.
 915      *
 916      * @return the hyperbolic sine of this vector
 917      */
 918     public FloatVector sinh() {
 919         return uOp((i, a) -> (float) Math.sinh((double) a));
 920     }
 921 
 922     /**
 923      * Calculates the hyperbolic sine of this vector, selecting lane elements
 924      * controlled by a mask.
 925      * <p>
 926      * Semantics for rounding, monotonicity, and special cases are
 927      * described in {@link FloatVector#sinh}
 928      *
 929      * @param m the mask controlling lane selection
 930      * @return the hyperbolic sine of this vector
 931      */
 932     public FloatVector sinh(VectorMask<Float> m) {
 933         return uOp(m, (i, a) -> (float) Math.sinh((double) a));
 934     }
 935 
 936     /**
 937      * Calculates the trigonometric cosine of this vector.
 938      * <p>
 939      * This is a lane-wise unary operation with same semantic definition as
 940      * {@link Math#cos} operation applied to each lane.
 941      * The implementation is not required to return same
 942      * results as {@link Math#cos}, but adheres to rounding, monotonicity,
 943      * and special case semantics as defined in the {@link Math#cos}
 944      * specifications. The computed result will be within 1 ulp of the
 945      * exact result.
 946      *
 947      * @return the cosine of this vector
 948      */
 949     public FloatVector cos() {
 950         return uOp((i, a) -> (float) Math.cos((double) a));
 951     }
 952 
 953     /**
 954      * Calculates the trigonometric cosine of this vector, selecting lane
 955      * elements controlled by a mask.
 956      * <p>
 957      * Semantics for rounding, monotonicity, and special cases are
 958      * described in {@link FloatVector#cos}
 959      *
 960      * @param m the mask controlling lane selection
 961      * @return the cosine of this vector
 962      */
 963     public FloatVector cos(VectorMask<Float> m) {
 964         return uOp(m, (i, a) -> (float) Math.cos((double) a));
 965     }
 966 
 967     /**
 968      * Calculates the hyperbolic cosine of this vector.
 969      * <p>
 970      * This is a lane-wise unary operation with same semantic definition as
 971      * {@link Math#cosh} operation applied to each lane.
 972      * The implementation is not required to return same
 973      * results as {@link Math#cosh}, but adheres to rounding, monotonicity,
 974      * and special case semantics as defined in the {@link Math#cosh}
 975      * specifications. The computed result will be within 2.5 ulps of the
 976      * exact result.
 977      *
 978      * @return the hyperbolic cosine of this vector
 979      */
 980     public FloatVector cosh() {
 981         return uOp((i, a) -> (float) Math.cosh((double) a));
 982     }
 983 
 984     /**
 985      * Calculates the hyperbolic cosine of this vector, selecting lane elements
 986      * controlled by a mask.
 987      * <p>
 988      * Semantics for rounding, monotonicity, and special cases are
 989      * described in {@link FloatVector#cosh}
 990      *
 991      * @param m the mask controlling lane selection
 992      * @return the hyperbolic cosine of this vector
 993      */
 994     public FloatVector cosh(VectorMask<Float> m) {
 995         return uOp(m, (i, a) -> (float) Math.cosh((double) a));
 996     }
 997 
 998     /**
 999      * Calculates the arc sine of this vector.
1000      * <p>
1001      * This is a lane-wise unary operation with same semantic definition as
1002      * {@link Math#asin} operation applied to each lane.
1003      * The implementation is not required to return same
1004      * results as {@link Math#asin}, but adheres to rounding, monotonicity,
1005      * and special case semantics as defined in the {@link Math#asin}
1006      * specifications. The computed result will be within 1 ulp of the
1007      * exact result.
1008      *
1009      * @return the arc sine of this vector
1010      */
1011     public FloatVector asin() {
1012         return uOp((i, a) -> (float) Math.asin((double) a));
1013     }
1014 
1015     /**
1016      * Calculates the arc sine of this vector, selecting lane elements
1017      * controlled by a mask.
1018      * <p>
1019      * Semantics for rounding, monotonicity, and special cases are
1020      * described in {@link FloatVector#asin}
1021      *
1022      * @param m the mask controlling lane selection
1023      * @return the arc sine of this vector
1024      */
1025     public FloatVector asin(VectorMask<Float> m) {
1026         return uOp(m, (i, a) -> (float) Math.asin((double) a));
1027     }
1028 
1029     /**
1030      * Calculates the arc cosine of this vector.
1031      * <p>
1032      * This is a lane-wise unary operation with same semantic definition as
1033      * {@link Math#acos} operation applied to each lane.
1034      * The implementation is not required to return same
1035      * results as {@link Math#acos}, but adheres to rounding, monotonicity,
1036      * and special case semantics as defined in the {@link Math#acos}
1037      * specifications. The computed result will be within 1 ulp of the
1038      * exact result.
1039      *
1040      * @return the arc cosine of this vector
1041      */
1042     public FloatVector acos() {
1043         return uOp((i, a) -> (float) Math.acos((double) a));
1044     }
1045 
1046     /**
1047      * Calculates the arc cosine of this vector, selecting lane elements
1048      * controlled by a mask.
1049      * <p>
1050      * Semantics for rounding, monotonicity, and special cases are
1051      * described in {@link FloatVector#acos}
1052      *
1053      * @param m the mask controlling lane selection
1054      * @return the arc cosine of this vector
1055      */
1056     public FloatVector acos(VectorMask<Float> m) {
1057         return uOp(m, (i, a) -> (float) Math.acos((double) a));
1058     }
1059 
1060     /**
1061      * Calculates the arc tangent of this vector.
1062      * <p>
1063      * This is a lane-wise unary operation with same semantic definition as
1064      * {@link Math#atan} operation applied to each lane.
1065      * The implementation is not required to return same
1066      * results as {@link Math#atan}, but adheres to rounding, monotonicity,
1067      * and special case semantics as defined in the {@link Math#atan}
1068      * specifications. The computed result will be within 1 ulp of the
1069      * exact result.
1070      *
1071      * @return the arc tangent of this vector
1072      */
1073     public FloatVector atan() {
1074         return uOp((i, a) -> (float) Math.atan((double) a));
1075     }
1076 
1077     /**
1078      * Calculates the arc tangent of this vector, selecting lane elements
1079      * controlled by a mask.
1080      * <p>
1081      * Semantics for rounding, monotonicity, and special cases are
1082      * described in {@link FloatVector#atan}
1083      *
1084      * @param m the mask controlling lane selection
1085      * @return the arc tangent of this vector
1086      */
1087     public FloatVector atan(VectorMask<Float> m) {
1088         return uOp(m, (i, a) -> (float) Math.atan((double) a));
1089     }
1090 
1091     /**
1092      * Calculates the arc tangent of this vector divided by an input vector.
1093      * <p>
1094      * This is a lane-wise binary operation with same semantic definition as
1095      * {@link Math#atan2} operation applied to each lane.
1096      * The implementation is not required to return same
1097      * results as {@link Math#atan2}, but adheres to rounding, monotonicity,
1098      * and special case semantics as defined in the {@link Math#atan2}
1099      * specifications. The computed result will be within 2 ulps of the
1100      * exact result.
1101      *
1102      * @param v the input vector
1103      * @return the arc tangent of this vector divided by the input vector
1104      */
1105     public FloatVector atan2(Vector<Float> v) {
1106         return bOp(v, (i, a, b) -> (float) Math.atan2((double) a, (double) b));
1107     }
1108 
1109     /**
1110      * Calculates the arc tangent of this vector divided by the broadcast of an
1111      * an input scalar.
1112      * <p>
1113      * This is a lane-wise binary operation with same semantic definition as
1114      * {@link Math#atan2} operation applied to each lane.
1115      * The implementation is not required to return same
1116      * results as {@link Math#atan2}, but adheres to rounding, monotonicity,
1117      * and special case semantics as defined in the {@link Math#atan2}
1118      * specifications. The computed result will be within 1 ulp of the
1119      * exact result.
1120      *
1121      * @param s the input scalar
1122      * @return the arc tangent of this vector over the input vector
1123      */
1124     public abstract FloatVector atan2(float s);
1125 
1126     /**
1127      * Calculates the arc tangent of this vector divided by an input vector,
1128      * selecting lane elements controlled by a mask.
1129      * <p>
1130      * Semantics for rounding, monotonicity, and special cases are
1131      * described in {@link FloatVector#atan2}
1132      *
1133      * @param v the input vector
1134      * @param m the mask controlling lane selection
1135      * @return the arc tangent of this vector divided by the input vector
1136      */
1137     public FloatVector atan2(Vector<Float> v, VectorMask<Float> m) {
1138         return bOp(v, m, (i, a, b) -> (float) Math.atan2((double) a, (double) b));
1139     }
1140 
1141     /**
1142      * Calculates the arc tangent of this vector divided by the broadcast of an
1143      * an input scalar, selecting lane elements controlled by a mask.
1144      * <p>
1145      * Semantics for rounding, monotonicity, and special cases are
1146      * described in {@link FloatVector#atan2}
1147      *
1148      * @param s the input scalar
1149      * @param m the mask controlling lane selection
1150      * @return the arc tangent of this vector over the input vector
1151      */
1152     public abstract FloatVector atan2(float s, VectorMask<Float> m);
1153 
1154     /**
1155      * Calculates the cube root of this vector.
1156      * <p>
1157      * This is a lane-wise unary operation with same semantic definition as
1158      * {@link Math#cbrt} operation applied to each lane.
1159      * The implementation is not required to return same
1160      * results as {@link Math#cbrt}, but adheres to rounding, monotonicity,
1161      * and special case semantics as defined in the {@link Math#cbrt}
1162      * specifications. The computed result will be within 1 ulp of the
1163      * exact result.
1164      *
1165      * @return the cube root of this vector
1166      */
1167     public FloatVector cbrt() {
1168         return uOp((i, a) -> (float) Math.cbrt((double) a));
1169     }
1170 
1171     /**
1172      * Calculates the cube root of this vector, selecting lane elements
1173      * controlled by a mask.
1174      * <p>
1175      * Semantics for rounding, monotonicity, and special cases are
1176      * described in {@link FloatVector#cbrt}
1177      *
1178      * @param m the mask controlling lane selection
1179      * @return the cube root of this vector
1180      */
1181     public FloatVector cbrt(VectorMask<Float> m) {
1182         return uOp(m, (i, a) -> (float) Math.cbrt((double) a));
1183     }
1184 
1185     /**
1186      * Calculates the natural logarithm of this vector.
1187      * <p>
1188      * This is a lane-wise unary operation with same semantic definition as
1189      * {@link Math#log} operation applied to each lane.
1190      * The implementation is not required to return same
1191      * results as {@link Math#log}, but adheres to rounding, monotonicity,
1192      * and special case semantics as defined in the {@link Math#log}
1193      * specifications. The computed result will be within 1 ulp of the
1194      * exact result.
1195      *
1196      * @return the natural logarithm of this vector
1197      */
1198     public FloatVector log() {
1199         return uOp((i, a) -> (float) Math.log((double) a));
1200     }
1201 
1202     /**
1203      * Calculates the natural logarithm of this vector, selecting lane elements
1204      * controlled by a mask.
1205      * <p>
1206      * Semantics for rounding, monotonicity, and special cases are
1207      * described in {@link FloatVector#log}
1208      *
1209      * @param m the mask controlling lane selection
1210      * @return the natural logarithm of this vector
1211      */
1212     public FloatVector log(VectorMask<Float> m) {
1213         return uOp(m, (i, a) -> (float) Math.log((double) a));
1214     }
1215 
1216     /**
1217      * Calculates the base 10 logarithm of this vector.
1218      * <p>
1219      * This is a lane-wise unary operation with same semantic definition as
1220      * {@link Math#log10} operation applied to each lane.
1221      * The implementation is not required to return same
1222      * results as {@link Math#log10}, but adheres to rounding, monotonicity,
1223      * and special case semantics as defined in the {@link Math#log10}
1224      * specifications. The computed result will be within 1 ulp of the
1225      * exact result.
1226      *
1227      * @return the base 10 logarithm of this vector
1228      */
1229     public FloatVector log10() {
1230         return uOp((i, a) -> (float) Math.log10((double) a));
1231     }
1232 
1233     /**
1234      * Calculates the base 10 logarithm of this vector, selecting lane elements
1235      * controlled by a mask.
1236      * <p>
1237      * Semantics for rounding, monotonicity, and special cases are
1238      * described in {@link FloatVector#log10}
1239      *
1240      * @param m the mask controlling lane selection
1241      * @return the base 10 logarithm of this vector
1242      */
1243     public FloatVector log10(VectorMask<Float> m) {
1244         return uOp(m, (i, a) -> (float) Math.log10((double) a));
1245     }
1246 
1247     /**
1248      * Calculates the natural logarithm of the sum of this vector and the
1249      * broadcast of {@code 1}.
1250      * <p>
1251      * This is a lane-wise unary operation with same semantic definition as
1252      * {@link Math#log1p} operation applied to each lane.
1253      * The implementation is not required to return same
1254      * results as  {@link Math#log1p}, but adheres to rounding, monotonicity,
1255      * and special case semantics as defined in the {@link Math#log1p}
1256      * specifications. The computed result will be within 1 ulp of the
1257      * exact result.
1258      *
1259      * @return the natural logarithm of the sum of this vector and the broadcast
1260      * of {@code 1}
1261      */
1262     public FloatVector log1p() {
1263         return uOp((i, a) -> (float) Math.log1p((double) a));
1264     }
1265 
1266     /**
1267      * Calculates the natural logarithm of the sum of this vector and the
1268      * broadcast of {@code 1}, selecting lane elements controlled by a mask.
1269      * <p>
1270      * Semantics for rounding, monotonicity, and special cases are
1271      * described in {@link FloatVector#log1p}
1272      *
1273      * @param m the mask controlling lane selection
1274      * @return the natural logarithm of the sum of this vector and the broadcast
1275      * of {@code 1}
1276      */
1277     public FloatVector log1p(VectorMask<Float> m) {
1278         return uOp(m, (i, a) -> (float) Math.log1p((double) a));
1279     }
1280 
1281     /**
1282      * Calculates this vector raised to the power of an input vector.
1283      * <p>
1284      * This is a lane-wise binary operation with same semantic definition as
1285      * {@link Math#pow} operation applied to each lane.
1286      * The implementation is not required to return same
1287      * results as {@link Math#pow}, but adheres to rounding, monotonicity,
1288      * and special case semantics as defined in the {@link Math#pow}
1289      * specifications. The computed result will be within 1 ulp of the
1290      * exact result.
1291      *
1292      * @param v the input vector
1293      * @return this vector raised to the power of an input vector
1294      */
1295     public FloatVector pow(Vector<Float> v) {
1296         return bOp(v, (i, a, b) -> (float) Math.pow((double) a, (double) b));
1297     }
1298 
1299     /**
1300      * Calculates this vector raised to the power of the broadcast of an input
1301      * scalar.
1302      * <p>
1303      * This is a lane-wise binary operation with same semantic definition as
1304      * {@link Math#pow} operation applied to each lane.
1305      * The implementation is not required to return same
1306      * results as {@link Math#pow}, but adheres to rounding, monotonicity,
1307      * and special case semantics as defined in the {@link Math#pow}
1308      * specifications. The computed result will be within 1 ulp of the
1309      * exact result.
1310      *
1311      * @param s the input scalar
1312      * @return this vector raised to the power of the broadcast of an input
1313      * scalar.
1314      */
1315     public abstract FloatVector pow(float s);
1316 
1317     /**
1318      * Calculates this vector raised to the power of an input vector, selecting
1319      * lane elements controlled by a mask.
1320      * <p>
1321      * Semantics for rounding, monotonicity, and special cases are
1322      * described in {@link FloatVector#pow}
1323      *
1324      * @param v the input vector
1325      * @param m the mask controlling lane selection
1326      * @return this vector raised to the power of an input vector
1327      */
1328     public FloatVector pow(Vector<Float> v, VectorMask<Float> m) {
1329         return bOp(v, m, (i, a, b) -> (float) Math.pow((double) a, (double) b));
1330     }
1331 
1332     /**
1333      * Calculates this vector raised to the power of the broadcast of an input
1334      * scalar, selecting lane elements controlled by a mask.
1335      * <p>
1336      * Semantics for rounding, monotonicity, and special cases are
1337      * described in {@link FloatVector#pow}
1338      *
1339      * @param s the input scalar
1340      * @param m the mask controlling lane selection
1341      * @return this vector raised to the power of the broadcast of an input
1342      * scalar.
1343      */
1344     public abstract FloatVector pow(float s, VectorMask<Float> m);
1345 
1346     /**
1347      * Calculates the broadcast of Euler's number {@code e} raised to the power
1348      * of this vector.
1349      * <p>
1350      * This is a lane-wise unary operation with same semantic definition as
1351      * {@link Math#exp} operation applied to each lane.
1352      * The implementation is not required to return same
1353      * results as {@link Math#exp}, but adheres to rounding, monotonicity,
1354      * and special case semantics as defined in the {@link Math#exp}
1355      * specifications. The computed result will be within 1 ulp of the
1356      * exact result.
1357      *
1358      * @return the broadcast of Euler's number {@code e} raised to the power of
1359      * this vector
1360      */
1361     public FloatVector exp() {
1362         return uOp((i, a) -> (float) Math.exp((double) a));
1363     }
1364 
1365     /**
1366      * Calculates the broadcast of Euler's number {@code e} raised to the power
1367      * of this vector, selecting lane elements controlled by a mask.
1368      * <p>
1369      * Semantics for rounding, monotonicity, and special cases are
1370      * described in {@link FloatVector#exp}
1371      *
1372      * @param m the mask controlling lane selection
1373      * @return the broadcast of Euler's number {@code e} raised to the power of
1374      * this vector
1375      */
1376     public FloatVector exp(VectorMask<Float> m) {
1377         return uOp(m, (i, a) -> (float) Math.exp((double) a));
1378     }
1379 
1380     /**
1381      * Calculates the broadcast of Euler's number {@code e} raised to the power
1382      * of this vector minus the broadcast of {@code -1}.
1383      * More specifically as if the following (ignoring any differences in
1384      * numerical accuracy):
1385      * <pre>{@code
1386      *   this.exp().sub(EVector.broadcast(this.species(), 1))
1387      * }</pre>
1388      * <p>
1389      * This is a lane-wise unary operation with same semantic definition as
1390      * {@link Math#expm1} operation applied to each lane.
1391      * The implementation is not required to return same
1392      * results as {@link Math#expm1}, but adheres to rounding, monotonicity,
1393      * and special case semantics as defined in the {@link Math#expm1}
1394      * specifications. The computed result will be within 1 ulp of the
1395      * exact result.
1396      *
1397      * @return the broadcast of Euler's number {@code e} raised to the power of
1398      * this vector minus the broadcast of {@code -1}
1399      */
1400     public FloatVector expm1() {
1401         return uOp((i, a) -> (float) Math.expm1((double) a));
1402     }
1403 
1404     /**
1405      * Calculates the broadcast of Euler's number {@code e} raised to the power
1406      * of this vector minus the broadcast of {@code -1}, selecting lane elements
1407      * controlled by a mask
1408      * More specifically as if the following (ignoring any differences in
1409      * numerical accuracy):
1410      * <pre>{@code
1411      *   this.exp(m).sub(EVector.broadcast(this.species(), 1), m)
1412      * }</pre>
1413      * <p>
1414      * Semantics for rounding, monotonicity, and special cases are
1415      * described in {@link FloatVector#expm1}
1416      *
1417      * @param m the mask controlling lane selection
1418      * @return the broadcast of Euler's number {@code e} raised to the power of
1419      * this vector minus the broadcast of {@code -1}
1420      */
1421     public FloatVector expm1(VectorMask<Float> m) {
1422         return uOp(m, (i, a) -> (float) Math.expm1((double) a));
1423     }
1424 
1425     /**
1426      * Calculates the product of this vector and a first input vector summed
1427      * with a second input vector.
1428      * More specifically as if the following (ignoring any differences in
1429      * numerical accuracy):
1430      * <pre>{@code
1431      *   this.mul(v1).add(v2)
1432      * }</pre>
1433      * <p>
1434      * This is a lane-wise ternary operation which applies the {@link Math#fma} operation
1435      * to each lane.
1436      *
1437      * @param v1 the first input vector
1438      * @param v2 the second input vector
1439      * @return the product of this vector and the first input vector summed with
1440      * the second input vector
1441      */
1442     public abstract FloatVector fma(Vector<Float> v1, Vector<Float> v2);
1443 
1444     /**
1445      * Calculates the product of this vector and the broadcast of a first input
1446      * scalar summed with the broadcast of a second input scalar.
1447      * More specifically as if the following:
1448      * <pre>{@code
1449      *   this.fma(EVector.broadcast(this.species(), s1), EVector.broadcast(this.species(), s2))
1450      * }</pre>
1451      * <p>
1452      * This is a lane-wise ternary operation which applies the {@link Math#fma} operation
1453      * to each lane.
1454      *
1455      * @param s1 the first input scalar
1456      * @param s2 the second input scalar
1457      * @return the product of this vector and the broadcast of a first input
1458      * scalar summed with the broadcast of a second input scalar
1459      */
1460     public abstract FloatVector fma(float s1, float s2);
1461 
1462     /**
1463      * Calculates the product of this vector and a first input vector summed
1464      * with a second input vector, selecting lane elements controlled by a mask.
1465      * More specifically as if the following (ignoring any differences in
1466      * numerical accuracy):
1467      * <pre>{@code
1468      *   this.mul(v1, m).add(v2, m)
1469      * }</pre>
1470      * <p>
1471      * This is a lane-wise ternary operation which applies the {@link Math#fma} operation
1472      * to each lane.
1473      *
1474      * @param v1 the first input vector
1475      * @param v2 the second input vector
1476      * @param m the mask controlling lane selection
1477      * @return the product of this vector and the first input vector summed with
1478      * the second input vector
1479      */
1480     public FloatVector fma(Vector<Float> v1, Vector<Float> v2, VectorMask<Float> m) {
1481         return tOp(v1, v2, m, (i, a, b, c) -> Math.fma(a, b, c));
1482     }
1483 
1484     /**
1485      * Calculates the product of this vector and the broadcast of a first input
1486      * scalar summed with the broadcast of a second input scalar, selecting lane
1487      * elements controlled by a mask
1488      * More specifically as if the following:
1489      * <pre>{@code
1490      *   this.fma(EVector.broadcast(this.species(), s1), EVector.broadcast(this.species(), s2), m)
1491      * }</pre>
1492      * <p>
1493      * This is a lane-wise ternary operation which applies the {@link Math#fma} operation
1494      * to each lane.
1495      *
1496      * @param s1 the first input scalar
1497      * @param s2 the second input scalar
1498      * @param m the mask controlling lane selection
1499      * @return the product of this vector and the broadcast of a first input
1500      * scalar summed with the broadcast of a second input scalar
1501      */
1502     public abstract FloatVector fma(float s1, float s2, VectorMask<Float> m);
1503 
1504     /**
1505      * Calculates square root of the sum of the squares of this vector and an
1506      * input vector.
1507      * More specifically as if the following (ignoring any differences in
1508      * numerical accuracy):
1509      * <pre>{@code
1510      *   this.mul(this).add(v.mul(v)).sqrt()
1511      * }</pre>
1512      * <p>
1513      * This is a lane-wise binary operation with same semantic definition as
1514      * {@link Math#hypot} operation applied to each lane.
1515      * The implementation is not required to return same
1516      * results as {@link Math#hypot}, but adheres to rounding, monotonicity,
1517      * and special case semantics as defined in the {@link Math#hypot}
1518      * specifications. The computed result will be within 1 ulp of the
1519      * exact result.
1520      *
1521      * @param v the input vector
1522      * @return square root of the sum of the squares of this vector and an input
1523      * vector
1524      */
1525     public FloatVector hypot(Vector<Float> v) {
1526         return bOp(v, (i, a, b) -> (float) Math.hypot((double) a, (double) b));
1527     }
1528 
1529     /**
1530      * Calculates square root of the sum of the squares of this vector and the
1531      * broadcast of an input scalar.
1532      * More specifically as if the following (ignoring any differences in
1533      * numerical accuracy):
1534      * <pre>{@code
1535      *   this.mul(this).add(EVector.broadcast(this.species(), s * s)).sqrt()
1536      * }</pre>
1537      * <p>
1538      * This is a lane-wise binary operation with same semantic definition as
1539      * {@link Math#hypot} operation applied to each.
1540      * The implementation is not required to return same
1541      * results as {@link Math#hypot}, but adheres to rounding, monotonicity,
1542      * and special case semantics as defined in the {@link Math#hypot}
1543      * specifications. The computed result will be within 1 ulp of the
1544      * exact result.
1545      *
1546      * @param s the input scalar
1547      * @return square root of the sum of the squares of this vector and the
1548      * broadcast of an input scalar
1549      */
1550     public abstract FloatVector hypot(float s);
1551 
1552     /**
1553      * Calculates square root of the sum of the squares of this vector and an
1554      * input vector, selecting lane elements controlled by a mask.
1555      * More specifically as if the following (ignoring any differences in
1556      * numerical accuracy):
1557      * <pre>{@code
1558      *   this.mul(this, m).add(v.mul(v), m).sqrt(m)
1559      * }</pre>
1560      * <p>
1561      * Semantics for rounding, monotonicity, and special cases are
1562      * described in {@link FloatVector#hypot}
1563      *
1564      * @param v the input vector
1565      * @param m the mask controlling lane selection
1566      * @return square root of the sum of the squares of this vector and an input
1567      * vector
1568      */
1569     public FloatVector hypot(Vector<Float> v, VectorMask<Float> m) {
1570         return bOp(v, m, (i, a, b) -> (float) Math.hypot((double) a, (double) b));
1571     }
1572 
1573     /**
1574      * Calculates square root of the sum of the squares of this vector and the
1575      * broadcast of an input scalar, selecting lane elements controlled by a
1576      * mask.
1577      * More specifically as if the following (ignoring any differences in
1578      * numerical accuracy):
1579      * <pre>{@code
1580      *   this.mul(this, m).add(EVector.broadcast(this.species(), s * s), m).sqrt(m)
1581      * }</pre>
1582      * <p>
1583      * Semantics for rounding, monotonicity, and special cases are
1584      * described in {@link FloatVector#hypot}
1585      *
1586      * @param s the input scalar
1587      * @param m the mask controlling lane selection
1588      * @return square root of the sum of the squares of this vector and the
1589      * broadcast of an input scalar
1590      */
1591     public abstract FloatVector hypot(float s, VectorMask<Float> m);
1592 
1593 
1594     @Override
1595     public abstract void intoByteArray(byte[] a, int ix);
1596 
1597     @Override
1598     public abstract void intoByteArray(byte[] a, int ix, VectorMask<Float> m);
1599 
1600     @Override
1601     public abstract void intoByteBuffer(ByteBuffer bb, int ix);
1602 
1603     @Override
1604     public abstract void intoByteBuffer(ByteBuffer bb, int ix, VectorMask<Float> m);
1605 
1606 
1607     // Type specific horizontal reductions
1608     /**
1609      * Adds all lane elements of this vector.
1610      * <p>
1611      * This is a cross-lane reduction operation which applies the addition
1612      * operation ({@code +}) to lane elements,
1613      * and the identity value is {@code 0.0}.
1614      *
1615      * <p>The value of a floating-point sum is a function both of the input values as well
1616      * as the order of addition operations. The order of addition operations of this method
1617      * is intentionally not defined to allow for JVM to generate optimal machine
1618      * code for the underlying platform at runtime. If the platform supports a vector
1619      * instruction to add all values in the vector, or if there is some other efficient machine
1620      * code sequence, then the JVM has the option of generating this machine code. Otherwise,
1621      * the default implementation of adding vectors sequentially from left to right is used.
1622      * For this reason, the output of this method may vary for the same input values.
1623      *
1624      * @return the addition of all the lane elements of this vector
1625      */
1626     public abstract float addAll();
1627 
1628     /**
1629      * Adds all lane elements of this vector, selecting lane elements
1630      * controlled by a mask.
1631      * <p>
1632      * This is a cross-lane reduction operation which applies the addition
1633      * operation ({@code +}) to lane elements,
1634      * and the identity value is {@code 0.0}.
1635      *
1636      * <p>The value of a floating-point sum is a function both of the input values as well
1637      * as the order of addition operations. The order of addition operations of this method
1638      * is intentionally not defined to allow for JVM to generate optimal machine
1639      * code for the underlying platform at runtime. If the platform supports a vector
1640      * instruction to add all values in the vector, or if there is some other efficient machine
1641      * code sequence, then the JVM has the option of generating this machine code. Otherwise,
1642      * the default implementation of adding vectors sequentially from left to right is used.
1643      * For this reason, the output of this method may vary on the same input values.
1644      *
1645      * @param m the mask controlling lane selection
1646      * @return the addition of the selected lane elements of this vector
1647      */
1648     public abstract float addAll(VectorMask<Float> m);
1649 
1650     /**
1651      * Multiplies all lane elements of this vector.
1652      * <p>
1653      * This is a cross-lane reduction operation which applies the
1654      * multiplication operation ({@code *}) to lane elements,
1655      * and the identity value is {@code 1.0}.
1656      *
1657      * <p>The order of multiplication operations of this method
1658      * is intentionally not defined to allow for JVM to generate optimal machine
1659      * code for the underlying platform at runtime. If the platform supports a vector
1660      * instruction to multiply all values in the vector, or if there is some other efficient machine
1661      * code sequence, then the JVM has the option of generating this machine code. Otherwise,
1662      * the default implementation of multiplying vectors sequentially from left to right is used.
1663      * For this reason, the output of this method may vary on the same input values.
1664      *
1665      * @return the multiplication of all the lane elements of this vector
1666      */
1667     public abstract float mulAll();
1668 
1669     /**
1670      * Multiplies all lane elements of this vector, selecting lane elements
1671      * controlled by a mask.
1672      * <p>
1673      * This is a cross-lane reduction operation which applies the
1674      * multiplication operation ({@code *}) to lane elements,
1675      * and the identity value is {@code 1.0}.
1676      *
1677      * <p>The order of multiplication operations of this method
1678      * is intentionally not defined to allow for JVM to generate optimal machine
1679      * code for the underlying platform at runtime. If the platform supports a vector
1680      * instruction to multiply all values in the vector, or if there is some other efficient machine
1681      * code sequence, then the JVM has the option of generating this machine code. Otherwise,
1682      * the default implementation of multiplying vectors sequentially from left to right is used.
1683      * For this reason, the output of this method may vary on the same input values.
1684      *
1685      * @param m the mask controlling lane selection
1686      * @return the multiplication of all the lane elements of this vector
1687      */
1688     public abstract float mulAll(VectorMask<Float> m);
1689 
1690     /**
1691      * Returns the minimum lane element of this vector.
1692      * <p>
1693      * This is an associative cross-lane reduction operation which applies the operation
1694      * {@code (a, b) -> Math.min(a, b)} to lane elements,
1695      * and the identity value is
1696      * {@link Float#POSITIVE_INFINITY}.
1697      *
1698      * @return the minimum lane element of this vector
1699      */
1700     public abstract float minAll();
1701 
1702     /**
1703      * Returns the minimum lane element of this vector, selecting lane elements
1704      * controlled by a mask.
1705      * <p>
1706      * This is an associative cross-lane reduction operation which applies the operation
1707      * {@code (a, b) -> Math.min(a, b)} to lane elements,
1708      * and the identity value is
1709      * {@link Float#POSITIVE_INFINITY}.
1710      *
1711      * @param m the mask controlling lane selection
1712      * @return the minimum lane element of this vector
1713      */
1714     public abstract float minAll(VectorMask<Float> m);
1715 
1716     /**
1717      * Returns the maximum lane element of this vector.
1718      * <p>
1719      * This is an associative cross-lane reduction operation which applies the operation
1720      * {@code (a, b) -> Math.max(a, b)} to lane elements,
1721      * and the identity value is
1722      * {@link Float#NEGATIVE_INFINITY}.
1723      *
1724      * @return the maximum lane element of this vector
1725      */
1726     public abstract float maxAll();
1727 
1728     /**
1729      * Returns the maximum lane element of this vector, selecting lane elements
1730      * controlled by a mask.
1731      * <p>
1732      * This is an associative cross-lane reduction operation which applies the operation
1733      * {@code (a, b) -> Math.max(a, b)} to lane elements,
1734      * and the identity value is
1735      * {@link Float#NEGATIVE_INFINITY}.
1736      *
1737      * @param m the mask controlling lane selection
1738      * @return the maximum lane element of this vector
1739      */
1740     public abstract float maxAll(VectorMask<Float> m);
1741 
1742 
1743     // Type specific accessors
1744 
1745     /**
1746      * Gets the lane element at lane index {@code i}
1747      *
1748      * @param i the lane index
1749      * @return the lane element at lane index {@code i}
1750      * @throws IllegalArgumentException if the index is is out of range
1751      * ({@code < 0 || >= length()})
1752      */
1753     public abstract float lane(int i);
1754 
1755     /**
1756      * Replaces the lane element of this vector at lane index {@code i} with
1757      * value {@code e}.
1758      * <p>
1759      * This is a cross-lane operation and behaves as if it returns the result
1760      * of blending this vector with an input vector that is the result of
1761      * broadcasting {@code e} and a mask that has only one lane set at lane
1762      * index {@code i}.
1763      *
1764      * @param i the lane index of the lane element to be replaced
1765      * @param e the value to be placed
1766      * @return the result of replacing the lane element of this vector at lane
1767      * index {@code i} with value {@code e}.
1768      * @throws IllegalArgumentException if the index is is out of range
1769      * ({@code < 0 || >= length()})
1770      */
1771     public abstract FloatVector with(int i, float e);
1772 
1773     // Type specific extractors
1774 
1775     /**
1776      * Returns an array containing the lane elements of this vector.
1777      * <p>
1778      * This method behaves as if it {@link #intoArray(float[], int)} stores}
1779      * this vector into an allocated array and returns the array as follows:
1780      * <pre>{@code
1781      *   float[] a = new float[this.length()];
1782      *   this.intoArray(a, 0);
1783      *   return a;
1784      * }</pre>
1785      *
1786      * @return an array containing the the lane elements of this vector
1787      */
1788     @ForceInline
1789     public final float[] toArray() {
1790         float[] a = new float[species().length()];
1791         intoArray(a, 0);
1792         return a;
1793     }
1794 
1795     /**
1796      * Stores this vector into an array starting at offset.
1797      * <p>
1798      * For each vector lane, where {@code N} is the vector lane index,
1799      * the lane element at index {@code N} is stored into the array at index
1800      * {@code offset + N}.
1801      *
1802      * @param a the array
1803      * @param offset the offset into the array
1804      * @throws IndexOutOfBoundsException if {@code offset < 0}, or
1805      * {@code offset > a.length - this.length()}
1806      */
1807     public abstract void intoArray(float[] a, int offset);
1808 
1809     /**
1810      * Stores this vector into an array starting at offset and using a mask.
1811      * <p>
1812      * For each vector lane, where {@code N} is the vector lane index,
1813      * if the mask lane at index {@code N} is set then the lane element at
1814      * index {@code N} is stored into the array index {@code offset + N}.
1815      *
1816      * @param a the array
1817      * @param offset the offset into the array
1818      * @param m the mask
1819      * @throws IndexOutOfBoundsException if {@code offset < 0}, or
1820      * for any vector lane index {@code N} where the mask at lane {@code N}
1821      * is set {@code offset >= a.length - N}
1822      */
1823     public abstract void intoArray(float[] a, int offset, VectorMask<Float> m);
1824 
1825     /**
1826      * Stores this vector into an array using indexes obtained from an index
1827      * map.
1828      * <p>
1829      * For each vector lane, where {@code N} is the vector lane index, the
1830      * lane element at index {@code N} is stored into the array at index
1831      * {@code a_offset + indexMap[i_offset + N]}.
1832      *
1833      * @param a the array
1834      * @param a_offset the offset into the array, may be negative if relative
1835      * indexes in the index map compensate to produce a value within the
1836      * array bounds
1837      * @param indexMap the index map
1838      * @param i_offset the offset into the index map
1839      * @throws IndexOutOfBoundsException if {@code i_offset < 0}, or
1840      * {@code i_offset > indexMap.length - this.length()},
1841      * or for any vector lane index {@code N} the result of
1842      * {@code a_offset + indexMap[i_offset + N]} is {@code < 0} or {@code >= a.length}
1843      */
1844     public abstract void intoArray(float[] a, int a_offset, int[] indexMap, int i_offset);
1845 
1846     /**
1847      * Stores this vector into an array using indexes obtained from an index
1848      * map and using a mask.
1849      * <p>
1850      * For each vector lane, where {@code N} is the vector lane index,
1851      * if the mask lane at index {@code N} is set then the lane element at
1852      * index {@code N} is stored into the array at index
1853      * {@code a_offset + indexMap[i_offset + N]}.
1854      *
1855      * @param a the array
1856      * @param a_offset the offset into the array, may be negative if relative
1857      * indexes in the index map compensate to produce a value within the
1858      * array bounds
1859      * @param m the mask
1860      * @param indexMap the index map
1861      * @param i_offset the offset into the index map
1862      * @throws IndexOutOfBoundsException if {@code j < 0}, or
1863      * {@code i_offset > indexMap.length - this.length()},
1864      * or for any vector lane index {@code N} where the mask at lane
1865      * {@code N} is set the result of {@code a_offset + indexMap[i_offset + N]} is
1866      * {@code < 0} or {@code >= a.length}
1867      */
1868     public abstract void intoArray(float[] a, int a_offset, VectorMask<Float> m, int[] indexMap, int i_offset);
1869     // Species
1870 
1871     @Override
1872     public abstract VectorSpecies<Float> species();
1873 
1874     /**
1875      * Class representing {@link FloatVector}'s of the same {@link VectorShape VectorShape}.
1876      */
1877     static final class FloatSpecies extends AbstractSpecies<Float> {
1878         final Function<float[], FloatVector> vectorFactory;
1879 
1880         private FloatSpecies(VectorShape shape,
1881                           Class<?> boxType,
1882                           Class<?> maskType,
1883                           Function<float[], FloatVector> vectorFactory,
1884                           Function<boolean[], VectorMask<Float>> maskFactory,
1885                           Function<IntUnaryOperator, VectorShuffle<Float>> shuffleFromArrayFactory,
1886                           fShuffleFromArray<Float> shuffleFromOpFactory) {
1887             super(shape, float.class, Float.SIZE, boxType, maskType, maskFactory,
1888                   shuffleFromArrayFactory, shuffleFromOpFactory);
1889             this.vectorFactory = vectorFactory;
1890         }
1891 
1892         interface FOp {
1893             float apply(int i);
1894         }
1895 
1896         FloatVector op(FOp f) {
1897             float[] res = new float[length()];
1898             for (int i = 0; i < length(); i++) {
1899                 res[i] = f.apply(i);
1900             }
1901             return vectorFactory.apply(res);
1902         }
1903 
1904         FloatVector op(VectorMask<Float> o, FOp f) {
1905             float[] res = new float[length()];
1906             boolean[] mbits = ((AbstractMask<Float>)o).getBits();
1907             for (int i = 0; i < length(); i++) {
1908                 if (mbits[i]) {
1909                     res[i] = f.apply(i);
1910                 }
1911             }
1912             return vectorFactory.apply(res);
1913         }
1914     }
1915 
1916     /**
1917      * Finds the preferred species for an element type of {@code float}.
1918      * <p>
1919      * A preferred species is a species chosen by the platform that has a
1920      * shape of maximal bit size.  A preferred species for different element
1921      * types will have the same shape, and therefore vectors, masks, and
1922      * shuffles created from such species will be shape compatible.
1923      *
1924      * @return the preferred species for an element type of {@code float}
1925      */
1926     private static FloatSpecies preferredSpecies() {
1927         return (FloatSpecies) VectorSpecies.ofPreferred(float.class);
1928     }
1929 
1930     /**
1931      * Finds a species for an element type of {@code float} and shape.
1932      *
1933      * @param s the shape
1934      * @return a species for an element type of {@code float} and shape
1935      * @throws IllegalArgumentException if no such species exists for the shape
1936      */
1937     static FloatSpecies species(VectorShape s) {
1938         Objects.requireNonNull(s);
1939         switch (s) {
1940             case S_64_BIT: return (FloatSpecies) SPECIES_64;
1941             case S_128_BIT: return (FloatSpecies) SPECIES_128;
1942             case S_256_BIT: return (FloatSpecies) SPECIES_256;
1943             case S_512_BIT: return (FloatSpecies) SPECIES_512;
1944             case S_Max_BIT: return (FloatSpecies) SPECIES_MAX;
1945             default: throw new IllegalArgumentException("Bad shape: " + s);
1946         }
1947     }
1948 
1949     /** Species representing {@link FloatVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
1950     public static final VectorSpecies<Float> SPECIES_64 = new FloatSpecies(VectorShape.S_64_BIT, Float64Vector.class, Float64Vector.Float64Mask.class,
1951                                                                      Float64Vector::new, Float64Vector.Float64Mask::new,
1952                                                                      Float64Vector.Float64Shuffle::new, Float64Vector.Float64Shuffle::new);
1953 
1954     /** Species representing {@link FloatVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
1955     public static final VectorSpecies<Float> SPECIES_128 = new FloatSpecies(VectorShape.S_128_BIT, Float128Vector.class, Float128Vector.Float128Mask.class,
1956                                                                       Float128Vector::new, Float128Vector.Float128Mask::new,
1957                                                                       Float128Vector.Float128Shuffle::new, Float128Vector.Float128Shuffle::new);
1958 
1959     /** Species representing {@link FloatVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
1960     public static final VectorSpecies<Float> SPECIES_256 = new FloatSpecies(VectorShape.S_256_BIT, Float256Vector.class, Float256Vector.Float256Mask.class,
1961                                                                       Float256Vector::new, Float256Vector.Float256Mask::new,
1962                                                                       Float256Vector.Float256Shuffle::new, Float256Vector.Float256Shuffle::new);
1963 
1964     /** Species representing {@link FloatVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
1965     public static final VectorSpecies<Float> SPECIES_512 = new FloatSpecies(VectorShape.S_512_BIT, Float512Vector.class, Float512Vector.Float512Mask.class,
1966                                                                       Float512Vector::new, Float512Vector.Float512Mask::new,
1967                                                                       Float512Vector.Float512Shuffle::new, Float512Vector.Float512Shuffle::new);
1968 
1969     /** Species representing {@link FloatVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
1970     public static final VectorSpecies<Float> SPECIES_MAX = new FloatSpecies(VectorShape.S_Max_BIT, FloatMaxVector.class, FloatMaxVector.FloatMaxMask.class,
1971                                                                       FloatMaxVector::new, FloatMaxVector.FloatMaxMask::new,
1972                                                                       FloatMaxVector.FloatMaxShuffle::new, FloatMaxVector.FloatMaxShuffle::new);
1973 
1974     /**
1975      * Preferred species for {@link FloatVector}s.
1976      * A preferred species is a species of maximal bit size for the platform.
1977      */
1978     public static final VectorSpecies<Float> SPECIES_PREFERRED = (VectorSpecies<Float>) preferredSpecies();
1979 }