Old src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java

   1 /*
   2  * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import jdk.internal.misc.Unsafe;
  28 import jdk.internal.vm.annotation.ForceInline;
  29 import jdk.internal.vm.annotation.Stable;
  30 
  31 import java.lang.reflect.Array;
  32 import java.nio.ByteBuffer;
  33 import java.nio.ByteOrder;
  34 import java.util.Arrays;
  35 import java.util.List;
  36 import java.util.Objects;
  37 import java.util.function.Function;
  38 import java.util.function.IntUnaryOperator;
  39 import java.util.function.UnaryOperator;
  40 
  41 import jdk.incubator.vector.*;
  42 
  43 /**
  44  * A
  45  *
  46  * <!-- The following paragraphs are shared verbatim
  47  *   -- between Vector.java and package-info.java -->
  48  * sequence of a fixed number of <em>lanes</em>,
  49  * all of some fixed
  50  * {@linkplain Vector#elementType() <em>element type</em>}
  51  * such as {@code byte}, {@code long}, or {@code float}.
  52  * Each lane contains an independent value of the element type.
  53  * Operations on vectors are typically
  54  * <a href="Vector.html#lane-wise"><em>lane-wise</em></a>,
  55  * distributing some scalar operator (such as
  56  * {@linkplain Vector#add(Vector) addition})
  57  * across the lanes of the participating vectors,
  58  *
  59  * usually generating a vector result whose lanes contain the various
  60  * scalar results.  When run on a supporting platform, lane-wise
  61  * operations can be executed in parallel by the hardware.  This style
  62  * of parallelism is called <em>Single Instruction Multiple Data</em>
  63  * (SIMD) parallelism.
  64  *
  65  * <p> In the SIMD style of programming, most of the operations within
  66  * a vector lane are unconditional, but the effect of conditional
  67  * execution may be achieved using
  68  * <a href="Vector.html#masking"><em>masked operations</em></a>
  69  * such as {@link Vector#blend(Vector,VectorMask) blend()},
  70  * under the control of an associated {@link VectorMask}.
  71  * Data motion other than strictly lane-wise flow is achieved using
  72  * <a href="Vector.html#cross-lane"><em>cross-lane</em></a>
  73  * operations, often under the control of an associated
  74  * {@link VectorShuffle}.
  75  * Lane data and/or whole vectors can be reformatted using various
  76  * kinds of lane-wise
  77  * {@linkplain Vector#convert(VectorOperators.Conversion,int) conversions},
  78  * and byte-wise reformatting
  79  * {@linkplain Vector#reinterpretShape(VectorSpecies,int) reinterpretations},
  80  * often under the control of a reflective {@link VectorSpecies}
  81  * object which selects an alternative vector format different
  82  * from that of the input vector.
  83  *
  84  * <p> {@code Vector<E>} declares a set of vector operations (methods)
  85  * that are common to all element types.  These common operations
  86  * include generic access to lane values, data selection and movement,
  87  * reformatting, and certain arithmetic and logical operations (such as addition
  88  * or comparison) that are common to all primitive types.
  89  *
  90  * <p> <a href="Vector.html#subtypes">Public subtypes of {@code Vector}</a>
  91  * correspond to specific
  92  * element types.  These declare further operations that are specific
  93  * to that element type, including unboxed access to lane values,
  94  * bitwise operations on values of integral element types, or
  95  * transcendental operations on values of floating point element
  96  * types.
  97  *
  98  * <p>This package contains a public subtype of {@link Vector}
  99  * corresponding to each supported element type:
 100  * {@link ByteVector}, {@link ShortVector},
 101  * {@link IntVector}, {@link LongVector},
 102  * {@link FloatVector}, and {@link DoubleVector}.
 103  *
 104  * <!-- The preceding paragraphs are shared verbatim
 105  *   -- between Vector.java and package-info.java -->
 106  *
 107  * <p> The {@linkplain #elementType element type} of a vector,
 108  * sometimes called {@code ETYPE}, is one of the primitive types
 109  * {@code byte}, {@code short}, {@code int}, {@code long}, {@code
 110  * float}, or {@code double}.
 111  *
 112  * <p> The type {@code E} in {@code Vector<E>} is a generic type
 113  * argument that corresponds to the element type.  In fact, it is the
 114  * <em>boxed</em> version of the primitive element type.  For example,
 115  * in the type {@code Vector<Integer>}, the {@code E} parameter is
 116  * {@code Integer} but the {@code ETYPE} is {@code int}.  In such a
 117  * vector, each lane carries a primitive {@code int} value.  This
 118  * pattern continues for the other primitive types as well.
 119  *
 120  * <p> The {@linkplain #length() length} of a vector is the number of
 121  * lanes it contains.
 122  *
 123  * This number is also called {@code VLENGTH} when the context makes
 124  * clear which vector it belongs to.  Each vector has its own fixed
 125  * {@code VLENGTH} but different instances of vectors may have
 126  * different lengths.  {@code VLENGTH} is an important number, because
 127  * it estimates the SIMD performance gain of a single vector operation
 128  * as compared to scalar execution of the {@code VLENGTH} scalar
 129  * operators which underly the vector operation.
 130  *
 131  * <h1><a id="species"></a>Shapes and species</h1>
 132  *
 133  * The information capacity of a vector is determined by its
 134  * {@linkplain #shape() <em>vector shape</em>}, also called its
 135  * {@code VSHAPE}.  Each possible {@code VSHAPE} is represented by
 136  * a member of the {@link VectorShape} enumeration, and represents
 137  * an implementation format shared in common by all vectors of a
 138  * of that shape.  Thus, the {@linkplain #bitSize() size in bits} of
 139  * of a vector is determined by appealing to its vector shape.
 140  *
 141  * <p> Some Java platforms given special support to only one shape,
 142  * while others support several.  A typical platform is not likely
 143  * to support all the shapes described by this API.  For this reason,
 144  * most vector operations work on a single input shape and
 145  * produce the same shape on output.  Operations which change
 146  * shape are clearly documented as such <em>shape-changing</em>,
 147  * while the majority of operations are <em>shape-invariant</em>,
 148  * to avoid disadvantaging platforms which support only one shape.
 149  * There are queries to discover, for the current Java platform,
 150  * the {@linkplain VectorShape#preferredShape() preferred shape}
 151  * for general SIMD computation, or the
 152  * {@linkplain VectorShape#largestShapeFor(Class) largest
 153  * available shape} for any given lane type.  To be portable,
 154  * code using this API should start by querying a supported
 155  * shape, and then process all data with shape-invariant
 156  * operations, within the selected shape.
 157  *
 158  * <p> Each unique combination of element type and vector shape
 159  * determines a unique
 160  * {@linkplain #species() <em>vector species</em>}.
 161  * A vector species is represented by a fixed instance of
 162  * {@link VectorSpecies VectorSpecies&lt;E&gt;}
 163  * shared in common by all vectors of the same shape and
 164  * {@code ETYPE}.
 165  *
 166  * <p> Unless otherwise documented, lane-wise vector operations
 167  * require that all vector inputs have exactly the same {@code VSHAPE}
 168  * and {@code VLENGTH}, which is to say that they must have exactly
 169  * the same species.  This allows corresponding lanes to be paired
 170  * unambiguously.  The {@link #check(VectorSpecies) check()} method
 171  * provides an easy way to perform this check explicitly.
 172  *
 173  * <p> Vector shape, {@code VLENGTH}, and {@code ETYPE} are all
 174  * mutually constrained, so that {@code VLENGTH} times the
 175  * {@linkplain #elementSize() bit-size of each lane}
 176  * must always match the bit-size of the vector's shape.
 177  *
 178  * Thus, {@link plain #reinterpretShape(VectorSpecies,int) reinterpreting} a
 179  * vector via a cast may double its length if and only if it either
 180  * halves the lane size, or else changes the shape.  Likewise,
 181  * reinterpreting a vector may double the lane size if and only if it
 182  * either halves the length, or else changes the shape of the vector.
 183  *
 184  * <h1><a id="subtypes"></a>Vector subtypes</h1>
 185  *
 186  * Vector declares a set of vector operations (methods) that are common to all
 187  * element types (such as addition).  Sub-classes of Vector with a concrete
 188  * element type declare further operations that are specific to that
 189  * element type (such as access to element values in lanes, logical operations
 190  * on values of integral elements types, or transcendental operations on values
 191  * of floating point element types).
 192  * There are six abstract sub-classes of Vector corresponding to the supported set
 193  * of element types, {@link ByteVector}, {@link ShortVector},
 194  * {@link IntVector} {@link LongVector}, {@link FloatVector}, and
 195  * {@link DoubleVector}. Along with type-specific operations these classes
 196  * support creation of vector values (instances of Vector).
 197  * They expose static constants corresponding to the supported species,
 198  * and static methods on these types generally take a species as a parameter.
 199  * For example,
 200  * {@link FloatVector#fromArray(VectorSpecies, float[], int) FloatVector.fromArray}
 201  * creates and returns a float vector of the specified species, with elements
 202  * loaded from the specified float array.
 203  * It is recommended that Species instances be held in {@code static final}
 204  * fields for optimal creation and usage of Vector values by the runtime compiler.
 205  *
 206  * <p> The various typed vector classes expose static constants
 207  * corresponding to their supported species, and static methods on these
 208  * types generally take a species as a parameter.  For example, the
 209  * constant {@link FloatVector#SPECIES_256 FloatVector.SPECIES_256}
 210  * is the unique species whose lanes are {@code float}s and whose
 211  * vector size is 256 bits.  Again, the constant
 212  * {@link ShortVector#SPECIES_PREFERRED} is the species which
 213  * best supports processing of {@code short} vector lanes on
 214  * the currently running Java platform.
 215  *
 216  * <p> As another example, a broadcast scalar value of
 217  * {@code (double)0.5} can be obtained by calling
 218  * {@link DoubleVector#broadcast(VectorSpecies,double)
 219  * DoubleVector.broadcast(dsp, 0.5)}, but the argument {@code dsp} is
 220  * required to select the species (and hence the shape and length) of
 221  * the resulting vector.
 222  *
 223  * <h1><a id="lane-wise"></a>Lane-wise operations</h1>
 224  *
 225  * We use the term <em>lanes</em> when defining operations on
 226  * vectors. The number of lanes in a vector is the number of scalar
 227  * elements it holds. For example, a vector of type {@code float} and
 228  * shape {@code S_256_BIT} has eight lanes, since {@code 32*8=256}.
 229  *
 230  * <p> Most operations on vectors are lane-wise, which means the operation
 231  * is composed of an underlying scalar operator, which is repeated for
 232  * each distinct lane of the input vector.  If there are additional
 233  * vector arguments of the same type, their lanes are aligned with the
 234  * lanes of the first input vector.  (They must all have a common
 235  * {@code VLENGTH}.)  The output resulting from a lane-wise operation
 236  * will have a {@code VLENGTH} which is equal to the {@code VLENGTH}
 237  * of the input(s) to the operation.  Thus, lane-wise operations are
 238  * <em>length-invariant</em>, in their basic definitions.
 239  *
 240  * <p> The principle of length-invariance is combined with another
 241  * basic principle, that lane-wise operations are always
 242  * <em>shape-invariant</em>, meaning that the inputs and the output of
 243  * a lane-wise operation will have a common {@code VSHAPE}.  When the
 244  * principles conflict, because a logical result (with an invariant
 245  * {@code VLENGTH}), does not fit into the invariant {@code VSHAPE},
 246  * the resulting expansions and contractions are handled explicitly
 247  * with
 248  * <a href="Vector.html#expansion">special conventions</a>.
 249  *
 250  * <p> Vector operations can be grouped into various categories and
 251  * their behavior can be generally specified in terms of underlying
 252  * scalar operators.  In the examples below, {@code ETYPE} is the
 253  * element type of the operation (such as {@code int.class}) and
 254  * {@code EVector} is the corresponding concrete vector type (such as
 255  * {@code IntVector.class}).
 256  *
 257  * <ul>
 258  * <li>
 259  * A <em>lane-wise unary</em> operation takes one input vector,
 260  * distributing a unary scalar operator across the lanes, 
 261  * and produces a result vector of the same type and shape.
 262  *
 263  * For each lane of the input vector {@code a},
 264  * the underlying scalar operator is applied to the lane value.
 265  * The result is placed into the vector result in the same lane.
 266  * The following pseudocode illustrates the behavior of this operation
 267  * category:
 268  *
 269  * <pre>{@code
 270  * ETYPE scalar_unary_op(ETYPE s);
 271  * EVector a = ...;
 272  * VectorSpecies<E> species = a.species();
 273  * ETYPE[] ar = new ETYPE[a.length()];
 274  * for (int i = 0; i < ar.length; i++) {
 275  *     ar[i] = scalar_unary_op(a.lane(i));
 276  * }
 277  * EVector r = EVector.fromArray(species, ar, 0);
 278  * }</pre>
 279  *
 280  * <li>
 281  * A <em>lane-wise binary</em> operation takes two input vectors,
 282  * distributing a binary scalar operator across the lanes, 
 283  * and produces a result vector of the same type and shape.
 284  * 
 285  * For each lane of the two input vectors {@code a} and {@code b},
 286  * the underlying scalar operator is applied to the lane values.
 287  * The result is placed into the vector result in the same lane.
 288  * The following pseudocode illustrates the behavior of this operation
 289  * category:
 290  *
 291  * <pre>{@code
 292  * ETYPE scalar_binary_op(ETYPE s, ETYPE t);
 293  * EVector a = ...;
 294  * VectorSpecies<E> species = a.species();
 295  * EVector b = ...;
 296  * b.check(species);  // must have same species
 297  * ETYPE[] ar = new ETYPE[a.length()];
 298  * for (int i = 0; i < ar.length; i++) {
 299  *     ar[i] = scalar_binary_op(a.lane(i), b.lane(i));
 300  * }
 301  * EVector r = EVector.fromArray(species, ar, 0);
 302  * }</pre>
 303  * </li>
 304  *
 305  * <li>
 306  * Generalizing from unary and binary operations,
 307  * a <em>lane-wise n-ary</em> operation takes {@code N} input vectors {@code v[j]},
 308  * distributing an n-ary scalar operator across the lanes,
 309  * and produces a result vector of the same type and shape.
 310  * Except for a few ternary operations, this API has no support
 311  * lane-wise n-ary operations.
 312  *
 313  * For each lane of all of the input vectors {@code v[j]},
 314  * the underlying scalar operator is applied to the lane values.
 315  * The result is placed into the vector result in the same lane.
 316  * The following pseudocode illustrates the behavior of this operation
 317  * category:
 318  *
 319  * <pre>{@code
 320  * ETYPE scalar_nary_op(ETYPE... args);
 321  * EVector[] v = ...;
 322  * int N = v.length;
 323  * VectorSpecies<E> species = v[0].species();
 324  * for (EVector arg : v) {
 325  *     arg.check(species);  // all must have same species
 326  * }
 327  * ETYPE[] ar = new ETYPE[a.length()];
 328  * for (int i = 0; i < ar.length; i++) {
 329  *     ETYPE[] args = new ETYPE[N];
 330  *     for (int j = 0; j < N; j++) {
 331  *         args[j] = v[j].lane(i);
 332  *     }
 333  *     ar[i] = scalar_nary_op(args);
 334  * }
 335  * EVector r = EVector.fromArray(species, ar, 0);
 336  * }</pre>
 337  * </li>
 338  *
 339  * <li>
 340  * A <em>lane-wise conversion</em> operation takes one input vector,
 341  * distributing a unary scalar conversion operator across the lanes,
 342  * and produces a logical result of the converted values.  The logical
 343  * result (or at least a part of it) is presented in a vector of the
 344  * same shape as the input vector.
 345  *
 346  * <p> Unlike other lane-wise operations, conversions can change lane
 347  * type, from the input (domain) type to the output (range) type.  The
 348  * lane size may change along with the type.  In order to manage the
 349  * size changes, lane-wise conversion methods can product <em>partial
 350  * results</em>, under the control of a {@code part} parameter, which
 351  * is <a href="Vector.html#expansion">explained elsewhere</a>.
 352  *
 353  * <p> The following pseudocode illustrates the behavior of this
 354  * operation category in the specific example of a conversion from
 355  * {@code int} to {@code double}:
 356  *
 357  * <pre>{@code
 358  * IntVector a = ...;
 359  * int VLENGTH = a.length();
 360  * VectorShape VSHAPE = a.shape();
 361  * double[] arlogical = new double[VLENGTH];
 362  * for (int i = 0; i < limit; i++) {
 363  *     int e = a.lane(i);
 364  *     arlogical[i] = (double) e;
 365  * }
 366  * VectorSpecies<Double> rs = VSHAPE.withLanes(double.class);
 367  * DoubleVector r = DoubleVector.fromArray(rs, arlogical, 0);
 368  * int M = Double.BITS / Integer.BITS;  // expansion factor
 369  * assert r.length() == VLENGTH / M;
 370  * }</pre>
 371  * </li>
 372  *
 373  * <li>
 374  * A <em>cross-lane reduction</em> operation operates on all
 375  * the lane elements of an input vector.
 376  * An accumulation function is applied to all the
 377  * lane elements to produce a scalar result.
 378  * If the reduction operation is associative then the result may be accumulated
 379  * by operating on the lane elements in any order using a specified associative
 380  * scalar binary operation and identity value.  Otherwise, the reduction
 381  * operation specifies the order of accumulation.
 382  * The following pseudocode illustrates the behavior of this operation category
 383  * if it is associative:
 384  * <pre>{@code
 385  * ETYPE assoc_scalar_binary_op(ETYPE s, ETYPE t);
 386  * EVector a = ...;
 387  * ETYPE r = <identity value>;
 388  * for (int i = 0; i < a.length(); i++) {
 389  *     r = assoc_scalar_binary_op(r, a.lane(i));
 390  * }
 391  * }</pre>
 392  * </li>
 393  *
 394  * <li>
 395  * A <em>cross-lane movement</em> operation operates on all
 396  * the lane elements of an input vector and moves them
 397  * in a data-dependent manner into <em>different lanes</em>
 398  * in an output vector.
 399  * The movement is steered by an auxiliary datum, such as
 400  * a {@link VectorShuffle} or a scalar index defining the
 401  * origin of the movement.
 402  * The following pseudocode illustrates the behavior of this
 403  * operation category, in the case of a shuffle:
 404  * <pre>{@code
 405  * EVector a = ...;
 406  * Shuffle<E> s = ...;
 407  * ETYPE[] ar = new ETYPE[a.length()];
 408  * for (int i = 0; i < ar.length; i++) {
 409  *     int source = s.laneSource(i);
 410  *     ar[i] = a.lane(source);
 411  * }
 412  * EVector r = EVector.fromArray(a.species(), ar, 0);
 413  * }</pre>
 414  * </li>
 415  *
 416  * <li>
 417  * A <em>masked operation</em> is one which is a variation on one of the
 418  * previous operations (either lane-wise or cross-lane), where
 419  * the operation takes an extra trailing {@link VectorMask} argument.
 420  * In lanes the mask is set, the operation behaves as if the mask
 421  * argument were absent, but in lanes where the mask is unset, the
 422  * underlying scalar operation is suppressed.
 423  * Masked operations are explained in
 424  * <a href="Vector.html#masking">greater detail elsewhere</a>.
 425  *
 426  * <li>
 427  * A very special case of a masked lane-wise binary operation is a
 428  * {@linkplain blend(Vector,VectorMask) blend}, which operates
 429  * lane-wise on two input vectors {@code a} and {@code b}, selecting lane
 430  * values from one input or the other depending on a mask {@code m}.
 431  * In lanes where {@code m} is set, the corresponding value from
 432  * {@code b} is selected into the result; otherwise the value from
 433  * {@code a} is selected.  Thus, a blend acts as a vectorized version
 434  * of Java's ternary selection expression {@code m?b:a}:
 435  * <pre>{@code
 436  * ETYPE[] ar = new ETYPE[a.length()];
 437  * for (int i = 0; i < ar.length; i++) {
 438  *     boolean isSet = m.laneIsSet(i);
 439  *     ar[i] = isSet ? b.lane(i) : a.lane(i);
 440  * }
 441  * EVector r = EVector.fromArray(species, ar, 0);
 442  * }</pre>
 443  * </li>
 444  *
 445  * <li>
 446  * A <em>lane-wise binary test</em> operation takes two input vectors,
 447  * distributing a binary scalar comparison across the lanes, 
 448  * and produces, not a vector of booleans, but rather a
 449  * {@linkplain VectorMask vector mask}.
 450  *
 451  * For each lane of the two input vectors {@code a} and {@code b},
 452  * the underlying scalar comparison operator is applied to the lane values.
 453  * The resulting boolean is placed into the vector mask result in the same lane.
 454  * The following pseudocode illustrates the behavior of this operation
 455  * category:
 456  * <pre>{@code
 457  * boolean scalar_binary_test_op(ETYPE s, ETYPE t);
 458  * EVector a = ...;
 459  * VectorSpecies<E> species = a.species();
 460  * EVector b = ...;
 461  * b.check(species);  // must have same species
 462  * boolean[] mr = new boolean[a.length()];
 463  * for (int i = 0; i < mr.length; i++) {
 464  *     mr[i] = scalar_binary_test_op(a.lane(i), b.lane(i));
 465  * }
 466  * VectorMask<E> m = VectorMask.fromArray(species, mr, 0);
 467  * }</pre>
 468  * </li>
 469  *
 470  * </ul>
 471  *
 472  * <p>
 473  * If a vector operation does not belong to one of the above categories then
 474  * the method documentation explicitly specifies how it processes the lanes of
 475  * input vectors, and where appropriate illustrates the behavior using
 476  * pseudocode.
 477  *
 478  * <p>
 479  * Most lane-wise binary and comparison operations offer convenience
 480  * overloadings which accept a scalar as the second input, in place of a
 481  * vector.  In this case the scalar value is promoted to a vector by
 482  * {@linkplain Vector#broadcast(long) broadcasting it}
 483  * into the same lane structure as the first input.
 484  *
 485  * For example, to multiply all lanes of a {@code double} vector by
 486  * a scalar value{@code 1.1}, the expression {@code v.mul(1.1)} is
 487  * easier to work with than an equivalent expression with an explicit
 488  * broadcast operation, such as {@code v.mul(v.broadcast(1.1))}
 489  * or {@code v.mul(DoubleVector.broadcast(v.species(), 1.1))}.
 490  *
 491  * Unless otherwise specified the scalar variant always behaves as if
 492  * each scalar value is first transformed to a vector of the same
 493  * species as the first vector input, using the appropriate
 494  * {@code broadcast} operation.
 495  * 
 496  * <h1><a id="masking"></a>Masked operations</h1>
 497  *
 498  * <p> Many vector operations accept an optional
 499  * {@link VectorMask mask} argument, selecting which lanes participate
 500  * in the underlying scalar operator.  If present, the mask argument
 501  * appears at the end of the method argument list.
 502  *
 503  * <p> Each lane of the mask argument is a boolean which is either in
 504  * the <em>set</em> or <em>unset</em> state.  For lanes where the mask
 505  * argument is unset, the underlying scalar operator is suppressed.
 506  * In this way, masks allow vector operations to emulate scalar
 507  * control flow operations, without losing SIMD parallelism, except
 508  * where the mask lane is unset.
 509  *
 510  * <p> An operation suppressed by a mask will never cause an exception
 511  * or side effect of any sort, even if the underlying scalar operator
 512  * can potentially do so.  For example, an unset lane that seems to
 513  * access an out of bounds array element or divide an integral value
 514  * by zero will simply be ignored.  Values in suppressed lanes never
 515  * participate or appear in the result of the overall operation.
 516  *
 517  * <p> Result lanes corresponding to a suppressed operation will be
 518  * filled with a default value which depends on the specific
 519  * operation, as follows:
 520  *
 521  * <ul>
 522  *
 523  * <li>If the masked operation is a unary, binary, or n-ary arithmetic or
 524  * logical operation, suppressed lanes are filled from the first
 525  * vector operand (i.e., the vector recieving the method call), as if
 526  * by a {@linkplain #blend(Vector,VectorMask) blend}.</li>
 527  * 
 528  * <li>If the masked operation is a memory load or a {@code slice()} from
 529  * another vector, suppressed lanes are not loaded, and are filled
 530  * with the default value for the {@code ETYPE}, which in every case
 531  * consists of all zero bits.  An unset lane can never cause an
 532  * exception, even if the hypothetical corresponding memory location
 533  * does not exist (because it is out of an array's index range).</li>
 534  * 
 535  * <li>If the operation is a cross-lane operation with an operand
 536  * which supplies lane indexes (of type {@code VectorShuffle} or
 537  * {@code Vector}, suppressed lanes are not computed, and are filled
 538  * with the zero default value.  Normally, invalid lane indexes elicit
 539  * an {@code IndexOutOfBoundsException}, but if a lane is unset, the
 540  * zero value is quietly substituted, regardless of the index.  This
 541  * rule is similar to the previous rule, for masked memory loads.</li>
 542  *
 543  * <li>If the masked operation is a memory store or an {@code unslice()} into
 544  * another vector, suppressed lanes are not stored, and the
 545  * corresponding memory or vector locations (if any) are unchanged.</li>
 546  *
 547  * <p> (Note: Memory effects such as race conditions never occur for
 548  * suppressed lanes.  That is, implementations will not secretly
 549  * re-write the existing value for unset lanes.  In the Java Memory
 550  * Model, reassigning a memory variable to its current value is not a
 551  * no-op; it may quietly undo a racing store from another
 552  * thread.)</li>
 553  *
 554  * <li>If the masked operation is a reduction, suppressed lanes are ignored
 555  * in the reduction.  If all lanes are suppressed, a suitable neutral
 556  * value is returned, depending on the specific reduction operation,
 557  * and documented by the masked variant of that method.  (This means
 558  * that users can obtain the neutral value programmatically by
 559  * executing the reduction on a dummy vector with an all-unset mask.)
 560  *
 561  * <li>If the masked operation is a comparison operation, suppressed output
 562  * lanes in the resulting mask are themselves unset, as if the
 563  * suppressed comparison operation returned {@code false} regardless
 564  * of the suppressed input values.  In effect, it is as if the
 565  * comparison operation were performed unmasked, and then the
 566  * result intersected with the controlling mask.</li>
 567  *
 568  * <li>In other cases, such as masked
 569  * <a href="Vector.html#cross-lane"><em>cross-lane movements</em></a>,
 570  * the specific effects of masking are documented by the masked
 571  * variant of the method.
 572  *
 573  * </ul>
 574  *
 575  * <p> As an example, a masked binary operation on two input vectors
 576  * {@code a} and {@code b} suppresses the binary operation for lanes
 577  * where the mask is unset, and retains the original lane value from
 578  * {@code a}.  The following pseudocode illustrates this behavior:
 579  * <pre>{@code
 580  * ETYPE scalar_binary_op(ETYPE s, ETYPE t);
 581  * EVector a = ...;
 582  * VectorSpecies<E> species = a.species();
 583  * EVector b = ...;
 584  * b.check(species);  // must have same species
 585  * VectorMask<E> m = ...;
 586  * m.check(species);  // must have same species
 587  * boolean[] ar = new boolean[a.length()];
 588  * for (int i = 0; i < ar.length; i++) {
 589  *     if (m.laneIsSet(i)) {
 590  *         ar[i] = scalar_binary_op(a.lane(i), b.lane(i));
 591  *     } else {
 592  *         ar[i] = a.lane(i);  // from first input
 593  *     }
 594  * }
 595  * EVector r = EVector.fromArray(species, ar, 0);
 596  * }</pre>
 597  *
 598  * <h1><a id="lane-order">Lane order and byte order</h1>
 599  *
 600  * The number of lane values stored in a given vector is referred to
 601  * as its {@linkplain #length() vector length} or {@code VLENGTH}.
 602  *
 603  * It is useful to consider vector lanes as ordered
 604  * <em>sequentially</em> from first to last, with the first lane
 605  * numbered {@code 0}, the next lane numbered {@code 1}, and so on to
 606  * the last lane numbered {@code VLENGTH-1}.  This is a temporal
 607  * order, where lower-numbered lanes are considered earlier than
 608  * higher-numbered (later) lanes.  This API uses these terms
 609  * in preference to spatial terms such as "left", "right", "high",
 610  * and "low".
 611  *
 612  * <p> Temporal terminology works well for vectors because they
 613  * (usually) represent small fixed-sized segments in a long sequence
 614  * of workload elements, where the workload is conceptually traversed
 615  * in time order from beginning to end.  (This is a mental model: it
 616  * does not exclude multicore divide-and-conquer techniques.)  Thus,
 617  * when a scalar loop is transformed into a vector loop, adjacent
 618  * scalar items (one earlier, one later) in the workload end up as
 619  * adjacent lanes in a single vector (again, one earlier, one later).
 620  * At a vector boundary, the last lane item in the earlier vector is
 621  * adjacent to (and just before) the first lane item in the
 622  * immediately following vector.
 623  *
 624  * <p> Vectors are also sometimes thought of in spatial terms, where
 625  * the first lane is placed at an edge of some virtual paper, and
 626  * subsequent lanes are presented in order next to it.  When using
 627  * spatial terms, all directions are equally plausible: Some vector
 628  * notations present lanes from left to right, and others from right
 629  * to left; still others present from top to bottom or vice versa.
 630  * Using the language of time (before, after, first, last) instead of
 631  * space (left, right, high, low) is often more likely to avoid
 632  * misunderstandings.
 633  *
 634  * <p> As second reason to prefer temporal to spatial language about
 635  * vector lanes is the fact that the terms "left", "right", "high" and
 636  * "low" are widely used to describe the relations between bits in
 637  * scalar values.  The leftmost or highest bit in a given type is
 638  * likely to be a sign bit, while the rightmost or lowest bit is
 639  * likely to be the arithmetically least significant, and so on.
 640  * Applying these terms to vector lanes risks confusion, however,
 641  * because it is relatively rare to find algorithms where, given two
 642  * adjacent vector lanes, one lane is somehow more arithmetically
 643  * significant than its neighbor, and even in those cases, there is no
 644  * general way to know which neighbor is the the more significant.
 645  *
 646  * <p> Putting the terms together, we view the information structure
 647  * of a vector as a temporal sequence of lanes ("first", "next",
 648  * "earlier", "later", "last", etc.)  of bit-strings which are
 649  * internally ordered spatially (either "low" to "high" or "right" to
 650  * "left").  The primitive values in the lanes are decoded from these
 651  * bit-strings, in the usual way.  Most vector operations, like most
 652  * Java scalar operators, treat primitive values as atomic values, but
 653  * some operations reveal the internal bit-string structure.
 654  *
 655  * <p> When a vector is loaded from or stored into memory, the order
 656  * of vector lanes is <em>always consistent </em> with the inherent
 657  * ordering of the memory container.  This is true whether or not
 658  * individual lane elements are subject to "byte swapping" due to
 659  * details of byte order.  Thus, while the scalar lane elements of
 660  * vector might be "byte swapped", the lanes themselves are never
 661  * reordered, except by an explicit method call that performs
 662  * cross-lane reordering.
 663  *
 664  * <p> When vector lane values are stored to Java variables of the
 665  * same type, byte swapping is performed if and only if the
 666  * implementation of the vector hardware requires such swapping.  It
 667  * is therefore unconditional and invisible.
 668  *
 669  * <p> As a useful fiction, this API presents a consistent illusion
 670  * that vector lane bytes are composed into larger lane scalars in
 671  * <em>little endian order</em>.  This means that storing a vector
 672  * into a Java byte array will reveal the successive bytes of the
 673  * vector lane values in little-endian order on all platforms,
 674  * regardless of native memory order, and also regardless of byte
 675  * order (if any) within vector unit registers.
 676  *
 677  * <p> This hypothetical little-endian ordering also appears when a
 678  * {@linkplain #reinterpretShape(VectorSpecies,int) reinterpretation cast} is
 679  * applied in such a way that lane boundaries are discarded and
 680  * redrawn differently, while maintaining vector bits unchanged.  In
 681  * such an operation, two adjacent lanes will contribute bytes to a
 682  * single new lane (or vice versa), and the sequential order of the
 683  * two lanes will determine the arithmetic order of the bytes in the
 684  * single lane.  In this case, the little-endian convention provides
 685  * portable results, so that on all platforms earlier lanes tend to
 686  * contribute lower (rightward) bits, and later lanes tend to
 687  * contribute higher (leftward) bits.  The {@linkplain #reinterpretAsBytes()
 688  * reinterpretation casts} between {@link ByteVector}s and the
 689  * other non-byte vectors use this convention to clarify their
 690  * portable semantics.
 691  *
 692  * <p> The little-endian fiction for relating lane order to per-lane
 693  * byte order is slightly preferable to an equivalent big-endian
 694  * fiction, because some related formulas are much simpler,
 695  * specifically those which renumber bytes after lane structure
 696  * changes.  The earliest byte is invariantly earliest across all lane
 697  * structure changes, but only if little-endian convention are used.
 698  * The root cause of this is that bytes in scalars are numbered from
 699  * the least significant (rightmost) to the omst significant
 700  * (leftmost), and almost never vice-versa.  If we habitually numbered
 701  * sign bits as zero (as on some computers) then this API would reach
 702  * for big-endian fictions to create unified addressing of vector
 703  * bytes.
 704  *
 705  * <h1><a id="memory">Memory operations</h1>
 706  *
 707  * As was already mentioned, vectors can be loaded from memory and
 708  * stored back.  An optional mask can control which individual memory
 709  * locations are read from or written to.  The shape of a vector
 710  * determines how much memory it will occupy.  In the absence of
 711  * masking, the lanes are stored as a dense sequence of back-to-back
 712  * values in memory, the same as a dense (gap-free) series of single
 713  * scalar values in an array of the scalar type.
 714  *
 715  * Memory order corresponds exactly to lane order.  The first vector
 716  * lane value occupies the first position in memory, and so on, up to
 717  * the length of the vector.  Although memory order is not directly
 718  * defined by Java as a separate concept, the memory order of stored
 719  * vector lanes always corresponds to increasing index values in a
 720  * Java array or in a {@link java.nio.ByteBuffer}.
 721  *
 722  * <p> Byte order for lane storage is chosen such that the stored
 723  * vector values can be read or written as single primitive values,
 724  * within the array or buffer that holds the vector, producing the
 725  * same values as the lane-wise values within the vector.
 726  * This fact is independent of the convenient fiction that lane values
 727  * inside of vectors are stored in little-endian order.
 728  *
 729  * <p> For example,
 730  * {@link FloatVector#fromArray(VectorSpecies, float[], int)
 731  *        FloatVector.fromArray(fsp,fa,i)}
 732  * creates and returns a float vector of some particular species {@code fsp},
 733  * with elements loaded from some float array {@code fa}.
 734  * The first lane is loaded from {@code fa[i]} and the last lane
 735  * is initialized loaded from {@code fa[i+VL-1]}, where {@code VL}
 736  * is the length of the vector as derived from the species {@code fsp}.
 737  * Then, {@link FloatVector#add(Vector<Float>) fv=FloatVector.add(fv2)}
 738  * will produce another float vector of that species {@code fsp},
 739  * given a vector {@code fv2} of the same species {@code fsp}.
 740  * Next, {@link FloatVector#compare(VectorOperators.Comparison,float)
 741  * mnz=fv.compare(NE, 0.0f)} tests whether the result is zero,
 742  *
 743  * yielding a mask {@code mnz}.  The non-zero lanes (and only those
 744  * lanes) can then be stored back into the original array elements
 745  * using the statement
 746  * {@link FloatVector#intoArray(float[],int,VectorMask) fv.intoArray(fa,i,mnz)}.
 747  * 
 748  * <h1><a id="expansion">Expansions, contractions, and partial results</h1>
 749  *
 750  * Since vectors are fixed in size, occasions often arise where the
 751  * logical result of an operation is not the same as the physical size
 752  * of the proposed output vector.  To encourage user code that is as
 753  * portable and predictable as possible, this API has a systematic
 754  * approach to the design of such <em>resizing</em> vector operations.
 755  *
 756  * <p> As a basic principle, lane-wise operations are
 757  * <em>length-invariant</em>.  Length-invariance simply means that
 758  * if {@code VLENGTH} lanes go into an operation, the same number
 759  * of lanes come out, with nothing discarded and no extra padding.
 760  *
 761  * <p> As a second principle, sometimes in tension with the first,
 762  * lane-wise operations are also <em>shape-invariant</em>, unless
 763  * clearly marked otherwise.
 764  *
 765  * Shape-invariance means that {@code VSHAPE} is constant for typical
 766  * computations.  Keeping the same shape throughout a computation
 767  * helps ensure that scarce vector resources are efficiently used.
 768  * (On some hardware platforms shape changes could cause unwanted
 769  * effects like extra data movement instructions, round trips through
 770  * memory, or pipeline bubbles.)
 771  *
 772  * <p> Tension between these principles arises when an operation
 773  * produces a <em>logical result</em> that is too large for the
 774  * required output {@code VSHAPE}.  In other cases, when a logical
 775  * result is smaller than the capacity of the output {@code VSHAPE},
 776  * the positioning of the logical result is open to question, since
 777  * the physical output vector must contain a mix of logical result and
 778  * padding.
 779  *
 780  * <p> In the first case, of a too-large logical result being crammed
 781  * into a too-small output {@code VSHAPE}, we say that data has
 782  * <em>expanded</em>.  In other words, an <em>expansion operation</em>
 783  * has caused the output shape to overflow.  Symmetrically, in the
 784  * second case of a small logical result fitting into a roomy output
 785  * {@code VSHAPE}, the data has <em>contracted</em>, and the
 786  * <em>contraction operation</em> has required the output shape to pad
 787  * itself with extra zero lanes.
 788  *
 789  * <p> In both cases we can speak of a parameter {@code M} which
 790  * measures the <em>expansion ratio</em> or <em>contraction ratio</em>
 791  * between the logical result size (in bits) and the bit-size of the
 792  * actual output shape.  When vector shapes are changed, and lane
 793  * sizes are not, {@code M} is just the integral ratio of the output
 794  * shape to the logical result.  (With the possible exception of
 795  * the {@linkplain VectorShape#S_Max_BIT maximum shape}, all vector
 796  * sizes are powers of two, and so the ratio {@code M} is always
 797  * an integer.  In the hypothetical case of a non-integral ratio,
 798  * the value {@code M} would be rounded up to the next integer,
 799  * and then the same general considerations would apply.)
 800  *
 801  * <p> If the logical result is larger than the physical output shape,
 802  * such a shape change must inevitably drop result lanes (all but
 803  * {@code 1/M} of the logical result).  If the logical size is smaller
 804  * than the output, the shape change must introduce zero-filled lanes
 805  * of padding (all but {@code 1/M} of the physical output).  The first
 806  * case, with dropped lanes, is an expansion, while the second, with
 807  * padding lanes added, is a contraction.
 808  *
 809  * <p> Similarly, consider a lane-wise conversion operation which
 810  * leaves the shape invariant but changes the lane size by a ratio of
 811  * {@code M}.  If the logical result is larger than the output (or
 812  * input), this conversion must reduce the {@code VLENGTH} lanes of the
 813  * output by {@code M}, dropping all but {@code 1/M} of the logical
 814  * result lanes.  As before, the dropping of lanes is the hallmark of
 815  * an expansion.  A lane-wise operation which contracts lane size by a
 816  * ratio of {@code M} must increase the {@code VLENGTH} by the same
 817  * factor {@code M}, filling the extra lanes with a zero padding
 818  * value; because padding must be added this is a contraction.
 819  *
 820  * <p> It is also possible (though somewhat confusing) to change both
 821  * lane size and container size in one operation which performs both
 822  * lane conversion <em>and</em> reshaping.  If this is done, the same
 823  * rules apply, but the logical result size is the product of the
 824  * input size times any expansion or contraction ratio from the lane
 825  * change size.
 826  *
 827  * <p> For completeness, we can also speak of <em>in-place
 828  * operations</em> for the frequent case when resizing does not occur.
 829  * With an in-place operation, the data is simply copied from logical
 830  * output to its physical container with no truncation or padding.
 831  * The ratio parameter {@code M} in this case is unity.
 832  *
 833  * <p> Note that the classification of contraction vs. expansion
 834  * depends on the relative sizes of the logical result and the
 835  * physical output container.  The size of the input container may be
 836  * larger or smaller than either of the other two values, without
 837  * changing the classification.  For example, a conversion from a
 838  * 128-bit shape to a 256-bit shape will be a contraction in many
 839  * cases, but it would be an expansion if it were combined with a
 840  * conversion from {@code byte} to {@code long}, since in that case
 841  * the logical result would be 1024 bits in size.  This example also
 842  * illustrates that a logical result does not need to correspond to
 843  * any particular platform-supported vector shape.
 844  *
 845  * <p> Although lane-wise masked operations can be viewed as producing
 846  * partial operations, they are not classified (in this API) as
 847  * expansions or contractions.  A masked load from an array surely
 848  * produces a partial vector, but there is no meaningful "logical
 849  * output vector" that this partial result was contracted from.
 850  *
 851  * <p> Some care is required with these terms, because it is the
 852  * <em>data</em>, not the <em>container size</em>, that is expanding
 853  * or contracting, relative to the size of its output container.
 854  * Thus, resizing a 128-bit input into 512-bit vector has the effect
 855  * of a <em>contraction</em>.  Though the 128 bits of payload hasn't
 856  * changed in size, we can say it "looks smaller" in its new 512-bit
 857  * home, and this will capture the practical details of the situation.
 858  *
 859  * <p> If a vector method might expand its data, it accepts an extra
 860  * {@code int} parameter called {@code part}, or the "part number".
 861  * The part number must be in the range {@code [0..M-1]}, where
 862  * {@code M} is the expansion ratio.  The part number selects one
 863  * of {@code M} contiguous disjoint equally-sized blocks of lanes
 864  * from the logical result and fills the physical output vector
 865  * with this block of lanes.
 866  *
 867  * <p> Specifically, the lanes selected from the logical result of an
 868  * expansion are numbered in the range {@code [R..R+L-1]}, where
 869  * {@code L} is the {@code VLENGTH} of the physical output vector, and
 870  * the origin of the block, {@code R}, is {@code part*L}.
 871  *
 872  * <p> A similar convention applies to any vector method that might
 873  * contract its data.  Such a method also accepts an extra part number
 874  * parameter (again called {@code part}) which steers the contracted
 875  * data lanes one of {@code M} contiguous disjoint equally-sized
 876  * blocks of lanes in the physical output vector.  The remaining lanes
 877  * are filled with zero, or as specified by the method.
 878  *
 879  * <p> Specifically, the data is steered into the lanes numbered in the
 880  * range {@code [R..R+L-1}, where {@code L} is the {@code VLENGTH} of
 881  * the logical result vector, and the origin of the block, {@code R},
 882  * is again a multiple of {@code L} selected by the part number,
 883  * specifically {@code |part|*L}.
 884  *
 885  * <p> In the case of a contraction, the part number must be in the
 886  * non-positive range {@code [-M+1..0]}.  This convention is adopted
 887  * because some methods can perform both expansions and contractions,
 888  * in a data-dependent manner, and the extra sign on the part number
 889  * serves as an error check.  If vector method takes a part number and
 890  * is invoked to perform an in-place operation (neither contracting
 891  * nor expanding), the {@code part} parameter must be exactly zero.
 892  * Part numbers outside the allowed ranges will elicit an indexing
 893  * exception.  Note that in all cases a zero part number is valid, and
 894  * corresponds to an operation which preserves as many lanes as
 895  * possible from the beginning of the logical result, and places them
 896  * into the beginning of the physical output container.  This is
 897  * often a desirable default, so a part number of zero is safe
 898  * in all cases and useful in most cases.
 899  *
 900  * <p> The various resizing operations of this API contract or expand
 901  * their data as follows:
 902  * <ul>
 903  *
 904  * <li>
 905  * {@link Vector#convert(VectorOperators.Conversion,int) Vector.convert()}
 906  * will expand (respectively, contract) its operand by ratio
 907  * {@code M} if the
 908  * {@linkplain #elementSize() element size} of its output is
 909  * larger (respectively, smaller) by a factor of {@code M}.
 910  * If the element sizes of input and output are the same,
 911  * then {@code convert()} is an in-place operation.
 912  *
 913  * <li>
 914  * {@link Vector#convertShape(VectorOperators.Conversion,VectorSpecies,int) Vector.convertShape()}
 915  * will expand (respectively, contract) its operand by ratio
 916  * {@code M} if the bit-size of its logical result is
 917  * larger (respectively, smaller) than the bit-size of its
 918  * output shape.
 919  * The size of the logical result is defined as the
 920  * {@linkplain #elementSize() element size} of the output,
 921  * times the {@code VLENGTH} of its input.
 922  *
 923  * Depending on the ratio of the changed lane sizes, the logical size
 924  * may be (in various cases) either larger or smaller than the input
 925  * vector, independently of whether the operation is an expansion
 926  * or contraction.
 927  *
 928  * <li>
 929  * Since {@link Vector#castShape(VectorSpecies,int) Vector.castShape()}
 930  * is a convenience method for {@code convertShape()}, its classification
 931  * as an expansion or contraction is the same as for {@code convertShape()}.
 932  *
 933  * <li>
 934  * {@link Vector#reinterpretShape(VectorSpecies,int) Vector.reinterpretShape()}
 935  * is an expansion (respectively, contraction) by ratio {@code M} if the
 936  * {@linkplain #bitSize() vector bit-size} of its input is
 937  * crammed into a smaller (respectively, dropped into a larger)
 938  * output container by a factor of {@code M}.
 939  * Otherwise it is an in-place operation.
 940  *
 941  * Since this method is a reinterpretation cast that can erase and
 942  * redraw lane boundaries as well as modify shape, the input vector's
 943  * lane size and lane count are irrelevant to its classification as
 944  * expanding or contracting.
 945  *
 946  * <li>
 947  * The {@link #unslice(int,Vector,int) unslice()} methods expand
 948  * by a ratio of {@code M=2}, because the single input slice is
 949  * positioned and inserted somewhere within two consecutive background
 950  * vectors.  The part number selects the first or second background
 951  * vector, as updated by the inserted slice.
 952  * Note that the corresponding
 953  * {@link #slice(int,Vector) slice()} methods, although inverse
 954  * to the {@code unslice()} methods, do not contract their data
 955  * and thus require no part number.  This is because
 956  * {@code slice()} delivers a slice of exactly {@code VLENGTH}
 957  * lanes extracted from two input vectors.
 958  * </ul>
 959  *
 960  * The method {@link VectorSpecies#partLimit(VectorSpecies,boolean)
 961  * partLimit()} on {@link VectorSpecies} can be used, before any
 962  * expanding or contracting operation is performed, to query the
 963  * limiting value on a part parameter for a proposed expansion
 964  * or contraction.  The value returned from {@code partLimit()} is
 965  * positive for expansions, negative for contractions, and zero for
 966  * in-place operations.  Its absolute value is the parameter {@code
 967  * M}, and so it serves as an exclusive limit on valid part number
 968  * arguments for the relevant methods.  Thus, for expansions, the
 969  * {@code partLimit()} value {@code M} is the exclusive upper limit
 970  * for part numbers, while for contractions the {@code partLimit()}
 971  * value {@code -M} is the exclusive <em>lower</em> limit.
 972  * 
 973  * <h1><a id="cross-lane">Moving data across lane boundaries</h1>
 974  * The cross-lane methods which do not redraw lanes or change species
 975  * are more regularly structured and easier to reason about.
 976  * These operations are:
 977  * <ul>
 978  *
 979  * <li>The {@link #slice(int,Vector) slice()} family of methods,
 980  * which extract contiguous slice of {@code VLENGTH} fields from
 981  * a given origin point within a concatenated pair of vectors.
 982  * 
 983  * <li>The {@link #unslice(int,Vector,int) unslice()} family of
 984  * methods, which insert a contiguous slice of {@code VLENGTH} fields
 985  * into a concatenated pair of vectors at a given origin point.
 986  *
 987  * <li>The {@link #rearrange(VectorShuffle) rearrange()} family of
 988  * methods, which select an arbitrary set of {@code VLENGTH} lanes
 989  * from one or two input vectors, and assemble them in an arbitrary
 990  * order.  The selection and order of lanes is controlled by a
 991  * {@code VectorShuffle} object, which acts as an routing table
 992  * mapping source lanes to destination lanes.  A {@code VectorShuffle}
 993  * can encode a mathematical permutation as well as many other
 994  * patterns of data movement.
 995  *
 996  * </ul> 
 997  * <p> Some vector operations are not lane-wise, but rather move data
 998  * across lane boundaries.  Such operations are typically rare in SIMD
 999  * code, though they are sometimes necessary for specific algorithms
1000  * that manipulate data formats at a low level, and/or require SIMD
1001  * data to move in complex local patterns.  (Local movement in a small
1002  * window of a large array of data is relatively unusual, although
1003  * some highly patterned algorithms call for it.)  In this API such
1004  * methods are always clearly recognizable, so that simpler lane-wise
1005  * reasoning can be confidently applied to the rest of the code.
1006  *
1007  * <p> In some cases, vector lane boundaries are discarded and
1008  * "redrawn from scratch", so that data in a given input lane might
1009  * appear (in several parts) distributed through several output lanes,
1010  * or (conversely) data from several input lanes might be consolidated
1011  * into a single output lane.  The fundamental method which can redraw
1012  * lanes boundaries is
1013  * {@link #reinterpretShape(VectorSpecies,int) reinterpretShape()}.
1014  * Built on top of this method, certain convenience methods such
1015  * as {@link #reinterpretAsBytes() reinterpretAsBytes()} or
1016  * {@link #reinterpretAsInts() reinterpretAsInts()} will
1017  * (potentially) redraw lane boundaries, while retaining the
1018  * same overall vector shape.
1019  *
1020  * <p> Operations which produce or consume a scalar result can be
1021  * viewed as very simple cross-lane operations.  Methods in the
1022  * {@link #reduceLanesToLong(VectorOperators.Associative)
1023  * reduceLanes()} family fold together all lanes (or mask-selected
1024  * lanes) of a method and return a single result.  As an inverse, the
1025  * {@link #broadcast(long) broadcast} family of methods can be thought
1026  * of as crossing lanes in the other direction, from a scalar to all
1027  * lanes of the output vector.  Single-lane access methods such as
1028  * {@code lane(I)} or {@code withLane(I,E)} might also be regarded as
1029  * very simple cross-lane operations.
1030  *
1031  * <p> Likewise, a method which moves a non-byte vector to or from a
1032  * byte array could be viewed as a cross-lane operation, because the
1033  * vector lanes must be distributed into separate bytes, or (in the
1034  * other direction) consolidated from array bytes.
1035  *
1036  * @implNote
1037  *
1038  * <h1>Hardware platform dependencies</h1>
1039  * 
1040  * The Vector API is to accelerate computations in style of Single
1041  * Instruction Multiple Data (SIMD), using available hardware
1042  * resources such as vector hardware registers and vector hardware
1043  * instructions.  The API is designed to make effective use of
1044  * multiple SIMD hardware platforms.
1045  *
1046  * <p> This API will also work correctly even on Java platforms which
1047  * do not include specialized hardware support for SIMD computations.
1048  * The Vector API is not likely to provide any special performance
1049  * benefit on such platforms.
1050  *
1051  * <h1>No boxing of primitives</h1>
1052  *
1053  * Although a vector type like {@code Vector<Integer>} may seem to
1054  * work with boxed {@code Integer} values, the overheads associated
1055  * with boxing are avoided by having each vector subtype work
1056  * internally on lane values of the actual {@code ETYPE}, such as
1057  * {@code int}.  A few {@linkplain Vector#toList() interoperability
1058  * methods}, are specified to work on boxed values.  These are
1059  * documented as <em>not</em> for use in inner loops.
1060  *
1061  * <h1>Value-based classes and identity operations</h1>
1062  *
1063  * {@code Vector}, along with all of its subtypes and many of its
1064  * helper types like {@code VectorMask} and {@code VectorShuffle}, is a
1065  * <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a>
1066  * class.
1067  *
1068  * <p> Once created, a vector is never mutated, not even if only
1069  * {@linkplain IntVector#withLane(int,int) a single lane is changed}.
1070  * A new vector is always created to hold a new configuration
1071  * of lane values.  The unavailability of mutative methods is a
1072  * necessary consequence of suppressing the object identity of
1073  * all vectors, as value-based classes.
1074  *
1075  * <p> With {@code Vector},
1076  *
1077  * <!-- The following paragraph is shared verbatim
1078  *   -- between Vector.java and package-info.java -->
1079  * identity-sensitive operations such as {@code ==} may yield
1080  * unpredictable results, or reduced performance.  Oddly enough,
1081  * {@link Vector#equals(Object) v.equals(w)} is likely to be faster
1082  * than {@code v==w}, since {@code equals} is <em>not</em> an identity
1083  * sensitive method.  It is also reasonable to use, on vectors, the
1084  * {@code toString} and {@code hashCode} methods of {@code Object}.
1085  *
1086  * Also, these objects can be stored in locals and parameters and as
1087  * {@code static final} constants, but storing them in other Java
1088  * fields or in array elements, while semantically valid, may incur
1089  * performance penalties.
1090  * <!-- The preceding paragraph is shared verbatim
1091  *   -- between Vector.java and package-info.java -->
1092  *
1093  * @param <E> the generic (boxed) version of the vector {@code ETYPE}
1094  * 
1095  */
1096 public abstract class Vector<E> {
1097 
1098     // This type is sealed within its package.
1099     // Users cannot roll their own vector types.
1100     Vector() {}
1101 
1102     /**
1103      * Returns the species of this vector.
1104      *
1105      * @return the species of this vector
1106      */
1107     public abstract VectorSpecies<E> species();
1108 
1109     /**
1110      * Returns the primitive element type ({@code ETYPE}) of this vector.
1111      * This is the same value as {@code this.species().elementType()}.
1112      *
1113      * @return the primitive element type of this vector
1114      */
1115     public abstract Class<E> elementType();
1116 
1117     /**
1118      * Returns the size of each lane, in bits, of this vector.
1119      * This is the same value as {@code this.species().elementSize()}.
1120      *
1121      * @return the lane size, in bits, of this vector
1122      */
1123     public abstract int elementSize();
1124 
1125     /**
1126      * Returns the shape of this vector.
1127      * This is the same value as {@code this.species().vectorShape()}.
1128      *
1129      * @return the shape of this vector
1130      */
1131     public abstract VectorShape shape();
1132 
1133     /**
1134      * Returns the number of vector lanes ({@code VLENGTH}).
1135      *
1136      * @return the number of vector lanes
1137      */
1138     public abstract int length();
1139 
1140     /**
1141      * Returns the total size, in bits, of this vector.
1142      * This is the same value as {@code this.shape().vectorBitSize()}.
1143      *
1144      * @return the total size, in bits, of this vector
1145      */
1146     public abstract int bitSize();
1147 
1148     /**
1149      * Returns the total size, in bytes, of this vector.
1150      * This is the same value as {@code this.bitSize()/Byte.SIZE}.
1151      *
1152      * @return the total size, in bytes, of this vector
1153      */
1154     public abstract int byteSize();
1155 
1156     /// Arithmetic
1157 
1158     /**
1159      * Operates on the lane values of this vector.
1160      *
1161      * This is a lane-wise binary operation which applies
1162      * the selected operation to each lane.
1163      *
1164      * <p>FIXME: Write about the unary operators here.
1165      *
1166      * @apiNote
1167      * Subtypes improve on this method by sharpening
1168      * the method return type.
1169      *
1170      * @return the result of applying the operation lane-wise
1171                to the input vector
1172      * @throws UnsupportedOperationException if this vector does
1173      *         not support the requested operation
1174      * @see #lanewise(VectorOperators.Unary,Vector,VectorMask)
1175      * @see #lanewise(VectorOperators.Binary,Vector)
1176      * @see #lanewise(VectorOperators.Ternary,Vector)
1177      */
1178     public abstract Vector<E> lanewise(VectorOperators.Unary op);
1179 
1180     /**
1181      * Operates on the lane values of this vector,
1182      * with selection of lane elements controlled by a mask.
1183      *
1184      * This is a lane-wise binary operation which applies
1185      * the selected operation to each lane.
1186      *
1187      * @apiNote
1188      * Subtypes improve on this method by sharpening
1189      * the method return type.
1190      *
1191      * @param m the mask controlling lane selection
1192      * @return the result of applying the operation lane-wise
1193      *         to the input vector
1194      * @throws UnsupportedOperationException if this vector does
1195      *         not support the requested operation
1196      * @see #lanewise(VectorOperators.Unary,Vector)
1197      */
1198     public abstract Vector<E> lanewise(VectorOperators.Unary op,
1199                                        VectorMask<E> m);
1200 
1201     /**
1202      * Combines the corresponding lane values of this vector
1203      * with those of a second input vector.
1204      *
1205      * This is a lane-wise binary operation which applies
1206      * the selected operation to each lane.
1207      *
1208      * <p>FIXME: Write about the binary operators here.
1209      * Shift counts are reduced (as unsigned values) modulo
1210      * {@code ESIZE}, so the shift is always in the range
1211      * {@code [0..ESIZE-1]}.
1212      * It is as if the shift value were subjected to a
1213      * bitwise logical {@code AND} operator ({@code &})
1214      * with the mask value {@code ESIZE-1}.
1215      *
1216      * @apiNote
1217      * Subtypes improve on this method by sharpening
1218      * the method return type.
1219      *
1220      * @param v the input vector
1221      * @return the result of applying the operation lane-wise
1222      *         to the two input vectors
1223      * @throws UnsupportedOperationException if this vector does
1224      *         not support the requested operation
1225      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1226      * @see #lanewise(VectorOperators.Unary,Vector)
1227      * @see #lanewise(VectorOperators.Ternary,Vector)
1228      */
1229     public abstract Vector<E> lanewise(VectorOperators.Binary op,
1230                                        Vector<E> v);
1231 
1232     /**
1233      * Combines the corresponding lane values of this vector
1234      * with those of a second input vector,
1235      * with selection of lane elements controlled by a mask.
1236      *
1237      * This is a lane-wise binary operation which applies
1238      * the selected operation to each lane.
1239      *
1240      * @apiNote
1241      * Subtypes improve on this method by sharpening
1242      * the method return type.
1243      *
1244      * @param v the second input vector
1245      * @param m the mask controlling lane selection
1246      * @return the result of applying the operation lane-wise
1247      *         to the two input vectors
1248      * @throws UnsupportedOperationException if this vector does
1249      *         not support the requested operation
1250      * @see #lanewise(VectorOperators.Binary,Vector)
1251      */
1252     public abstract Vector<E> lanewise(VectorOperators.Binary op,
1253                                        Vector<E> v, VectorMask<E> m);
1254 
1255     /**
1256      * Combines the lane values of this vector
1257      * with the value of a broadcast scalar.
1258      *
1259      * This is a lane-wise binary operation which applies
1260      * the selected operation to each lane.
1261      * The return value will be equal to this expression:
1262      * {@code this.lanewise(op, this.broadcast(e))}.
1263      *
1264      * @apiNote
1265      * The {@code long} value {@code e} must be accurately
1266      * representable by the {@code ETYPE} of this vector's species,
1267      * so that {@code e==(long)(ETYPE)e}.  This rule is enforced
1268      * by the implicit call to {@code broadcast()}.
1269      * <p>
1270      * Subtypes improve on this method by sharpening
1271      * the method return type and
1272      * the type of the scalar parameter {@code e}.
1273      *
1274      * @param e the input scalar
1275      * @return the result of applying the operation lane-wise
1276      *         to the input vector and the scalar
1277      * @throws UnsupportedOperationException if this vector does
1278      *         not support the requested operation
1279      * @throws IllegalArgumentException
1280      *         if the given {@code long} value cannot
1281      *         be represented by the right operand type
1282      *         of the vector operation
1283      * @see #broadcast(long)
1284      * @see #lanewise(VectorOperators.Binary,long,VectorMask)
1285      */
1286     public abstract Vector<E> lanewise(VectorOperators.Binary op,
1287                                        long e);
1288 
1289     /**
1290      * Combines the corresponding lane values of this vector
1291      * with those of a second input vector,
1292      * with selection of lane elements controlled by a mask.
1293      *
1294      * This is a lane-wise binary operation which applies
1295      * the selected operation to each lane.
1296      * The second operand is a broadcast integral value.
1297      * The return value will be equal to this expression:
1298      * {@code this.lanewise(op, this.broadcast(e), m)}.
1299      *
1300      * @apiNote
1301      * The {@code long} value {@code e} must be accurately
1302      * representable by the {@code ETYPE} of this vector's species,
1303      * so that {@code e==(long)(ETYPE)e}.  This rule is enforced
1304      * by the implicit call to {@code broadcast()}.
1305      * <p>
1306      * Subtypes improve on this method by sharpening
1307      * the method return type and
1308      * the type of the scalar parameter {@code e}.
1309      *
1310      * @param e the input scalar
1311      * @param m the mask controlling lane selection
1312      * @return the result of applying the operation lane-wise
1313      *         to the input vector and the scalar
1314      * @throws UnsupportedOperationException if this vector does
1315      *         not support the requested operation
1316      * @throws IllegalArgumentException
1317      *         if the given {@code long} value cannot
1318      *         be represented by the right operand type
1319      *         of the vector operation
1320      * @see #broadcast(long)
1321      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1322      */
1323     public abstract Vector<E> lanewise(VectorOperators.Binary op,
1324                                        long e, VectorMask<E> m);
1325 
1326     /**
1327      * Combines the corresponding lane values of this vector
1328      * with the lanes of a second and a third input vector.
1329      *
1330      * This is a lane-wise ternary operation which applies
1331      * the selected operation to each lane.
1332      *
1333      * <p>FIXME: Write about the ternary operators here.
1334      * For now it's only {@code FMA} and {@code BITWISE_BLEND}.
1335      *
1336      * @apiNote
1337      * Subtypes improve on this method by sharpening
1338      * the method return type.
1339      *
1340      * @param v1 the second input vector
1341      * @param v2 the third input vector
1342      * @return the result of applying the operation lane-wise
1343      *         to the three input vectors
1344      * @throws UnsupportedOperationException if this vector does
1345      *         not support the requested operation
1346      * @see #lanewise(VectorOperators.Unary,Vector)
1347      * @see #lanewise(VectorOperators.Binary,Vector)
1348      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1349      */
1350     public abstract Vector<E> lanewise(VectorOperators.Ternary op,
1351                                        Vector<E> v1,
1352                                        Vector<E> v2);
1353 
1354     /**
1355      * Combines the corresponding lane values of this vector
1356      * with the lanes of a second and a third input vector,
1357      * with selection of lane elements controlled by a mask.
1358      *
1359      * This is a lane-wise ternary operation which applies
1360      * the selected operation to each lane.
1361      *
1362      * @apiNote
1363      * Subtypes improve on this method by sharpening
1364      * the method return type.
1365      *
1366      * @param v1 the second input vector
1367      * @param v2 the third input vector
1368      * @param m the mask controlling lane selection
1369      * @return the result of applying the operation lane-wise
1370      *         to the three input vectors
1371      * @throws UnsupportedOperationException if this vector does
1372      *         not support the requested operation
1373      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1374      */
1375     public abstract Vector<E> lanewise(VectorOperators.Ternary op,
1376                                        Vector<E> v1, Vector<E> v2,
1377                                        VectorMask<E> m);
1378 
1379     // Note:  lanewise(Binary) has two rudimentary broadcast
1380     // operations from an approximate scalar type (long).
1381     // We don both with that, here, for lanewise(Ternary).
1382     // The vector subtypes supply a full suite of
1383     // broadcasting and masked lanewise operations
1384     // for their specific ETYPEs:
1385     //   lanewise(Unary, [mask])
1386     //   lanewise(Binary, [e | v], [mask])
1387     //   lanewise(Ternary, [e1 | v1], [e2 | v2], [mask])
1388 
1389     /// Full-service binary ops: ADD, SUB, MUL, DIV
1390 
1391     // Full-service functions support all four variations
1392     // of vector vs. broadcast scalar, and mask vs. not.
1393     // The lanewise generic operator is (by this defintion)
1394     // also a full-service function.
1395 
1396     // Other named functions handle just the one named
1397     // variation.  Most lanewise operations are *not* named,
1398     // and are reached only by lanewise.
1399 
1400     /**
1401      * Adds this vector to a second input vector.
1402      *
1403      * This is a lane-wise binary operation which applies
1404      * the primitive addition operation ({@code +})
1405      * to each pair of corresponding lane values.
1406      *
1407      * This method is also equivalent to the expression
1408      * {@link #lanewise(VectorOperators.Binary,Vector)
1409      *    lanewise}{@code (}{@link VectorOperators#ADD
1410      *    ADD}{@code , v)}.
1411      *
1412      * <p>
1413      * As a full-service named operation, this method
1414      * comes in masked and unmasked overloadings, and
1415      * (in subclasses) also comes in scalar-broadcast
1416      * overloadings (both masked and unmasked).
1417      *
1418      * @param v a second input vector
1419      * @return the result of adding this vector to the second input vector
1420      * @see #add(Vector,VectorMask)
1421      * @see IntVector#add(int)
1422      * @see VectorOperators#ADD
1423      * @see #lanewise(VectorOperators.Binary,Vector)
1424      * @see IntVector#lanewise(VectorOperators.Binary,int)
1425      */
1426     public abstract Vector<E> add(Vector<E> v);
1427 
1428     /**
1429      * Adds this vector to a second input vector, selecting lanes
1430      * under the control of a mask.
1431      *
1432      * This is a masked lane-wise binary operation which applies
1433      * the primitive addition operation ({@code +})
1434      * to each pair of corresponding lane values.
1435      *
1436      * For any lane unset in the mask, the primitive operation is
1437      * suppressed and this vector retains the original value stored in
1438      * that lane.
1439      *
1440      * This method is also equivalent to the expression
1441      * {@link #lanewise(VectorOperators.Binary,Vector,VectorMask)
1442      *    lanewise}{@code (}{@link VectorOperators#ADD
1443      *    ADD}{@code , v, m)}.
1444      * 
1445      * <p>
1446      * As a full-service named operation, this method
1447      * comes in masked and unmasked overloadings, and
1448      * (in subclasses) also comes in scalar-broadcast
1449      * overloadings (both masked and unmasked).
1450      *
1451      * @param v the second input vector
1452      * @param m the mask controlling lane selection
1453      * @return the result of adding this vector to the given vector
1454      * @see #add(Vector)
1455      * @see IntVector#add(int,VectorMask)
1456      * @see VectorOperators#ADD
1457      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1458      * @see IntVector#lanewise(VectorOperators.Binary,int,VectorMask)
1459      */
1460     public abstract Vector<E> add(Vector<E> v, VectorMask<E> m);
1461 
1462     /**
1463      * Subtracts a second input vector from this vector.
1464      *
1465      * This is a lane-wise binary operation which applies
1466      * the primitive subtraction operation ({@code -})
1467      * to each pair of corresponding lane values.
1468      *
1469      * This method is also equivalent to the expression
1470      * {@link #lanewise(VectorOperators.Binary,Vector)
1471      *    lanewise}{@code (}{@link VectorOperators#SUB
1472      *    SUB}{@code , v)}.
1473      *
1474      * <p>
1475      * As a full-service named operation, this method
1476      * comes in masked and unmasked overloadings, and
1477      * (in subclasses) also comes in scalar-broadcast
1478      * overloadings (both masked and unmasked).
1479      *
1480      * @param v a second input vector
1481      * @return the result of subtracting the second input vector from this vector
1482      * @see #sub(Vector,VectorMask)
1483      * @see IntVector#sub(int)
1484      * @see VectorOperators#SUB
1485      * @see #lanewise(VectorOperators.Binary,Vector)
1486      * @see IntVector#lanewise(VectorOperators.Binary,int)
1487      */
1488     public abstract Vector<E> sub(Vector<E> v);
1489 
1490     /**
1491      * Subtracts a second input vector from this vector
1492      * under the control of a mask.
1493      *
1494      * This is a masked lane-wise binary operation which applies
1495      * the primitive subtraction operation ({@code -})
1496      * to each pair of corresponding lane values.
1497      *
1498      * For any lane unset in the mask, the primitive operation is
1499      * suppressed and this vector retains the original value stored in
1500      * that lane.
1501      *
1502      * This method is also equivalent to the expression
1503      * {@link #lanewise(VectorOperators.Binary,Vector,VectorMask)
1504      *    lanewise}{@code (}{@link VectorOperators#SUB
1505      *    SUB}{@code , v, m)}.
1506      * 
1507      * <p>
1508      * As a full-service named operation, this method
1509      * comes in masked and unmasked overloadings, and
1510      * (in subclasses) also comes in scalar-broadcast
1511      * overloadings (both masked and unmasked).
1512      *
1513      * @param v the second input vector
1514      * @param m the mask controlling lane selection
1515      * @return the result of subtracting the second input vector from this vector
1516      * @see #sub(Vector)
1517      * @see IntVector#sub(int,VectorMask)
1518      * @see VectorOperators#SUB
1519      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1520      * @see IntVector#lanewise(VectorOperators.Binary,int,VectorMask)
1521      */
1522     public abstract Vector<E> sub(Vector<E> v, VectorMask<E> m);
1523 
1524     /**
1525      * Multiplies this vector by a second input vector.
1526      *
1527      * This is a lane-wise binary operation which applies
1528      * the primitive multiplication operation ({@code *})
1529      * to each pair of corresponding lane values.
1530      *
1531      * This method is also equivalent to the expression
1532      * {@link #lanewise(VectorOperators.Binary,Vector)
1533      *    lanewise}{@code (}{@link VectorOperators#MUL
1534      *    MUL}{@code , v)}.
1535      *
1536      * <p>
1537      * As a full-service named operation, this method
1538      * comes in masked and unmasked overloadings, and
1539      * (in subclasses) also comes in scalar-broadcast
1540      * overloadings (both masked and unmasked).
1541      *
1542      * @param v a second input vector
1543      * @return the result of multiplying this vector by the second input vector
1544      * @see #mul(Vector,VectorMask)
1545      * @see IntVector#mul(int)
1546      * @see VectorOperators#MUL
1547      * @see #lanewise(VectorOperators.Binary,Vector)
1548      * @see IntVector#lanewise(VectorOperators.Binary,int)
1549      */
1550     public abstract Vector<E> mul(Vector<E> v);
1551 
1552     /**
1553      * Multiplies this vector by a second input vector
1554      * under the control of a mask.
1555      *
1556      * This is a lane-wise binary operation which applies
1557      * the primitive multiplication operation ({@code *})
1558      * to each pair of corresponding lane values.
1559      *
1560      * For any lane unset in the mask, the primitive operation is
1561      * suppressed and this vector retains the original value stored in
1562      * that lane.
1563      *
1564      * This method is also equivalent to the expression
1565      * {@link #lanewise(VectorOperators.Binary,Vector,VectorMask)
1566      *    lanewise}{@code (}{@link VectorOperators#MUL
1567      *    MUL}{@code , v, m)}.
1568      * 
1569      * <p>
1570      * As a full-service named operation, this method
1571      * comes in masked and unmasked overloadings, and
1572      * (in subclasses) also comes in scalar-broadcast
1573      * overloadings (both masked and unmasked).
1574      *
1575      * @param v the second input vector
1576      * @param m the mask controlling lane selection
1577      * @return the result of multiplying this vector by the given vector
1578      * @see #mul(Vector)
1579      * @see IntVector#mul(int,VectorMask)
1580      * @see VectorOperators#MUL
1581      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1582      * @see IntVector#lanewise(VectorOperators.Binary,int,VectorMask)
1583      */
1584     public abstract Vector<E> mul(Vector<E> v, VectorMask<E> m);
1585 
1586     /**
1587      * Divides this vector by a second input vector.
1588      *
1589      * This is a lane-wise binary operation which applies
1590      * the primitive division operation ({@code /})
1591      * to each pair of corresponding lane values.
1592      *
1593      * This method is also equivalent to the expression
1594      * {@link #lanewise(VectorOperators.Binary,Vector)
1595      *    lanewise}{@code (}{@link VectorOperators#DIV
1596      *    DIV}{@code , v)}.
1597      *
1598      * <p>
1599      * If the underlying scalar operator does not support
1600      * division by zero, but is presented with a zero divisor,
1601      * an {@code ArithmeticException} will be thrown.
1602      *
1603      * <p>
1604      * As a full-service named operation, this method
1605      * comes in masked and unmasked overloadings, and
1606      * (in subclasses) also comes in scalar-broadcast
1607      * overloadings (both masked and unmasked).
1608      *
1609      * @param v a second input vector
1610      * @return the result of dividing this vector by the second input vector
1611      * @throws ArithmeticException if any lane
1612      *         in {@code v} is zero
1613      *         and {@code ETYPE} is not {@code float} or {@code double}.
1614      * @see #div(Vector,VectorMask)
1615      * @see DoubleVector#div(double)
1616      * @see VectorOperators#DIV
1617      * @see #lanewise(VectorOperators.Binary,Vector)
1618      * @see IntVector#lanewise(VectorOperators.Binary,int)
1619      */
1620     public abstract Vector<E> div(Vector<E> v);
1621 
1622     /**
1623      * Divides this vector by a second input vector
1624      * under the control of a mask.
1625      *
1626      * This is a lane-wise binary operation which applies
1627      * the primitive division operation ({@code /})
1628      * to each pair of corresponding lane values.
1629      *
1630      * For any lane unset in the mask, the primitive operation is
1631      * suppressed and this vector retains the original value stored in
1632      * that lane.
1633      *
1634      * This method is also equivalent to the expression
1635      * {@link #lanewise(VectorOperators.Binary,Vector,VectorMask)
1636      *    lanewise}{@code (}{@link VectorOperators#DIV
1637      *    DIV}{@code , v, m)}.
1638      *
1639      * <p>
1640      * If the underlying scalar operator does not support
1641      * division by zero, but is presented with a zero divisor,
1642      * an {@code ArithmeticException} will be thrown.
1643      *
1644      * <p>
1645      * As a full-service named operation, this method
1646      * comes in masked and unmasked overloadings, and
1647      * (in subclasses) also comes in scalar-broadcast
1648      * overloadings (both masked and unmasked).
1649      *
1650      * @param v a second input vector
1651      * @param m the mask controlling lane selection
1652      * @return the result of dividing this vector by the second input vector
1653      * @throws ArithmeticException if any lane selected by {@code m}
1654      *         in {@code v} is zero
1655      *         and {@code ETYPE} is not {@code float} or {@code double}.
1656      * @see #div(Vector)
1657      * @see DoubleVector#div(double,VectorMask)
1658      * @see VectorOperators#DIV
1659      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1660      * @see DoubleVector#lanewise(VectorOperators.Binary,double,VectorMask)
1661      */
1662     public abstract Vector<E> div(Vector<E> v, VectorMask<E> m);
1663 
1664     /// END OF FULL-SERVICE BINARY METHODS
1665 
1666     /// Non-full-service unary ops: NEG, ABS
1667 
1668     /**
1669      * Negates this vector.
1670      *
1671      * This is a lane-wise unary operation which applies
1672      * the primitive negation operation ({@code -x})
1673      * to each input lane.
1674      *
1675      * This method is also equivalent to the expression
1676      * {@link #lanewise(VectorOperators.Unary,Vector)
1677      *    lanewise}{@code (}{@link VectorOperators#NEG
1678      *    MIN}{@code)}.
1679      *
1680      * @apiNote
1681      * This method has no masked variant, but the corresponding
1682      * masked operation can be obtained from the
1683      * {@linkplain #lanewise(VectorOperators.Unary,Vector,VectorMask)
1684      * lanewise method}.
1685      *
1686      * @return the negation of this vector
1687      * @see VectorOperators#NEG
1688      * @see #lanewise(VectorOperators.Unary,Vector)
1689      * @see #lanewise(VectorOperators.Unary,Vector,VectorMask)
1690      */
1691     public abstract Vector<E> neg();
1692 
1693     /**
1694      * Returns the absolute value of this vector.
1695      *
1696      * This is a lane-wise unary operation which applies
1697      * the method {@code Math.abs}
1698      * to each input lane.
1699      *
1700      * This method is also equivalent to the expression
1701      * {@link #lanewise(VectorOperators.Unary,Vector)
1702      *    lanewise}{@code (}{@link VectorOperators#ABS
1703      *    MIN}{@code)}.
1704      *
1705      * <p>
1706      * This method has no masked variant, but the corresponding
1707      * masked operation can be obtained from the
1708      * {@linkplain #lanewise(VectorOperators.Unary,Vector,VectorMask)
1709      * lanewise method}.
1710      *
1711      * @return the absolute value of this vector
1712      * @see VectorOperators#ABS
1713      * @see #lanewise(VectorOperators.Unary,Vector)
1714      * @see #lanewise(VectorOperators.Unary,Vector,VectorMask)
1715      */
1716     public abstract Vector<E> abs();
1717 
1718     /// Non-full-service binary ops: MIN, MAX
1719 
1720     /**
1721      * Computes the smaller of this vector and a second input vector.
1722      *
1723      * This is a lane-wise binary operation which applies the
1724      * operation {@code (a, b) -> a < b ? a : b} to each pair of
1725      * corresponding lane values.
1726      *
1727      * This method is also equivalent to the expression
1728      * {@link #lanewise(VectorOperators.Binary,Vector)
1729      *    lanewise}{@code (}{@link VectorOperators#MIN
1730      *    MIN}{@code , v)}.
1731      *
1732      * <p>
1733      * This is not a full-service named operation like
1734      * {@link #add(Vector) add()}.  A masked version of
1735      * version of this operation is not directly available
1736      * but may be obtained via the masked version of
1737      * {@code lanewise}.  Subclasses define an additional
1738      * scalar-broadcast overloading of this method.
1739      *
1740      * @param v a second input vector
1741      * @return the lanewise minimum of this vector and the second input vector
1742      * @see IntVector#min(int)
1743      * @see VectorOperators#MIN
1744      * @see #lanewise(VectorOperators.Binary,Vector)
1745      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1746      */
1747     public abstract Vector<E> min(Vector<E> v);
1748 
1749     /**
1750      * Computes the larger of this vector and a second input vector.
1751      *
1752      * This is a lane-wise binary operation which applies the
1753      * operation {@code (a, b) -> a > b ? a : b} to each pair of
1754      * corresponding lane values.
1755      *
1756      * This method is also equivalent to the expression
1757      * {@link #lanewise(VectorOperators.Binary,Vector)
1758      *    lanewise}{@code (}{@link VectorOperators#MAX
1759      *    MAX}{@code , v)}.
1760      *
1761      * <p>
1762      * This is not a full-service named operation like
1763      * {@link #add(Vector) add()}.  A masked version of
1764      * version of this operation is not directly available
1765      * but may be obtained via the masked version of
1766      * {@code lanewise}.  Subclasses define an additional
1767      * scalar-broadcast overloading of this method.
1768      *
1769      * @param v a second input vector
1770      * @return the lanewise maximum of this vector and the second input vector
1771      * @see IntVector#max(int)
1772      * @see VectorOperators#MAX
1773      * @see #lanewise(VectorOperators.Binary,Vector)
1774      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1775      */
1776     public abstract Vector<E> max(Vector<E> v);
1777 
1778     // Reductions
1779 
1780     /**
1781      * Returns a value accumulated from all the lanes of this vector.
1782      *
1783      * This is an associative cross-lane reduction operation which
1784      * applies the specified operation to all the lane elements.
1785      * The result is delivered as a {@code long} value, rather
1786      * than the vector's native {@code ETYPE}.
1787      *
1788      * @apiNote
1789      * If the {@code ETYPE} is {@code float} or {@code double},
1790      * this operation can lose precision and/or range, as a
1791      * normal part of casting the result down to {@code long}.
1792      *
1793      * Usually
1794      * {@linkplain IntVector#reduceLanes(VectorOperators.Associative)
1795      * strongly typed access}
1796      * is preferable, if you are working with a vector
1797      * subtype that has a known element type.
1798      *
1799      * @implNote
1800      * The value of a floating-point reduction may be a function
1801      * both of the input values as well as the order of scalar
1802      * operations which combine those values, specifically in the
1803      * case of {@code ADD} and {@code MUL} operations, where
1804      * details of rounding depend on operand order.
1805      * See {@link FloatVector#reduceLanes(VectorOperators.Associative)
1806      * FloatVector.reduceLanes()} for a discussion.
1807      *
1808      * @param op the operation used to combine lane values
1809      * @return the accumulated result, cast to {@code long}
1810      * @throws UnsupportedOperationException if this vector does
1811      *         not support the requested operation
1812      * @see #reduceLanesToLong(VectorOperators.Associative,VectorMask)
1813      * @see IntVector#reduceLanes(VectorOperators.Associative)
1814      * @see FloatVector#reduceLanes(VectorOperators.Associative)
1815      */
1816     public abstract long reduceLanesToLong(VectorOperators.Associative op);
1817 
1818     /**
1819      * Returns a value accumulated from selected lanes of this vector,
1820      * controlled by a mask.
1821      *
1822      * This is an associative cross-lane reduction operation which
1823      * applies the specified operation to the selected lane elements.
1824      * The result is delivered as a {@code long} value, rather
1825      * than the vector's native {@code ETYPE}.
1826      * <p>
1827      * If no elements are selected, an operation-specific identity
1828      * value is returned.
1829      * <ul>
1830      * <li>
1831      * If the operation is {@code ADD}, {@code XOR}, or {@code OR},
1832      * then the identity value is zero.
1833      * <li>
1834      * If the operation is {@code MUL},
1835      * then the identity value is one.
1836      * <li>
1837      * If the operation is {@code AND},
1838      * then the identity value is minus one (all bits set).
1839      * <li>
1840      * If the operation is {@code MAX},
1841      * then the identity value is the {@code MIN_VALUE}
1842      * of the vector's native {@code ETYPE}.
1843      * (In the case of floating point types, the value
1844      * {@code NEGATIVE_INFINITY} is used, and will appear
1845      * after casting as {@code Long.MAX_VALUE}.
1846      * <li>
1847      * If the operation is {@code MIN},
1848      * then the identity value is the {@code MIN_VALUE}
1849      * of the vector's native {@code ETYPE}.
1850      * (In the case of floating point types, the value
1851      * {@code NEGATIVE_INFINITY} is used, and will appear
1852      * after casting as {@code Long.MAX_VALUE}.
1853      * </ul>
1854      *
1855      * @apiNote
1856      * If the {@code ETYPE} is {@code float} or {@code double},
1857      * this operation can lose precision and/or range, as a
1858      * normal part of casting the result down to {@code long}.
1859      *
1860      * Usually
1861      * {@linkplain IntVector#reduceLanes(VectorOperators.Associative,VectorMask)
1862      * strongly typed access}
1863      * is preferable, if you are working with a vector
1864      * subtype that has a known element type.
1865      *
1866      * @implNote
1867      * The value of a floating-point reduction may be a function
1868      * both of the input values as well as the order of scalar
1869      * operations which combine those values, specifically in the
1870      * case of {@code ADD} and {@code MUL} operations, where
1871      * details of rounding depend on operand order.
1872      * See {@link FloatVector#reduceLanes(VectorOperators.Associative)
1873      * FloatVector.reduceLanes()} for a discussion.
1874      *
1875      * @param op the operation used to combine lane values
1876      * @param m the mask controlling lane selection
1877      * @return the reduced result accumulated from the selected lane values
1878      * @throws UnsupportedOperationException if this vector does
1879      *         not support the requested operation
1880      * @see #reduceLanesToLong(VectorOperators.Associative)
1881      * @see IntVector#reduceLanes(VectorOperators.Associative,VectorMask)
1882      * @see FloatVector#reduceLanes(VectorOperators.Associative,VectorMask)
1883      */
1884     public abstract long reduceLanesToLong(VectorOperators.Associative op,
1885                                            VectorMask<E> m);
1886 
1887 
1888     // Comparisons
1889 
1890     /**
1891      * Tests if this vector is equal to another input vector.
1892      *
1893      * This is a lane-wise binary test operation which applies
1894      * the primitive equals operation ({@code ==})
1895      * to each pair of corresponding lane values.
1896      * The result is the same as {@code compare(VectorOperators.EQ, v)}.
1897      *
1898      * @param v a second input vector
1899      * @return the mask result of testing lane-wise if this vector
1900      *         equal to the second input vector
1901      * @see #compare(VectorOperators.Comparison,Vector)
1902      * @see VectorOperators#EQ
1903      */
1904     public abstract VectorMask<E> eq(Vector<E> v);
1905 
1906     /**
1907      * Tests if this vector is less than another input vector.
1908      *
1909      * This is a lane-wise binary test operation which applies
1910      * the primitive less-than operation ({@code <}) to each lane.
1911      * The result is the same as {@code compare(VectorOperators.LT, v)}.
1912      *
1913      * @param v a second input vector
1914      * @return the mask result of testing lane-wise if this vector
1915      *         is less than the second input vector
1916      * @see #compare(VectorOperators.Comparison,Vector)
1917      * @see VectorOperators#LT
1918      */
1919     public abstract VectorMask<E> lt(Vector<E> v);
1920 
1921     /**
1922      * Tests this vector by comparing it with another input vector,
1923      * according to the given comparison operation.
1924      *
1925      * This is a lane-wise binary test operation which applies
1926      * to each pair of corresponding lane values.
1927      *
1928      * @param v a second input vector
1929      * @return the mask result of testing lane-wise if this vector
1930      *         compares to the input, according to the selected
1931      *         comparison operator
1932      * @see #equals(Vector)
1933      * @see #lessThan(Vector)
1934      * @see VectorOperators.Comparison
1935      * @see #compare(VectorOperators.Comparison, Vector, VectorMask)
1936      */
1937     public abstract VectorMask<E> compare(VectorOperators.Comparison op,
1938                                           Vector<E> v);
1939 
1940     /**
1941      * Tests this vector by comparing it with another input vector,
1942      * according to the given comparison operation,
1943      * in lanes selected by a mask.
1944      *
1945      * This is a masked lane-wise binary test operation which applies
1946      * to each pair of corresponding lane values.
1947      *
1948      * The returned result is equal to the expression
1949      * {@code compare(op,v).and(m)}.
1950      *
1951      * @param v a second input vector
1952      * @param m the mask controlling lane selection
1953      * @return the mask result of testing lane-wise if this vector
1954      *         compares to the input, according to the selected
1955      *         comparison operator,
1956      *         and only in the lanes selected by the mask
1957      * @see #compare(VectorOperators.Comparison, Vector)
1958      */
1959     public abstract VectorMask<E> compare(VectorOperators.Comparison op,
1960                                           Vector<E> v,
1961                                           VectorMask<E> m);
1962 
1963     /**
1964      * Tests this vector by comparing it with an input scalar,
1965      * according to the given comparison operation,
1966      * in lanes selected by a mask.
1967      *
1968      * This is a lane-wise binary test operation which applies
1969      * to each pair of corresponding lane values.
1970      *
1971      * <p>
1972      * The result is the same as
1973      * {@code this.compare(op, this.broadcast(s))}.
1974      * That is, the scalar may be regarded as broadcast to
1975      * a vector of the same species, and then compared
1976      * against the original vector, using the selected
1977      * comparison operation.
1978      *
1979      * @apiNote
1980      * The {@code long} value {@code e} must be accurately
1981      * representable by the {@code ETYPE} of this vector's species,
1982      * so that {@code e==(long)(ETYPE)e}.  This rule is enforced
1983      * by the implicit call to {@code broadcast()}.
1984      * <p>
1985      * Subtypes improve on this method by sharpening
1986      * the type of the scalar parameter {@code e}.
1987      *
1988      * @param e the input scalar
1989      * @return the mask result of testing lane-wise if this vector
1990      *         compares to the input, according to the selected
1991      *         comparison operator
1992      * @throws IllegalArgumentException
1993      *         if the given {@code long} value cannot
1994      *         be represented by the vector's {@code ETYPE}
1995      * @see #broadcast(long)
1996      * @see #compare(VectorOperators.Comparison,Vector)
1997      */
1998     public abstract VectorMask<E> compare(VectorOperators.Comparison op,
1999                                           long e);
2000 
2001     /**
2002      * Tests this vector by comparing it with an input scalar,
2003      * according to the given comparison operation,
2004      * in lanes selected by a mask.
2005      *
2006      * This is a masked lane-wise binary test operation which applies
2007      * to each pair of corresponding lane values.
2008      *
2009      * The returned result is equal to the expression
2010      * {@code compare(op,s).and(m)}.
2011      *
2012      * @apiNote
2013      * The {@code long} value {@code e} must be accurately
2014      * representable by the {@code ETYPE} of this vector's species,
2015      * so that {@code e==(long)(ETYPE)e}.  This rule is enforced
2016      * by the implicit call to {@code broadcast()}.
2017      * <p>
2018      * Subtypes improve on this method by sharpening
2019      * the type of the scalar parameter {@code e}.
2020      *
2021      * @param e the input scalar
2022      * @param m the mask controlling lane selection
2023      * @return the mask result of testing lane-wise if this vector
2024      *         compares to the input, according to the selected
2025      *         comparison operator,
2026      *         and only in the lanes selected by the mask
2027      * @throws IllegalArgumentException
2028      *         if the given {@code long} value cannot
2029      *         be represented by the vector's {@code ETYPE}
2030      * @see #broadcast(long)
2031      * @see #compare(VectorOperators.Comparison,Vector)
2032      */
2033     public abstract VectorMask<E> compare(VectorOperators.Comparison op,
2034                                           long e,
2035                                           VectorMask<E> m);
2036 
2037     /**
2038      * Replaces selected lanes of this vector with
2039      * corresponding lanes from a second input vector
2040      * under the control of a mask.
2041      *
2042      * This is a masked lane-wise binary operation which
2043      * selects each lane value from one or the other input.
2044      *
2045      * <ul>
2046      * <li>
2047      * For any lane <em>set</em> in the mask, the new lane value
2048      * is taken from the second input vector, and replaces
2049      * whatever value was in the that lane of this vector.
2050      * <li>
2051      * For any lane <em>unset</em> in the mask, the replacement is
2052      * suppressed and this vector retains the original value stored in
2053      * that lane.
2054      * </ul>
2055      *
2056      * The following pseudocode illustrates this behavior:
2057      * <pre>{@code
2058      * Vector<E> a = ...;
2059      * VectorSpecies<E> species = a.species();
2060      * Vector<E> b = ...;
2061      * b.check(species);
2062      * VectorMask<E> m = ...;
2063      * ETYPE[] ar = a.toArray();
2064      * for (int i = 0; i < ar.length; i++) {
2065      *     if (m.laneIsSet(i)) {
2066      *         ar[i] = b.lane(i);
2067      *     }
2068      * }
2069      * return EVector.fromArray(s, ar, 0);
2070      * }</pre>
2071      *
2072      * @param v the second input vector, containing replacement lane values
2073      * @param m the mask controlling lane selection from the second input vector
2074      * @return the result of blending the lane elements of this vector with
2075      *         those of the second input vector
2076      */
2077     public abstract Vector<E> blend(Vector<E> v, VectorMask<E> m);
2078 
2079     /**
2080      * Replaces selected lanes of this vector with
2081      * a scalar value
2082      * under the control of a mask.
2083      *
2084      * This is a masked lane-wise binary operation which
2085      * selects each lane value from one or the other input.
2086      *
2087      * The returned result is equal to the expression
2088      * {@code blend(broadcast(e),m)}.
2089      *
2090      * @apiNote
2091      * The {@code long} value {@code e} must be accurately
2092      * representable by the {@code ETYPE} of this vector's species,
2093      * so that {@code e==(long)(ETYPE)e}.  This rule is enforced
2094      * by the implicit call to {@code broadcast()}.
2095      * <p>
2096      * Subtypes improve on this method by sharpening
2097      * the type of the scalar parameter {@code e}.
2098      *
2099      * @param e the input scalar, containing the replacement lane value
2100      * @param m the mask controlling lane selection of the scalar
2101      * @return the result of blending the lane elements of this vector with
2102      *         the scalar value
2103      */
2104     public abstract Vector<E> blend(long e, VectorMask<E> m);
2105 
2106     /**
2107      * Adds the lanes of this vector to their corresponding
2108      * lane numbers, scaled by a given constant.
2109      *
2110      * This is a lane-wise unary operation which, for
2111      * each lane {@code N}, computes the scaled index value
2112      * {@code N*scale} and adds it to the value already
2113      * in lane {@code N} of the current vector.
2114      *
2115      * <p> The scale must not be so large, and the element size must
2116      * not be so small, that that there would be an overflow when
2117      * computing any of the {@code N*scale} or {@code VLENGTH*scale},
2118      * when the the result is represented using the vector
2119      * lane type {@code ETYPE}.
2120      *
2121      * <p>
2122      * The following pseudocode illustrates this behavior:
2123      * <pre>{@code
2124      * Vector<E> a = ...;
2125      * VectorSpecies<E> species = a.species();
2126      * ETYPE[] ar = a.toArray();
2127      * for (int i = 0; i < ar.length; i++) {
2128      *     long d = (long)i * scale;
2129      *     if (d != (ETYPE) d)  throw ...;
2130      *     ar[i] += (ETYPE) d;
2131      * }
2132      * long d = (long)ar.length * scale;
2133      * if (d != (ETYPE) d)  throw ...;
2134      * return EVector.fromArray(s, ar, 0);
2135      * }</pre>
2136      *
2137      * @param scale the number to multiply by each lane index
2138      *        {@code N}, typically {@code 1}
2139      * @return the result of incrementing each lane element by its
2140      *         corresponding lane index {@code N}, scaled by {@code scale}
2141      * @throws IllegalArgumentException
2142      *         if the values in the interval
2143      *         {@code [0..VLENGTH*scale]}
2144      *         are not representable by the {@code ETYPE}
2145      */
2146     public abstract Vector<E> addIndex(int scale);
2147 
2148     // Slicing segments of adjacent lanes
2149 
2150     /**
2151      * Slices a segment of adjacent lanes, starting at a given
2152      * {@code origin} lane in the current vector, and continuing (as
2153      * needed) into an immediately following vector.  The block of
2154      * {@code VLENGTH} lanes is extracted into its own vector and
2155      * returned.
2156      *
2157      * <p> This is a cross-lane operation that shifts lane elements
2158      * to the front, from the current vector and the second vector.
2159      * Both vectors can be viewed as a combined "background" of length
2160      * {@code 2*VLENGTH}, from which a slice is extracted.
2161      *
2162      * The lane numbered {@code N} in the output vector is copied
2163      * from lane {@code origin+N} of the input vector, if that
2164      * lane exists, else from lane {@code origin+N-VLENGTH} of
2165      * the second vector (which is guaranteed to exist).
2166      *
2167      * <p> The {@code origin} value must be in the inclusive range
2168      * {@code 0..VLENGTH}.  As limiting cases, {@code v.slice(0,w)}
2169      * and {@code v.slice(VLENGTH,w)} return {@code v} and {@code w},
2170      * respectively.
2171      *
2172      * @apiNote
2173      *
2174      * This method may be regarded as the inverse of
2175      * {@code #unslice(int,Vector,int) unslice()},
2176      * in that the sliced value could be unsliced back into its
2177      * original position in the two input vectors, without
2178      * disturbing unrelated elements, as in the following
2179      * pseudocode:
2180      * <pre>{@code
2181      * EVector slice = v1.slice(origin, v2);
2182      * EVector w1 = slice.unslice(origin, v1, 0);
2183      * EVector w2 = slice.unslice(origin, v2, 1);
2184      * assert v1.equals(w1);
2185      * assert v2.equals(w2);
2186      * }</pre>
2187      *
2188      * <p> This method also supports a variety of cross-lane shifts and
2189      * rotates as follows:
2190      * <ul>
2191      *
2192      * <li>To shift lanes forward to the front of the vector, supply a
2193      * zero vector for the second operand and specify the shift count
2194      * as the origin.  For example: {@code v.slice(shift, v.broadcast(0))}.
2195      *
2196      * <li>To shift lanes backward to the back of the vector, supply a
2197      * zero vector for the <em>first</em> operand, and specify the
2198      * negative shift count as the origin (modulo {@code VLENGTH}.
2199      * For example: {@code v.broadcast(0).slice(v.length()-shift, v)}.
2200      *
2201      * <li>To rotate lanes forward toward the front end of the vector,
2202      * cycling the earliest lanes around to the back, supply the same
2203      * vector for both operands and specify the rotate count as the
2204      * origin.  For example: {@code v.slice(rotate, v)}.
2205      *
2206      * <li>To rotate lanes backward toward the back end of the vector,
2207      * cycling the latest lanes around to the front, supply the same
2208      * vector for both operands and specify the negative of the rotate
2209      * count (modulo {@code VLENGTH}) as the origin.  For example:
2210      * {@code v.slice(v.length() - rotate, v)}.
2211      *
2212      * <li>
2213      * Since {@code origin} values less then zero or more than
2214      * {@code VLENGTH} will be rejected, if you need to rotate
2215      * by an unpredictable multiple of {@code VLENGTH}, be sure
2216      * to reduce the origin value into the required range.
2217      * The {@link VectorSpecies#loopBound(int) loopBound()}
2218      * method can help with this.  For example:
2219      * {@code v.slice(rotate - v.species().loopBound(rotate), v)}.
2220      *
2221      * </ul>
2222      *
2223      * @param origin the first input lane to transfer into the slice
2224      * @param v1 a second vector logically concatenated with the first,
2225      *        before the slice is taken (if omitted it defaults to zero)
2226      * @return a contiguous slice of {@code VLENGTH} lanes, taken from
2227      *         this vector starting at the indicated origin, and
2228      *         continuing (as needed) into the second vector
2229      * @throws ArrayIndexOutOfBoundsException if {@code origin}
2230      *         is negative or greater than {@code VLENGTH}
2231      * @see #slice(int,Vector,VectorMask)
2232      * @see #slice(int)
2233      * @see #unslice(int,Vector,int)
2234      */
2235     public abstract Vector<E> slice(int origin, Vector<E> v1);
2236 
2237     /**
2238      * Slices a segment of adjacent lanes
2239      * under the control of a mask,
2240      * starting at a given
2241      * {@code origin} lane in the current vector, and continuing (as
2242      * needed) into an immediately following vector.  The block of
2243      * {@code VLENGTH} lanes is extracted into its own vector and
2244      * returned.
2245      *
2246      * The resulting vector will be zero in all lanes unset in the
2247      * given mask.  Lanes set in the mask will contain data copied
2248      * from selected lanes of {@code this} or {@code v1}.
2249      *
2250      * <p> This is a cross-lane operation that shifts lane elements
2251      * to the front, from the current vector and the second vector.
2252      * Both vectors can be viewed as a combined "background" of length
2253      * {@code 2*VLENGTH}, from which a slice is extracted.
2254      *
2255      * The returned result is equal to the expression
2256      * {@code broadcast(0).blend(slice(origin,v1),m)}.
2257      *
2258      * @apiNote
2259      * This method may be regarded as the inverse of
2260      * {@code #unslice(int,Vector,int,VectorMask) unslice()},
2261      * in that the sliced value could be unsliced back into its
2262      * original position in the two input vectors, without
2263      * disturbing unrelated elements, as in the following
2264      * pseudocode:
2265      * <pre>{@code
2266      * EVector slice = v1.slice(origin, v2, m);
2267      * EVector w1 = slice.unslice(origin, v1, 0, m);
2268      * EVector w2 = slice.unslice(origin, v2, 1, m);
2269      * assert v1.equals(w1);
2270      * assert v2.equals(w2);
2271      * }</pre>
2272      *
2273      * @param origin the first input lane to transfer into the slice
2274      * @param v1 a second vector logically concatenated with the first,
2275      *        before the slice is taken (if omitted it defaults to zero)
2276      * @param m the mask controlling lane selection into the resulting vector
2277      * @return a contiguous slice of {@code VLENGTH} lanes, taken from
2278      *         this vector starting at the indicated origin, and
2279      *         continuing (as needed) into the second vector
2280      * @throws ArrayIndexOutOfBoundsException if {@code origin}
2281      *         is negative or greater than {@code VLENGTH}
2282      * @see #slice(int,Vector)
2283      * @see #unslice(int,Vector,int,VectorMask)
2284      */
2285     // FIXME: does this pull its weight?  It's symmetrical with masked unslice.
2286     public abstract Vector<E> slice(int origin, Vector<E> v1, VectorMask<E> m);
2287 
2288     /**
2289      * Slices a segment of adjacent lanes, starting at a given
2290      * {@code origin} lane in the current vector.  A block of
2291      * {@code VLENGTH} lanes, possibly padded with zero lanes, is
2292      * extracted into its own vector and returned.
2293      *
2294      * This is a convenience method which slices from a single
2295      * vector against an extended background of zero lanes.
2296      * It is equivalent to
2297      * {@link #slice(int,Vector) slice}{@code
2298      * (origin, }{@link #broadcast(long) broadcast}{@code (0))}.
2299      * It may also be viewed simply as a cross-lane shift
2300      * from later to earlier lanes, with zeroes filling
2301      * in the vacated lanes at the end of the vector.
2302      * In this view, the shift count is {@code origin}.
2303      *
2304      * @param origin the first input lane to transfer into the slice
2305      * @return the last {@code VLENGTH-origin} input lanes,
2306      *         placed starting in the first lane of the ouput,
2307      *         padded at the end with zeroes
2308      * @throws ArrayIndexOutOfBoundsException if {@code origin}
2309      *         is negative or greater than {@code VLENGTH}
2310      * @see #slice(int,Vector)
2311      * @see #unslice(int,Vector,int)
2312      */
2313     // FIXME: does this pull its weight?
2314     // It's a one-off and broadcast(0) is easy.  It's here as a teaching aid.
2315     public abstract Vector<E> slice(int origin);
2316 
2317     /**
2318      * Reverses a {@linkplain #slice(int,Vector) slice()}, inserting
2319      * the current vector as a slice within another "background" input
2320      * vector, which is regarded as one or the other input to a
2321      * hypothetical subsequent {@code slice()} operation.
2322      * 
2323      * <p> This is a cross-lane operation that permutes the lane
2324      * elements of the current vector toward the back and inserts them
2325      * into a logical pair of background vectors.  Only one of the
2326      * pair will be returned, however.  The background is formed by
2327      * duplicating the second input vector.  (However, the output will
2328      * never contain two duplicates from the same input lane.)
2329      *
2330      * The lane numbered {@code N} in the input vector is copied into
2331      * lane {@code origin+N} of the first background vector, if that
2332      * lane exists, else into lane {@code origin+N-VLENGTH} of the
2333      * second background vector (which is guaranteed to exist).
2334      *
2335      * The first or second background vector, updated with the
2336      * inserted slice, is returned.  The {@code part} number of zero
2337      * or one selects the first or second updated background vector.
2338      *
2339      * <p> The {@code origin} value must be in the inclusive range
2340      * {@code 0..VLENGTH}.  As limiting cases, {@code v.unslice(0,w,0)}
2341      * and {@code v.unslice(VLENGTH,w,1)} both return {@code v}, while
2342      * {@code v.unslice(0,w,1)} and {@code v.unslice(VLENGTH,w,0)}
2343      * both return {@code w}.
2344      *
2345      * @apiNote
2346      * This method supports a variety of cross-lane insertion
2347      * operations as follows:
2348      * <ul>
2349      *
2350      * <li>To insert near the end of a background vector {@code w}
2351      * at some offset, specify the offset as the origin and
2352      * select part zero. For example: {@code v.unslice(offset, w, 0)}.
2353      *
2354      * <li>To insert near the end of a background vector {@code w},
2355      * but capturing the overflow into the next vector {@code x},
2356      * specify the offset as the origin and select part one.
2357      * For example: {@code v.unslice(offset, x, 1)}.
2358      *
2359      * <li>To insert the last {@code N} items near the beginning
2360      * of a background vector {@code w}, supply a {@code VLENGTH-N}
2361      * as the origin and select part one.
2362      * For example: {@code v.unslice(v.length()-N, w)}.
2363      *
2364      * </ul>
2365      *
2366      * @param origin the first output lane to receive the slice
2367      * @param w the background vector that (as two copies) will receive
2368      *        the inserted slice
2369      * @param part the part number of the result (either zero or one)
2370      * @return either the first or second part of a pair of
2371      *         background vectors {@code w}, updated by inserting
2372      *         this vector at the indicated origin
2373      * @throws ArrayIndexOutOfBoundsException if {@code origin}
2374      *         is negative or greater than {@code VLENGTH},
2375      *         or if {@code part} is not zero or one
2376      * @see #slice(int,Vector)
2377      * @see #unslice(int,Vector,int,VectorMask)
2378      */
2379     public abstract Vector<E> unslice(int origin, Vector<E> w, int part);
2380 
2381     /**
2382      * Reverses a {@linkplain #slice(int,Vector) slice()}, inserting
2383      * (under the control of a mask)
2384      * the current vector as a slice within another "background" input
2385      * vector, which is regarded as one or the other input to a
2386      * hypothetical subsequent {@code slice()} operation.
2387      *
2388      * <p> This is a cross-lane operation that permutes the lane
2389      * elements of the current vector forward and inserts its lanes
2390      * (when selected by the mask) into a logical pair of background
2391      * vectors.  As with the
2392      * {@code #unslice(int,Vector,int) unmasked version} of this method,
2393      * only one of the pair will be returned, as selected by the
2394      * {@code part} number.
2395      *
2396      * For each lane {@code N} selected by the mask, the lane value
2397      * is copied into
2398      * lane {@code origin+N} of the first background vector, if that
2399      * lane exists, else into lane {@code origin+N-VLENGTH} of the
2400      * second background vector (which is guaranteed to exist).
2401      * Background lanes retain their original values if the
2402      * corresponding input lanes {@code N} are unset in the mask.
2403      *
2404      * The first or second background vector, updated with set lanes
2405      * of the inserted slice, is returned.  The {@code part} number of
2406      * zero or one selects the first or second updated background
2407      * vector.
2408      *
2409      * @param origin the first output lane to receive the slice
2410      * @param w the background vector that (as two copies) will receive
2411      *        the inserted slice, if they are set in {@code m}
2412      * @param part the part number of the result (either zero or one)
2413      * @param m the mask controlling lane selection from the current vector
2414      * @return either the first or second part of a pair of
2415      *         background vectors {@code w}, updated by inserting
2416      *         selected lanes of this vector at the indicated origin
2417      * @throws ArrayIndexOutOfBoundsException if {@code origin}
2418      *         is negative or greater than {@code VLENGTH},
2419      *         or if {@code part} is not zero or one
2420      * @see #unslice(int,Vector,int)
2421      * @see #slice(int,Vector)
2422      */
2423     public abstract Vector<E> unslice(int origin, Vector<E> w, int part, VectorMask<E> m);
2424 
2425     /**
2426      * Reverses a {@linkplain #slice(int) slice()}, inserting
2427      * the current vector as a slice within a "background" input
2428      * of zero lane values.  Compared to other {@code unslice()}
2429      * methods, this method only returns the first of the
2430      * pair of background vectors.
2431      *
2432      * This is a convenience method which returns the result of
2433      * {@link #unslice(int,Vector,int) unslice}{@code
2434      * (origin, }{@link #broadcast(long) broadcast}{@code (0), 0)}.
2435      * It may also be viewed simply as a cross-lane shift
2436      * from earlier to later lanes, with zeroes filling
2437      * in the vacated lanes at the beginning of the vector.
2438      * In this view, the shift count is {@code origin}.
2439      *
2440      * @param origin the first output lane to receive the slice
2441      * @return the first {@code VLENGTH-origin} input lanes,
2442      *         placed starting at the given origin,
2443      *         padded at the beginning with zeroes
2444      * @throws ArrayIndexOutOfBoundsException if {@code origin}
2445      *         is negative or greater than {@code VLENGTH}
2446      * @see #unslice(int,Vector,int)
2447      * @see #slice(int)
2448      */
2449     // FIXME: does this pull its weight?
2450     // It's a one-off and broadcast(0) is easy.  It's here as a teaching aid.
2451     public abstract Vector<E> unslice(int origin);
2452 
2453     // ISSUE: Add a slice which uses a mask instead of an origin?
2454     //public abstract Vector<E> slice(VectorMask<E> support);
2455 
2456     // ISSUE: Add some more options for questionable edge conditions?
2457     // We might define enum EdgeOption { ERROR, ZERO, WRAP } for the
2458     // default of throwing AIOOBE, or substituting zeroes, or just
2459     // reducing the out-of-bounds index modulo VLENGTH.  Similar
2460     // concerns also apply to general Shuffle operations.  For now,
2461     // just support ERROR, since that is safest.
2462 
2463     /**
2464      * Rearranges the lane elements of this vector, selecting lanes
2465      * under the control of a specific shuffle.
2466      *
2467      * This is a cross-lane operation that rearranges the lane
2468      * elements of this vector.
2469      * 
2470      * For each lane {@code N} of the shuffle, and for each lane
2471      * source index {@code I=s.laneSource(N)} in the shuffle,
2472      * the output lane {@code N} obtains the value from
2473      * the input vector at lane {@code I}.
2474      *
2475      * @param s the shuffle controlling lane index selection
2476      * @return the rearrangement of the lane elements of this vector
2477      * @throw IndexOutOfBoundsException if there are any exceptional
2478      *        source indexes in the shuffle
2479      * @see #rearrange(VectorShuffle,VectorMask)
2480      * @see #rearrange(VectorShuffle,Vector)
2481      * @see VectorShuffle#laneIsValid()
2482      */
2483     public abstract Vector<E> rearrange(VectorShuffle<E> s);
2484 
2485     /**
2486      * Rearranges the lane elements of this vector, selecting lanes
2487      * under the control of a specific shuffle and a mask.
2488      *
2489      * This is a cross-lane operation that rearranges the lane
2490      * elements of this vector.
2491      * 
2492      * For each lane {@code N} of the shuffle, and for each lane
2493      * source index {@code I=s.laneSource(N)} in the shuffle,
2494      * the output lane {@code N} obtains the value from
2495      * the input vector at lane {@code I} if the mask is set.
2496      * Otherwise the output lane {@code N} is set to zero.
2497      *
2498      * <p> This method returns the value of this pseudocode:
2499      * <pre>{@code
2500      * Vector<E> r = this.rearrange(s.wrapIndexes());
2501      * return broadcast(0).blend(r, s.laneIsValid());
2502      * }</pre>
2503      *
2504      * @param s the shuffle controlling lane index selection
2505      * @param m the mask controlling application of the shuffle
2506      * @return the rearrangement of the lane elements of this vector
2507      * @throw IndexOutOfBoundsException if there are any exceptional
2508      *        source indexes in the shuffle where the mask is set
2509      * @see #rearrange(VectorShuffle)
2510      * @see #rearrange(VectorShuffle,Vector)
2511      * @see VectorShuffle#laneIsValid()
2512      */
2513     public abstract Vector<E> rearrange(VectorShuffle<E> s, VectorMask<E> m);
2514 
2515     /**
2516      * Rearranges the lane elements of two vectors, selecting lanes
2517      * under the control of a specific shuffle, using both normal and
2518      * exceptional indexes in the shuffle to steer data.
2519      *
2520      * This is a cross-lane operation that rearranges the lane
2521      * elements of the two input vectors (the current vector
2522      * and a second vector {@code v}).
2523      *
2524      * For each lane {@code N} of the shuffle, and for each lane
2525      * source index {@code I=s.laneSource(N)} in the shuffle,
2526      * the output lane {@code N} obtains the value from
2527      * the first vector at lane {@code I} if {@code I>=0}.
2528      * Otherwise, the exceptional index {@code I} is wrapped
2529      * by adding {@code VLENGTH} to it and used to index
2530      * the <em>second</em> vector, at index {@code I+VLENGTH},
2531      *
2532      * <p> This method returns the value of this pseudocode:
2533      * <pre>{@code
2534      * Vector<E> r1 = this.rearrange(s.wrapIndexes());
2535      * // or else: r1 = this.rearrange(s, valid);
2536      * Vector<E> r2 = v.rearrange(s.wrapIndexes());
2537      * return r2.blend(r1,s.laneIsValid());
2538      * }</pre>
2539      *
2540      * @param s the shuffle controlling lane selection from both input vectors
2541      * @param v the second input vector
2542      * @return the rearrangement of lane elements of this vector and
2543      *         a second input vector
2544      * @see #rearrange(VectorShuffle)
2545      * @see #rearrange(VectorShuffle,VectorMask)
2546      * @see VectorShuffle#laneIsValid()
2547      * @see #slice(int,Vector)
2548      */
2549     public abstract Vector<E> rearrange(VectorShuffle<E> s, Vector<E> v);
2550 
2551     /**
2552      * Using index values stored in the lanes of this vector,
2553      * assemble values stored in second vector {@code v}.
2554      * The second vector thus serves as a table, whose
2555      * elements are selected by indexes in the current vector.
2556      *
2557      * This is a cross-lane operation that rearranges the lane
2558      * elements of the argument vector, under the control of
2559      * this vector.
2560      *
2561      * For each lane {@code N} of this vector, and for each lane
2562      * value {@code I=this.lane(N)} in this vector,
2563      * the output lane {@code N} obtains the value from
2564      * the argument vector at lane {@code I}.
2565      * 
2566      * In this way, the result contains only values stored in the
2567      * argument vector {@code v}, but presented in an order which
2568      * depends on the index values in {@code this}.
2569      *
2570      * The result is the same as the expression
2571      * {@code v.rearrange(this.toShuffle())}.
2572      *
2573      * @param v the vector supplying the result values
2574      * @return the rearrangement of the lane elements of {@code v}
2575      * @throw IndexOutOfBoundsException if any invalid
2576      *        source indexes are found in {@code this}
2577      * @see #rearrange(VectorShuffle)
2578      */
2579     public abstract Vector<E> selectFrom(Vector<E> v);
2580 
2581     /**
2582      * Using index values stored in the lanes of this vector,
2583      * assemble values stored in second vector, under the control
2584      * of a mask.
2585      * Using index values stored in the lanes of this vector,
2586      * assemble values stored in second vector {@code v}.
2587      * The second vector thus serves as a table, whose
2588      * elements are selected by indexes in the current vector.
2589      * Lanes that are unset in the mask receive a
2590      * zero rather than a value from the table.
2591      *
2592      * This is a cross-lane operation that rearranges the lane
2593      * elements of the argument vector, under the control of
2594      * this vector and the mask.
2595      *
2596      * The result is the same as the expression
2597      * {@code v.rearrange(this.toShuffle(), m)}.
2598      *
2599      * @param v the vector supplying the result values
2600      * @param m the mask controlling selection from {@code v}
2601      * @return the rearrangement of the lane elements of {@code v}
2602      * @throw IndexOutOfBoundsException if any invalid
2603      *        source indexes are found in {@code this},
2604      *        in a lane which is set in the mask
2605      * @see #selectFrom(Vector)
2606      * @see #rearrange(VectorShuffle,VectorMask)
2607      */
2608     public abstract Vector<E> selectFrom(Vector<E> v, VectorMask<E> m);
2609 
2610     // Conversions
2611 
2612     /**
2613      * Returns a vector of the same species as this one
2614      * where all lane elements are set to
2615      * the primitive value {@code e}.
2616      *
2617      * The contents of the current vector are discarded;
2618      * only the species is relevant to this operation.
2619      *
2620      * <p> This method returns the value of this expression:
2621      * {@code EVector.broadcast(this.species(), (ETYPE)e)}, where
2622      * {@code EVector} is the vector class specific to this
2623      * vector's element type {@code ETYPE}.
2624      *
2625      * <p>
2626      * The {@code long} value {@code e} must be accurately
2627      * representable by the {@code ETYPE} of this vector's species,
2628      * so that {@code e==(long)(ETYPE)e}.
2629      *
2630      * If this rule is violated the problem is not detected
2631      * statically, but an {@code IllegalArgumentException} is thrown
2632      * at run-time.  Thus, this method somewhat weakens the static
2633      * type checking of immediate constants and other scalars, but it
2634      * makes up for this by improving the expressiveness of the
2635      * generic API.  Note that an {@code e} value in the range
2636      * {@code [-128..127]} is always acceptable, since every
2637      * {@code ETYPE} will accept every {@code byte} value.
2638      *
2639      * @apiNote
2640      * Subtypes improve on this method by sharpening
2641      * the method return type and
2642      * and the type of the scalar parameter {@code e}.
2643      *
2644      * @param e the value to broadcast
2645      * @return a vector where all lane elements are set to
2646      *         the primitive value {@code e}
2647      * @throws IllegalArgumentException
2648      *         if the given {@code long} value cannot
2649      *         be represented by the vector's {@code ETYPE}
2650      * @see VectorSpecies#broadcast(long)
2651      * @see IntVector#broadcast(int)
2652      * @see FloatVector#broadcast(float)
2653      */
2654     public abstract Vector<E> broadcast(long e);
2655 
2656     /**
2657      * Returns a mask of same species as this vector,
2658      * where each lane is set or unset according to given
2659      * single boolean, which is broadcast to all lanes.
2660      * <p>
2661      * This method returns the value of this expression:
2662      * {@code species().maskAll(bit)}.
2663      *
2664      * @param bit the given mask bit to be replicated
2665      * @return a mask where each lane is set or unset according to
2666      *         the given bit
2667      * @see VectorSpecies#maskAll(boolean)
2668      */
2669     public abstract VectorMask<E> maskAll(boolean bit);
2670 
2671     /**
2672      * Converts this vector into a shuffle, converting the lane values
2673      * to {@code int} and regarding them as source indexes.
2674      * <p>
2675      * This method behaves as if it returns the result of creating a shuffle
2676      * given an array of the vector elements, as follows:
2677      * <pre>{@code
2678      * long[] a = this.toLongArray();
2679      * int[] sa = new int[a.length];
2680      * for (int i = 0; i < a.length; i++) {
2681      *     sa[i] = (int) a[i];
2682      * }
2683      * return VectorShuffle.fromValues(this.species(), sa);
2684      * }</pre>
2685      *
2686      * @return a shuffle representation of this vector
2687      * @see VectorShuffle#fromValues(VectorSpecies,int...)
2688      */
2689     public abstract VectorShuffle<E> toShuffle();
2690 
2691     // Bitwise preserving
2692 
2693     /**
2694      * Transforms this vector to a vector of the given species of
2695      * element type {@code F}, reinterpreting the bytes of this
2696      * vector without performing any value conversions.
2697      *
2698      * <p> Depending on the selected species, this operation may
2699      * either <a href="Vector.html#expansion">expand or contract</a>
2700      * its logical result, in which case a non-zero {@code part}
2701      * number can further control the selection and steering of the
2702      * logical result into the physical output vector.
2703      *
2704      * <p>
2705      * The underlying bits of this vector are copied to the resulting
2706      * vector without modification, but those bits, before copying,
2707      * may be truncated if the this vector's bit-size is greater than
2708      * desired vector's bit size, or filled with zero bits if this
2709      * vector's bit-size is less than desired vector's bit-size.
2710      *
2711      * <p> If the old and new species have different shape, this is a
2712      * <em>shape-changing</em> operation, and may have special
2713      * implementation costs.
2714      *
2715      * <p> The method behaves as if this vector is stored into a byte
2716      * buffer or array using little-endian byte ordering and then the
2717      * desired vector is loaded from the same byte buffer or array
2718      * using the same ordering.
2719      *
2720      * <p> The following pseudocode illustrates the behavior:
2721      * <pre>{@code
2722      * int domSize = this.byteSize();
2723      * int ranSize = species.vectorByteSize();
2724      * int M = (domSize > ranSize ? domSize / ranSize : ranSize / domSize);
2725      * assert Math.abs(part) < M;
2726      * assert (part == 0) || (part > 0) == (domSize > ranSize);
2727      * byte[] ra = new byte[Math.max(domSize, ranSize)];
2728      * if (domSize > ranSize) {  // expansion
2729      *     this.intoByteArray(ra, 0);
2730      *     int origin = part * ranSize;
2731      *     return species.fromByteArray(ra, origin);
2732      * } else {  // contraction or size-invariant
2733      *     int origin = (-part) * domSize;
2734      *     this.intoByteArray(ra, origin);
2735      *     return species.fromByteArray(ra, 0);
2736      * }
2737      * }</pre>
2738      *
2739      * @apiNote Although this method is defined as if the vectors in
2740      * question were loaded or stored into memory, memory semantics
2741      * has little to do or nothing with the actual implementation.
2742      * The appeal to little-endian ordering is simply a shorthand
2743      * for what could otherwise be a large number of detailed rules
2744      * concerning the mapping between lane-structured vectors and
2745      * byte-sturctured vectors.
2746      *
2747      * @param species the desired vector species
2748      * @param part the <a href="Vector.html#expansion">part number</a>
2749      *        of the result, or zero if neither expanding nor contracting
2750      * @param <F> the boxed element type of the species
2751      * @return a vector transformed, by shape and element type, from this vector
2752      * @see Vector#convertShape(VectorOperators.Conversion,VectorSpecies,int)
2753      * @see Vector#castShape(VectorSpecies,int)
2754      * @see VectorSpecies#partLimit(VectorSpecies,boolean)
2755      */
2756     public abstract <F> Vector<F> reinterpretShape(VectorSpecies<F> species, int part);
2757 
2758     /**
2759      * Views this vector as a vector of the same shape
2760      * and contents but a lane type of {@code byte},
2761      * where the bytes are extracted from the lanes
2762      * according to little-endian order.
2763      * It is a convenience method for the expression
2764      * {@code reinterpretShape(species().withLanes(byte.class))}.
2765      * It may be considered an inverse to the various
2766      * methods which consolidate bytes into larger lanes
2767      * within the same vector, such as
2768      * {@link Vector#reinterpretAsInts()}.
2769      *
2770      * @return a {@code ByteVector} with the same shape and information content
2771      * @see Vector#reinterpretShape(VectorSpecies,int)
2772      * @see ByteVector#toIntArray
2773      * @see ByteVector#toFloatArray
2774      * @see VectorSpecies#withLanes(Class)
2775      */
2776     public abstract ByteVector reinterpretAsBytes();
2777 
2778     /**
2779      * Reinterprets this vector as a vector of the same shape
2780      * and contents but a lane type of {@code short},
2781      * where the lanes are assembled from successive bytes
2782      * according to little-endian order.
2783      * It is a convenience method for the expression
2784      * {@code reinterpretShape(species().withLanes(short.class))}.
2785      * It may be considered an inverse to {@link Vector#reinterpretAsBytes()}.
2786      *
2787      * @return a {@code ShortVector} with the same shape and information content
2788      */
2789     public abstract ShortVector reinterpretAsShorts();
2790 
2791     /**
2792      * Reinterprets this vector as a vector of the same shape
2793      * and contents but a lane type of {@code int},
2794      * where the lanes are assembled from successive bytes
2795      * according to little-endian order.
2796      * It is a convenience method for the expression
2797      * {@code reinterpretShape(species().withLanes(int.class))}.
2798      * It may be considered an inverse to {@link Vector#reinterpretAsBytes()}.
2799      *
2800      * @return a {@code IntVector} with the same shape and information content
2801      */
2802     public abstract IntVector reinterpretAsInts();
2803 
2804     /**
2805      * Reinterprets this vector as a vector of the same shape
2806      * and contents but a lane type of {@code long},
2807      * where the lanes are assembled from successive bytes
2808      * according to little-endian order.
2809      * It is a convenience method for the expression
2810      * {@code reinterpretShape(species().withLanes(long.class))}.
2811      * It may be considered an inverse to {@link Vector#reinterpretAsBytes()}.
2812      *
2813      * @return a {@code LongVector} with the same shape and information content
2814      */
2815     public abstract LongVector reinterpretAsLongs();
2816 
2817     /**
2818      * Reinterprets this vector as a vector of the same shape
2819      * and contents but a lane type of {@code float},
2820      * where the lanes are assembled from successive bytes
2821      * according to little-endian order.
2822      * It is a convenience method for the expression
2823      * {@code reinterpretShape(species().withLanes(float.class))}.
2824      * It may be considered an inverse to {@link Vector#reinterpretAsBytes()}.
2825      *
2826      * @return a {@code FloatVector} with the same shape and information content
2827      */
2828     public abstract FloatVector reinterpretAsFloats();
2829 
2830     /**
2831      * Reinterprets this vector as a vector of the same shape
2832      * and contents but a lane type of {@code double},
2833      * where the lanes are assembled from successive bytes
2834      * according to little-endian order.
2835      * It is a convenience method for the expression
2836      * {@code reinterpretShape(species().withLanes(double.class))}.
2837      * It may be considered an inverse to {@link Vector#reinterpretAsBytes()}.
2838      *
2839      * @return a {@code DoubleVector} with the same shape and information content
2840      */
2841     public abstract DoubleVector reinterpretAsDoubles();
2842 
2843     /**
2844      * Views this vector as a vector of the same shape, length, and
2845      * contents, but a lane type that is not a floating-point type.
2846      *
2847      * This is a lane-wise reinterpretation cast on the lane values.
2848      * As such, there this method does not change {@code VSHAPE} or
2849      * {@code VLENGTH}, and there is no change to the bitwise contents
2850      * of the vector.  If the vector's {@code ETYPE} is already an
2851      * integral type, the same vector is returned unchanged.
2852      *
2853      * This method returns the value of this expression:
2854      * {@code convert(conv,part)}, where {@code conv} is
2855      * {@code VectorOperators.Conversion.ofReinterpret(E.class,F.class)},
2856      * and {@code F} is the non-floating-point type of the
2857      * same size as {@code E}.
2858      *
2859      * @apiNote
2860      * Subtypes improve on this method by sharpening
2861      * the return type.
2862      *
2863      * @return the original vector, reinterpreted as non-floating point
2864      * @see VectorOperators.Conversion#ofReinterpret(Class,Class)
2865      * @see Vector#convert(VectorOperators.Conversion,int)
2866      */
2867     public abstract Vector<?> viewAsIntegralLanes();
2868 
2869     /**
2870      * Views this vector as a vector of the same shape, length, and
2871      * contents, but a lane type that is a floating-point type.
2872      *
2873      * This is a lane-wise reinterpretation cast on the lane values.
2874      * As such, there this method does not change {@code VSHAPE} or
2875      * {@code VLENGTH}, and there is no change to the bitwise contents
2876      * of the vector.  If the vector's {@code ETYPE} is already a
2877      * float-point type, the same vector is returned unchanged.
2878      *
2879      * If the vector's element size does not match any floating point
2880      * type size, an {@code IllegalArgumentException} is thrown.
2881      *
2882      * This method returns the value of this expression:
2883      * {@code convert(conv,part)}, where {@code conv} is
2884      * {@code VectorOperators.Conversion.ofReinterpret(E.class,F.class)},
2885      * and {@code F} is the floating-point type of the
2886      * same size as {@code E}, if any.
2887      *
2888      * @apiNote
2889      * Subtypes improve on this method by sharpening
2890      * the return type.
2891      *
2892      * @return the original vector, reinterpreted as floating point
2893      * @throws IllegalArgumentException if there is no floating point
2894      *         type the same size as the lanes of this vector
2895      * @see VectorOperators.Conversion#ofReinterpret(Class,Class)
2896      * @see Vector#convert(VectorOperators.Conversion,int)
2897      */
2898     public abstract Vector<?> viewAsFloatingLanes();
2899 
2900     /**
2901      * Convert this vector to a vector of the same shape and a new
2902      * element type, converting lane values from the current {@code ETYPE}
2903      * to a new lane type (called {@code FTYPE} here) according to the
2904      * indicated {@linkplain VectorOperators.Conversion conversion}.
2905      *
2906      * This is a lane-wise shape-invariant operation which copies
2907      * {@code ETYPE} values from the input vector to corresponding
2908      * {@code FTYPE} values in the result.  Depending on the selected
2909      * conversion, this operation may either
2910      * <a href="Vector.html#expansion">expand or contract</a> its
2911      * logical result, in which case a non-zero {@code part} number
2912      * can further control the selection and steering of the logical
2913      * result into the physical output vector.
2914      *
2915      * <p> Each specific conversion is described by a conversion
2916      * constant in the class {@link VectorOperators}.  Each conversion
2917      * operator has a specified {@linkplain
2918      * VectorOperators.Conversion#domainType() domain type} and
2919      * {@linkplain VectorOperators.Conversion#rangeType() range type}.
2920      * The domain type must exactly match the lane type of the input
2921      * vector, while the range type determines the lane type of the
2922      * output vectors.
2923      *
2924      * <p> A conversion operator may be classified as (respectively)
2925      * in-place, expanding, or contracting, depending on whether the
2926      * bit-size of its domain type is (respectively) equal, less than,
2927      * or greater than the bit-size of its range type.
2928      *
2929      * <p> Independently, conversion operations can also be classified
2930      * as reinterpreting or value-transforming, depending on whether
2931      * the conversion copies representation bits unchanged, or changes
2932      * the representation bits in order to retain (part or all of)
2933      * the logical value of the input value.
2934      *
2935      * <p> If a reinterpreting conversion contracts, it will truncate the
2936      * upper bits of the input.  If it expands, it will pad upper bits
2937      * of the output with zero bits, when there are no corresponding
2938      * input bits.
2939      *
2940      * <p> As another variation of behavior, an in-place conversion
2941      * can incorporate an expanding or contracting conversion, while
2942      * retaining the same lane size between input and output.
2943      *
2944      * In the case of a contraction, the lane value is first converted
2945      * to the smaller value, and then zero-padded (as if by a subsequent
2946      * reinterpretation) before storing into the output lane.
2947      *
2948      * In the case of an expansion, the lane value is first truncated
2949      * to the smaller value (as if by an initial reinterpretation),
2950      * and then converted before storing into the output lane.
2951      *
2952      * <p> An expanding conversion such as {@code S2I} ({@code short}
2953      * value to {@code long}) takes a scalar value and represents it
2954      * in a larger format (always with some information redundancy).
2955      *
2956      * A contracting conversion such as {@code D2F} ({@code double}
2957      * value to {@code float}) takes a scalar value and represents it
2958      * in a smaller format (always with some information loss).
2959      *
2960      * Some in-place conversions may also include information loss,
2961      * such as {@code L2D} ({@code long} value to {@code double})
2962      * or {@code F2I}  ({@code float} value to {@code int}).
2963      *
2964      * Reinterpreting in-place conversions are not lossy, unless the
2965      * bitwise value is somehow not legal in the output type.
2966      * Converting the bit-pattern of a {@code NaN} may discard bits
2967      * from the {@code NaN}'s significand.
2968      *
2969      * <p> This classification is important, because, unless otherwise
2970      * documented, conversion operations <em>never change vector
2971      * shape</em>, regardless of how they may change <em>lane sizes</em>.
2972      *
2973      * Therefore an <em>expanding</em> conversion cannot store all of its
2974      * results in its output vector, because the output vector has fewer
2975      * lanes of larger size, in order to have the same overall bit-size as
2976      * its input.
2977      *
2978      * Likewise, a contracting conversion must store its relatively small
2979      * results into a subset of the lanes of the output vector, defaulting
2980      * the unused lanes to zero.
2981      *
2982      * <p> As an example, a conversion from {@code byte} to {@code long}
2983      * ({@code M=8}) will discard 87.5% of the input values in order to
2984      * convert the remaining 12.5% into the roomy {@code long} lanes of
2985      * the output vector. The inverse conversion will convert back all of
2986      * the large results, but will waste 87.5% of the lanes in the output
2987      * vector.
2988      *
2989      * <em>In-place</em> conversions ({@code M=1}) deliver all of
2990      * their results in one output vector, without wasting lanes.
2991      *
2992      * <p> To manage the details of these
2993      * <a href="Vector.html#expansion">expansions and contractions</a>,
2994      * a non-zero {@code part} parameter selects partial results from
2995      * expansions, or steers the results of contractions into
2996      * corresponding locations, as follows:
2997      *
2998      * <ul>
2999      * <li> expanding by {@code M}: {@code part} must be in the range
3000      * {@code [0..M-1]}, and selects the block of {@code VLENGTH/M} input
3001      * lanes starting at the <em>origin lane</em> at {@code part*VLENGTH/M}.
3002 
3003      * <p> The {@code VLENGTH/M} output lanes represent a partial
3004      * slice of the whole logical result of the conversion, filling
3005      * the entire physical output vector.
3006      *
3007      * <li> contracting by {@code M}: {@code part} must be in the range
3008      * {@code [-M+1..0]}, and steers all {@code VLENGTH} input lanes into
3009      * the output located at the <em>origin lane</em> {@code -part*VLENGTH}.
3010      * There is a total of {@code VLENGTH*M} output lanes, and those not
3011      * holding converted input values are filled with zeroes.
3012      *
3013      * <p> A group of such output vectors, with logical result parts
3014      * steered to disjoint blocks, can be reassembled using the
3015      * {@linkplain VectorOperators#OR bitwise or} or (for floating
3016      * point) the {@link VectorOperators#FIRST_NONZERO FIRST_NONZERO}
3017      * operator.
3018      *
3019      * <li> in-place ({@code M=1}): {@code part} must be zero.
3020      * Both vectors have the same {@code VLENGTH}.  The result is
3021      * always positioned at the <em>origin lane</em> of zero.
3022      *
3023      * </ul>
3024      *
3025      * <p> This method is a restricted version of the more general
3026      * but less frequently used <em>shape-changing</em> method
3027      * {@link #convertShape(VectorOperators.Conversion,VectorSpecies,int)
3028      * convertShape()}.
3029      * The result of this method is the same as the expression
3030      * {@code this.convertShape(conv, rsp, this.broadcast(part))},
3031      * where the output species is
3032      * {@code rsp=this.species().withLanes(FTYPE.class)}.
3033      * 
3034      * @param conv the desired scalar conversion to apply lane-wise
3035      * @param part the <a href="Vector.html#expansion">part number</a>
3036      *        of the result, or zero if neither expanding nor contracting
3037      * @param <F> the boxed element type of the species
3038      * @return a vector converted by shape and element type from this vector
3039      * @throws ArrayIndexOutOfBoundsException unless {@code part} is zero,
3040      *         or else the expansion ratio is {@code M} and
3041      *         {@code part} is positive and less than {@code M},
3042      *         or else the contraction ratio is {@code M} and
3043      *         {@code part} is negative and greater {@code -M}
3044      *
3045      * @see VectorOperators#I2L
3046      * @see VectorOperators.Conversion#ofCast(Class,Class)
3047      * @see VectorSpecies#partLimit(VectorSpecies,boolean)
3048      * @see #viewAsFloatingLanes(VectorSpecies,int)
3049      * @see #viewAsIntegralLanes(VectorSpecies,int)
3050      * @see #convertShape(VectorOperators.Conversion,VectorSpecies,int)
3051      * @see #reinterpretShape(VectorSpecies,int)
3052      */
3053     public abstract <F> Vector<F> convert(VectorOperators.Conversion<E,F> conv, int part);
3054 
3055     /**
3056      * Converts this vector to a vector of the given species, shape and
3057      * element type, converting lane values from the current {@code ETYPE}
3058      * to a new lane type (called {@code FTYPE} here) according to the
3059      * indicated {@linkplain VectorOperators.Conversion conversion}.
3060      *
3061      * This is a lane-wise operation which copies {@code ETYPE} values
3062      * from the input vector to corresponding {@code FTYPE} values in
3063      * the result.
3064      * 
3065      * <p> If the old and new species have the same shape, the behavior
3066      * is exactly the same as the simpler, shape-invariant method
3067      * {@link #convert(VectorOperators.Conversion,int) convert()}.
3068      * In such cases, the simpler method {@code convert()} should be
3069      * used, to make code easier to reason about.
3070      * Otherwise, this is a <em>shape-changing</em> operation, and may
3071      * have special implementation costs.
3072      *
3073      * <p> As a combined effect of shape changes and lane size changes,
3074      * the input and output species may have different lane counts, causing
3075      * <a href="Vector.html#expansion">expansion or contraction</a>.
3076      * In this case a non-zero {@code part} parameter selects
3077      * partial results from an expanded logical result, or steers
3078      * the results of a contracted logical result into a physical
3079      * output vector of the required output species.
3080      *
3081      * <p >The following pseudocode illustrates the behavior of this
3082      * method for in-place, expanding, and contracting conversions.
3083      * (This pseudocode also applies to the shape-invariant method,
3084      * but with shape restrictions on the output species.)
3085      * Note that only one of the three code paths is relevant to any
3086      * particular combination of conversion operator and shapes.
3087      *
3088      * <pre>{@code
3089      * FTYPE scalar_conversion_op(ETYPE s);
3090      * EVector a = ...;
3091      * VectorSpecies<F> rsp = ...;
3092      * int part = ...;
3093      * VectorSpecies<E> dsp = a.species();
3094      * int domlen = dsp.length();
3095      * int ranlen = rsp.length();
3096      * FTYPE[] logical = new FTYPE[domlen];
3097      * for (int i = 0; i < domlen; i++) {
3098      *   logical[i] = scalar_conversion_op(a.lane(i));
3099      * }
3100      * FTYPE[] physical;
3101      * if (domlen == ranlen) { // in-place
3102      *     assert part == 0; //else AIOOBE
3103      *     physical = logical;
3104      * } else if (domlen > ranlen) { // expanding
3105      *     int M = domlen / ranlen;
3106      *     assert 0 <= part && part < M; //else AIOOBE
3107      *     int origin = part * ranlen;
3108      *     physical = Arrays.copyOfRange(logical, origin, origin + ranlen);
3109      * } else { // (domlen < ranlen) // contracting
3110      *     int M = ranlen / domlen;
3111      *     assert 0 >= part && part > -M; //else AIOOBE
3112      *     int origin = -part * domlen;
3113      *     System.arraycopy(logical, 0, physical, origin, domlen);
3114      * }
3115      * return FVector.fromArray(ran, physical, 0);
3116      * }</pre>
3117      *
3118      * @param conv the desired scalar conversion to apply lane-wise
3119      * @param rsp the desired output species
3120      * @param part the <a href="Vector.html#expansion">part number</a>
3121      *        of the result, or zero if neither expanding nor contracting
3122      * @param <F> the boxed element type of the output species
3123      * @return a vector converted by element type from this vector
3124      * @see #convert(VectorOperators.Conversion,int)
3125      * @see #castShape(VectorSpecies,int)
3126      * @see #reinterpretShape(VectorSpecies,int)
3127      */
3128     public abstract <F> Vector<F> convertShape(VectorOperators.Conversion<E,F> conv, VectorSpecies<F> rsp, int part);
3129 
3130     /**
3131      * Convenience method for converting a vector from one lane type
3132      * to another, reshaping as needed when lane sizes change.
3133      *
3134      * This method returns the value of this expression:
3135      * {@code convertShape(conv,rsp,part)}, where {@code conv} is
3136      * {@code VectorOperators.Conversion.ofCast(E.class,F.class)}.
3137      *
3138      * <p> If the old and new species have different shape, this is a
3139      * <em>shape-changing</em> operation, and may have special
3140      * implementation costs.
3141      *
3142      * @param rsp the desired output species
3143      * @param part the <a href="Vector.html#expansion">part number</a>
3144      *        of the result, or zero if neither expanding nor contracting
3145      * @param <F> the boxed element type of the output species
3146      * @return a vector converted by element type from this vector
3147      * @see VectorOperators.Conversion#ofCast(Class,Class)
3148      * @see Vector#convertShape(VectorOperators.Conversion,VectorSpecies,int)
3149      */
3150     // FIXME: Does this carry its weight?
3151     public abstract <F> Vector<F> castShape(VectorSpecies<F> rsp, int part);
3152 
3153     /**
3154      * Checks that this vector has the given element type,
3155      * and returns this vector unchanged.
3156      * The effect is similar to this pseudocode:
3157      * {@code elementType == species().elementType()
3158      *        ? this
3159      *        : throw new ClassCastException()}.
3160      *
3161      * @param elementType the required lane type
3162      * @param <F> the boxed element type of the required lane type
3163      * @return the same vector
3164      * @throws ClassCastException if the vector has the wrong element type
3165      * @see VectorSpecies#check(Class)
3166      * @see VectorMask#check(Class)
3167      * @see Vector#check(VectorSpecies)
3168      * @see VectorShuffle#check(VectorSpecies)
3169      */
3170     public abstract <F> Vector<F> check(Class<F> elementType);
3171 
3172     /**
3173      * Checks that this vector has the given species,
3174      * and returns this vector unchanged.
3175      * The effect is similar to this pseudocode:
3176      * {@code species == species()
3177      *        ? this
3178      *        : throw new ClassCastException()}.
3179      *
3180      * @param species the required species
3181      * @param <F> the boxed element type of the required species
3182      * @return the same vector
3183      * @throws ClassCastException if the vector has the wrong species
3184      * @see Vector#check(Class)
3185      * @see VectorMask#check(VectorSpecies)
3186      * @see VectorShuffle#check(VectorSpecies)
3187      */
3188     public abstract <F> Vector<F> check(VectorSpecies<F> species);
3189 
3190     //Array stores
3191 
3192     /**
3193      * Stores this vector into a byte array starting at an offset.
3194      * <p>
3195      * Bytes are extracted from primitive lane elements according
3196      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
3197      * The lanes are stored according to their
3198      * <a href="Vector.html#lane-order">memory ordering</a>.
3199      * <p>
3200      * This method behaves as if it calls
3201      * {@link #intoByteBuffer(ByteBuffer,int,ByteOrder,VectorMask)
3202      * intoByteBuffer()} as follows:
3203      * <pre>{@code
3204      * var bb = ByteBuffer.wrap(a);
3205      * var bo = ByteOrder.LITTLE_ENDIAN;
3206      * var m = maskAll(true);
3207      * intoByteBuffer(bb, offset, m, bo);
3208      * }</pre>
3209      *
3210      * @param a the byte array
3211      * @param offset the offset into the array
3212      * @throws IndexOutOfBoundsException
3213      *         if {@code offset+N*ESIZE < 0}
3214      *         or {@code offset+(N+1)*ESIZE > a.length}
3215      *         for any lane {@code N} in the vector
3216      */
3217     public abstract void intoByteArray(byte[] a, int offset);
3218 
3219     /**
3220      * Stores this vector into a byte array starting at an offset
3221      * using a mask.
3222      * <p>
3223      * Bytes are extracted from primitive lane elements according
3224      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
3225      * The lanes are stored according to their
3226      * <a href="Vector.html#lane-order">memory ordering</a>.
3227      * <p>
3228      * This method behaves as if it calls
3229      * {@link #intoByteBuffer(ByteBuffer,int,ByteOrder,VectorMask)
3230      * intoByteBuffer()} as follows:
3231      * <pre>{@code
3232      * var bb = ByteBuffer.wrap(a);
3233      * var bo = ByteOrder.LITTLE_ENDIAN;
3234      * intoByteBuffer(bb, offset, m, bo);
3235      * }</pre>
3236      *
3237      * @param a the byte array
3238      * @param offset the offset into the array
3239      * @param m the mask controlling lane selection
3240      * @throws IndexOutOfBoundsException
3241      *         if {@code offset+N*ESIZE < 0}
3242      *         or {@code offset+(N+1)*ESIZE > a.length}
3243      *         for any lane {@code N} in the vector
3244      *         where the mask is set
3245      */
3246     public abstract void intoByteArray(byte[] a, int offset,
3247                                        VectorMask<E> m);
3248 
3249     /**
3250      * Stores this vector into a byte array starting at an offset
3251      * using explicit byte order and a mask.
3252      * <p>
3253      * Bytes are extracted from primitive lane elements according
3254      * to the specified byte ordering.
3255      * The lanes are stored according to their
3256      * <a href="Vector.html#lane-order">memory ordering</a>.
3257      * <p>
3258      * This method behaves as if it calls
3259      * {@link #intoByteBuffer(ByteBuffer,int,ByteOrder,VectorMask)
3260      * intoByteBuffer()} as follows:
3261      * <pre>{@code
3262      * var bb = ByteBuffer.wrap(a);
3263      * intoByteBuffer(bb, offset, m, bo);
3264      * }</pre>
3265      *
3266      * @param a the byte array
3267      * @param offset the offset into the array
3268      * @param bo the intended byte order
3269      * @param m the mask controlling lane selection
3270      * @throws IndexOutOfBoundsException
3271      *         if {@code offset+N*ESIZE < 0}
3272      *         or {@code offset+(N+1)*ESIZE > a.length}
3273      *         for any lane {@code N} in the vector
3274      *         where the mask is set
3275      */
3276     public abstract void intoByteArray(byte[] a, int offset,
3277                                        ByteOrder bo,
3278                                        VectorMask<E> m);
3279 
3280     /**
3281      * Stores this vector into a byte buffer starting at an offset
3282      * using explicit byte order.
3283      * <p>
3284      * Bytes are extracted from primitive lane elements according
3285      * to the specified byte ordering.
3286      * The lanes are stored according to their
3287      * <a href="Vector.html#lane-order">memory ordering</a>.
3288      * <p>
3289      * This method behaves as if it calls
3290      * {@link #intoByteBuffer(ByteBuffer,int,ByteOrder,VectorMask)
3291      * intoByteBuffer()} as follows:
3292      * <pre>{@code
3293      * var m = maskAll(true);
3294      * intoByteBuffer(bb, offset, m, bo);
3295      * }</pre>
3296      *
3297      * @param bb the byte buffer
3298      * @param offset the offset into the array
3299      * @param bo the intended byte order
3300      * @param m the mask controlling lane selection
3301      * @throws IndexOutOfBoundsException
3302      *         if {@code offset+N*ESIZE < 0}
3303      *         or {@code offset+(N+1)*ESIZE > bb.limit()}
3304      *         for any lane {@code N} in the vector
3305      */
3306     public abstract void intoByteBuffer(ByteBuffer bb, int offset, ByteOrder bo);
3307 
3308     /**
3309      * Stores this vector into a byte buffer starting at an offset
3310      * using explicit byte order and a mask.
3311      * <p>
3312      * Bytes are extracted from primitive lane elements according
3313      * to the specified byte ordering.
3314      * The lanes are stored according to their
3315      * <a href="Vector.html#lane-order">memory ordering</a>.
3316      * <p>
3317      * The following pseudocode illustrates the behavior, where
3318      * {@code EBuffer} is the primitive buffer type, {@code ETYPE} is the
3319      * primitive element type, and {@code EVector} is the primitive
3320      * vector type for this vector:
3321      * <pre>{@code
3322      * EBuffer eb = bb.duplicate()
3323      *     .position(offset)
3324      *     .order(bo).asEBuffer();
3325      * ETYPE[] a = this.toArray();
3326      * for (int n = 0; n < a.length; n++) {
3327      *     if (m.laneIsSet(n)) {
3328      *         eb.put(n, es[n]);
3329      *     }
3330      * }
3331      * }</pre>
3332      * @implNote
3333      * This operation is likely to be more efficient if
3334      * the specified byte order is the same as
3335      * {@linkplain ByteOrder#nativeOrder()
3336      * the platform native order},
3337      * since this method will not need to reorder
3338      * the bytes of lane values.
3339      * In the special case where {@code ETYPE} is
3340      * {@code byte}, the byte order argument is
3341      * ignored.
3342      *
3343      * @param bb the byte buffer
3344      * @param offset the offset into the array
3345      * @param bo the intended byte order
3346      * @param m the mask controlling lane selection
3347      * @throws IndexOutOfBoundsException
3348      *         if {@code offset+N*ESIZE < 0}
3349      *         or {@code offset+(N+1)*ESIZE > bb.limit()}
3350      *         for any lane {@code N} in the vector
3351      *         where the mask is set
3352      */
3353     public abstract void intoByteBuffer(ByteBuffer bb, int offset,
3354                                         ByteOrder bo, VectorMask<E> m);
3355 
3356     /**
3357      * Returns a packed array containing all the lane values.
3358      * The array length is the same as the vector length.
3359      * The element type of the array is the same as the element
3360      * type of the vector.
3361      * The array elements are stored in lane order.
3362      * Overrides of this method on subtypes of {@code Vector}
3363      * which specify the element type have an accurately typed
3364      * array result.
3365      *
3366      * @apiNote
3367      * Usually {@linkplain IntVector#toArray() strongly typed access}
3368      * is preferable, if you are working with a vector
3369      * subtype that has a known element type.
3370      *
3371      * @return an accurately typed array containing
3372      *         the lane values of this vector
3373      * @see ByteVector#toArray()
3374      * @see IntVector#toArray()
3375      * @see DoubleVector#toArray()
3376      */
3377     public abstract Object toArray();
3378 
3379     /**
3380      * Returns a {@code long[]} array containing all
3381      * the lane values, converted to the type {@code long}.
3382      * The array length is the same as the vector length.
3383      * The array elements are converted as if by casting
3384      * and stored in lane order.
3385      * This operation can lose precision and/or range
3386      * if the vector element type is {@code float}
3387      * or {@code double}.
3388      *
3389      * @apiNote
3390      * Usually {@linkplain IntVector#toArray() strongly typed access}
3391      * is preferable, if you are working with a vector
3392      * subtype that has a known element type.
3393      *
3394      * @return a {@code long[]} array containing
3395      *         the lane values of this vector
3396      * @see #toArray()
3397      * @see LongVector#toArray
3398      */
3399     public abstract long[] toLongArray();
3400 
3401     /**
3402      * Returns a {@code double[]} array containing all
3403      * the lane values, converted to the type {@code double}.
3404      * The array length is the same as the vector length.
3405      * The array elements are converted as if by casting
3406      * and stored in lane order.
3407      * This operation can lose precision
3408      * if the vector element type is {@code long}.
3409      *
3410      * @apiNote
3411      * Usually {@link IntVector#toArray() strongly typed access}
3412      * is preferable, if you are working with a vector
3413      * subtype that has a known element type.
3414      *
3415      * @return a {@code double[]} array containing
3416      *         the lane values of this vector
3417      * @see #toArray()
3418      * @see DoubleVector#toArray
3419      */
3420     public abstract double[] toDoubleArray();
3421 
3422     /**
3423      * Returns a string representation of this vector, of the form
3424      * {@code "[0,1,2...]"}, reporting the lane values of this
3425      * vector, in lane order.
3426      *
3427      * The string is produced as if by a call to
3428      * {@link Arrays#toString(int[]) Arrays.toString()},
3429      * as appropriate to the array returned by
3430      * {@link #toArray() this.toArray()}.
3431      *
3432      * @return a string of the form {@code "[0,1,2...]"}
3433      * reporting the lane values of this vector
3434      */
3435     @Override
3436     public abstract String toString();
3437 
3438     /**
3439      * Indicates whether this vector is identical to some other object.
3440      * Two vectors are identical only if they have the same species
3441      * and same lane values, in the same order.
3442 
3443      * <p>The comparison of lane values is produced as if by a call to
3444      * {@link Arrays#equals(int[],int[]) Arrays.equals()},
3445      * as appropriate to the arrays returned by
3446      * {@link #toArray toArray()} on both vectors.
3447      *
3448      * @return whether this vector is identical to some other object
3449      */
3450     @Override
3451     public abstract boolean equals(Object obj);
3452 
3453     /**
3454      * Returns a hash code value for the vector.
3455      * based on the lane values and the vector species.
3456      *
3457      * @return  a hash code value for this vector
3458      */
3459     @Override
3460     public abstract int hashCode();
3461 
3462     /**
3463      * Returns all the lane values of this vector, boxed in a list.
3464      * The list elements are boxed and presented in lane order.
3465      * The list is immutable, as if returned from
3466      * {@link List#of(Object[]) List.&lt;E&gt;of}.
3467      *
3468      * @apiNote
3469      * Because this operation jumps out of the domain of vectors into
3470      * the domain of Java collections, it is likely to have large
3471      * overheads, as compared with other vector operations.
3472      * Often {@link #toArray Vector.toArray} is preferable,
3473      * since it produces a packed array of unboxed lane values.
3474      *
3475      * @return a list containing the lane values of this vector
3476      */
3477     // FIXME:  Does this pull its weight?  Probably not.
3478     // Perhaps it's fine to rely on the {@code toArray()} methods.
3479     public abstract List<E> toList();
3480 
3481     // ==== JROSE NAME CHANGES ====
3482 
3483     // RAISED FROM SUBCLASSES (with generalized type)
3484     // * toArray() -> ETYPE[] <: Object (erased return type for interop)
3485     // * toString(), equals(Object), hashCode() (documented)
3486     // ADDED
3487     // * compare(OP,v) to replace most of the comparison methods
3488     // * maskAll(boolean) to replace maskAllTrue/False
3489     // * toList() -> List<E> (interop with collections)
3490     // * toLongArray(), toDoubleArray() (generic unboxed access)
3491     // * check(Class), check(VectorSpecies) (static type-safety checks)
3492     // * enum Comparison (enum of EQ, NE, GT, LT, GE, LE)
3493     // * zero(VS), broadcast(long) (basic factories)
3494     // * reinterpretAsEs(), viewAsXLanes (bytewise reinterpreting views)
3495     // * addIndex(int) (iota function)
3496 
3497     /** Use {@code numElem*}{@link VectorSpecies#elementSize(Class)}. */
3498     @Deprecated
3499     public static int bitSizeForVectorLength(Class<?> c, int numElem) {
3500         return VectorSpecies.elementSize(c) * numElem;
3501     }
3502 
3503     /** Use #lt() */
3504     @Deprecated
3505     public VectorMask<E> lessThan(Vector<E> v) { return lt(v); }
3506 
3507     /** Use #eq() */
3508     @Deprecated
3509     public VectorMask<E> equal(Vector<E> v) { return eq(v); }
3510 
3511     /** Use #compare(VectorOperators.Comparison,Vector) */
3512     @Deprecated
3513     public VectorMask<E> notEqual(Vector<E> v) { return compare(VectorOperators.NE, v); }
3514 
3515     /** Use #compare(VectorOperators.Comparison,Vector) */
3516     @Deprecated
3517     public VectorMask<E> lessThanEq(Vector<E> v) { return compare(VectorOperators.LE, v); }
3518 
3519     /** Use #compare(VectorOperators.Comparison,Vector) */
3520     @Deprecated
3521     public VectorMask<E> greaterThan(Vector<E> v) { return compare(VectorOperators.GT, v); }
3522 
3523     /** Use #compare(VectorOperators.Comparison,Vector) */
3524     @Deprecated
3525     public VectorMask<E> greaterThanEq(Vector<E> v) { return compare(VectorOperators.GE, v); }
3526 
3527     /** Use #maskAll(boolean) */
3528     @Deprecated
3529     public VectorMask<E> maskAllTrue() { return maskAll(true); }
3530 
3531     /** Use #maskAll(boolean) */
3532     @Deprecated
3533     public VectorMask<E> maskAllFalse() { return maskAll(false); }
3534 
3535     /** Use VectorMask#fromArray(VectorSpecies, boolean[], int) */
3536     @Deprecated
3537     public VectorMask<E> maskFromArray(boolean[] a, int offset) { return VectorMask.fromArray(species(), a, offset); }
3538 
3539     /** Use VectorMask.fromValues(...) */
3540     @Deprecated
3541     public final VectorMask<E> maskFromValues(boolean... bits) {
3542         return VectorMask.fromValues(this.species(), bits);
3543     }
3544 
3545     /** Use explicit argument of ByteOrder.LITTLE_ENDIAN */
3546     @Deprecated
3547     public final
3548     void intoByteBuffer(ByteBuffer bb, int offset) {
3549         ByteOrder bo = ByteOrder.LITTLE_ENDIAN;
3550         if (bb.order() != bo)  throw new IllegalArgumentException();
3551         intoByteBuffer(bb, offset, bo);
3552     }
3553 
3554     /** Use explicit argument of ByteOrder.LITTLE_ENDIAN */
3555     @Deprecated
3556     public final
3557     void intoByteBuffer(ByteBuffer bb, int offset,
3558                         VectorMask<E> m) {
3559         ByteOrder bo = ByteOrder.LITTLE_ENDIAN;
3560         if (bb.order() != bo)  throw new IllegalArgumentException();
3561         intoByteBuffer(bb, offset, bo, m);
3562     }
3563 
3564     /** Use reinterpretShape(s,0). */
3565     @Deprecated
3566     public <F> Vector<F> reinterpret(VectorSpecies<F> s) {
3567         return reinterpretShape(s, 0);
3568     }
3569 
3570     /** Use reinterpretShape(s,0). */
3571     @Deprecated
3572     public Vector<E> reshape(VectorSpecies<E> s) {
3573         s.check(elementType());  // verify same E
3574         return reinterpretShape(s, 0);
3575     }
3576 
3577     /** Use castShape(s, 0). */
3578     @Deprecated
3579     public <F> Vector<F> cast(VectorSpecies<F> s) {
3580         return castShape(s, 0);
3581     }
3582 
3583     /** Use lanewise(NEG, m). */
3584     @Deprecated
3585     public Vector<E> neg(VectorMask<E> m) {
3586         return lanewise(VectorOperators.NEG, m);
3587     }
3588 
3589     /** Use lanewise(ABS, m). */
3590     @Deprecated
3591     public Vector<E> abs(VectorMask<E> m) {
3592         return lanewise(VectorOperators.ABS, m);
3593     }
3594 
3595     /** use v.slice(-i &amp; (VLENGTH-1), v) */
3596     @Deprecated
3597     public Vector<E> rotateLanesLeft(int i) {
3598         return slice(-i & (length()-1), this);
3599     }
3600     /** use v.slice(i &amp; (VLENGTH-1), v) */
3601     @Deprecated
3602     public Vector<E> rotateLanesRight(int i) {
3603         return slice(i & (length()-1), this);
3604     }
3605     /** use v.broadcast(0).slice(i &amp; (VLENGTH-1), v) */
3606     @Deprecated
3607     public Vector<E> shiftLanesLeft(int i) {
3608         return broadcast(0).slice(-i & (length()-1), this);
3609     }
3610     /** use v.slice(i &amp; (VLENGTH-1), v.broadcast(0)); */
3611     @Deprecated
3612     public Vector<E> shiftLanesRight(int i) {
3613         return slice(i & (length()-1), broadcast(0));
3614     }
3615 }