1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import jdk.internal.misc.Unsafe;
  28 import jdk.internal.vm.annotation.ForceInline;
  29 
  30 import java.nio.BufferUnderflowException;
  31 import java.nio.ByteBuffer;
  32 
  33 /**
  34  * A {@code Vector} is designed for use in computations that can be transformed
  35  * by a runtime compiler, on supported hardware, to Single Instruction Multiple
  36  * Data (SIMD) computations leveraging vector hardware registers and vector
  37  * hardware instructions.  Such SIMD computations exploit data parallelism to
  38  * perform the same operation on multiple data points simultaneously in a
  39  * faster time it would ordinarily take to perform the same operation
  40  * sequentially on each data point.
  41  * <p>
  42  * A Vector represents an ordered immutable sequence of values of the same
  43  * element type {@code e} that is one of the following primitive types
  44  * {@code byte}, {@code short}, {@code int}, {@code long}, {@code float}, or
  45  * {@code double}).  The type variable {@code E} corresponds to the boxed
  46  * element type, specifically the class that wraps a value of {@code e} in an
  47  * object (such the {@code Integer} class that wraps a value of {@code int}}.
  48  * A Vector has a {@link #shape() shape} {@code S}, extending type
  49  * {@link Shape}, that governs the total {@link #bitSize() size} in bits
  50  * of the sequence of values.
  51  * <p>
  52  * The number of values in the sequence is referred to as the Vector
  53  * {@link #length() length}.  The length also corresponds to the number of
  54  * Vector lanes.  The lane element at lane index {@code N} (from {@code 0},
  55  * inclusive, to length, exclusive) corresponds to the {@code N + 1}'th value in
  56  * the sequence.
  57  * Note: this arrangement
  58  * of Vector bit size, Vector length, element bit size, and lane element index
  59  * has no bearing on how a Vector instance and its sequence of elements may be
  60  * arranged in memory or represented as a value in a vector hardware register.
  61  * <p>
  62  * Vector declares a set of vector operations (methods) that are common to all
  63  * element types (such as addition).  Sub-classes of Vector with a concrete
  64  * boxed element type declare further operations that are specific to that
  65  * element type (such as access to element values in lanes, logical operations
  66  * on values of integral elements types, or transcendental operations on values
  67  * of floating point element types).
  68  * There are six sub-classes of Vector corresponding to the supported set
  69  * of element types, {@link ByteVector<S>}, {@link ShortVector<S>},
  70  * {@link IntVector<S>} {@link LongVector<S>}, {@link FloatVector<S>}, and
  71  * {@link DoubleVector<S>}.
  72  * <p>
  73  * Vector values, instances of Vector, are created from a special kind of
  74  * factory called a {@link Species}.  A Species has an
  75  * element type and shape and creates Vector values of the same element type
  76  * and shape.
  77  * A species can be {@link #speciesInstance obtained} given an element type and
  78  * shape, or a preferred species can be {@link #speciesInstance obtained} given
  79  * just an element type where the most optimal shape is selected for the current
  80  * platform.  It is recommended that Species instances be held in
  81  * {@code static final} fields for optimal creation and usage of Vector values
  82  * by the runtime compiler.
  83  * <p>
  84  * Vector operations can be grouped into various categories and their behaviour
  85  * generally specified as follows:
  86  * <ul>
  87  * <li>
  88  * A vector unary operation (1-ary) operates on one input vector to produce a
  89  * result vector.
  90  * For each lane of the input vector the
  91  * lane element is operated on using the specified scalar unary operation and
  92  * the element result is placed into the vector result at the same lane.
  93  * The following pseudocode expresses the behaviour of this operation category,
  94  * where {@code e} is the element type and {@code EVector} corresponds to the
  95  * primitive Vector type:
  96  *
  97  * <pre>{@code
  98  * EVector<S> a = ...;
  99  * e[] ar = new e[a.length()];
 100  * for (int i = 0; i < a.length(); i++) {
 101  *     ar[i] = scalar_unary_op(a.get(i));
 102  * }
 103  * EVector<S> r = a.species().fromArray(ar, 0);
 104  * }</pre>
 105  *
 106  * Unless otherwise specified the input and result vectors will have the same
 107  * element type and shape.
 108  *
 109  * <li>
 110  * A vector binary operation (2-ary) operates on two input
 111  * vectors to produce a result vector.
 112  * For each lane of the two input vectors,
 113  * a and b say, the corresponding lane elements from a and b are operated on
 114  * using the specified scalar binary operation and the element result is placed
 115  * into the vector result at the same lane.
 116  * The following pseudocode expresses the behaviour of this operation category:
 117  *
 118  * <pre>{@code
 119  * EVector<S> a = ...;
 120  * EVector<S> b = ...;
 121  * e[] ar = new e[a.length()];
 122  * for (int i = 0; i < a.length(); i++) {
 123  *     ar[i] = scalar_binary_op(a.get(i), b.get(i));
 124  * }
 125  * EVector<S> r = a.species().fromArray(ar, 0);
 126  * }</pre>
 127  *
 128  * Unless otherwise specified the two input and result vectors will have the
 129  * same element type and shape.
 130  *
 131  * <li>
 132  * Generalizing from unary (1-ary) and binary (2-ary) operations, a vector n-ary
 133  * operation operates in n input vectors to produce a
 134  * result vector.
 135  * N lane elements from each input vector are operated on
 136  * using the specified n-ary scalar operation and the element result is placed
 137  * into the vector result at the same lane.
 138  * Unless otherwise specified the n input and result vectors will have the same
 139  * element type and shape.
 140  *
 141  * <li>
 142  * A vector reduction operation operates on all the lane
 143  * elements of an input vector.
 144  * An accumulation function is applied to all the
 145  * lane elements to produce a scalar result.
 146  * If the reduction operation is associative then the result may be accumulated
 147  * by operating on the lane elements in any order using a specified associative
 148  * scalar binary operation and identity value.  Otherwise, the reduction
 149  * operation specifies the behaviour of the accumulation function.
 150  * The following pseudocode expresses the behaviour of this operation category
 151  * if it is associative:
 152  * <pre>{@code
 153  * EVector<S> a = ...;
 154  * e r = <identity value>;
 155  * for (int i = 0; i < a.length(); i++) {
 156  *     r = assoc_scalar_binary_op(r, a.get(i));
 157  * }
 158  * }</pre>
 159  *
 160  * Unless otherwise specified the scalar result type and element type will be
 161  * the same.
 162  *
 163  * <li>
 164  * A vector binary test operation operates on two input vectors to produce a
 165  * result mask.  For each lane of the two input vectors, a and b say, the
 166  * the corresponding lane elements from a and b are operated on using the
 167  * specified scalar binary test operation and the boolean result is placed
 168  * into the mask at the same lane.
 169  * The following pseudocode expresses the behaviour of this operation category:
 170  * <pre>{@code
 171  * EVector<S> a = ...;
 172  * EVector<S> b = ...;
 173  * boolean[] ar = new boolean[a.length()];
 174  * for (int i = 0; i < a.length(); i++) {
 175  *     ar[i] = scalar_binary_test_op(a.get(i), b.get(i));
 176  * }
 177  * Mask<E, S> r = a.species().maskFromArray(ar, 0);
 178  * }</pre>
 179  *
 180  * Unless otherwise specified the two input vectors and result mask will have
 181  * the same element type and shape.
 182  *
 183  * <li>
 184  * The prior categories of operation can be said to operate within the vector
 185  * lanes, where lane access is uniformly applied to all vectors, specifically
 186  * the scalar operation is applied to elements taken from input vectors at the
 187  * same lane, and if appropriate applied to the result vector at the same lane.
 188  * A further category of operation is a cross-lane vector operation where lane
 189  * access is defined by the arguments to the operation.  Cross-lane operations
 190  * generally rearrange lane elements, by permutation (commonly controlled by a
 191  * {@link Shuffle}) or by blending (commonly controlled by a {@link Mask}).
 192  * Such an operation explicitly specifies how it rearranges lane elements.
 193  * </ul>
 194  *
 195  * If a vector operation is represented as an instance method then first input
 196  * vector corresponds to {@code this} vector and subsequent input vectors are
 197  * arguments of the method.  Otherwise, if the an operation is represented as a
 198  * static method then all input vectors are arguments of the method.
 199  * <p>
 200  * If a vector operation does not belong to one of the above categories then
 201  * the operation explicitly specifies how it processes the lane elements of
 202  * input vectors, and where appropriate expresses the behaviour using
 203  * pseudocode.
 204  *
 205  * <p>
 206  * Many vector operations provide an additional {@link Mask mask} accepting
 207  * variant.
 208  * The mask controls which lanes are selected for application of the scalar
 209  * operation.  Masks are a key component for the support of control flow in
 210  * vector computations.
 211  * <p>
 212  * For certain operation categories the mask accepting variants can be specified
 213  * in generic terms.  If a lane of the mask is set then the scalar operation is
 214  * applied to corresponding lane elements, otherwise if a lane of a mask is not
 215  * set then a default scalar operation is applied and its result is placed into
 216  * the vector result at the same lane. The default operation is specified for
 217  * the following operation categories:
 218  * <ul>
 219  * <li>
 220  * For a vector n-ary operation the default operation is a function that returns
 221  * it's first argument, specifically a lane element of the first input vector.
 222  * <li>
 223  * For an associative vector reduction operation the default operation is a
 224  * function that returns the identity value.
 225  * <li>
 226  * For vector binary test operation the default operation is a function that
 227  * returns false.
 228  *</ul>
 229  * Otherwise, the mask accepting variant of the operation explicitly specifies
 230  * how it processes the lane elements of input vectors, and where appropriate
 231  * expresses the behaviour using pseudocode.
 232  *
 233  * <p>
 234  * For convenience many vector operations, of arity greater than one, provide
 235  * an additional scalar accepting variant.  This variant accepts compatible
 236  * scalar values instead of vectors for the second and subsequent input vectors,
 237  * if any.
 238  * Unless otherwise specified the scalar variant behaves as if each scalar value
 239  * is transformed to a vector using the vector Species
 240  * {@code broadcast} operation, and
 241  * then the vector accepting vector operation is applied using the transformed
 242  * values.
 243  *
 244  * <p>
 245  * This is a value-based
 246  * class; use of identity-sensitive operations (including reference equality
 247  * ({@code ==}), identity hash code, or synchronization) on instances of
 248  * {@code Vector} may have unpredictable results and should be avoided.
 249  *
 250  * @param <E> the boxed element type of elements in this vector
 251  * @param <S> the type of shape of this vector
 252  */
 253 public abstract class Vector<E, S extends Vector.Shape> {
 254 
 255     Vector() {}
 256 
 257     /**
 258      * Returns the species of this vector.
 259      *
 260      * @return the species of this vector
 261      */
 262     public abstract Species<E, S> species();
 263 
 264     // @@@
 265 
 266     /**
 267      * Returns the primitive element type of this vector.
 268      *
 269      * @return the primitive element type of this vector
 270      */
 271     public Class<E> elementType() { return species().elementType(); }
 272 
 273     /**
 274      * Returns the element size, in bits, of this vector.
 275      *
 276      * @return the element size, in bits
 277      */
 278     public int elementSize() { return species().elementSize(); }
 279 
 280     /**
 281      * Returns the shape of this vector.
 282      *
 283      * @return the shape of this vector
 284      */
 285     public S shape() { return species().shape(); }
 286 
 287     /**
 288      * Returns the number of vector lanes (the length).
 289      *
 290      * @return the number of vector lanes
 291      */
 292     public int length() { return species().length(); }
 293 
 294     /**
 295      * Returns the total vector size, in bits.
 296      *
 297      * @return the total vector size, in bits
 298      */
 299     public int bitSize() { return species().bitSize(); }
 300 
 301     //Arithmetic
 302 
 303     /**
 304      * Adds this vector to an input vector.
 305      * <p>
 306      * This is a vector binary operation where the primitive addition operation
 307      * ({@code +}) is applied to lane elements.
 308      *
 309      * @param b the input vector
 310      * @return the result of adding this vector to the input vector
 311      */
 312     public abstract Vector<E, S> add(Vector<E, S> b);
 313 
 314     /**
 315      * Adds this vector to an input vector, selecting lane elements
 316      * controlled by a mask.
 317      * <p>
 318      * This is a vector binary operation where the primitive addition operation
 319      * ({@code +}) is applied to lane elements.
 320      *
 321      * @param b the input vector
 322      * @param m the mask controlling lane selection
 323      * @return the result of adding this vector to the given vector
 324      */
 325     public abstract Vector<E, S> add(Vector<E, S> b, Mask<E, S> m);
 326 
 327     public abstract Vector<E, S> addSaturate(Vector<E, S> o);
 328 
 329     public abstract Vector<E, S> addSaturate(Vector<E, S> o, Mask<E, S> m);
 330 
 331     public abstract Vector<E, S> sub(Vector<E, S> o);
 332 
 333     public abstract Vector<E, S> sub(Vector<E, S> o, Mask<E, S> m);
 334 
 335     public abstract Vector<E, S> subSaturate(Vector<E, S> o);
 336 
 337     public abstract Vector<E, S> subSaturate(Vector<E, S> o, Mask<E, S> m);
 338 
 339     public abstract Vector<E, S> mul(Vector<E, S> o);
 340 
 341     public abstract Vector<E, S> mul(Vector<E, S> o, Mask<E, S> m);
 342 
 343     /**
 344      * Negates this vector.
 345      * <p>
 346      * This is a vector unary operation where the primitive negation operation
 347      * ({@code -})is applied to lane elements.
 348      *
 349      * @return the result of negating this vector
 350      */
 351     public abstract Vector<E, S> neg();
 352 
 353     public abstract Vector<E, S> neg(Mask<E, S> m);
 354 
 355     //Maths from java.math
 356     public abstract Vector<E, S> abs();
 357 
 358     public abstract Vector<E, S> abs(Mask<E, S> m);
 359 
 360     public abstract Vector<E, S> min(Vector<E, S> o);
 361 
 362     public abstract Vector<E, S> max(Vector<E, S> o);
 363 
 364     //TODO: Parity
 365 
 366     //Comparisons
 367 
 368     //TODO: N.B. Floating point NaN behaviors?
 369     //TODO: Check the JLS
 370 
 371     /**
 372      * Tests if this vector is equal to the given vector.
 373      * <p>
 374      * This is a vector binary test operation where the primitive equals
 375      * operation ({@code ==}) is applied to lane elements.
 376      *
 377      * @param b the given vector
 378      * @return the result mask of testing if this vector is equal to the given
 379      * vector
 380      */
 381     public abstract Mask<E, S> equal(Vector<E, S> b);
 382 
 383     public abstract Mask<E, S> notEqual(Vector<E, S> o);
 384 
 385     /**
 386      * Tests if this vector is less than the given vector.
 387      * <p>
 388      * This is a vector binary test operation where the primitive less than
 389      * operation ({@code <}) is applied to lane elements.
 390      *
 391      * @param b the given vector
 392      * @return the mask result of testing if this vector is less than the given
 393      * vector
 394      */
 395     public abstract Mask<E, S> lessThan(Vector<E, S> b);
 396 
 397     public abstract Mask<E, S> lessThanEq(Vector<E, S> o);
 398 
 399     public abstract Mask<E, S> greaterThan(Vector<E, S> o);
 400 
 401     public abstract Mask<E, S> greaterThanEq(Vector<E, S> o);
 402 
 403     //Elemental shifting
 404 
 405     /**
 406      * Rotates left the lane elements of this vector by the given number of
 407      * lanes, {@code i}, modulus the vector length.
 408      * <p>
 409      * This is a cross-lane operation that permutes the lane elements of this
 410      * vector.
 411      * For each lane of the input vector, at lane index {@code N}, the lane
 412      * element is assigned to the result vector at lane index
 413      * {@code (i + N) % this.length()}.
 414      *
 415      * @param i the number of lanes to rotate left
 416      * @return the result of rotating left lane elements of this vector by the
 417      * given number of lanes
 418      */
 419     public abstract Vector<E, S> rotateEL(int i); //Rotate elements left
 420 
 421     /**
 422      * Rotates right the lane elements of this vector by the given number of
 423      * lanes, {@code i}, modulus the vector length.
 424      * <p>
 425      * This is a cross-lane operation that permutes the lane elements of this
 426      * vector and behaves as if rotating left the lane elements by
 427      * {@code this.length() - (i % this.length())} lanes.
 428      *
 429      * @param i the number of lanes to rotate left
 430      * @return the result of rotating right lane elements of this vector by the
 431      * given number of lanes
 432      */
 433     public abstract Vector<E, S> rotateER(int i); //Rotate elements right
 434 
 435     public abstract Vector<E, S> shiftEL(int i); //shift elements left
 436 
 437     public abstract Vector<E, S> shiftER(int i); //shift elements right
 438 
 439     /**
 440      * Blends the lane elements of this vector with those of an input vector,
 441      * selecting lanes controlled by a mask.
 442      * <p>
 443      * For each lane of the mask, at lane index {@code N}, if the mask lane
 444      * is set then the lane element at {@code N} from the input vector is
 445      * selected and placed into the resulting vector at {@code N},
 446      * otherwise the the lane element at {@code N} from this input vector is
 447      * selected and placed into the resulting vector at {@code N}.
 448      *
 449      * @param b the input vector
 450      * @param m the mask controlling lane selection
 451      * @return the result of blending the lane elements of this vector with
 452      * those of an input vector
 453      */
 454     public abstract Vector<E, S> blend(Vector<E, S> b, Mask<E, S> m);
 455 
 456     /**
 457      * Shuffles the lane elements of this vector and those of an input vector,
 458      * selecting lane indexes controlled by a shuffle.
 459      * <p>
 460      * This is a cross-lane operation that permutes the lane elements of this
 461      * vector and tine input vector.
 462      * For each lane of the shuffle, at lane index {@code N}, if the shuffle
 463      * lane element, {@code I}, is less than the length of this vector then the
 464      * lane element at {@code I} from this vector is selected and placed into
 465      * the resulting vector at {@code N}, otherwise the lane element at
 466      * {@code I - this.length()} from the input vector is selected and placed
 467      * into the resulting vector at {@code N}.
 468      *
 469      * @param b the input vector
 470      * @param s the shuffle controlling lane index selection
 471      * @return the result of shuffling the lane elements of this vector and
 472      * those of an input vector
 473      * @throws IndexOutOfBoundsException if any lane element is {@code < 0} or
 474      * {@code >= 2 * this.length())
 475      */
 476     public abstract Vector<E, S> shuffle(Vector<E, S> b, Shuffle<E, S> s);
 477 
 478     /**
 479      * Shuffles the lane elements of this vector selecting lane indexes
 480      * controlled by a shuffle.
 481      * <p>
 482      * This is a cross-lane operation that permutes the lane elements of this
 483      * vector.
 484      * For each lane of the shuffle, at lane index {@code N} with lane
 485      * element {@code I}, the lane element at {@code I} from this vector is
 486      * selected and placed into the resulting vector at {@code N}.
 487      *
 488      * @param s the shuffle controlling lane index selection
 489      * @return the result of shuffling the lane elements of this vector
 490      * @throws IndexOutOfBoundsException if any lane element is {@code < 0} or
 491      * {@code >= this.length())
 492      */
 493     public abstract Vector<E, S> swizzle(Shuffle<E, S> s);
 494 
 495 
 496     // Conversions
 497 
 498     // Bitwise preserving
 499 
 500     /**
 501      * Transforms this vector to a vector of the given species shape {@code T}
 502      * and element type {@code F}.
 503      * <p>
 504      * This method behaves as if it returns the result of calling
 505      * {@link Species#reshape(Vector) reshape} on the given species with this
 506      * vector:
 507      * <pre>{@code
 508      * return species.reshape(this);
 509      * }</pre>
 510      *
 511      * @param species the species
 512      * @param <F> the boxed element type of the species
 513      * @param <T> the type of shape of the species
 514      * @return a vector transformed by shape and element type
 515      * @see Species#reshape(Vector)
 516      */
 517     @ForceInline
 518     public <F, T extends Shape> Vector<F, T> reshape(Species<F, T> species) {
 519         return species.reshape(this);
 520     }
 521 
 522     /**
 523      * Transforms this vector to a vector of the given species element type
 524      * {@code F}, where this vector's shape {@code S} is preserved.
 525      * <p>
 526      * This method behaves as if it returns the result of calling
 527      * {@link Species#rebracket(Vector) rebracket} on the given species with this
 528      * vector:
 529      * <pre>{@code
 530      * return species.rebracket(this);
 531      * }</pre>
 532      *
 533      * @param species the species
 534      * @param <F> the boxed element type of the species
 535      * @return a vector transformed element type
 536      * @see Species#rebracket(Vector)
 537      */
 538     @ForceInline
 539     public <F> Vector<F, S> rebracket(Species<F, S> species) {
 540         return species.reshape(this);
 541     }
 542 
 543     /**
 544      * Transforms this vector to a vector of the given species shape {@code T},
 545      * where this vector's element type {@code E} is preserved.
 546      * <p>
 547      * This method behaves as if it returns the result of calling
 548      * {@link Species#resize(Vector) resize} on the given species with this vector:
 549      * <pre>{@code
 550      * return species.resize(this);
 551      * }</pre>
 552      *
 553      * @param species the species
 554      * @param <T> the type of shape of the species
 555      * @return a vector transformed by shape
 556      * @see Species#resize(Vector)
 557      */
 558     public abstract <T extends Shape> Vector<E, T> resize(Species<E, T> species);
 559 
 560     // Cast
 561 
 562     /**
 563      * Converts this vector to a vector of the given species shape {@code T} and
 564      * element type {@code F}.
 565      * <p>
 566      * This method behaves as if it returns the result of calling
 567      * {@link Species#cast(Vector) cast} on the given species with this vector:
 568      * <pre>{@code
 569      * return species.cast(this);
 570      * }</pre>
 571      *
 572      * @param species the species
 573      * @param <F> the boxed element type of the species
 574      * @param <T> the type of shape of the species
 575      * @return a vector converted by shape and element type
 576      * @see Species#cast(Vector)
 577      */
 578     @ForceInline
 579     public <F, T extends Shape> Vector<F, T> cast(Species<F, T> species) {
 580         return species.cast(this);
 581     }
 582 
 583     //Array stores
 584 
 585     /**
 586      * Stores this vector into a byte array starting at an offset.
 587      * <p>
 588      * Bytes are extracted from primitive lane elements according to the
 589      * native byte order of the underlying platform.
 590      * <p>
 591      * This method behaves as it calls the
 592      * byte buffer, offset, and mask accepting
 593      * {@link #intoByteBuffer(ByteBuffer, int, Mask) method} as follows:
 594      * <pre>{@code
 595      * return this.intoByteBuffer(ByteBuffer.wrap(a), i, this.maskAllTrue());
 596      * }</pre>
 597      *
 598      * @param a the byte array
 599      * @param i the offset into the array
 600      * @return a vector loaded from a byte array
 601      * @throws IndexOutOfBoundsException if {@code i < 0} or
 602      * {@code i > a.length - (this.length() * this.elementSize() / Byte.SIZE)}
 603      */
 604     public abstract void intoByteArray(byte[] a, int i);
 605 
 606     /**
 607      * Stores this vector into a byte array starting at an offset and using a mask.
 608      * <p>
 609      * Bytes are extracted from primitive lane elements according to the
 610      * native byte order of the underlying platform.
 611      * <p>
 612      * This method behaves as it calls the
 613      * byte buffer, offset, and mask accepting
 614      * {@link #intoByteBuffer(ByteBuffer, int, Mask) method} as follows:
 615      * <pre>{@code
 616      * return this.intoByteBuffer(ByteBuffer.wrap(a), i, m);
 617      * }</pre>
 618      *
 619      * @param a the byte array
 620      * @param i the offset into the array
 621      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 622      * or {@code > a.length},
 623      * for any vector lane index {@code N} where the mask at lane {@code N}
 624      * is set
 625      * {@code i >= a.length - (N * this.elementSize() / Byte.SIZE)}
 626      */
 627     public abstract void intoByteArray(byte[] a, int i, Mask<E, S> m);
 628 
 629     /**
 630      * Stores this vector into a {@link ByteBuffer byte buffer} starting at the
 631      * buffer's position.
 632      * <p>
 633      * Bytes are extracted from primitive lane elements according to the
 634      * native byte order of the underlying platform.
 635      * <p>
 636      * This method behaves as if it calls the byte buffer, offset, and mask
 637      * accepting
 638      * {@link #intoByteBuffer(ByteBuffer, int, Mask)} method} as follows:
 639      * <pre>{@code
 640      *   this.intoByteBuffer(b, b.position(), this.maskAllTrue())
 641      * }</pre>
 642      *
 643      * @param b the byte buffer
 644      * @throws IndexOutOfBoundsException if there are fewer than
 645      * {@code this.length() * this.elementSize() / Byte.SIZE} bytes
 646      * remaining in the byte buffer
 647      */
 648     public abstract void intoByteBuffer(ByteBuffer b);
 649 
 650     /**
 651      * Stores this vector into a {@link ByteBuffer byte buffer} starting at the
 652      * buffer's position and using a mask.
 653      * <p>
 654      * Bytes are extracted from primitive lane elements according to the
 655      * native byte order of the underlying platform.
 656      * <p>
 657      * This method behaves as if it calls the byte buffer, offset, and mask
 658      * accepting
 659      * {@link #intoByteBuffer(ByteBuffer, int, Mask)} method} as follows:
 660      * <pre>{@code
 661      *   this.intoByteBuffer(b, b.position(), m)
 662      * }</pre>
 663      *
 664      * @param b the byte buffer
 665      * @param m the mask
 666      * @throws IndexOutOfBoundsException if for any vector lane index
 667      * {@code N} where the mask at lane {@code N} is set
 668      * {@code b.position() >= b.limit() - (N * this.elementSize() / Byte.SIZE)}
 669      */
 670     public abstract void intoByteBuffer(ByteBuffer b, Mask<E, S> m);
 671 
 672     /**
 673      * Stores this vector into a {@link ByteBuffer byte buffer} starting at an
 674      * offset into the byte buffer.
 675      * <p>
 676      * Bytes are extracted from primitive lane elements according to the
 677      * native byte order of the underlying platform.
 678      * <p>
 679      * This method behaves as if it calls the byte buffer, offset, and mask
 680      * accepting
 681      * {@link #intoByteBuffer(ByteBuffer, int, Mask)} method} as follows:
 682      * <pre>{@code
 683      *   this.intoByteBuffer(b, i, this.maskAllTrue())
 684      * }</pre>
 685      *
 686      * @param b the byte buffer
 687      * @param i the offset into the byte buffer
 688      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 689      * or {@code > b.limit()},
 690      * or if there are fewer than
 691      * {@code this.length() * this.elementSize() / Byte.SIZE} bytes
 692      * remaining in the byte buffer from the given offset
 693      */
 694     public abstract void intoByteBuffer(ByteBuffer b, int i);
 695 
 696     /**
 697      * Stores this vector into a {@link ByteBuffer byte buffer} starting at an
 698      * offset into the byte buffer and using a mask.
 699      * <p>
 700      * This method behaves as if the byte buffer is viewed as a primitive
 701      * {@link java.nio.Buffer buffer} for the primitive element type,
 702      * according to the native byte order of the underlying platform, and
 703      * the lane elements of this vector are put into the buffer if the
 704      * corresponding mask lane is set.
 705      * The following pseudocode expresses the behaviour, where
 706      * {@coce EBuffer} is the primitive buffer type, {@code e} is the
 707      * primitive element type, and {@code EVector<S>} is the primitive
 708      * vector type for this vector:
 709      * <pre>{@code
 710      * EBuffer eb = b.duplicate().
 711      *     order(ByteOrder.nativeOrder()).position(i).
 712      *     asEBuffer();
 713      * e[] es = ((EVector<S>)this).toArray();
 714      * for (int n = 0; n < t.length && m.isSet(n); n++) {
 715      *     eb.put(n, es[n]);
 716      * }
 717      * }</pre>
 718      *
 719      * @param b the byte buffer
 720      * @param i the offset into the byte buffer
 721      * @param m the mask
 722      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 723      * or {@code > b.limit()},
 724      * for any vector lane index {@code N} where the mask at lane {@code N}
 725      * is set
 726      * {@code i >= b.limit() - (N * this.elementSize() / Byte.SIZE)} bytes
 727      */
 728     public abstract void intoByteBuffer(ByteBuffer b, int i, Mask<E, S> m);
 729 
 730 
 731     /**
 732      * A factory for creating {@link Vector}, {@link Mask} and {@link Shuffle}
 733      * values of the same element type and shape.
 734      *
 735      * @param <E> the boxed element type of this species
 736      * @param <S> the type of shape of this species
 737      */
 738     public static abstract class Species<E, S extends Shape> {
 739         Species() {}
 740 
 741         /**
 742          * Returns the primitive element type of vectors produced by this
 743          * species.
 744          *
 745          * @return the primitive element type
 746          */
 747         public abstract Class<E> elementType();
 748 
 749         /**
 750          * Returns the element size, in bits, of vectors produced by this
 751          * species.
 752          *
 753          * @return the element size, in bits
 754          */
 755         public abstract int elementSize();
 756 
 757         /**
 758          * Returns the shape of masks, shuffles, and vectors produced by this
 759          * species.
 760          *
 761          * @return the primitive element type
 762          */
 763         public abstract S shape();
 764 
 765         /**
 766          * Returns the mask, shuffe, or vector lanes produced by this species.
 767          *
 768          * @return the the number of lanes
 769          */
 770         public int length() { return shape().length(this); }
 771 
 772         /**
 773          * Returns the total vector size, in bits, of vectors produced by this
 774          * species.
 775          *
 776          * @return the total vector size, in bits
 777          */
 778         public int bitSize() { return shape().bitSize(); }
 779 
 780         // Factory
 781 
 782         /**
 783          * Returns a vector where all lane elements are set to the default
 784          * primitive value.
 785          *
 786          * @return a zero vector
 787          */
 788         public abstract Vector<E, S> zero();
 789 
 790         /**
 791          * Loads a vector from a byte array starting at an offset.
 792          * <p>
 793          * Bytes are composed into primitive lane elements according to the
 794          * native byte order of the underlying platform
 795          * <p>
 796          * This method behaves as if it returns the result of calling the
 797          * byte buffer, offset, and mask accepting
 798          * {@link #fromByteBuffer(ByteBuffer, int, Mask) method} as follows:
 799          * <pre>{@code
 800          * return this.fromByteBuffer(ByteBuffer.wrap(a), i, this.maskAllTrue());
 801          * }</pre>
 802          *
 803          * @param a the byte array
 804          * @param i the offset into the array
 805          * @return a vector loaded from a byte array
 806          * @throws IndexOutOfBoundsException if {@code i < 0} or
 807          * {@code i > a.length - (this.length() * this.elementSize() / Byte.SIZE)}
 808          */
 809         public abstract Vector<E, S> fromByteArray(byte[] a, int i);
 810 
 811         /**
 812          * Loads a vector from a byte array starting at an offset and using a
 813          * mask.
 814          * <p>
 815          * Bytes are composed into primitive lane elements according to the
 816          * native byte order of the underlying platform.
 817          * <p>
 818          * This method behaves as if it returns the result of calling the
 819          * byte buffer, offset, and mask accepting
 820          * {@link #fromByteBuffer(ByteBuffer, int, Mask) method} as follows:
 821          * <pre>{@code
 822          * return this.fromByteBuffer(ByteBuffer.wrap(a), i, m);
 823          * }</pre>
 824          *
 825          * @param a the byte array
 826          * @param i the offset into the array
 827          * @param m the mask
 828          * @return a vector loaded from a byte array
 829          * @throws IndexOutOfBoundsException if {@code i < 0} or
 830          * {@code i > a.length - (this.length() * this.elementSize() / Byte.SIZE)}
 831          * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 832          * or {@code > a.length},
 833          * for any vector lane index {@code N} where the mask at lane {@code N}
 834          * is set
 835          * {@code i >= a.length - (N * this.elementSize() / Byte.SIZE)}
 836          */
 837         public abstract Vector<E, S> fromByteArray(byte[] a, int i, Mask<E, S> m);
 838 
 839         /**
 840          * Loads a vector from a {@link ByteBuffer byte buffer} starting at the
 841          * buffer's position.
 842          * <p>
 843          * Bytes are composed into primitive lane elements according to the
 844          * native byte order of the underlying platform.
 845          * <p>
 846          * This method behaves as if it returns the result of calling the
 847          * byte buffer, offset, and mask accepting
 848          * {@link #fromByteBuffer(ByteBuffer, int, Mask) method} as follows:
 849          * <pre>{@code
 850          *   return this.fromByteBuffer(b, b.position(), this.maskAllTrue())
 851          * }</pre>
 852          *
 853          * @param b the byte buffer
 854          * @return a vector loaded from a byte buffer
 855          * @throws IndexOutOfBoundsException if there are fewer than
 856          * {@code this.length() * this.elementSize() / Byte.SIZE} bytes
 857          * remaining in the byte buffer
 858          */
 859         public abstract Vector<E, S> fromByteBuffer(ByteBuffer b);
 860 
 861         /**
 862          * Loads a vector from a {@link ByteBuffer byte buffer} starting at the
 863          * buffer's position and using a mask.
 864          * <p>
 865          * Bytes are composed into primitive lane elements according to the
 866          * native byte order of the underlying platform.
 867          * <p>
 868          * This method behaves as if it returns the result of calling the
 869          * byte buffer, offset, and mask accepting
 870          * {@link #fromByteBuffer(ByteBuffer, int, Mask)} method} as follows:
 871          * <pre>{@code
 872          *   return this.fromByteBuffer(b, b.position(), m)
 873          * }</pre>
 874          *
 875          * @param b the byte buffer
 876          * @param m the mask
 877          * @return a vector loaded from a byte buffer
 878          * @throws IndexOutOfBoundsException if for any vector lane index
 879          * {@code N} where the mask at lane {@code N} is set
 880          * {@code b.position() >= b.limit() - (N * this.elementSize() / Byte.SIZE)}
 881          */
 882         public abstract Vector<E, S> fromByteBuffer(ByteBuffer b, Mask<E, S> m);
 883 
 884         /**
 885          * Loads a vector from a {@link ByteBuffer byte buffer} starting at an
 886          * offset into the byte buffer.
 887          * <p>
 888          * Bytes are composed into primitive lane elements according to the
 889          * native byte order of the underlying platform.
 890          * <p>
 891          * This method behaves as if it returns the result of calling the
 892          * byte buffer, offset, and mask accepting
 893          * {@link #fromByteBuffer(ByteBuffer, int, Mask)} method} as follows:
 894          * <pre>{@code
 895          *   return this.fromByteBuffer(b, i, this.maskAllTrue())
 896          * }</pre>
 897          *
 898          * @param b the byte buffer
 899          * @param i the offset into the byte buffer
 900          * @return a vector loaded from a byte buffer
 901          * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 902          * or {@code > b.limit()},
 903          * or if there are fewer than
 904          * {@code this.length() * this.elementSize() / Byte.SIZE} bytes
 905          * remaining in the byte buffer from the given offset
 906          */
 907         public abstract Vector<E, S> fromByteBuffer(ByteBuffer b, int i);
 908 
 909         /**
 910          * Loads a vector from a {@link ByteBuffer byte buffer} starting at an
 911          * offset into the byte buffer and using a mask.
 912          * <p>
 913          * This method behaves as if the byte buffer is viewed as a primitive
 914          * {@link java.nio.Buffer buffer} for the primitive element type,
 915          * according to the native byte order of the underlying platform, and
 916          * the returned vector is loaded with a mask from a primitive array
 917          * obtained from the primitive buffer.
 918          * The following pseudocode expresses the behaviour, where
 919          * {@coce EBuffer} is the primitive buffer type, {@code e} is the
 920          * primitive element type, and {@code ESpecies<S>} is the primitive
 921          * species for {@code e}:
 922          * <pre>{@code
 923          * EBuffer eb = b.duplicate().
 924          *     order(ByteOrder.nativeOrder()).position(i).
 925          *     asEBuffer();
 926          * e[] es = new e[this.length()];
 927          * for (int n = 0; n < t.length && m.isSet(n); n++) {
 928          *     es[n] = eb.get(n);
 929          * }
 930          * Vector<E, S> r = ((ESpecies<S>)this).fromArray(es, 0, m);
 931          * }</pre>
 932          *
 933          * @param b the byte buffer
 934          * @param i the offset into the byte buffer
 935          * @return a vector loaded from a byte buffer
 936          * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 937          * or {@code > b.limit()},
 938          * for any vector lane index {@code N} where the mask at lane {@code N}
 939          * is set
 940          * {@code i >= b.limit() - (N * this.elementSize() / Byte.SIZE)}
 941          */
 942         public abstract Vector<E, S> fromByteBuffer(ByteBuffer b, int i, Mask<E, S> m);
 943 
 944         //Mask and shuffle constructions
 945 
 946         /**
 947          * Returns a mask where each lane is set or unset according to a given
 948          * {@code boolean} value.
 949          * <p>
 950          * For each mask lane, where {@code N} is the mask lane index,
 951          * if the given {@code boolean} value at index {@code N} is {@code true}
 952          * then the mask lane at index {@code N} is set, otherwise it is unset.
 953          *
 954          * @@@ What should happen if bits.length < this.length() ? use the
 955          * default value or throw IndexOutOfBoundsException
 956          *
 957          * @param bits the given {@code boolean} values
 958          * @return a mask where each lane is set or unset according to a given
 959          * {@code boolean} value
 960          */
 961         public abstract Mask<E, S> maskFromValues(boolean... bits);
 962 
 963         /**
 964          * Loads a mask from a {@code boolean} array starting at an offset.
 965          * <p>
 966          * For each mask lane, where {@code N} is the mask lane index,
 967          * if the array element at index {@code i + N} is {@code true} then the
 968          * mask lane at index {@code N} is set, otherwise it is unset.
 969          *
 970          * @param a the {@code boolean} array
 971          * @param i the offset into the array
 972          * @return the mask loaded from a {@code boolean} array
 973          * @throws IndexOutOfBoundsException if {@code i < 0}, or
 974          * {@code i > a.length - this.length()}
 975          */
 976         public abstract Mask<E, S> maskFromArray(boolean[] a, int i);
 977 
 978         /**
 979          * Returns a mask where all lanes are a set.
 980          *
 981          * @return a mask where all lanes are a set
 982          */
 983         public abstract Mask<E, S> maskAllTrue();
 984 
 985         /**
 986          * Returns a mask where all lanes are unset.
 987          *
 988          * @return a mask where all lanes are unset
 989          */
 990         public abstract Mask<E, S> maskAllFalse();
 991 
 992         /**
 993          * Returns a shuffle where each lane element to a given {@code int}
 994          * value.
 995          * <p>
 996          * For each shuffle lane, where {@code N} is the shuffle lane index, the
 997          * the {@code int} value at index {@code N} is placed into the resulting
 998          * shuffle at lane index {@code N}.
 999          *
1000          * @@@ What should happen if indexes.length < this.length() ? use the
1001          * default value or throw IndexOutOfBoundsException
1002          *
1003          * @param indexes the given {@code int} values
1004          * @return a shufle where each lane element is set to a given
1005          * {@code int} value
1006          */
1007         public abstract Shuffle<E, S> shuffleFromValues(int... indexes);
1008 
1009         /**
1010          * Loads a shuffle from an {@code int} array starting at offset.
1011          * <p>
1012          * For each shuffle lane, where {@code N} is the shuffle lane index, the
1013          * array element at index {@code i + N} is placed into the
1014          * resulting shuffle at lane index {@code N}.
1015          *
1016          * @param a the {@code int} array
1017          * @param i the offset into the array
1018          * @return the shuffle loaded from an {@code int} array
1019          * @throws IndexOutOfBoundsException if {@code i < 0}, or
1020          * {@code i > a.length - this.length()}
1021          */
1022         public abstract Shuffle<E, S> shuffleFromArray(int[] a, int i);
1023 
1024         /**
1025          * Returns a shuffle containing lane elements of an {@code int}
1026          * vector.
1027          * <p>
1028          * For each vector lane, where {@code N} is the vector lane index, the
1029          * lane element at index {@code N} is placed into the resulting shuffle
1030          * at lane index {@code N}.
1031          *
1032          * @param v the {@code int} vector
1033          * @return a shuffle containing lane elements of an {@code int} vector
1034          */
1035         public abstract Shuffle<E, S> shuffleFromVector(Vector<Integer, S> v);
1036 
1037         // Shuffle iota, 0...N
1038 
1039         // Vector type/shape transformations
1040 
1041         /**
1042          * Transforms an input vector of shape {@code T} and element type
1043          * {@code F} to a vector of this species shape {@code S} and element
1044          * type {@code E}.
1045          * <p>
1046          * The underlying bits of the input vector are copied to the resulting
1047          * vector without modification, but those bits, before copying, may be
1048          * truncated if the vector bit size is greater than this species bit
1049          * size, or appended to with zero bits if the vector bit size is less
1050          * than this species bit size.
1051          * <p>
1052          * The method behaves as if the input vector is stored into a byte array
1053          * and then the returned vector is loaded from the byte array.
1054          * The following pseudocode expresses the behaviour:
1055          * <pre>{@code
1056          * int alen = Math.max(v.bitSize(), this.bitSize()) / Byte.SIZE;
1057          * byte[] a = new byte[alen];
1058          * v.intoByteArray(a, 0);
1059          * return this.fromByteArray(a, 0);
1060          * }</pre>
1061          *
1062          * @param v the input vector
1063          * @param <F> the boxed element type of the vector
1064          * @param <T> the type of shape of the vector
1065          * @return a vector transformed, by shape and element type, from an
1066          * input vector
1067          */
1068         public abstract <F, T extends Shape> Vector<E, S> reshape(Vector<F, T> v);
1069 
1070         /**
1071          * Transforms an input vector of element type {@code F} to a vector of
1072          * this species element type {@code E}, where the this species shape
1073          * {@code S} is preserved.
1074          * <p>
1075          * The underlying bits of the input vector are copied without
1076          * modification to the resulting vector.
1077          * <p>
1078          * The method behaves as if the input vector is stored into a byte array
1079          * and then the returned vector is loaded from the byte array.
1080          * The following pseudocode expresses the behaviour:
1081          * <pre>{@code
1082          * byte[] a = new byte[v.bitSize() / Byte.SIZE];
1083          * v.intoByteArray(a, 0);
1084          * return this.fromByteArray(a, 0);
1085          * }</pre>
1086          *
1087          * @param v the input vector
1088          * @param <F> the boxed element type of the vector
1089          * @return a vector transformed, by element type, from an input vector
1090          */
1091         public abstract <F> Vector<E, S> rebracket(Vector<F, S> v);
1092 
1093         /**
1094          * Transforms an input vector of shape {@code T} to a vector of this
1095          * species shape {@code S}, where the this species element type
1096          * {@code E} is preserved.
1097          * <p>
1098          * The lane elements of the input vector are copied without
1099          * modification to the resulting vector, but those lane elements, before
1100          * copying, may be truncated if the vector length is greater than this
1101          * species length, or appended to with default element values if the
1102          * vector length is less than this species length.
1103          * <p>
1104          * The method behaves as if the input vector is stored into a byte array
1105          * and then the returned vector is loaded from the byte array.
1106          * The following pseudocode expresses the behaviour:
1107          * <pre>{@code
1108          * int alen = Math.max(v.bitSize(), this.bitSize()) / Byte.SIZE;
1109          * byte[] a = new byte[alen];
1110          * v.intoByteArray(a, 0);
1111          * return fromByteArray(a, 0);
1112          * }</pre>
1113          *
1114          * @param v the input vector
1115          * @param <T> the type of shape of the vector
1116          * @return a vector transformed, by shape, from an input vector
1117          */
1118         public abstract <T extends Shape> Vector<E, S> resize(Vector<E, T> v);
1119 
1120         /**
1121          * Converts an input vector of shape {@code T} and element type
1122          * {@code F} to a vector of this species shape {@code S} and element
1123          * type {@code E}.
1124          * <p>
1125          * For each input vector lane up to the length of the input vector or
1126          * this species, which ever is the minimum, and where {@code N} is the
1127          * vector lane index, the element at index {@code N} of primitive type
1128          * {@code F} is converted, according to primitive conversion rules
1129          * specified by the Java Language Specification, to a value of primitive
1130          * type {@code E} and placed into the resulting vector at lane index
1131          * {@code N}.  If this species length is greater than the input
1132          * vector length then the default primitive value is placed into
1133          * subsequent lanes of the resulting vector.
1134          *
1135          * @param v the input vector
1136          * @param <F> the boxed element type of the vector
1137          * @param <T> the type of shape of the vector
1138          * @return a vector, converted by shape and element type, from an input
1139          * vector.
1140          */
1141         public abstract <F, T extends Shape> Vector<E, S> cast(Vector<F, T> v);
1142 
1143 
1144         // Mask type/shape transformations
1145 
1146         /**
1147          * Transforms an input mask of shape {@code T} and element type
1148          * {@code F} to a vector of this species shape {@code S} and element
1149          * type {@code E}.
1150          * <p>
1151          * The lane elements of the input mask are copied to the resulting
1152          * mask without modification, but those lane elements, before copying,
1153          * may be truncated if the mask length is greater than this species
1154          * length, or appended to with false values if the mask length less than
1155          * this species length.
1156          * <p>
1157          * The method behaves as if the input mask is stored into a boolean
1158          * array and then the returned mask is loaded from the boolean array.
1159          * The following pseudocode expresses the behaviour:
1160          * <pre>{@code
1161          * int alen = Math.max(m.length(), this.length());
1162          * boolean[] a = new boolean[alen];
1163          * m.intoArray(a, 0);
1164          * return this.maskFromArray(a, 0);
1165          * }</pre>
1166          *
1167          * @param m the input mask
1168          * @param <F> the boxed element type of the mask
1169          * @param <T> the type of shape of the mask
1170          * @return a mask transformed, by shape and element type, from an
1171          * input mask
1172          */
1173         public <F, T extends Shape> Mask<E, S> reshape(Mask<F, T> m) {
1174             return maskFromValues(m.toArray());
1175         }
1176 
1177         /**
1178          * Transforms an input mask of element type {@code F} to a mask of
1179          * this species element type {@code E}, where the this species shape
1180          * {@code S} is preserved.
1181          * <p>
1182          * The lane elements of the input mask are copied without
1183          * modification to the resulting mask.
1184          * <p>
1185          * The method behaves as if the input mask is stored into a boolean
1186          * array and then the returned mask is loaded from the boolean array.
1187          * The following pseudocode expresses the behaviour:
1188          * <pre>{@code
1189          * boolean[] a = new byte[m.length()];
1190          * m.intoArray(a, 0);
1191          * return this.maskFromArray(a, 0);
1192          * }</pre>
1193          *
1194          * @param m the input mask
1195          * @param <F> the boxed element type of the mask
1196          * @return a mask transformed, by element type, from an input mask
1197          */
1198         @ForceInline
1199         public <F> Mask<E, S> rebracket(Mask<F, S> m) {
1200             return reshape(m);
1201         }
1202 
1203         /**
1204          * Transforms an input mask of shape {@code T} to a mask of this
1205          * species shape {@code S}, where the this species element type
1206          * {@code E} is preserved.
1207          * <p>
1208          * The lane elements of the input mask are copied to the resulting
1209          * mask without modification, but those lane elements, before copying,
1210          * may be truncated if the mask length is greater than this species
1211          * length, or appended to with false values if the mask length less than
1212          * this species length.
1213          * <p>
1214          * The method behaves as if the input mask is stored into a boolean
1215          * array and then the returned mask is loaded from the boolean array.
1216          * The following pseudocode expresses the behaviour:
1217          * <pre>{@code
1218          * int alen = Math.max(m.length(), this.length());
1219          * boolean[] a = new boolean[alen];
1220          * m.intoArray(a, 0);
1221          * return this.maskFromArray(a, 0);
1222          * }</pre>
1223          *
1224          * @param m the input mask
1225          * @param <T> the type of shape of the mask
1226          * @return a mask transformed, by shape, from an input mask
1227          */
1228         @ForceInline
1229         public <T extends Shape> Mask<E, S> resize(Mask<E, T> m) {
1230             return reshape(m);
1231         }
1232 
1233 
1234         // Shuffle type/shape transformations
1235 
1236         /**
1237          * Transforms an input shuffle of shape {@code T} and element type
1238          * {@code F} to a shuffle of this species shape {@code S} and element
1239          * type {@code E}.
1240          * <p>
1241          * The lane elements of the input shuffle are copied to the resulting
1242          * shuffle without modification, but those lane elements, before copying,
1243          * may be truncated if the shuffle length is greater than this species
1244          * length, or appended to with zero values if the shuffle length less than
1245          * this species length.
1246          * <p>
1247          * The method behaves as if the input shuffle is stored into a int
1248          * array and then the returned shuffle is loaded from the int array.
1249          * The following pseudocode expresses the behaviour:
1250          * <pre>{@code
1251          * int alen = Math.max(s.length(), this.length());
1252          * int[] a = new int[blen];
1253          * s.intoArray(a, 0);
1254          * return this.shuffleFromArray(a, 0);
1255          * }</pre>
1256          *
1257          * @param s the input shuffle
1258          * @param <F> the boxed element type of the shuffle
1259          * @param <T> the type of shape of the shuffle
1260          * @return a shuffle transformed, by shape and element type, from an
1261          * input shuffle
1262          */
1263         public <F, T extends Shape> Shuffle<E, S> reshape(Shuffle<F, T> s) {
1264             return shuffleFromValues(s.toArray());
1265         }
1266 
1267         /**
1268          * Transforms an input shuffle of element type {@code F} to a shuffle of
1269          * this species element type {@code E}, where the this species shape
1270          * {@code S} is preserved.
1271          * <p>
1272          * The lane elements of the input shuffle are copied without
1273          * modification to the resulting shuffle.
1274          * <p>
1275          * The method behaves as if the input shuffle is stored into a int
1276          * array and then the returned shuffle is loaded from the int array.
1277          * The following pseudocode expresses the behaviour:
1278          * <pre>{@code
1279          * int[] a = new byte[s.length()];
1280          * s.intoArray(a, 0);
1281          * return this.shuffleFromArray(a, 0);
1282          * }</pre>
1283          *
1284          * @param s the input shuffle
1285          * @param <F> the boxed element type of the shuffle
1286          * @return a shuffle transformed, by element type, from an input shuffle
1287          */
1288         @ForceInline
1289         public <F> Shuffle<E, S> rebracket(Shuffle<F, S> s) {
1290             return reshape(s);
1291         }
1292 
1293         /**
1294          * Transforms an input shuffle of shape {@code T} to a shuffle of this
1295          * species shape {@code S}, where the this species element type
1296          * {@code E} is preserved.
1297          * <p>
1298          * The lane elements of the input shuffle are copied to the resulting
1299          * shuffle without modification, but those lane elements, before copying,
1300          * may be truncated if the shuffle length is greater than this species
1301          * length, or appended to with zero values if the shuffle length less than
1302          * this species length.
1303          * <p>
1304          * The method behaves as if the input shuffle is stored into a int
1305          * array and then the returned shuffle is loaded from the int array.
1306          * The following pseudocode expresses the behaviour:
1307          * <pre>{@code
1308          * int alen = Math.max(m.length(), this.length());
1309          * boolean[] a = new boolean[alen];
1310          * m.intoArray(a, 0);
1311          * return this.maskFromArray(a, 0);
1312          * }</pre>
1313          *
1314          * @param s the input shuffle
1315          * @param <T> the type of shape of the shuffle
1316          * @return a shuffle transformed, by shape, from an input shuffle
1317          */
1318         @ForceInline
1319         public <T extends Shape> Shuffle<E, S> resize(Shuffle<E, T> s) {
1320             return reshape(s);
1321         }
1322 
1323 
1324         // Species/species transformations
1325 
1326         // Returns a species for a given element type and the length of this
1327         // species.
1328         // The length of the returned species will be equal to the length of
1329         // this species.
1330         //
1331         // Throws IAE if no shape exists for the element type and this species length,
1332 //        public <F> Species<F, ?> toSpeciesWithSameNumberOfLanes(Class<F> c) {
1333 //            // @@@ TODO implement and find better name
1334 //            throw new UnsupportedOperationException();
1335 //        }
1336 
1337     }
1338 
1339     /**
1340      * A {@code Shape} governs the total size, in bits, of a
1341      * {@link Vector}, {@link Mask}, or {@code Shuffle}.  The shape in
1342      * combination with the element type together govern the number of lanes.
1343      */
1344     public static abstract class Shape {
1345         Shape() {}
1346 
1347         /**
1348          * Returns the size, in bits, of this shape.
1349          *
1350          * @return the size, in bits, of this shape.
1351          */
1352         public abstract int bitSize();
1353 
1354         // @@@ remove this method
1355         public int length(Species<?, ?> s) { return bitSize() / s.elementSize(); }
1356     }
1357 
1358     /**
1359      * A {@code Mask} represents an ordered immutable sequence of {@code boolean}
1360      * values.  A Mask can be used with a mask accepting vector operation to
1361      * control the selection and operation of lane elements of input vectors.
1362      * <p>
1363      * The number of values in the sequence is referred to as the Mask
1364      * {@link #length() length}.  The length also corresponds to the number of
1365      * Mask lanes.  The lane element at lane index {@code N} (from {@code 0},
1366      * inclusive, to length, exclusive) corresponds to the {@code N + 1}'th
1367      * value in the sequence.
1368      * A Mask and Vector of the same element type and shape have the same number
1369      * of lanes.
1370      * <p>
1371      * A lane is said to be <em>set</em> if the lane element is {@code true},
1372      * otherwise a lane is said to be <em>unset</em> if the lane element is
1373      * {@code false}.
1374      * <p>
1375      * Mask declares a limited set of unary, binary and reductive mask
1376      * operations.
1377      * <ul>
1378      * <li>
1379      * A mask unary operation (1-ary) operates on one input mask to produce a
1380      * result mask.
1381      * For each lane of the input mask the
1382      * lane element is operated on using the specified scalar unary operation and
1383      * the boolean result is placed into the mask result at the same lane.
1384      * The following pseudocode expresses the behaviour of this operation category:
1385      *
1386      * <pre>{@code
1387      * Mask<E, S> a = ...;
1388      * boolean[] ar = new boolean[a.length()];
1389      * for (int i = 0; i < a.length(); i++) {
1390      *     ar[i] = boolean_unary_op(a.isSet(i));
1391      * }
1392      * Mask<E, S> r = a.species().maskFromArray(ar, 0);
1393      * }</pre>
1394      *
1395      * <li>
1396      * A mask binary operation (2-ary) operates on two input
1397      * masks to produce a result mask.
1398      * For each lane of the two input masks,
1399      * a and b say, the corresponding lane elements from a and b are operated on
1400      * using the specified scalar binary operation and the boolean result is placed
1401      * into the mask result at the same lane.
1402      * The following pseudocode expresses the behaviour of this operation category:
1403      *
1404      * <pre>{@code
1405      * Mask<E, S> a = ...;
1406      * Mask<E, S> b = ...;
1407      * boolean[] ar = new boolean[a.length()];
1408      * for (int i = 0; i < a.length(); i++) {
1409      *     ar[i] = scalar_binary_op(a.isSet(i), b.isSet(i));
1410      * }
1411      * Mask<E, S> r = a.species().maskFromArray(ar, 0);
1412      * }</pre>
1413      *
1414      * @param <E> the boxed element type of this mask
1415      * @param <S> the type of shape of this mask
1416      */
1417     public static abstract class Mask<E, S extends Shape> {
1418         Mask() {}
1419 
1420         /**
1421          * Returns the species of this mask.
1422          *
1423          * @return the species of this mask
1424          */
1425         public abstract Species<E, S> species();
1426 
1427         /**
1428          * Returns the number of mask lanes (the length).
1429          *
1430          * @return the number of mask lanes
1431          */
1432         public int length() { return species().length(); }
1433 
1434         /**
1435          * Returns the lane elements of this mask packed into a {@code long}
1436          * value for at most the first 64 lane elements.
1437          * <p>
1438          * The lane elements are packed in the order of least significant bit
1439          * to most significant bit.
1440          * For each mask lane where {@code N} is the mask lane index, if the
1441          * mask lane is set then the {@code N}'th bit is set to one in the
1442          * resulting {@code long} value, otherwise the {@code N}'th bit is set
1443          * to zero.
1444          *
1445          * @return the lane elements of this mask packed into a {@code long}
1446          * value.
1447          */
1448         public abstract long toLong();
1449 
1450         /**
1451          * Returns an {@code boolean} array containing the lane elements of this
1452          * mask.
1453          * <p>
1454          * This method behaves as if it {@link #intoArray(boolean[], int)} stores}
1455          * this mask into an allocated array and returns that array as
1456          * follows:
1457          * <pre>{@code
1458          * boolean[] a = new boolean[this.length()];
1459          * this.intoArray(a, 0);
1460          * return a;
1461          * }</pre>
1462          *
1463          * @return an array containing the the lane elements of this vector
1464          */
1465         public abstract boolean[] toArray();
1466 
1467         /**
1468          * Stores this mask into a {@code boolean} array starting at offset.
1469          * <p>
1470          * For each mask lane, where {@code N} is the mask lane index,
1471          * the lane element at index {@code N} is stored into the array at index
1472          * {@code i + N}.
1473          *
1474          * @param a the array
1475          * @param i the offset into the array
1476          * @throws IndexOutOfBoundsException if {@code i < 0}, or
1477          * {@code i > a.length - this.length()}
1478          */
1479         public abstract void intoArray(boolean[] a, int i);
1480 
1481         /**
1482          * Returns {@code true} if any of the mask lanes are set.
1483          *
1484          * @return {@code true} if any of the mask lanes are set, otherwise
1485          * {@code false}.
1486          */
1487         public abstract boolean anyTrue();
1488 
1489         /**
1490          * Returns {@code true} if all of the mask lanes are set.
1491          *
1492          * @return {@code true} if all of the mask lanes are set, otherwise
1493          * {@code false}.
1494          */
1495         public abstract boolean allTrue();
1496 
1497         /**
1498          * Returns the number of mask lanes that are set.
1499          *
1500          * @return the number of mask lanes that are set.
1501          */
1502         public abstract int trueCount();
1503 
1504         // TODO: LZ count/numberOfLeadingZeros
1505         // TODO: xor, shiftl, shiftr
1506 
1507         /**
1508          * Logically ands this mask with an input mask.
1509          * <p>
1510          * This is a mask binary operation where the logical and operation
1511          * ({@code &&} is applied to lane elements.
1512          *
1513          * @param o the input mask
1514          * @return the result of logically and'ing this mask with an input mask
1515          */
1516         public abstract Mask<E, S> and(Mask<E, S> o);
1517 
1518         /**
1519          * Logically ors this mask with an input mask.
1520          * <p>
1521          * This is a mask binary operation where the logical or operation
1522          * ({@code ||} is applied to lane elements.
1523          *
1524          * @param o the input mask
1525          * @return the result of logically or'ing this mask with an input mask
1526          */
1527         public abstract Mask<E, S> or(Mask<E, S> o);
1528 
1529         /**
1530          * Logically negates this mask.
1531          * <p>
1532          * This is a mask unary operation where the logical not operation
1533          * ({@code !} is applied to lane elements.
1534          *
1535          * @return the result of logically negating this mask.
1536          */
1537         public abstract Mask<E, S> not();
1538 
1539         /**
1540          * Returns a vector representation of this mask.
1541          * <p>
1542          * For each mask lane, where {@code N} is the mask lane index,
1543          * if the mask lane is set then an element value whose most significant
1544          * bit is set is placed into the resulting vector at lane index
1545          * {@code N}, otherwise the default element value is placed into the
1546          * resulting vector at lane index {@code N}.
1547          *
1548          * @return a vector representation of this mask.
1549          */
1550         public abstract Vector<E, S> toVector();
1551 
1552         /**
1553          * Tests if the lane at index {@code i} is set
1554          * @param i the lane index
1555          *
1556          * @return true if the lane at index {@code i} is set, otherwise false
1557          */
1558         // @@@ Rename to isSet
1559         public abstract boolean getElement(int i);
1560 
1561         /**
1562          * Transforms this mask to a mask of the given species shape {@code T}
1563          * and element type {@code F}.
1564          * <p>
1565          * This method behaves as if it returns the result of calling
1566          * {@link Species#reshape(Mask) reshape} on the given species with this
1567          * mask:
1568          * <pre>{@code
1569          * return species.reshape(this);
1570          * }</pre>
1571          *
1572          * @param species the species
1573          * @param <F> the boxed element type of the species
1574          * @param <T> the type of shape of the species
1575          * @return a mask transformed by shape and element type
1576          * @see Species#reshape(Mask)
1577          */
1578         @ForceInline
1579         public <F, T extends Shape> Mask<F, T> reshape(Species<F, T> species) {
1580             return species.reshape(this);
1581         }
1582 
1583         /**
1584          * Transforms this mask to a mask of the given species element type
1585          * {@code F}, where this mask's shape {@code S} is preserved.
1586          * <p>
1587          * This method behaves as if it returns the result of calling
1588          * {@link Species#rebracket(Mask) rebracket} on the given species with
1589          * this mask:
1590          * <pre>{@code
1591          * return species.rebracket(this);
1592          * }</pre>
1593          *
1594          * @param species the species
1595          * @param <F> the boxed element type of the species
1596          * @return a mask transformed element type
1597          * @see Species#rebracket(Mask)
1598          */
1599         @ForceInline
1600         public <F> Mask<F, S> rebracket(Species<F, S> species) {
1601             return species.reshape(this);
1602         }
1603 
1604         /**
1605          * Transforms this mask to a mask of the given species shape {@code T},
1606          * where this mask's element type {@code E} is preserved.
1607          * <p>
1608          * This method behaves as if it returns the result of calling
1609          * {@link Species#resize(Mask) resize} on the given species with this
1610          * mask:
1611          * <pre>{@code
1612          * return species.resize(this);
1613          * }</pre>
1614          *
1615          * @param species the species
1616          * @param <T> the type of shape of the species
1617          * @return a mask transformed by shape
1618          * @see Species#resize(Mask)
1619          */
1620         @ForceInline
1621         public <T extends Shape> Mask<E, T> resize(Species<E, T> species) {
1622             return species.reshape(this);
1623         }
1624     }
1625 
1626     /**
1627      * A {@code Shuffle} represents an ordered immutable sequence of
1628      * {@code int} values.  A Shuffle can be used with a shuffle accepting
1629      * vector operation to control the rearrangement of lane elements of input
1630      * vectors
1631      * <p>
1632      * The number of values in the sequence is referred to as the Shuffle
1633      * {@link #length() length}.  The length also corresponds to the number of
1634      * Shuffle lanes.  The lane element at lane index {@code N} (from {@code 0},
1635      * inclusive, to length, exclusive) corresponds to the {@code N + 1}'th
1636      * value in the sequence.
1637      * A Shuffle and Vector of the same element type and shape have the same
1638      * number of lanes.
1639      * <p>
1640      * A {@code Shuffle<E, S>} is a specialized and limited form of an
1641      * {@code IntVector<S>} where the Shuffle's lane elements correspond to
1642      * lane index values.
1643      * A Shuffle describes how a lane element of a vector may cross lanes from
1644      * its lane index, {@code i} say, to another lane index whose value is the
1645      * Shuffle's lane element at lane index {@code i}.
1646      *
1647      * @param <E> the boxed element type of this mask
1648      * @param <S> the type of shape of this mask
1649      */
1650     public static abstract class Shuffle<E, S extends Shape> {
1651         Shuffle() {}
1652 
1653         /**
1654          * Returns the species of this shuffle.
1655          *
1656          * @return the species of this shuffle
1657          */
1658         public abstract Species<E, S> species();
1659 
1660         /**
1661          * Returns the specialized {@code int} species of this shuffle.
1662          *
1663          * @return the specialized {@code int} species of this shuffle
1664          * @see #toVector
1665          */
1666         public abstract IntVector.IntSpecies<S> intSpecies();
1667 
1668         /**
1669          * Returns the number of shuffle lanes (the length).
1670          *
1671          * @return the number of shuffle lanes
1672          */
1673         public int length() { return species().length(); }
1674 
1675         /**
1676          * Returns an {@code int} array containing the lane elements of this
1677          * shuffle.
1678          * <p>
1679          * This method behaves as if it {@link #intoArray(int[], int)} stores}
1680          * this shuffle into an allocated array and returns that array as
1681          * follows:
1682          * <pre>{@code
1683          *   int[] a = new int[this.length()];
1684          *   this.intoArray(a, 0);
1685          *   return a;
1686          * }</pre>
1687          *
1688          * @return an array containing the the lane elements of this vector
1689          */
1690         public abstract int[] toArray();
1691 
1692         /**
1693          * Stores this shuffle into an {@code int} array starting at offset.
1694          * <p>
1695          * For each shuffle lane, where {@code N} is the shuffle lane index,
1696          * the lane element at index {@code N} is stored into the array at index
1697          * {@code i + N}.
1698          *
1699          * @param a the array
1700          * @param i the offset into the array
1701          * @throws IndexOutOfBoundsException if {@code i < 0}, or
1702          * {@code i > a.length - this.length()}
1703          */
1704         public abstract void intoArray(int[] a, int i);
1705 
1706         // @@@ rotate/shift/EL/ER
1707 
1708         /**
1709          * Returns an {@link IntVector}, of the same shape as this shuffle,
1710          * containing the lane elements of this shuffle.
1711          * <p>
1712          * This method behaves as if it returns the result of creating an
1713          * {@link IntVector} given this shuffle's {@code int} species and an
1714          * {@code int} array obtained from this shuffle's lane elements, as
1715          * follows:
1716          * <pre>{@code
1717          *   int[] a = this.toArray();
1718          *   return this.intSpecies().fromArray(a, 0);
1719          * }</pre>
1720          *
1721          * @return an {@link IntVector}, of the same shape as this shuffle,
1722          * containing the lane elements of this shuffle
1723          */
1724         public abstract IntVector<S> toVector();
1725 
1726         /**
1727          * Gets the {@code int} lane element at lane index {@code i}
1728          *
1729          * @param i the lane index
1730          * @return the {@code int} lane element at lane index {@code i}
1731          */
1732         public int getElement(int i) { return toArray()[i]; }
1733 
1734         /**
1735          * Transforms this shuffle to a shuffle of the given species shape {@code T}
1736          * and element type {@code F}.
1737          * <p>
1738          * This method behaves as if it returns the result of calling
1739          * {@link Species#reshape(Shuffle) reshape} on the given species with this
1740          * shuffle:
1741          * <pre>{@code
1742          * return species.reshape(this);
1743          * }</pre>
1744          *
1745          * @param species the species
1746          * @param <F> the boxed element type of the species
1747          * @param <T> the type of shape of the species
1748          * @return a shuffle transformed by shape and element type
1749          * @see Species#reshape(Shuffle)
1750          */
1751         @ForceInline
1752         public <F, T extends Shape> Shuffle<F, T> reshape(Species<F, T> species) {
1753             return species.reshape(this);
1754         }
1755 
1756         /**
1757          * Transforms this shuffle to a shuffle of the given species element type
1758          * {@code F}, where this shuffle's shape {@code S} is preserved.
1759          * <p>
1760          * This method behaves as if it returns the result of calling
1761          * {@link Species#rebracket(Shuffle) rebracket} on the given species with this
1762          * shuffle:
1763          * <pre>{@code
1764          * return species.rebracket(this);
1765          * }</pre>
1766          *
1767          * @param species the species
1768          * @param <F> the boxed element type of the species
1769          * @return a shuffle transformed element type
1770          * @see Species#rebracket(Shuffle)
1771          */
1772         @ForceInline
1773         public <F> Shuffle<F, S> rebracket(Species<F, S> species) {
1774             return species.reshape(this);
1775         }
1776 
1777         /**
1778          * Transforms this shuffle to a shuffle of the given species shape {@code T},
1779          * where this shuffle's element type {@code E} is preserved.
1780          * <p>
1781          * This method behaves as if it returns the result of calling
1782          * {@link Species#resize(Shuffle) resize} on the given species with this shuffle:
1783          * <pre>{@code
1784          * return species.resize(this);
1785          * }</pre>
1786          *
1787          * @param species the species
1788          * @param <T> the type of shape of the species
1789          * @return a shuffle transformed by shape
1790          * @see Species#resize(Shuffle)
1791          */
1792         @ForceInline
1793         public <T extends Shape> Shuffle<E, T> resize(Species<E, T> species) {
1794             return species.reshape(this);
1795         }
1796     }
1797 
1798     /**
1799      * Finds a preferred species for an element type.
1800      * <p>
1801      * A preferred species is a species chosen by the platform that has a
1802      * shape of maximal bit size.  A preferred species for different element
1803      * types will have the same shape, and therefore vectors created from
1804      * such species will be shape compatible.
1805      *
1806      * @param c the element type
1807      * @param <E> the boxed element type
1808      * @return a preferred species for an element type
1809      * @throws IllegalArgumentException if no such species exists for the
1810      * element type
1811      */
1812     @SuppressWarnings("unchecked")
1813     public static <E> Vector.Species<E, ?> preferredSpeciesInstance(Class<E> c) {
1814         Unsafe u = Unsafe.getUnsafe();
1815 
1816         int vectorLength = u.getMaxVectorSize(c);
1817         int vectorBitSize = bitSizeForVectorLength(c, vectorLength);
1818         Shape s = shapeForVectorBitSize(vectorBitSize);
1819         return speciesInstance(c, s);
1820     }
1821 
1822     // @@@ public static method on Species?
1823     private static int bitSizeForVectorLength(Class<?> c, int elementSize) {
1824         if (c == float.class) {
1825             return Float.SIZE * elementSize;
1826         }
1827         else if (c == double.class) {
1828             return Double.SIZE * elementSize;
1829         }
1830         else if (c == byte.class) {
1831             return Byte.SIZE * elementSize;
1832         }
1833         else if (c == short.class) {
1834             return Short.SIZE * elementSize;
1835         }
1836         else if (c == int.class) {
1837             return Integer.SIZE * elementSize;
1838         }
1839         else if (c == long.class) {
1840             return Long.SIZE * elementSize;
1841         }
1842         else {
1843             throw new IllegalArgumentException("Bad vector type: " + c.getName());
1844         }
1845     }
1846 
1847     // @@@ public static method on Shape?
1848     private static Shape shapeForVectorBitSize(int bitSize) {
1849         switch (bitSize) {
1850             case 64:
1851                 return Shapes.S_64_BIT;
1852             case 128:
1853                 return Shapes.S_128_BIT;
1854             case 256:
1855                 return Shapes.S_256_BIT;
1856             case 512:
1857                 return Shapes.S_512_BIT;
1858             default:
1859                 throw new IllegalArgumentException("Bad vector bit size: " + bitSize);
1860         }
1861     }
1862 
1863     /**
1864      * Finds a species for an element type and shape.
1865      *
1866      * @param c the element type
1867      * @param s the shape
1868      * @param <E> the boxed element type
1869      * @param <S> the type of shape
1870      * @return a species for an element type and shape
1871      * @throws IllegalArgumentException if no such species exists for the
1872      * element type and/or shape
1873      */
1874     @SuppressWarnings("unchecked")
1875     public static <E, S extends Shape> Vector.Species<E, S> speciesInstance(Class<E> c, S s) {
1876         if (c == float.class) {
1877             return (Vector.Species<E, S>) FloatVector.speciesInstance(s);
1878         }
1879         else if (c == double.class) {
1880             return (Vector.Species<E, S>) DoubleVector.speciesInstance(s);
1881         }
1882         else if (c == byte.class) {
1883             return (Vector.Species<E, S>) ByteVector.speciesInstance(s);
1884         }
1885         else if (c == short.class) {
1886             return (Vector.Species<E, S>) ShortVector.speciesInstance(s);
1887         }
1888         else if (c == int.class) {
1889             return (Vector.Species<E, S>) IntVector.speciesInstance(s);
1890         }
1891         else if (c == long.class) {
1892             return (Vector.Species<E, S>) LongVector.speciesInstance(s);
1893         }
1894         else {
1895             throw new IllegalArgumentException("Bad vector element type: " + c.getName());
1896         }
1897     }
1898 }