1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import jdk.internal.misc.Unsafe;
  28 import jdk.internal.vm.annotation.ForceInline;
  29 import jdk.internal.vm.annotation.Stable;
  30 
  31 import java.nio.ByteBuffer;
  32 import java.nio.ByteOrder;
  33 import java.util.function.IntUnaryOperator;
  34 import jdk.incubator.vector.*;
  35 
  36 /**
  37  * A {@code Vector} is designed for use in computations that can be transformed
  38  * by a runtime compiler, on supported hardware, to Single Instruction Multiple
  39  * Data (SIMD) computations leveraging vector hardware registers and vector
  40  * hardware instructions.  Such SIMD computations exploit data parallelism to
  41  * perform the same operation on multiple data points simultaneously in a
  42  * faster time it would ordinarily take to perform the same operation
  43  * sequentially on each data point.
  44  * <p>
  45  * A Vector represents an ordered immutable sequence of values of the same
  46  * element type {@code e} that is one of the following primitive types
  47  * {@code byte}, {@code short}, {@code int}, {@code long}, {@code float}, or
  48  * {@code double}).  The type variable {@code E} corresponds to the boxed
  49  * element type, specifically the class that wraps a value of {@code e} in an
  50  * object (such the {@code Integer} class that wraps a value of {@code int}}.
  51  * A Vector has a {@link #shape() shape} {@code S}, extending type
  52  * {@link Shape}, that governs the total {@link #bitSize() size} in bits
  53  * of the sequence of values.
  54  * <p>
  55  * The number of values in the sequence is referred to as the Vector
  56  * {@link #length() length}.  The length also corresponds to the number of
  57  * Vector lanes.  The lane element at lane index {@code N} (from {@code 0},
  58  * inclusive, to length, exclusive) corresponds to the {@code N + 1}'th value in
  59  * the sequence.
  60  * Note: this arrangement
  61  * of Vector bit size, Vector length, element bit size, and lane element index
  62  * has no bearing on how a Vector instance and its sequence of elements may be
  63  * arranged in memory or represented as a value in a vector hardware register.
  64  * <p>
  65  * Vector declares a set of vector operations (methods) that are common to all
  66  * element types (such as addition).  Sub-classes of Vector with a concrete
  67  * boxed element type declare further operations that are specific to that
  68  * element type (such as access to element values in lanes, logical operations
  69  * on values of integral elements types, or transcendental operations on values
  70  * of floating point element types).
  71  * There are six sub-classes of Vector corresponding to the supported set
  72  * of element types, {@link ByteVector}, {@link ShortVector},
  73  * {@link IntVector} {@link LongVector}, {@link FloatVector}, and
  74  * {@link DoubleVector}.
  75  * <p>
  76  * Vector values, instances of Vector, are created from a special kind of
  77  * factory called a {@link Species}.  A Species has an
  78  * element type and shape and creates Vector values of the same element type
  79  * and shape.
  80  * A species can be {@link Species#of(Class, Shape)} obtained} given an element
  81  * type and shape, or a preferred species can be
  82  * {@link Species#ofPreferred(Class)}  obtained} given just an element type where the most
  83  * optimal shape is selected for the current platform.  It is recommended that
  84  * Species instances be held in {@code static final} fields for optimal creation
  85  * and usage of Vector values by the runtime compiler.
  86  * <p>
  87  * Vector operations can be grouped into various categories and their behaviour
  88  * generally specified as follows:
  89  * <ul>
  90  * <li>
  91  * A vector unary operation (1-ary) operates on one input vector to produce a
  92  * result vector.
  93  * For each lane of the input vector the
  94  * lane element is operated on using the specified scalar unary operation and
  95  * the element result is placed into the vector result at the same lane.
  96  * The following pseudocode expresses the behaviour of this operation category,
  97  * where {@code e} is the element type and {@code EVector} corresponds to the
  98  * primitive Vector type:
  99  *
 100  * <pre>{@code
 101  * EVector<S> a = ...;
 102  * e[] ar = new e[a.length()];
 103  * for (int i = 0; i < a.length(); i++) {
 104  *     ar[i] = scalar_unary_op(a.get(i));
 105  * }
 106  * EVector<S> r = a.species().fromArray(ar, 0);
 107  * }</pre>
 108  *
 109  * Unless otherwise specified the input and result vectors will have the same
 110  * element type and shape.
 111  *
 112  * <li>
 113  * A vector binary operation (2-ary) operates on two input
 114  * vectors to produce a result vector.
 115  * For each lane of the two input vectors,
 116  * a and b say, the corresponding lane elements from a and b are operated on
 117  * using the specified scalar binary operation and the element result is placed
 118  * into the vector result at the same lane.
 119  * The following pseudocode expresses the behaviour of this operation category:
 120  *
 121  * <pre>{@code
 122  * EVector<S> a = ...;
 123  * EVector<S> b = ...;
 124  * e[] ar = new e[a.length()];
 125  * for (int i = 0; i < a.length(); i++) {
 126  *     ar[i] = scalar_binary_op(a.get(i), b.get(i));
 127  * }
 128  * EVector<S> r = a.species().fromArray(ar, 0);
 129  * }</pre>
 130  *
 131  * Unless otherwise specified the two input and result vectors will have the
 132  * same element type and shape.
 133  *
 134  * <li>
 135  * Generalizing from unary (1-ary) and binary (2-ary) operations, a vector n-ary
 136  * operation operates in n input vectors to produce a
 137  * result vector.
 138  * N lane elements from each input vector are operated on
 139  * using the specified n-ary scalar operation and the element result is placed
 140  * into the vector result at the same lane.
 141  * Unless otherwise specified the n input and result vectors will have the same
 142  * element type and shape.
 143  *
 144  * <li>
 145  * A vector reduction operation operates on all the lane
 146  * elements of an input vector.
 147  * An accumulation function is applied to all the
 148  * lane elements to produce a scalar result.
 149  * If the reduction operation is associative then the result may be accumulated
 150  * by operating on the lane elements in any order using a specified associative
 151  * scalar binary operation and identity value.  Otherwise, the reduction
 152  * operation specifies the behaviour of the accumulation function.
 153  * The following pseudocode expresses the behaviour of this operation category
 154  * if it is associative:
 155  * <pre>{@code
 156  * EVector<S> a = ...;
 157  * e r = <identity value>;
 158  * for (int i = 0; i < a.length(); i++) {
 159  *     r = assoc_scalar_binary_op(r, a.get(i));
 160  * }
 161  * }</pre>
 162  *
 163  * Unless otherwise specified the scalar result type and element type will be
 164  * the same.
 165  *
 166  * <li>
 167  * A vector binary test operation operates on two input vectors to produce a
 168  * result mask.  For each lane of the two input vectors, a and b say, the
 169  * the corresponding lane elements from a and b are operated on using the
 170  * specified scalar binary test operation and the boolean result is placed
 171  * into the mask at the same lane.
 172  * The following pseudocode expresses the behaviour of this operation category:
 173  * <pre>{@code
 174  * EVector<S> a = ...;
 175  * EVector<S> b = ...;
 176  * boolean[] ar = new boolean[a.length()];
 177  * for (int i = 0; i < a.length(); i++) {
 178  *     ar[i] = scalar_binary_test_op(a.get(i), b.get(i));
 179  * }
 180  * Mask<E> r = a.species().maskFromArray(ar, 0);
 181  * }</pre>
 182  *
 183  * Unless otherwise specified the two input vectors and result mask will have
 184  * the same element type and shape.
 185  *
 186  * <li>
 187  * The prior categories of operation can be said to operate within the vector
 188  * lanes, where lane access is uniformly applied to all vectors, specifically
 189  * the scalar operation is applied to elements taken from input vectors at the
 190  * same lane, and if appropriate applied to the result vector at the same lane.
 191  * A further category of operation is a cross-lane vector operation where lane
 192  * access is defined by the arguments to the operation.  Cross-lane operations
 193  * generally rearrange lane elements, for example by permutation (commonly
 194  * controlled by a {@link Shuffle}) or by blending (commonly controlled by a
 195  * {@link Mask}).  Such an operation explicitly specifies how it rearranges lane
 196  * elements.
 197  * </ul>
 198  *
 199  * If a vector operation is represented as an instance method then first input
 200  * vector corresponds to {@code this} vector and subsequent input vectors are
 201  * arguments of the method.  Otherwise, if the an operation is represented as a
 202  * static method then all input vectors are arguments of the method.
 203  * <p>
 204  * If a vector operation does not belong to one of the above categories then
 205  * the operation explicitly specifies how it processes the lane elements of
 206  * input vectors, and where appropriate expresses the behaviour using
 207  * pseudocode.
 208  *
 209  * <p>
 210  * Many vector operations provide an additional {@link Mask mask} accepting
 211  * variant.
 212  * The mask controls which lanes are selected for application of the scalar
 213  * operation.  Masks are a key component for the support of control flow in
 214  * vector computations.
 215  * <p>
 216  * For certain operation categories the mask accepting variants can be specified
 217  * in generic terms.  If a lane of the mask is set then the scalar operation is
 218  * applied to corresponding lane elements, otherwise if a lane of a mask is not
 219  * set then a default scalar operation is applied and its result is placed into
 220  * the vector result at the same lane. The default operation is specified for
 221  * the following operation categories:
 222  * <ul>
 223  * <li>
 224  * For a vector n-ary operation the default operation is a function that returns
 225  * it's first argument, specifically a lane element of the first input vector.
 226  * <li>
 227  * For an associative vector reduction operation the default operation is a
 228  * function that returns the identity value.
 229  * <li>
 230  * For vector binary test operation the default operation is a function that
 231  * returns false.
 232  *</ul>
 233  * Otherwise, the mask accepting variant of the operation explicitly specifies
 234  * how it processes the lane elements of input vectors, and where appropriate
 235  * expresses the behaviour using pseudocode.
 236  *
 237  * <p>
 238  * For convenience many vector operations, of arity greater than one, provide
 239  * an additional scalar accepting variant.  This variant accepts compatible
 240  * scalar values instead of vectors for the second and subsequent input vectors,
 241  * if any.
 242  * Unless otherwise specified the scalar variant behaves as if each scalar value
 243  * is transformed to a vector using the vector Species
 244  * {@code broadcast} operation, and
 245  * then the vector accepting vector operation is applied using the transformed
 246  * values.
 247  *
 248  * <p>
 249  * This is a value-based
 250  * class; use of identity-sensitive operations (including reference equality
 251  * ({@code ==}), identity hash code, or synchronization) on instances of
 252  * {@code Vector} may have unpredictable results and should be avoided.
 253  *
 254  * @param <E> the boxed element type of elements in this vector
 255  */
 256 public abstract class Vector<E> {
 257 
 258     Vector() {}
 259 
 260     /**
 261      * Returns the species of this vector.
 262      *
 263      * @return the species of this vector
 264      */
 265     public abstract Species<E> species();
 266 
 267     /**
 268      * Returns the primitive element type of this vector.
 269      *
 270      * @return the primitive element type of this vector
 271      */
 272     public Class<E> elementType() { return species().elementType(); }
 273 
 274     /**
 275      * Returns the element size, in bits, of this vector.
 276      *
 277      * @return the element size, in bits
 278      */
 279     public int elementSize() { return species().elementSize(); }
 280 
 281     /**
 282      * Returns the shape of this vector.
 283      *
 284      * @return the shape of this vector
 285      */
 286     public Shape shape() { return species().shape(); }
 287 
 288     /**
 289      * Returns the number of vector lanes (the length).
 290      *
 291      * @return the number of vector lanes
 292      */
 293     public int length() { return species().length(); }
 294 
 295     /**
 296      * Returns the total vector size, in bits.
 297      *
 298      * @return the total vector size, in bits
 299      */
 300     public int bitSize() { return species().bitSize(); }
 301 
 302     //Arithmetic
 303 
 304     /**
 305      * Adds this vector to an input vector.
 306      * <p>
 307      * This is a vector binary operation where the primitive addition operation
 308      * ({@code +}) is applied to lane elements.
 309      *
 310      * @param v the input vector
 311      * @return the result of adding this vector to the input vector
 312      */
 313     public abstract Vector<E> add(Vector<E> v);
 314 
 315     /**
 316      * Adds this vector to an input vector, selecting lane elements
 317      * controlled by a mask.
 318      * <p>
 319      * This is a vector binary operation where the primitive addition operation
 320      * ({@code +}) is applied to lane elements.
 321      *
 322      * @param v the input vector
 323      * @param m the mask controlling lane selection
 324      * @return the result of adding this vector to the given vector
 325      */
 326     public abstract Vector<E> add(Vector<E> v, Mask<E> m);
 327 
 328     /**
 329      * Subtracts an input vector from this vector.
 330      * <p>
 331      * This is a vector binary operation where the primitive subtraction
 332      * operation ({@code -}) is applied to lane elements.
 333      *
 334      * @param v the input vector
 335      * @return the result of subtracting the input vector from this vector
 336      */
 337     public abstract Vector<E> sub(Vector<E> v);
 338 
 339     /**
 340      * Subtracts an input vector from this vector, selecting lane elements
 341      * controlled by a mask.
 342      * <p>
 343      * This is a vector binary operation where the primitive subtraction
 344      * operation ({@code -}) is applied to lane elements.
 345      *
 346      * @param v the input vector
 347      * @param m the mask controlling lane selection
 348      * @return the result of subtracting the input vector from this vector
 349      */
 350     public abstract Vector<E> sub(Vector<E> v, Mask<E> m);
 351 
 352     /**
 353      * Multiplies this vector with an input vector.
 354      * <p>
 355      * This is a vector binary operation where the primitive multiplication
 356      * operation ({@code *}) is applied to lane elements.
 357      *
 358      * @param v the input vector
 359      * @return the result of multiplying this vector with the input vector
 360      */
 361     public abstract Vector<E> mul(Vector<E> v);
 362 
 363     /**
 364      * Multiplies this vector with an input vector, selecting lane elements
 365      * controlled by a mask.
 366      * <p>
 367      * This is a vector binary operation where the primitive multiplication
 368      * operation ({@code *}) is applied to lane elements.
 369      *
 370      * @param v the input vector
 371      * @param m the mask controlling lane selection
 372      * @return the result of multiplying this vector with the input vector
 373      */
 374     public abstract Vector<E> mul(Vector<E> v, Mask<E> m);
 375 
 376     /**
 377      * Negates this vector.
 378      * <p>
 379      * This is a vector unary operation where the primitive negation operation
 380      * ({@code -}) is applied to lane elements.
 381      *
 382      * @return the negation this vector
 383      */
 384     public abstract Vector<E> neg();
 385 
 386     /**
 387      * Negates this vector, selecting lane elements controlled by a mask.
 388      * <p>
 389      * This is a vector unary operation where the primitive negation operation
 390      * ({@code -})is applied to lane elements.
 391      *
 392      * @param m the mask controlling lane selection
 393      * @return the negation this vector
 394      */
 395     public abstract Vector<E> neg(Mask<E> m);
 396 
 397     // Maths from java.math
 398 
 399     /**
 400      * Returns the modulus of this vector.
 401      * <p>
 402      * This is a vector unary operation where the operation
 403      * {@code (a) -> (a < 0) ? -a : a} is applied to lane elements.
 404      *
 405      * @return the modulus this vector
 406      */
 407     public abstract Vector<E> abs();
 408 
 409     /**
 410      * Returns the modulus of this vector, selecting lane elements controlled by
 411      * a mask.
 412      * <p>
 413      * This is a vector unary operation where the operation
 414      * {@code (a) -> (a < 0) ? -a : a} is applied to lane elements.
 415      *
 416      * @param m the mask controlling lane selection
 417      * @return the modulus this vector
 418      */
 419     public abstract Vector<E> abs(Mask<E> m);
 420 
 421     /**
 422      * Returns the minimum of this vector and an input vector.
 423      * <p>
 424      * This is a vector binary operation where the operation
 425      * {@code (a, b) -> a < b ? a : b}  is applied to lane elements.
 426      *
 427      * @param v the input vector
 428      * @return the minimum of this vector and the input vector
 429      */
 430     public abstract Vector<E> min(Vector<E> v);
 431 
 432     /**
 433      * Returns the minimum of this vector and an input vector,
 434      * selecting lane elements controlled by a mask.
 435      * <p>
 436      * This is a vector binary operation where the operation
 437      * {@code (a, b) -> a < b ? a : b}  is applied to lane elements.
 438      *
 439      * @param v the input vector
 440      * @param m the mask controlling lane selection
 441      * @return the minimum of this vector and the input vector
 442      */
 443     public abstract Vector<E> min(Vector<E> v, Mask<E> m);
 444 
 445     /**
 446      * Returns the maximum of this vector and an input vector.
 447      * <p>
 448      * This is a vector binary operation where the operation
 449      * {@code (a, b) -> a > b ? a : b}  is applied to lane elements.
 450      *
 451      * @param v the input vector
 452      * @return the maximum of this vector and the input vector
 453      */
 454     public abstract Vector<E> max(Vector<E> v);
 455 
 456     /**
 457      * Returns the maximum of this vector and an input vector,
 458      * selecting lane elements controlled by a mask.
 459      * <p>
 460      * This is a vector binary operation where the operation
 461      * {@code (a, b) -> a > b ? a : b}  is applied to lane elements.
 462      *
 463      * @param v the input vector
 464      * @param m the mask controlling lane selection
 465      * @return the maximum of this vector and the input vector
 466      */
 467     public abstract Vector<E> max(Vector<E> v, Mask<E> m);
 468 
 469     // Comparisons
 470 
 471     /**
 472      * Tests if this vector is equal to an input vector.
 473      * <p>
 474      * This is a vector binary test operation where the primitive equals
 475      * operation ({@code ==}) is applied to lane elements.
 476      *
 477      * @param v the input vector
 478      * @return the result mask of testing if this vector is equal to the input
 479      * vector
 480      */
 481     public abstract Mask<E> equal(Vector<E> v);
 482 
 483     /**
 484      * Tests if this vector is not equal to an input vector.
 485      * <p>
 486      * This is a vector binary test operation where the primitive not equals
 487      * operation ({@code !=}) is applied to lane elements.
 488      *
 489      * @param v the input vector
 490      * @return the result mask of testing if this vector is not equal to the
 491      * input vector
 492      */
 493     public abstract Mask<E> notEqual(Vector<E> v);
 494 
 495     /**
 496      * Tests if this vector is less than an input vector.
 497      * <p>
 498      * This is a vector binary test operation where the primitive less than
 499      * operation ({@code <}) is applied to lane elements.
 500      *
 501      * @param v the input vector
 502      * @return the mask result of testing if this vector is less than the input
 503      * vector
 504      */
 505     public abstract Mask<E> lessThan(Vector<E> v);
 506 
 507     /**
 508      * Tests if this vector is less or equal to an input vector.
 509      * <p>
 510      * This is a vector binary test operation where the primitive less than
 511      * or equal to operation ({@code <=}) is applied to lane elements.
 512      *
 513      * @param v the input vector
 514      * @return the mask result of testing if this vector is less than or equal
 515      * to the input vector
 516      */
 517     public abstract Mask<E> lessThanEq(Vector<E> v);
 518 
 519     /**
 520      * Tests if this vector is greater than an input vector.
 521      * <p>
 522      * This is a vector binary test operation where the primitive greater than
 523      * operation ({@code >}) is applied to lane elements.
 524      *
 525      * @param v the input vector
 526      * @return the mask result of testing if this vector is greater than the
 527      * input vector
 528      */
 529     public abstract Mask<E> greaterThan(Vector<E> v);
 530 
 531     /**
 532      * Tests if this vector is greater than or equal to an input vector.
 533      * <p>
 534      * This is a vector binary test operation where the primitive greater than
 535      * or equal to operation ({@code >=}) is applied to lane elements.
 536      *
 537      * @param v the input vector
 538      * @return the mask result of testing if this vector is greater than or
 539      * equal to the given vector
 540      */
 541     public abstract Mask<E> greaterThanEq(Vector<E> v);
 542 
 543     // Elemental shifting
 544 
 545     /**
 546      * Rotates left the lane elements of this vector by the given number of
 547      * lanes, {@code i}, modulus the vector length.
 548      * <p>
 549      * This is a cross-lane operation that permutes the lane elements of this
 550      * vector.
 551      * For each lane of the input vector, at lane index {@code N}, the lane
 552      * element is placed into to the result vector at lane index
 553      * {@code (i + N) % this.length()}.
 554      *
 555      * @param i the number of lanes to rotate left
 556      * @return the result of rotating left lane elements of this vector by the
 557      * given number of lanes
 558      */
 559     public abstract Vector<E> rotateEL(int i);
 560 
 561     /**
 562      * Rotates right the lane elements of this vector by the given number of
 563      * lanes, {@code i}, modulus the vector length.
 564      * <p>
 565      * This is a cross-lane operation that permutes the lane elements of this
 566      * vector and behaves as if rotating left the lane elements by
 567      * {@code this.length() - (i % this.length())} lanes.
 568      *
 569      * @param i the number of lanes to rotate left
 570      * @return the result of rotating right lane elements of this vector by the
 571      * given number of lanes
 572      */
 573     public abstract Vector<E> rotateER(int i);
 574 
 575     /**
 576      * Shift left the lane elements of this vector by the given number of
 577      * lanes, {@code i}, modulus the vector length.
 578      * <p>
 579      * This is a cross-lane operation that permutes the lane elements of this
 580      * vector and behaves as if rotating left the lane elements by {@code i},
 581      * and then the zero value is placed into the result vector at lane indexes
 582      * less than {@code i % this.length()}.
 583      *
 584      * @param i the number of lanes to shift left
 585      * @return the result of shifting left lane elements of this vector by the
 586      * given number of lanes
 587      * @throws IllegalArgumentException if {@code i} is {@code < 0}.
 588      */
 589     public abstract Vector<E> shiftEL(int i);
 590 
 591     /**
 592      * Shift right the lane elements of this vector by the given number of
 593      * lanes, {@code i}, modulus the vector length.
 594      * <p>
 595      * This is a cross-lane operation that permutes the lane elements of this
 596      * vector and behaves as if rotating right the lane elements by {@code i},
 597      * and then the zero value is placed into the result vector at lane indexes
 598      * greater or equal to {@code this.length() - (i % this.length())}.
 599      *
 600      * @param i the number of lanes to shift left
 601      * @return the result of shifting left lane elements of this vector by the
 602      * given number of lanes
 603      * @throws IllegalArgumentException if {@code i} is {@code < 0}.
 604      */
 605     public abstract Vector<E> shiftER(int i);
 606 
 607     /**
 608      * Blends the lane elements of this vector with those of an input vector,
 609      * selecting lanes controlled by a mask.
 610      * <p>
 611      * For each lane of the mask, at lane index {@code N}, if the mask lane
 612      * is set then the lane element at {@code N} from the input vector is
 613      * selected and placed into the resulting vector at {@code N},
 614      * otherwise the the lane element at {@code N} from this input vector is
 615      * selected and placed into the resulting vector at {@code N}.
 616      *
 617      * @param v the input vector
 618      * @param m the mask controlling lane selection
 619      * @return the result of blending the lane elements of this vector with
 620      * those of an input vector
 621      */
 622     public abstract Vector<E> blend(Vector<E> v, Mask<E> m);
 623 
 624     /**
 625      * Rearranges the lane elements of this vector and those of an input vector,
 626      * selecting lane indexes controlled by shuffles and a mask.
 627      * <p>
 628      * This is a cross-lane operation that rearranges the lane elements of this
 629      * vector and the input vector.  This method behaves as if it rearranges
 630      * each vector with the corresponding shuffle and then blends the two
 631      * results with the mask:
 632      * <pre>{@code
 633      * return this.rearrange(s1).blend(v.rearrange(s2), m);
 634      * }</pre>
 635      *
 636      * @param v the input vector
 637      * @param s the shuffle controlling lane index selection of the input vector
 638      * if corresponding mask lanes are set, otherwise controlling lane
 639      * index selection of this vector
 640      * @param m the mask controlling shuffled lane selection
 641      * @return the rearrangement of lane elements of this vector and
 642      * those of an input vector
 643      */
 644     @ForceInline
 645     // rearrange
 646     public abstract Vector<E> rearrange(Vector<E> v,
 647                                            Shuffle<E> s, Mask<E> m);
 648 
 649     /**
 650      * Rearranges the lane elements of this vector selecting lane indexes
 651      * controlled by a shuffle.
 652      * <p>
 653      * This is a cross-lane operation that rearranges the lane elements of this
 654      * vector.
 655      * For each lane of the shuffle, at lane index {@code N} with lane
 656      * element {@code I}, the lane element at {@code I} from this vector is
 657      * selected and placed into the resulting vector at {@code N}.
 658      *
 659      * @param s the shuffle controlling lane index selection
 660      * @return the rearrangement of the lane elements of this vector
 661      */
 662     // rearrange
 663     public abstract Vector<E> rearrange(Shuffle<E> s);
 664 
 665 
 666     // Conversions
 667 
 668     /**
 669      * Converts this vector into a shuffle, creating a shuffle from vector
 670      * lane elements cast to {@code int} then logically AND'ed with the
 671      * shuffle length minus one.
 672      * <p>
 673      * This methods behaves as if it returns the result of creating a shuffle
 674      * given an array of the vector lane elements, as follows:
 675      * <pre>{@code
 676      * $type$[] a = this.toArray();
 677      * int[] sa = new int[a.length];
 678      * for (int i = 0; i < a.length; i++) {
 679      *     sa[i] = (int) a[i];
 680      * }
 681      * return this.species().shuffleFromValues(sa);
 682      * }</pre>
 683      *
 684      * @return a shuffle representation of this vector
 685      */
 686     public abstract Shuffle<E> toShuffle();
 687 
 688     // Bitwise preserving
 689 
 690     /**
 691      * Transforms this vector to a vector of the given species of element type {@code F}.
 692      * <p>
 693      * The underlying bits of this vector are copied to the resulting
 694      * vector without modification, but those bits, before copying, may be
 695      * truncated if the this vector's bit size is greater than desired vector's bit
 696      * size, or appended to with zero bits if this vector's bit size is less
 697      * than desired vector's bit size.
 698      * <p>
 699      * The method behaves as if this vector is stored into a byte buffer
 700      * and then the desired vector is loaded from the byte buffer using
 701      * native byte ordering. The implication is that ByteBuffer reads bytes
 702      * and then composes them based on the byte ordering so the result
 703      * depends on this composition.
 704      * <p>
 705      * For example, on a system with ByteOrder.LITTLE_ENDIAN, loading from
 706      * byte array with values {0,1,2,3} and reshaping to int, leads to bytes
 707      * being composed in order 0x3 0x2 0x1 0x0 which is decimal value 50462976.
 708      * On a system with ByteOrder.BIG_ENDIAN, the value is instead 66051 because
 709      * bytes are composed in order 0x0 0x1 0x2 0x3.
 710      * <p>
 711      * The following pseudocode expresses the behaviour:
 712      * <pre>{@code
 713      * int blen = Math.max(this.bitSize(), s.bitSize()) / Byte.SIZE;
 714      * ByteBuffer bb = ByteBuffer.allocate(blen).order(ByteOrder.nativeOrder());
 715      * this.intoByteBuffer(bb, 0);
 716      * return s.fromByteBuffer(bb, 0);
 717      * }</pre>
 718      *
 719      * @param s species of desired vector
 720      * @param <F> the boxed element type of the species
 721      * @return a vector transformed, by shape and element type, from this vector
 722      */
 723     @ForceInline
 724     public abstract <F> Vector<F> reinterpret(Species<F> s);
 725 
 726     @ForceInline
 727     @SuppressWarnings("unchecked")
 728     <F> Vector<F> defaultReinterpret(Species<F> s) {
 729         int blen = Math.max(s.bitSize(), this.species().bitSize()) / Byte.SIZE;
 730         ByteBuffer bb = ByteBuffer.allocate(blen).order(ByteOrder.nativeOrder());
 731         this.intoByteBuffer(bb, 0);
 732 
 733         Class<?> stype = s.elementType();
 734         if (stype == byte.class) {
 735            return (Vector) ByteVector.fromByteBuffer((ByteVector.ByteSpecies)s, bb, 0);
 736         } else if (stype == short.class) {
 737            return (Vector) ShortVector.fromByteBuffer((ShortVector.ShortSpecies)s, bb, 0);
 738         } else if (stype == int.class) {
 739            return (Vector) IntVector.fromByteBuffer((IntVector.IntSpecies)s, bb, 0);
 740         } else if (stype == long.class) {
 741            return (Vector) LongVector.fromByteBuffer((LongVector.LongSpecies)s, bb, 0);
 742         } else if (stype == float.class) {
 743            return (Vector) FloatVector.fromByteBuffer((FloatVector.FloatSpecies)s, bb, 0);
 744         } else if (stype == double.class) {
 745            return (Vector) DoubleVector.fromByteBuffer((DoubleVector.DoubleSpecies)s, bb, 0);
 746         } else {
 747             throw new UnsupportedOperationException("Bad lane type for reinterpret.");
 748         }
 749     }
 750 
 751     /**
 752      * Transforms this vector to a vector of same element type but different shape identified by species.
 753      * <p>
 754      * The lane elements of this vector are copied without
 755      * modification to the resulting vector, but those lane elements, before
 756      * copying, may be truncated if this vector's length is greater than the desired
 757      * vector's length, or appended to with default element values if this
 758      * vector's length is less than desired vector's length.
 759      * <p>
 760      * The method behaves as if this vector is stored into a byte array
 761      * and then the returned vector is loaded from the byte array.
 762      * The following pseudocode expresses the behaviour:
 763      * <pre>{@code
 764      * int alen = Math.max(this.bitSize(), s.bitSize()) / Byte.SIZE;
 765      * byte[] a = new byte[alen];
 766      * this.intoByteArray(a, 0);
 767      * return s.fromByteArray(a, 0);
 768      * }</pre>
 769      *
 770      * @param s species of the desired vector
 771      * @return a vector transformed, by shape, from this vector
 772      */
 773     public abstract Vector<E> reshape(Species<E> s);
 774 
 775     // Cast
 776 
 777     /**
 778      * Converts this vector to a vector of the given species element type {@code F}.
 779      * <p>
 780      * For each vector lane up to the length of this vector or
 781      * desired vector, which ever is the minimum, and where {@code N} is the
 782      * vector lane index, the element at index {@code N} of primitive type
 783      * {@code E} is converted, according to primitive conversion rules
 784      * specified by the Java Language Specification, to a value of primitive
 785      * type {@code F} and placed into the resulting vector at lane index
 786      * {@code N}. If desired vector's length is greater than this
 787      * vector's length then the default primitive value is placed into
 788      * subsequent lanes of the resulting vector.
 789      *
 790      * @param s species of the desired vector
 791      * @param <F> the boxed element type of the species
 792      * @return a vector converted by shape and element type from this vector
 793      */
 794     public abstract <F> Vector<F> cast(Species<F> s);
 795 
 796     //Array stores
 797 
 798     /**
 799      * Stores this vector into a byte array starting at an offset.
 800      * <p>
 801      * Bytes are extracted from primitive lane elements according to the
 802      * native byte order of the underlying platform.
 803      * <p>
 804      * This method behaves as it calls the
 805      * byte buffer, offset, and mask accepting
 806      * {@link #intoByteBuffer(ByteBuffer, int, Mask) method} as follows:
 807      * <pre>{@code
 808      * return this.intoByteBuffer(ByteBuffer.wrap(a), i, this.maskAllTrue());
 809      * }</pre>
 810      *
 811      * @param a the byte array
 812      * @param i the offset into the array
 813      * @return a vector loaded from a byte array
 814      * @throws IndexOutOfBoundsException if {@code i < 0} or
 815      * {@code i > a.length - (this.length() * this.elementSize() / Byte.SIZE)}
 816      */
 817     public abstract void intoByteArray(byte[] a, int i);
 818 
 819     /**
 820      * Stores this vector into a byte array starting at an offset and using a mask.
 821      * <p>
 822      * Bytes are extracted from primitive lane elements according to the
 823      * native byte order of the underlying platform.
 824      * <p>
 825      * This method behaves as it calls the
 826      * byte buffer, offset, and mask accepting
 827      * {@link #intoByteBuffer(ByteBuffer, int, Mask) method} as follows:
 828      * <pre>{@code
 829      * return this.intoByteBuffer(ByteBuffer.wrap(a), i, m);
 830      * }</pre>
 831      *
 832      * @param a the byte array
 833      * @param i the offset into the array
 834      * @param m the mask controlling lane selection
 835      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 836      * or {@code > a.length},
 837      * for any vector lane index {@code N} where the mask at lane {@code N}
 838      * is set
 839      * {@code i >= a.length - (N * this.elementSize() / Byte.SIZE)}
 840      */
 841     public abstract void intoByteArray(byte[] a, int i, Mask<E> m);
 842 
 843     /**
 844      * Stores this vector into a {@link ByteBuffer byte buffer} starting at an
 845      * offset into the byte buffer.
 846      * <p>
 847      * Bytes are extracted from primitive lane elements according to the
 848      * native byte order of the underlying platform.
 849      * <p>
 850      * This method behaves as if it calls the byte buffer, offset, and mask
 851      * accepting
 852      * {@link #intoByteBuffer(ByteBuffer, int, Mask)} method} as follows:
 853      * <pre>{@code
 854      *   this.intoByteBuffer(b, i, this.maskAllTrue())
 855      * }</pre>
 856      *
 857      * @param b the byte buffer
 858      * @param i the offset into the byte buffer
 859      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 860      * or {@code > b.limit()},
 861      * or if there are fewer than
 862      * {@code this.length() * this.elementSize() / Byte.SIZE} bytes
 863      * remaining in the byte buffer from the given offset
 864      */
 865     public abstract void intoByteBuffer(ByteBuffer b, int i);
 866 
 867     /**
 868      * Stores this vector into a {@link ByteBuffer byte buffer} starting at an
 869      * offset into the byte buffer and using a mask.
 870      * <p>
 871      * This method behaves as if the byte buffer is viewed as a primitive
 872      * {@link java.nio.Buffer buffer} for the primitive element type,
 873      * according to the native byte order of the underlying platform, and
 874      * the lane elements of this vector are put into the buffer if the
 875      * corresponding mask lane is set.
 876      * The following pseudocode expresses the behaviour, where
 877      * {@coce EBuffer} is the primitive buffer type, {@code e} is the
 878      * primitive element type, and {@code EVector<S>} is the primitive
 879      * vector type for this vector:
 880      * <pre>{@code
 881      * EBuffer eb = b.duplicate().
 882      *     order(ByteOrder.nativeOrder()).position(i).
 883      *     asEBuffer();
 884      * e[] es = ((EVector<S>)this).toArray();
 885      * for (int n = 0; n < t.length; n++) {
 886      *     if (m.isSet(n)) {
 887      *         eb.put(n, es[n]);
 888      *     }
 889      * }
 890      * }</pre>
 891      *
 892      * @param b the byte buffer
 893      * @param i the offset into the byte buffer
 894      * @param m the mask
 895      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 896      * or {@code > b.limit()},
 897      * for any vector lane index {@code N} where the mask at lane {@code N}
 898      * is set
 899      * {@code i >= b.limit() - (N * this.elementSize() / Byte.SIZE)} bytes
 900      */
 901     public abstract void intoByteBuffer(ByteBuffer b, int i, Mask<E> m);
 902 
 903 
 904     /**
 905      * A {@code Shape} governs the total size, in bits, of a
 906      * {@link Vector}, {@link Mask}, or {@link Shuffle}.  The shape in
 907      * combination with the element type together govern the number of lanes.
 908      */
 909     public enum Shape {
 910         /** Shape of length 64 bits */
 911         S_64_BIT(64),
 912         /** Shape of length 128 bits */
 913         S_128_BIT(128),
 914         /** Shape of length 256 bits */
 915         S_256_BIT(256),
 916         /** Shape of length 512 bits */
 917         S_512_BIT(512),
 918         /** Shape of maximum length supported on the platform */
 919         S_Max_BIT(Unsafe.getUnsafe().getMaxVectorSize(byte.class) * 8);
 920 
 921         @Stable
 922         final int bitSize;
 923 
 924         Shape(int bitSize) {
 925             this.bitSize = bitSize;
 926         }
 927 
 928         /**
 929          * Returns the size, in bits, of this shape.
 930          *
 931          * @return the size, in bits, of this shape.
 932          */
 933         public int bitSize() {
 934             return bitSize;
 935         }
 936 
 937         /**
 938          * Return the number of lanes of a vector of this shape and whose element
 939          * type is of the provided species
 940          *
 941          * @param s the species describing the element type
 942          * @return the number of lanes
 943          */
 944         int length(Species<?> s) {
 945             return bitSize() / s.elementSize();
 946         }
 947 
 948         /**
 949          * Finds appropriate shape depending on bitsize.
 950          *
 951          * @param bitSize the size in bits
 952          * @return the shape corresponding to bitsize
 953          * @see #bitSize
 954          */
 955         public static Shape forBitSize(int bitSize) {
 956             switch (bitSize) {
 957                 case 64:
 958                     return Shape.S_64_BIT;
 959                 case 128:
 960                     return Shape.S_128_BIT;
 961                 case 256:
 962                     return Shape.S_256_BIT;
 963                 case 512:
 964                     return Shape.S_512_BIT;
 965                 default:
 966                     if ((bitSize > 0) && (bitSize <= 2048) && (bitSize % 128 == 0)) {
 967                         return Shape.S_Max_BIT;
 968                     } else {
 969                         throw new IllegalArgumentException("Bad vector bit size: " + bitSize);
 970                     }
 971             }
 972         }
 973     }
 974 
 975 
 976     /**
 977      * Class representing vectors of same element type, {@code E} and {@link Vector.Shape Shape}.
 978      *
 979      * @param <E> the boxed element type of this species
 980      */
 981     public static abstract class Species<E> {
 982         Species() {}
 983 
 984         /**
 985          * Returns the primitive element type of vectors produced by this
 986          * species.
 987          *
 988          * @return the primitive element type
 989          */
 990         public abstract Class<E> elementType();
 991 
 992         /**
 993          * Returns the vector box type for this species
 994          *
 995          * @return the box type
 996          */
 997         abstract Class<?> boxType();
 998 
 999         /**
1000          * Returns the vector mask type for this species
1001          *
1002          * @return the box type
1003          */
1004         abstract Class<?> maskType();
1005 
1006         /**
1007          * Returns the element size, in bits, of vectors produced by this
1008          * species.
1009          *
1010          * @return the element size, in bits
1011          */
1012         public abstract int elementSize();
1013 
1014         /**
1015          * Returns the shape of masks, shuffles, and vectors produced by this
1016          * species.
1017          *
1018          * @return the primitive element type
1019          */
1020         public abstract Shape shape();
1021 
1022         /**
1023          * Returns the shape of the corresponding index species
1024          * @return the shape
1025          */
1026         @ForceInline
1027         public abstract Shape indexShape();
1028 
1029         /**
1030          * Returns the mask, shuffe, or vector lanes produced by this species.
1031          *
1032          * @return the the number of lanes
1033          */
1034         public int length() { return shape().length(this); }
1035 
1036         /**
1037          * Returns the total vector size, in bits, of vectors produced by this
1038          * species.
1039          *
1040          * @return the total vector size, in bits
1041          */
1042         public int bitSize() { return shape().bitSize(); }
1043 
1044         // Factory
1045 
1046         /**
1047          * Finds a species for an element type and shape.
1048          *
1049          * @param c the element type
1050          * @param s the shape
1051          * @param <E> the boxed element type
1052          * @return a species for an element type and shape
1053          * @throws IllegalArgumentException if no such species exists for the
1054          * element type and/or shape
1055          */
1056         @SuppressWarnings("unchecked")
1057         public static <E> Vector.Species<E> of(Class<E> c, Shape s) {
1058             if (c == float.class) {
1059                 return (Vector.Species<E>) FloatVector.species(s);
1060             }
1061             else if (c == double.class) {
1062                 return (Vector.Species<E>) DoubleVector.species(s);
1063             }
1064             else if (c == byte.class) {
1065                 return (Vector.Species<E>) ByteVector.species(s);
1066             }
1067             else if (c == short.class) {
1068                 return (Vector.Species<E>) ShortVector.species(s);
1069             }
1070             else if (c == int.class) {
1071                 return (Vector.Species<E>) IntVector.species(s);
1072             }
1073             else if (c == long.class) {
1074                 return (Vector.Species<E>) LongVector.species(s);
1075             }
1076             else {
1077                 throw new IllegalArgumentException("Bad vector element type: " + c.getName());
1078             }
1079         }
1080 
1081         /**
1082          * Finds a preferred species for an element type.
1083          * <p>
1084          * A preferred species is a species chosen by the platform that has a
1085          * shape of maximal bit size.  A preferred species for different element
1086          * types will have the same shape, and therefore vectors created from
1087          * such species will be shape compatible.
1088          *
1089          * @param c the element type
1090          * @param <E> the boxed element type
1091          * @return a preferred species for an element type
1092          * @throws IllegalArgumentException if no such species exists for the
1093          * element type
1094          */
1095         public static <E> Vector.Species<E> ofPreferred(Class<E> c) {
1096             Unsafe u = Unsafe.getUnsafe();
1097 
1098             int vectorLength = u.getMaxVectorSize(c);
1099             int vectorBitSize = bitSizeForVectorLength(c, vectorLength);
1100             Shape s = Shape.forBitSize(vectorBitSize);
1101             return Species.of(c, s);
1102         }
1103     }
1104 
1105     abstract static class AbstractSpecies<E> extends Vector.Species<E> {
1106         @Stable
1107         protected final Vector.Shape shape;
1108         @Stable
1109         protected final Class<E> elementType;
1110         @Stable
1111         protected final int elementSize;
1112         @Stable
1113         protected final Class<?> boxType;
1114         @Stable
1115         protected final Class<?> maskType;
1116         @Stable
1117         protected final Shape indexShape;
1118 
1119         AbstractSpecies(Vector.Shape shape, Class<E> elementType, int elementSize, Class<?> boxType, Class<?> maskType) {
1120             this.shape = shape;
1121             this.elementType = elementType;
1122             this.elementSize = elementSize;
1123             this.boxType = boxType;
1124             this.maskType = maskType;
1125 
1126             if (boxType == Long64Vector.class || boxType == Double64Vector.class) {
1127                 indexShape = Vector.Shape.S_64_BIT;
1128             }
1129             else {
1130                 int bitSize = Vector.bitSizeForVectorLength(int.class, shape.bitSize() / elementSize);
1131                 indexShape = Vector.Shape.forBitSize(bitSize);
1132             }
1133         }
1134 
1135         @Override
1136         @ForceInline
1137         public int bitSize() {
1138             return shape.bitSize();
1139         }
1140 
1141         @Override
1142         @ForceInline
1143         public int length() {
1144             return shape.bitSize() / elementSize;
1145         }
1146 
1147         @Override
1148         @ForceInline
1149         public Class<E> elementType() {
1150             return elementType;
1151         }
1152 
1153         @Override
1154         @ForceInline
1155         public Class<?> boxType() {
1156             return boxType;
1157         }
1158 
1159         @Override
1160         @ForceInline
1161         public Class<?> maskType() {
1162             return maskType;
1163         }
1164 
1165         @Override
1166         @ForceInline
1167         public int elementSize() {
1168             return elementSize;
1169         }
1170 
1171         @Override
1172         @ForceInline
1173         public Vector.Shape shape() {
1174             return shape;
1175         }
1176 
1177         @Override
1178         @ForceInline
1179         public Vector.Shape indexShape() { return indexShape; }
1180 
1181         @Override
1182         public String toString() {
1183             return new StringBuilder("Shape[")
1184                     .append(bitSize()).append(" bits, ")
1185                     .append(length()).append(" ").append(elementType.getSimpleName()).append("s x ")
1186                     .append(elementSize()).append(" bits")
1187                     .append("]")
1188                     .toString();
1189         }
1190     }
1191 
1192     /**
1193      * A {@code Mask} represents an ordered immutable sequence of {@code boolean}
1194      * values.  A Mask can be used with a mask accepting vector operation to
1195      * control the selection and operation of lane elements of input vectors.
1196      * <p>
1197      * The number of values in the sequence is referred to as the Mask
1198      * {@link #length() length}.  The length also corresponds to the number of
1199      * Mask lanes.  The lane element at lane index {@code N} (from {@code 0},
1200      * inclusive, to length, exclusive) corresponds to the {@code N + 1}'th
1201      * value in the sequence.
1202      * A Mask and Vector of the same element type and shape have the same number
1203      * of lanes.
1204      * <p>
1205      * A lane is said to be <em>set</em> if the lane element is {@code true},
1206      * otherwise a lane is said to be <em>unset</em> if the lane element is
1207      * {@code false}.
1208      * <p>
1209      * Mask declares a limited set of unary, binary and reductive mask
1210      * operations.
1211      * <ul>
1212      * <li>
1213      * A mask unary operation (1-ary) operates on one input mask to produce a
1214      * result mask.
1215      * For each lane of the input mask the
1216      * lane element is operated on using the specified scalar unary operation and
1217      * the boolean result is placed into the mask result at the same lane.
1218      * The following pseudocode expresses the behaviour of this operation category:
1219      *
1220      * <pre>{@code
1221      * Mask<E> a = ...;
1222      * boolean[] ar = new boolean[a.length()];
1223      * for (int i = 0; i < a.length(); i++) {
1224      *     ar[i] = boolean_unary_op(a.isSet(i));
1225      * }
1226      * Mask<E> r = a.species().maskFromArray(ar, 0);
1227      * }</pre>
1228      *
1229      * <li>
1230      * A mask binary operation (2-ary) operates on two input
1231      * masks to produce a result mask.
1232      * For each lane of the two input masks,
1233      * a and b say, the corresponding lane elements from a and b are operated on
1234      * using the specified scalar binary operation and the boolean result is placed
1235      * into the mask result at the same lane.
1236      * The following pseudocode expresses the behaviour of this operation category:
1237      *
1238      * <pre>{@code
1239      * Mask<E> a = ...;
1240      * Mask<E> b = ...;
1241      * boolean[] ar = new boolean[a.length()];
1242      * for (int i = 0; i < a.length(); i++) {
1243      *     ar[i] = scalar_binary_op(a.isSet(i), b.isSet(i));
1244      * }
1245      * Mask<E> r = a.species().maskFromArray(ar, 0);
1246      * }</pre>
1247      *
1248      * </ul>
1249      * @param <E> the boxed element type of this mask
1250      */
1251     public static abstract class Mask<E> {
1252         Mask() {}
1253 
1254         /**
1255          * Returns the species of this mask.
1256          *
1257          * @return the species of this mask
1258          */
1259         public abstract Species<E> species();
1260 
1261         /**
1262          * Returns the number of mask lanes (the length).
1263          *
1264          * @return the number of mask lanes
1265          */
1266         public int length() { return species().length(); }
1267 
1268         /**
1269          * Converts this mask to a mask of the given species shape of element type {@code F}.
1270          * <p>
1271          * For each mask lane, where {@code N} is the lane index, if the
1272          * mask lane at index {@code N} is set, then the mask lane at index
1273          * {@code N} of the resulting mask is set, otherwise that mask lane is
1274          * not set.
1275          *
1276          * @param s the species of the desired mask
1277          * @param <F> the boxed element type of the species
1278          * @return a mask converted by shape and element type
1279          * @throws IllegalArgumentException if this mask length and the species
1280          * length differ
1281          */
1282         public abstract <F> Mask<F> cast(Species<F> s);
1283 
1284         /**
1285          * Returns the lane elements of this mask packed into a {@code long}
1286          * value for at most the first 64 lane elements.
1287          * <p>
1288          * The lane elements are packed in the order of least significant bit
1289          * to most significant bit.
1290          * For each mask lane where {@code N} is the mask lane index, if the
1291          * mask lane is set then the {@code N}'th bit is set to one in the
1292          * resulting {@code long} value, otherwise the {@code N}'th bit is set
1293          * to zero.
1294          *
1295          * @return the lane elements of this mask packed into a {@code long}
1296          * value.
1297          */
1298         public abstract long toLong();
1299 
1300         /**
1301          * Returns an {@code boolean} array containing the lane elements of this
1302          * mask.
1303          * <p>
1304          * This method behaves as if it {@link #intoArray(boolean[], int)} stores}
1305          * this mask into an allocated array and returns that array as
1306          * follows:
1307          * <pre>{@code
1308          * boolean[] a = new boolean[this.length()];
1309          * this.intoArray(a, 0);
1310          * return a;
1311          * }</pre>
1312          *
1313          * @return an array containing the the lane elements of this vector
1314          */
1315         public abstract boolean[] toArray();
1316 
1317         /**
1318          * Stores this mask into a {@code boolean} array starting at offset.
1319          * <p>
1320          * For each mask lane, where {@code N} is the mask lane index,
1321          * the lane element at index {@code N} is stored into the array at index
1322          * {@code i + N}.
1323          *
1324          * @param a the array
1325          * @param i the offset into the array
1326          * @throws IndexOutOfBoundsException if {@code i < 0}, or
1327          * {@code i > a.length - this.length()}
1328          */
1329         public abstract void intoArray(boolean[] a, int i);
1330 
1331         /**
1332          * Returns {@code true} if any of the mask lanes are set.
1333          *
1334          * @return {@code true} if any of the mask lanes are set, otherwise
1335          * {@code false}.
1336          */
1337         public abstract boolean anyTrue();
1338 
1339         /**
1340          * Returns {@code true} if all of the mask lanes are set.
1341          *
1342          * @return {@code true} if all of the mask lanes are set, otherwise
1343          * {@code false}.
1344          */
1345         public abstract boolean allTrue();
1346 
1347         /**
1348          * Returns the number of mask lanes that are set.
1349          *
1350          * @return the number of mask lanes that are set.
1351          */
1352         public abstract int trueCount();
1353 
1354         /**
1355          * Logically ands this mask with an input mask.
1356          * <p>
1357          * This is a mask binary operation where the logical and operation
1358          * ({@code &&} is applied to lane elements.
1359          *
1360          * @param o the input mask
1361          * @return the result of logically and'ing this mask with an input mask
1362          */
1363         public abstract Mask<E> and(Mask<E> o);
1364 
1365         /**
1366          * Logically ors this mask with an input mask.
1367          * <p>
1368          * This is a mask binary operation where the logical or operation
1369          * ({@code ||} is applied to lane elements.
1370          *
1371          * @param o the input mask
1372          * @return the result of logically or'ing this mask with an input mask
1373          */
1374         public abstract Mask<E> or(Mask<E> o);
1375 
1376         /**
1377          * Logically negates this mask.
1378          * <p>
1379          * This is a mask unary operation where the logical not operation
1380          * ({@code !} is applied to lane elements.
1381          *
1382          * @return the result of logically negating this mask.
1383          */
1384         public abstract Mask<E> not();
1385 
1386         /**
1387          * Returns a vector representation of this mask.
1388          * <p>
1389          * For each mask lane, where {@code N} is the mask lane index,
1390          * if the mask lane is set then an element value whose most significant
1391          * bit is set is placed into the resulting vector at lane index
1392          * {@code N}, otherwise the default element value is placed into the
1393          * resulting vector at lane index {@code N}.
1394          *
1395          * @return a vector representation of this mask.
1396          */
1397         public abstract Vector<E> toVector();
1398 
1399         /**
1400          * Tests if the lane at index {@code i} is set
1401          * @param i the lane index
1402          *
1403          * @return true if the lane at index {@code i} is set, otherwise false
1404          */
1405         public abstract boolean getElement(int i);
1406 
1407         /**
1408          * Tests if the lane at index {@code i} is set
1409          * @param i the lane index
1410          * @return true if the lane at index {@code i} is set, otherwise false
1411          * @see #getElement
1412          */
1413         public boolean isSet(int i) {
1414             return getElement(i);
1415         }
1416     }
1417 
1418     /**
1419      * A {@code Shuffle} represents an ordered immutable sequence of
1420      * {@code int} values.  A Shuffle can be used with a shuffle accepting
1421      * vector operation to control the rearrangement of lane elements of input
1422      * vectors
1423      * <p>
1424      * The number of values in the sequence is referred to as the Shuffle
1425      * {@link #length() length}.  The length also corresponds to the number of
1426      * Shuffle lanes.  The lane element at lane index {@code N} (from {@code 0},
1427      * inclusive, to length, exclusive) corresponds to the {@code N + 1}'th
1428      * value in the sequence.
1429      * A Shuffle and Vector of the same element type and shape have the same
1430      * number of lanes.
1431      * <p>
1432      * A Shuffle describes how a lane element of a vector may cross lanes from
1433      * its lane index, {@code i} say, to another lane index whose value is the
1434      * Shuffle's lane element at lane index {@code i}.  Shuffle lane elements
1435      * will be in the range of {@code 0} (inclusive) to the shuffle length
1436      * (exclusive), and therefore cannot induce out of bounds errors when
1437      * used with vectors operations and vectors of the same length.
1438      *
1439      * @param <E> the boxed element type of this mask
1440      */
1441     public static abstract class Shuffle<E> {
1442         Shuffle() {}
1443 
1444         /**
1445          * Returns the species of this shuffle.
1446          *
1447          * @return the species of this shuffle
1448          */
1449         public abstract Species<E> species();
1450 
1451         /**
1452          * Returns the number of shuffle lanes (the length).
1453          *
1454          * @return the number of shuffle lanes
1455          */
1456         public int length() { return species().length(); }
1457 
1458         /**
1459          * Converts this shuffle to a shuffle of the given species of element type {@code F}.
1460          * <p>
1461          * For each shuffle lane, where {@code N} is the lane index, the
1462          * shuffle element at index {@code N} is placed, unmodified, into the
1463          * resulting shuffle at index {@code N}.
1464          *
1465          * @param species species of desired shuffle
1466          * @param <F> the boxed element type of the species
1467          * @return a shuffle converted by shape and element type
1468          * @throws IllegalArgumentException if this shuffle length and the
1469          * species length differ
1470          */
1471         public abstract <F> Shuffle<F> cast(Species<F> species);
1472 
1473         /**
1474          * Returns an {@code int} array containing the lane elements of this
1475          * shuffle.
1476          * <p>
1477          * This method behaves as if it {@link #intoArray(int[], int)} stores}
1478          * this shuffle into an allocated array and returns that array as
1479          * follows:
1480          * <pre>{@code
1481          *   int[] a = new int[this.length()];
1482          *   this.intoArray(a, 0);
1483          *   return a;
1484          * }</pre>
1485          *
1486          * @return an array containing the the lane elements of this vector
1487          */
1488         public abstract int[] toArray();
1489 
1490         /**
1491          * Stores this shuffle into an {@code int} array starting at offset.
1492          * <p>
1493          * For each shuffle lane, where {@code N} is the shuffle lane index,
1494          * the lane element at index {@code N} is stored into the array at index
1495          * {@code i + N}.
1496          *
1497          * @param a the array
1498          * @param i the offset into the array
1499          * @throws IndexOutOfBoundsException if {@code i < 0}, or
1500          * {@code i > a.length - this.length()}
1501          */
1502         public abstract void intoArray(int[] a, int i);
1503 
1504         /**
1505          * Converts this shuffle into a vector, creating a vector from shuffle
1506          * lane elements (int values) cast to the vector element type.
1507          * <p>
1508          * This method behaves as if it returns the result of creating a
1509          * vector given an {@code int} array obtained from this shuffle's
1510          * lane elements, as follows:
1511          * <pre>{@code
1512          *   int[] sa = this.toArray();
1513          *   $type$[] va = new $type$[a.length];
1514          *   for (int i = 0; i < a.length; i++) {
1515          *       va[i] = ($type$) sa[i];
1516          *   }
1517          *   return this.species().fromArray(va, 0);
1518          * }</pre>
1519          *
1520          * @return a vector representation of this shuffle
1521          */
1522         public abstract Vector<E> toVector();
1523 
1524         /**
1525          * Gets the {@code int} lane element at lane index {@code i}
1526          *
1527          * @param i the lane index
1528          * @return the {@code int} lane element at lane index {@code i}
1529          */
1530         public int getElement(int i) { return toArray()[i]; }
1531 
1532         /**
1533          * Rearranges the lane elements of this shuffle selecting lane indexes
1534          * controlled by another shuffle.
1535          * <p>
1536          * For each lane of the shuffle, at lane index {@code N} with lane
1537          * element {@code I}, the lane element at {@code I} from this shuffle is
1538          * selected and placed into the resulting shuffle at {@code N}.
1539          *
1540          * @param s the shuffle controlling lane index selection
1541          * @return the rearrangement of the lane elements of this shuffle
1542          */
1543         public abstract Shuffle<E> rearrange(Shuffle<E> s);
1544     }
1545 
1546     /**
1547      * Find bit size based on element type and number of elements.
1548      *
1549      * @param c the element type
1550      * @param numElem number of lanes in the vector
1551      * @return size in bits for vector
1552      */
1553     public static int bitSizeForVectorLength(Class<?> c, int numElem) {
1554         if (c == float.class) {
1555             return Float.SIZE * numElem;
1556         }
1557         else if (c == double.class) {
1558             return Double.SIZE * numElem;
1559         }
1560         else if (c == byte.class) {
1561             return Byte.SIZE * numElem;
1562         }
1563         else if (c == short.class) {
1564             return Short.SIZE * numElem;
1565         }
1566         else if (c == int.class) {
1567             return Integer.SIZE * numElem;
1568         }
1569         else if (c == long.class) {
1570             return Long.SIZE * numElem;
1571         }
1572         else {
1573             throw new IllegalArgumentException("Bad vector type: " + c.getName());
1574         }
1575     }
1576 }