1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import jdk.internal.misc.Unsafe;
  28 import jdk.internal.vm.annotation.ForceInline;
  29 
  30 import java.nio.ByteBuffer;
  31 import java.nio.ByteOrder;
  32 import java.util.function.IntUnaryOperator;
  33 import jdk.incubator.vector.*;
  34 
  35 /**
  36  * A {@code Vector} is designed for use in computations that can be transformed
  37  * by a runtime compiler, on supported hardware, to Single Instruction Multiple
  38  * Data (SIMD) computations leveraging vector hardware registers and vector
  39  * hardware instructions.  Such SIMD computations exploit data parallelism to
  40  * perform the same operation on multiple data points simultaneously in a
  41  * faster time it would ordinarily take to perform the same operation
  42  * sequentially on each data point.
  43  * <p>
  44  * A Vector represents an ordered immutable sequence of values of the same
  45  * element type {@code e} that is one of the following primitive types
  46  * {@code byte}, {@code short}, {@code int}, {@code long}, {@code float}, or
  47  * {@code double}).  The type variable {@code E} corresponds to the boxed
  48  * element type, specifically the class that wraps a value of {@code e} in an
  49  * object (such the {@code Integer} class that wraps a value of {@code int}}.
  50  * A Vector has a {@link #shape() shape} {@code S}, extending type
  51  * {@link Shape}, that governs the total {@link #bitSize() size} in bits
  52  * of the sequence of values.
  53  * <p>
  54  * The number of values in the sequence is referred to as the Vector
  55  * {@link #length() length}.  The length also corresponds to the number of
  56  * Vector lanes.  The lane element at lane index {@code N} (from {@code 0},
  57  * inclusive, to length, exclusive) corresponds to the {@code N + 1}'th value in
  58  * the sequence.
  59  * Note: this arrangement
  60  * of Vector bit size, Vector length, element bit size, and lane element index
  61  * has no bearing on how a Vector instance and its sequence of elements may be
  62  * arranged in memory or represented as a value in a vector hardware register.
  63  * <p>
  64  * Vector declares a set of vector operations (methods) that are common to all
  65  * element types (such as addition).  Sub-classes of Vector with a concrete
  66  * boxed element type declare further operations that are specific to that
  67  * element type (such as access to element values in lanes, logical operations
  68  * on values of integral elements types, or transcendental operations on values
  69  * of floating point element types).
  70  * There are six sub-classes of Vector corresponding to the supported set
  71  * of element types, {@link ByteVector}, {@link ShortVector},
  72  * {@link IntVector} {@link LongVector}, {@link FloatVector}, and
  73  * {@link DoubleVector}.
  74  * <p>
  75  * Vector values, instances of Vector, are created from a special kind of
  76  * factory called a {@link Species}.  A Species has an
  77  * element type and shape and creates Vector values of the same element type
  78  * and shape.
  79  * A species can be {@link Species#of(Class, Shape)} obtained} given an element
  80  * type and shape, or a preferred species can be
  81  * {@link Species#ofPreferred(Class)}  obtained} given just an element type where the most
  82  * optimal shape is selected for the current platform.  It is recommended that
  83  * Species instances be held in {@code static final} fields for optimal creation
  84  * and usage of Vector values by the runtime compiler.
  85  * <p>
  86  * Vector operations can be grouped into various categories and their behaviour
  87  * generally specified as follows:
  88  * <ul>
  89  * <li>
  90  * A vector unary operation (1-ary) operates on one input vector to produce a
  91  * result vector.
  92  * For each lane of the input vector the
  93  * lane element is operated on using the specified scalar unary operation and
  94  * the element result is placed into the vector result at the same lane.
  95  * The following pseudocode expresses the behaviour of this operation category,
  96  * where {@code e} is the element type and {@code EVector} corresponds to the
  97  * primitive Vector type:
  98  *
  99  * <pre>{@code
 100  * EVector<S> a = ...;
 101  * e[] ar = new e[a.length()];
 102  * for (int i = 0; i < a.length(); i++) {
 103  *     ar[i] = scalar_unary_op(a.get(i));
 104  * }
 105  * EVector<S> r = a.species().fromArray(ar, 0);
 106  * }</pre>
 107  *
 108  * Unless otherwise specified the input and result vectors will have the same
 109  * element type and shape.
 110  *
 111  * <li>
 112  * A vector binary operation (2-ary) operates on two input
 113  * vectors to produce a result vector.
 114  * For each lane of the two input vectors,
 115  * a and b say, the corresponding lane elements from a and b are operated on
 116  * using the specified scalar binary operation and the element result is placed
 117  * into the vector result at the same lane.
 118  * The following pseudocode expresses the behaviour of this operation category:
 119  *
 120  * <pre>{@code
 121  * EVector<S> a = ...;
 122  * EVector<S> b = ...;
 123  * e[] ar = new e[a.length()];
 124  * for (int i = 0; i < a.length(); i++) {
 125  *     ar[i] = scalar_binary_op(a.get(i), b.get(i));
 126  * }
 127  * EVector<S> r = a.species().fromArray(ar, 0);
 128  * }</pre>
 129  *
 130  * Unless otherwise specified the two input and result vectors will have the
 131  * same element type and shape.
 132  *
 133  * <li>
 134  * Generalizing from unary (1-ary) and binary (2-ary) operations, a vector n-ary
 135  * operation operates in n input vectors to produce a
 136  * result vector.
 137  * N lane elements from each input vector are operated on
 138  * using the specified n-ary scalar operation and the element result is placed
 139  * into the vector result at the same lane.
 140  * Unless otherwise specified the n input and result vectors will have the same
 141  * element type and shape.
 142  *
 143  * <li>
 144  * A vector reduction operation operates on all the lane
 145  * elements of an input vector.
 146  * An accumulation function is applied to all the
 147  * lane elements to produce a scalar result.
 148  * If the reduction operation is associative then the result may be accumulated
 149  * by operating on the lane elements in any order using a specified associative
 150  * scalar binary operation and identity value.  Otherwise, the reduction
 151  * operation specifies the behaviour of the accumulation function.
 152  * The following pseudocode expresses the behaviour of this operation category
 153  * if it is associative:
 154  * <pre>{@code
 155  * EVector<S> a = ...;
 156  * e r = <identity value>;
 157  * for (int i = 0; i < a.length(); i++) {
 158  *     r = assoc_scalar_binary_op(r, a.get(i));
 159  * }
 160  * }</pre>
 161  *
 162  * Unless otherwise specified the scalar result type and element type will be
 163  * the same.
 164  *
 165  * <li>
 166  * A vector binary test operation operates on two input vectors to produce a
 167  * result mask.  For each lane of the two input vectors, a and b say, the
 168  * the corresponding lane elements from a and b are operated on using the
 169  * specified scalar binary test operation and the boolean result is placed
 170  * into the mask at the same lane.
 171  * The following pseudocode expresses the behaviour of this operation category:
 172  * <pre>{@code
 173  * EVector<S> a = ...;
 174  * EVector<S> b = ...;
 175  * boolean[] ar = new boolean[a.length()];
 176  * for (int i = 0; i < a.length(); i++) {
 177  *     ar[i] = scalar_binary_test_op(a.get(i), b.get(i));
 178  * }
 179  * Mask<E> r = a.species().maskFromArray(ar, 0);
 180  * }</pre>
 181  *
 182  * Unless otherwise specified the two input vectors and result mask will have
 183  * the same element type and shape.
 184  *
 185  * <li>
 186  * The prior categories of operation can be said to operate within the vector
 187  * lanes, where lane access is uniformly applied to all vectors, specifically
 188  * the scalar operation is applied to elements taken from input vectors at the
 189  * same lane, and if appropriate applied to the result vector at the same lane.
 190  * A further category of operation is a cross-lane vector operation where lane
 191  * access is defined by the arguments to the operation.  Cross-lane operations
 192  * generally rearrange lane elements, for example by permutation (commonly
 193  * controlled by a {@link Shuffle}) or by blending (commonly controlled by a
 194  * {@link Mask}).  Such an operation explicitly specifies how it rearranges lane
 195  * elements.
 196  * </ul>
 197  *
 198  * If a vector operation is represented as an instance method then first input
 199  * vector corresponds to {@code this} vector and subsequent input vectors are
 200  * arguments of the method.  Otherwise, if the an operation is represented as a
 201  * static method then all input vectors are arguments of the method.
 202  * <p>
 203  * If a vector operation does not belong to one of the above categories then
 204  * the operation explicitly specifies how it processes the lane elements of
 205  * input vectors, and where appropriate expresses the behaviour using
 206  * pseudocode.
 207  *
 208  * <p>
 209  * Many vector operations provide an additional {@link Mask mask} accepting
 210  * variant.
 211  * The mask controls which lanes are selected for application of the scalar
 212  * operation.  Masks are a key component for the support of control flow in
 213  * vector computations.
 214  * <p>
 215  * For certain operation categories the mask accepting variants can be specified
 216  * in generic terms.  If a lane of the mask is set then the scalar operation is
 217  * applied to corresponding lane elements, otherwise if a lane of a mask is not
 218  * set then a default scalar operation is applied and its result is placed into
 219  * the vector result at the same lane. The default operation is specified for
 220  * the following operation categories:
 221  * <ul>
 222  * <li>
 223  * For a vector n-ary operation the default operation is a function that returns
 224  * it's first argument, specifically a lane element of the first input vector.
 225  * <li>
 226  * For an associative vector reduction operation the default operation is a
 227  * function that returns the identity value.
 228  * <li>
 229  * For vector binary test operation the default operation is a function that
 230  * returns false.
 231  *</ul>
 232  * Otherwise, the mask accepting variant of the operation explicitly specifies
 233  * how it processes the lane elements of input vectors, and where appropriate
 234  * expresses the behaviour using pseudocode.
 235  *
 236  * <p>
 237  * For convenience many vector operations, of arity greater than one, provide
 238  * an additional scalar accepting variant.  This variant accepts compatible
 239  * scalar values instead of vectors for the second and subsequent input vectors,
 240  * if any.
 241  * Unless otherwise specified the scalar variant behaves as if each scalar value
 242  * is transformed to a vector using the vector Species
 243  * {@code broadcast} operation, and
 244  * then the vector accepting vector operation is applied using the transformed
 245  * values.
 246  *
 247  * <p>
 248  * This is a value-based
 249  * class; use of identity-sensitive operations (including reference equality
 250  * ({@code ==}), identity hash code, or synchronization) on instances of
 251  * {@code Vector} may have unpredictable results and should be avoided.
 252  *
 253  * @param <E> the boxed element type of elements in this vector
 254  */
 255 public abstract class Vector<E> {
 256 
 257     Vector() {}
 258 
 259     /**
 260      * Returns the species of this vector.
 261      *
 262      * @return the species of this vector
 263      */
 264     public abstract Species<E> species();
 265 
 266     /**
 267      * Returns the primitive element type of this vector.
 268      *
 269      * @return the primitive element type of this vector
 270      */
 271     public Class<E> elementType() { return species().elementType(); }
 272 
 273     /**
 274      * Returns the element size, in bits, of this vector.
 275      *
 276      * @return the element size, in bits
 277      */
 278     public int elementSize() { return species().elementSize(); }
 279 
 280     /**
 281      * Returns the shape of this vector.
 282      *
 283      * @return the shape of this vector
 284      */
 285     public Shape shape() { return species().shape(); }
 286 
 287     /**
 288      * Returns the number of vector lanes (the length).
 289      *
 290      * @return the number of vector lanes
 291      */
 292     public int length() { return species().length(); }
 293 
 294     /**
 295      * Returns the total vector size, in bits.
 296      *
 297      * @return the total vector size, in bits
 298      */
 299     public int bitSize() { return species().bitSize(); }
 300 
 301     //Arithmetic
 302 
 303     /**
 304      * Adds this vector to an input vector.
 305      * <p>
 306      * This is a vector binary operation where the primitive addition operation
 307      * ({@code +}) is applied to lane elements.
 308      *
 309      * @param v the input vector
 310      * @return the result of adding this vector to the input vector
 311      */
 312     public abstract Vector<E> add(Vector<E> v);
 313 
 314     /**
 315      * Adds this vector to an input vector, selecting lane elements
 316      * controlled by a mask.
 317      * <p>
 318      * This is a vector binary operation where the primitive addition operation
 319      * ({@code +}) is applied to lane elements.
 320      *
 321      * @param v the input vector
 322      * @param m the mask controlling lane selection
 323      * @return the result of adding this vector to the given vector
 324      */
 325     public abstract Vector<E> add(Vector<E> v, Mask<E> m);
 326 
 327     /**
 328      * Subtracts an input vector from this vector.
 329      * <p>
 330      * This is a vector binary operation where the primitive subtraction
 331      * operation ({@code -}) is applied to lane elements.
 332      *
 333      * @param v the input vector
 334      * @return the result of subtracting the input vector from this vector
 335      */
 336     public abstract Vector<E> sub(Vector<E> v);
 337 
 338     /**
 339      * Subtracts an input vector from this vector, selecting lane elements
 340      * controlled by a mask.
 341      * <p>
 342      * This is a vector binary operation where the primitive subtraction
 343      * operation ({@code -}) is applied to lane elements.
 344      *
 345      * @param v the input vector
 346      * @param m the mask controlling lane selection
 347      * @return the result of subtracting the input vector from this vector
 348      */
 349     public abstract Vector<E> sub(Vector<E> v, Mask<E> m);
 350 
 351     /**
 352      * Multiplies this vector with an input vector.
 353      * <p>
 354      * This is a vector binary operation where the primitive multiplication
 355      * operation ({@code *}) is applied to lane elements.
 356      *
 357      * @param v the input vector
 358      * @return the result of multiplying this vector with the input vector
 359      */
 360     public abstract Vector<E> mul(Vector<E> v);
 361 
 362     /**
 363      * Multiplies this vector with an input vector, selecting lane elements
 364      * controlled by a mask.
 365      * <p>
 366      * This is a vector binary operation where the primitive multiplication
 367      * operation ({@code *}) is applied to lane elements.
 368      *
 369      * @param v the input vector
 370      * @param m the mask controlling lane selection
 371      * @return the result of multiplying this vector with the input vector
 372      */
 373     public abstract Vector<E> mul(Vector<E> v, Mask<E> m);
 374 
 375     /**
 376      * Negates this vector.
 377      * <p>
 378      * This is a vector unary operation where the primitive negation operation
 379      * ({@code -}) is applied to lane elements.
 380      *
 381      * @return the negation this vector
 382      */
 383     public abstract Vector<E> neg();
 384 
 385     /**
 386      * Negates this vector, selecting lane elements controlled by a mask.
 387      * <p>
 388      * This is a vector unary operation where the primitive negation operation
 389      * ({@code -})is applied to lane elements.
 390      *
 391      * @param m the mask controlling lane selection
 392      * @return the negation this vector
 393      */
 394     public abstract Vector<E> neg(Mask<E> m);
 395 
 396     // Maths from java.math
 397 
 398     /**
 399      * Returns the modulus of this vector.
 400      * <p>
 401      * This is a vector unary operation where the operation
 402      * {@code (a) -> (a < 0) ? -a : a} is applied to lane elements.
 403      *
 404      * @return the modulus this vector
 405      */
 406     public abstract Vector<E> abs();
 407 
 408     /**
 409      * Returns the modulus of this vector, selecting lane elements controlled by
 410      * a mask.
 411      * <p>
 412      * This is a vector unary operation where the operation
 413      * {@code (a) -> (a < 0) ? -a : a} is applied to lane elements.
 414      *
 415      * @param m the mask controlling lane selection
 416      * @return the modulus this vector
 417      */
 418     public abstract Vector<E> abs(Mask<E> m);
 419 
 420     /**
 421      * Returns the minimum of this vector and an input vector.
 422      * <p>
 423      * This is a vector binary operation where the operation
 424      * {@code (a, b) -> a < b ? a : b}  is applied to lane elements.
 425      *
 426      * @param v the input vector
 427      * @return the minimum of this vector and the input vector
 428      */
 429     public abstract Vector<E> min(Vector<E> v);
 430 
 431     /**
 432      * Returns the minimum of this vector and an input vector,
 433      * selecting lane elements controlled by a mask.
 434      * <p>
 435      * This is a vector binary operation where the operation
 436      * {@code (a, b) -> a < b ? a : b}  is applied to lane elements.
 437      *
 438      * @param v the input vector
 439      * @param m the mask controlling lane selection
 440      * @return the minimum of this vector and the input vector
 441      */
 442     public abstract Vector<E> min(Vector<E> v, Mask<E> m);
 443 
 444     /**
 445      * Returns the maximum of this vector and an input vector.
 446      * <p>
 447      * This is a vector binary operation where the operation
 448      * {@code (a, b) -> a > b ? a : b}  is applied to lane elements.
 449      *
 450      * @param v the input vector
 451      * @return the maximum of this vector and the input vector
 452      */
 453     public abstract Vector<E> max(Vector<E> v);
 454 
 455     /**
 456      * Returns the maximum of this vector and an input vector,
 457      * selecting lane elements controlled by a mask.
 458      * <p>
 459      * This is a vector binary operation where the operation
 460      * {@code (a, b) -> a > b ? a : b}  is applied to lane elements.
 461      *
 462      * @param v the input vector
 463      * @param m the mask controlling lane selection
 464      * @return the maximum of this vector and the input vector
 465      */
 466     public abstract Vector<E> max(Vector<E> v, Mask<E> m);
 467 
 468     // Comparisons
 469 
 470     /**
 471      * Tests if this vector is equal to an input vector.
 472      * <p>
 473      * This is a vector binary test operation where the primitive equals
 474      * operation ({@code ==}) is applied to lane elements.
 475      *
 476      * @param v the input vector
 477      * @return the result mask of testing if this vector is equal to the input
 478      * vector
 479      */
 480     public abstract Mask<E> equal(Vector<E> v);
 481 
 482     /**
 483      * Tests if this vector is not equal to an input vector.
 484      * <p>
 485      * This is a vector binary test operation where the primitive not equals
 486      * operation ({@code !=}) is applied to lane elements.
 487      *
 488      * @param v the input vector
 489      * @return the result mask of testing if this vector is not equal to the
 490      * input vector
 491      */
 492     public abstract Mask<E> notEqual(Vector<E> v);
 493 
 494     /**
 495      * Tests if this vector is less than an input vector.
 496      * <p>
 497      * This is a vector binary test operation where the primitive less than
 498      * operation ({@code <}) is applied to lane elements.
 499      *
 500      * @param v the input vector
 501      * @return the mask result of testing if this vector is less than the input
 502      * vector
 503      */
 504     public abstract Mask<E> lessThan(Vector<E> v);
 505 
 506     /**
 507      * Tests if this vector is less or equal to an input vector.
 508      * <p>
 509      * This is a vector binary test operation where the primitive less than
 510      * or equal to operation ({@code <=}) is applied to lane elements.
 511      *
 512      * @param v the input vector
 513      * @return the mask result of testing if this vector is less than or equal
 514      * to the input vector
 515      */
 516     public abstract Mask<E> lessThanEq(Vector<E> v);
 517 
 518     /**
 519      * Tests if this vector is greater than an input vector.
 520      * <p>
 521      * This is a vector binary test operation where the primitive greater than
 522      * operation ({@code >}) is applied to lane elements.
 523      *
 524      * @param v the input vector
 525      * @return the mask result of testing if this vector is greater than the
 526      * input vector
 527      */
 528     public abstract Mask<E> greaterThan(Vector<E> v);
 529 
 530     /**
 531      * Tests if this vector is greater than or equal to an input vector.
 532      * <p>
 533      * This is a vector binary test operation where the primitive greater than
 534      * or equal to operation ({@code >=}) is applied to lane elements.
 535      *
 536      * @param v the input vector
 537      * @return the mask result of testing if this vector is greater than or
 538      * equal to the given vector
 539      */
 540     public abstract Mask<E> greaterThanEq(Vector<E> v);
 541 
 542     // Elemental shifting
 543 
 544     /**
 545      * Rotates left the lane elements of this vector by the given number of
 546      * lanes, {@code i}, modulus the vector length.
 547      * <p>
 548      * This is a cross-lane operation that permutes the lane elements of this
 549      * vector.
 550      * For each lane of the input vector, at lane index {@code N}, the lane
 551      * element is placed into to the result vector at lane index
 552      * {@code (i + N) % this.length()}.
 553      *
 554      * @param i the number of lanes to rotate left
 555      * @return the result of rotating left lane elements of this vector by the
 556      * given number of lanes
 557      */
 558     public abstract Vector<E> rotateEL(int i);
 559 
 560     /**
 561      * Rotates right the lane elements of this vector by the given number of
 562      * lanes, {@code i}, modulus the vector length.
 563      * <p>
 564      * This is a cross-lane operation that permutes the lane elements of this
 565      * vector and behaves as if rotating left the lane elements by
 566      * {@code this.length() - (i % this.length())} lanes.
 567      *
 568      * @param i the number of lanes to rotate left
 569      * @return the result of rotating right lane elements of this vector by the
 570      * given number of lanes
 571      */
 572     public abstract Vector<E> rotateER(int i);
 573 
 574     /**
 575      * Shift left the lane elements of this vector by the given number of
 576      * lanes, {@code i}, modulus the vector length.
 577      * <p>
 578      * This is a cross-lane operation that permutes the lane elements of this
 579      * vector and behaves as if rotating left the lane elements by {@code i},
 580      * and then the zero value is placed into the result vector at lane indexes
 581      * less than {@code i % this.length()}.
 582      *
 583      * @param i the number of lanes to shift left
 584      * @return the result of shifting left lane elements of this vector by the
 585      * given number of lanes
 586      * @throws IllegalArgumentException if {@code i} is {@code < 0}.
 587      */
 588     public abstract Vector<E> shiftEL(int i);
 589 
 590     /**
 591      * Shift right the lane elements of this vector by the given number of
 592      * lanes, {@code i}, modulus the vector length.
 593      * <p>
 594      * This is a cross-lane operation that permutes the lane elements of this
 595      * vector and behaves as if rotating right the lane elements by {@code i},
 596      * and then the zero value is placed into the result vector at lane indexes
 597      * greater or equal to {@code this.length() - (i % this.length())}.
 598      *
 599      * @param i the number of lanes to shift left
 600      * @return the result of shifting left lane elements of this vector by the
 601      * given number of lanes
 602      * @throws IllegalArgumentException if {@code i} is {@code < 0}.
 603      */
 604     public abstract Vector<E> shiftER(int i);
 605 
 606     /**
 607      * Blends the lane elements of this vector with those of an input vector,
 608      * selecting lanes controlled by a mask.
 609      * <p>
 610      * For each lane of the mask, at lane index {@code N}, if the mask lane
 611      * is set then the lane element at {@code N} from the input vector is
 612      * selected and placed into the resulting vector at {@code N},
 613      * otherwise the the lane element at {@code N} from this input vector is
 614      * selected and placed into the resulting vector at {@code N}.
 615      *
 616      * @param v the input vector
 617      * @param m the mask controlling lane selection
 618      * @return the result of blending the lane elements of this vector with
 619      * those of an input vector
 620      */
 621     public abstract Vector<E> blend(Vector<E> v, Mask<E> m);
 622 
 623     /**
 624      * Rearranges the lane elements of this vector and those of an input vector,
 625      * selecting lane indexes controlled by shuffles and a mask.
 626      * <p>
 627      * This is a cross-lane operation that rearranges the lane elements of this
 628      * vector and the input vector.  This method behaves as if it rearranges
 629      * each vector with the corresponding shuffle and then blends the two
 630      * results with the mask:
 631      * <pre>{@code
 632      * return this.rearrange(s1).blend(v.rearrange(s2), m);
 633      * }</pre>
 634      *
 635      * @param v the input vector
 636      * @param s the shuffle controlling lane index selection of the input vector
 637      * if corresponding mask lanes are set, otherwise controlling lane
 638      * index selection of this vector
 639      * @param m the mask controlling shuffled lane selection
 640      * @return the rearrangement of lane elements of this vector and
 641      * those of an input vector
 642      */
 643     @ForceInline
 644     // rearrange
 645     public abstract Vector<E> rearrange(Vector<E> v,
 646                                            Shuffle<E> s, Mask<E> m);
 647 
 648     /**
 649      * Rearranges the lane elements of this vector selecting lane indexes
 650      * controlled by a shuffle.
 651      * <p>
 652      * This is a cross-lane operation that rearranges the lane elements of this
 653      * vector.
 654      * For each lane of the shuffle, at lane index {@code N} with lane
 655      * element {@code I}, the lane element at {@code I} from this vector is
 656      * selected and placed into the resulting vector at {@code N}.
 657      *
 658      * @param s the shuffle controlling lane index selection
 659      * @return the rearrangement of the lane elements of this vector
 660      */
 661     // rearrange
 662     public abstract Vector<E> rearrange(Shuffle<E> s);
 663 
 664 
 665     // Conversions
 666 
 667     /**
 668      * Converts this vector into a shuffle, creating a shuffle from vector
 669      * lane elements cast to {@code int} then logically AND'ed with the
 670      * shuffle length minus one.
 671      * <p>
 672      * This methods behaves as if it returns the result of creating a shuffle
 673      * given an array of the vector lane elements, as follows:
 674      * <pre>{@code
 675      * $type$[] a = this.toArray();
 676      * int[] sa = new int[a.length];
 677      * for (int i = 0; i < a.length; i++) {
 678      *     sa[i] = (int) a[i];
 679      * }
 680      * return this.species().shuffleFromValues(sa);
 681      * }</pre>
 682      *
 683      * @return a shuffle representation of this vector
 684      */
 685     public abstract Shuffle<E> toShuffle();
 686 
 687     // Bitwise preserving
 688 
 689     /**
 690      * Transforms this vector to a vector of the given species of element type {@code F}.
 691      * <p>
 692      * The underlying bits of this vector are copied to the resulting
 693      * vector without modification, but those bits, before copying, may be
 694      * truncated if the this vector's bit size is greater than desired vector's bit
 695      * size, or appended to with zero bits if this vector's bit size is less
 696      * than desired vector's bit size.
 697      * <p>
 698      * The method behaves as if this vector is stored into a byte buffer
 699      * and then the desired vector is loaded from the byte buffer using
 700      * native byte ordering. The implication is that ByteBuffer reads bytes
 701      * and then composes them based on the byte ordering so the result
 702      * depends on this composition.
 703      * <p>
 704      * For example, on a system with ByteOrder.LITTLE_ENDIAN, loading from
 705      * byte array with values {0,1,2,3} and reshaping to int, leads to bytes
 706      * being composed in order 0x3 0x2 0x1 0x0 which is decimal value 50462976.
 707      * On a system with ByteOrder.BIG_ENDIAN, the value is instead 66051 because
 708      * bytes are composed in order 0x0 0x1 0x2 0x3.
 709      * <p>
 710      * The following pseudocode expresses the behaviour:
 711      * <pre>{@code
 712      * int blen = Math.max(this.bitSize(), s.bitSize()) / Byte.SIZE;
 713      * ByteBuffer bb = ByteBuffer.allocate(blen).order(ByteOrder.nativeOrder());
 714      * this.intoByteBuffer(bb, 0);
 715      * return s.fromByteBuffer(bb, 0);
 716      * }</pre>
 717      *
 718      * @param s species of desired vector
 719      * @param <F> the boxed element type of the species
 720      * @return a vector transformed, by shape and element type, from this vector
 721      */
 722     @ForceInline
 723     public abstract <F> Vector<F> reinterpret(Species<F> s);
 724 
 725     @ForceInline
 726     @SuppressWarnings("unchecked")
 727     <F> Vector<F> defaultReinterpret(Species<F> s) {
 728         int blen = Math.max(s.bitSize(), this.species().bitSize()) / Byte.SIZE;
 729         ByteBuffer bb = ByteBuffer.allocate(blen).order(ByteOrder.nativeOrder());
 730         this.intoByteBuffer(bb, 0);
 731 
 732         Class<?> stype = s.elementType();
 733         if (stype == byte.class) {
 734            return (Vector) ByteVector.fromByteBuffer((ByteVector.ByteSpecies)s, bb, 0);
 735         } else if (stype == short.class) {
 736            return (Vector) ShortVector.fromByteBuffer((ShortVector.ShortSpecies)s, bb, 0);
 737         } else if (stype == int.class) {
 738            return (Vector) IntVector.fromByteBuffer((IntVector.IntSpecies)s, bb, 0);
 739         } else if (stype == long.class) {
 740            return (Vector) LongVector.fromByteBuffer((LongVector.LongSpecies)s, bb, 0);
 741         } else if (stype == float.class) {
 742            return (Vector) FloatVector.fromByteBuffer((FloatVector.FloatSpecies)s, bb, 0);
 743         } else if (stype == double.class) {
 744            return (Vector) DoubleVector.fromByteBuffer((DoubleVector.DoubleSpecies)s, bb, 0);
 745         } else {
 746             throw new UnsupportedOperationException("Bad lane type for reinterpret.");
 747         }
 748     }
 749 
 750     /**
 751      * Transforms this vector to a vector of same element type but different shape identified by species.
 752      * <p>
 753      * The lane elements of this vector are copied without
 754      * modification to the resulting vector, but those lane elements, before
 755      * copying, may be truncated if this vector's length is greater than the desired
 756      * vector's length, or appended to with default element values if this
 757      * vector's length is less than desired vector's length.
 758      * <p>
 759      * The method behaves as if this vector is stored into a byte array
 760      * and then the returned vector is loaded from the byte array.
 761      * The following pseudocode expresses the behaviour:
 762      * <pre>{@code
 763      * int alen = Math.max(this.bitSize(), s.bitSize()) / Byte.SIZE;
 764      * byte[] a = new byte[alen];
 765      * this.intoByteArray(a, 0);
 766      * return s.fromByteArray(a, 0);
 767      * }</pre>
 768      *
 769      * @param s species of the desired vector
 770      * @return a vector transformed, by shape, from this vector
 771      */
 772     public abstract Vector<E> reshape(Species<E> s);
 773 
 774     // Cast
 775 
 776     /**
 777      * Converts this vector to a vector of the given species element type {@code F}.
 778      * <p>
 779      * For each vector lane up to the length of this vector or
 780      * desired vector, which ever is the minimum, and where {@code N} is the
 781      * vector lane index, the element at index {@code N} of primitive type
 782      * {@code E} is converted, according to primitive conversion rules
 783      * specified by the Java Language Specification, to a value of primitive
 784      * type {@code F} and placed into the resulting vector at lane index
 785      * {@code N}. If desired vector's length is greater than this
 786      * vector's length then the default primitive value is placed into
 787      * subsequent lanes of the resulting vector.
 788      *
 789      * @param s species of the desired vector
 790      * @param <F> the boxed element type of the species
 791      * @return a vector converted by shape and element type from this vector
 792      */
 793     public abstract <F> Vector<F> cast(Species<F> s);
 794 
 795     //Array stores
 796 
 797     /**
 798      * Stores this vector into a byte array starting at an offset.
 799      * <p>
 800      * Bytes are extracted from primitive lane elements according to the
 801      * native byte order of the underlying platform.
 802      * <p>
 803      * This method behaves as it calls the
 804      * byte buffer, offset, and mask accepting
 805      * {@link #intoByteBuffer(ByteBuffer, int, Mask) method} as follows:
 806      * <pre>{@code
 807      * return this.intoByteBuffer(ByteBuffer.wrap(a), i, this.maskAllTrue());
 808      * }</pre>
 809      *
 810      * @param a the byte array
 811      * @param i the offset into the array
 812      * @return a vector loaded from a byte array
 813      * @throws IndexOutOfBoundsException if {@code i < 0} or
 814      * {@code i > a.length - (this.length() * this.elementSize() / Byte.SIZE)}
 815      */
 816     public abstract void intoByteArray(byte[] a, int i);
 817 
 818     /**
 819      * Stores this vector into a byte array starting at an offset and using a mask.
 820      * <p>
 821      * Bytes are extracted from primitive lane elements according to the
 822      * native byte order of the underlying platform.
 823      * <p>
 824      * This method behaves as it calls the
 825      * byte buffer, offset, and mask accepting
 826      * {@link #intoByteBuffer(ByteBuffer, int, Mask) method} as follows:
 827      * <pre>{@code
 828      * return this.intoByteBuffer(ByteBuffer.wrap(a), i, m);
 829      * }</pre>
 830      *
 831      * @param a the byte array
 832      * @param i the offset into the array
 833      * @param m the mask controlling lane selection
 834      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 835      * or {@code > a.length},
 836      * for any vector lane index {@code N} where the mask at lane {@code N}
 837      * is set
 838      * {@code i >= a.length - (N * this.elementSize() / Byte.SIZE)}
 839      */
 840     public abstract void intoByteArray(byte[] a, int i, Mask<E> m);
 841 
 842     /**
 843      * Stores this vector into a {@link ByteBuffer byte buffer} starting at an
 844      * offset into the byte buffer.
 845      * <p>
 846      * Bytes are extracted from primitive lane elements according to the
 847      * native byte order of the underlying platform.
 848      * <p>
 849      * This method behaves as if it calls the byte buffer, offset, and mask
 850      * accepting
 851      * {@link #intoByteBuffer(ByteBuffer, int, Mask)} method} as follows:
 852      * <pre>{@code
 853      *   this.intoByteBuffer(b, i, this.maskAllTrue())
 854      * }</pre>
 855      *
 856      * @param b the byte buffer
 857      * @param i the offset into the byte buffer
 858      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 859      * or {@code > b.limit()},
 860      * or if there are fewer than
 861      * {@code this.length() * this.elementSize() / Byte.SIZE} bytes
 862      * remaining in the byte buffer from the given offset
 863      */
 864     public abstract void intoByteBuffer(ByteBuffer b, int i);
 865 
 866     /**
 867      * Stores this vector into a {@link ByteBuffer byte buffer} starting at an
 868      * offset into the byte buffer and using a mask.
 869      * <p>
 870      * This method behaves as if the byte buffer is viewed as a primitive
 871      * {@link java.nio.Buffer buffer} for the primitive element type,
 872      * according to the native byte order of the underlying platform, and
 873      * the lane elements of this vector are put into the buffer if the
 874      * corresponding mask lane is set.
 875      * The following pseudocode expresses the behaviour, where
 876      * {@coce EBuffer} is the primitive buffer type, {@code e} is the
 877      * primitive element type, and {@code EVector<S>} is the primitive
 878      * vector type for this vector:
 879      * <pre>{@code
 880      * EBuffer eb = b.duplicate().
 881      *     order(ByteOrder.nativeOrder()).position(i).
 882      *     asEBuffer();
 883      * e[] es = ((EVector<S>)this).toArray();
 884      * for (int n = 0; n < t.length; n++) {
 885      *     if (m.isSet(n)) {
 886      *         eb.put(n, es[n]);
 887      *     }
 888      * }
 889      * }</pre>
 890      *
 891      * @param b the byte buffer
 892      * @param i the offset into the byte buffer
 893      * @param m the mask
 894      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 895      * or {@code > b.limit()},
 896      * for any vector lane index {@code N} where the mask at lane {@code N}
 897      * is set
 898      * {@code i >= b.limit() - (N * this.elementSize() / Byte.SIZE)} bytes
 899      */
 900     public abstract void intoByteBuffer(ByteBuffer b, int i, Mask<E> m);
 901 
 902 
 903     /**
 904      * A {@code Shape} governs the total size, in bits, of a
 905      * {@link Vector}, {@link Mask}, or {@link Shuffle}.  The shape in
 906      * combination with the element type together govern the number of lanes.
 907      */
 908     public enum Shape {
 909         /** Shape of length 64 bits */
 910         S_64_BIT(64),
 911         /** Shape of length 128 bits */
 912         S_128_BIT(128),
 913         /** Shape of length 256 bits */
 914         S_256_BIT(256),
 915         /** Shape of length 512 bits */
 916         S_512_BIT(512),
 917         /** Shape of maximum length supported on the platform */
 918         S_Max_BIT(Unsafe.getUnsafe().getMaxVectorSize(byte.class) * 8);
 919 
 920         final int bitSize;
 921 
 922         Shape(int bitSize) {
 923             this.bitSize = bitSize;
 924         }
 925 
 926         /**
 927          * Returns the size, in bits, of this shape.
 928          *
 929          * @return the size, in bits, of this shape.
 930          */
 931         public int bitSize() {
 932             return bitSize;
 933         }
 934 
 935         /**
 936          * Return the number of lanes of a vector of this shape and whose element
 937          * type is of the provided species
 938          *
 939          * @param s the species describing the element type
 940          * @return the number of lanes
 941          */
 942         int length(Species<?> s) {
 943             return bitSize() / s.elementSize();
 944         }
 945 
 946         /**
 947          * Finds appropriate shape depending on bitsize.
 948          *
 949          * @param bitSize the size in bits
 950          * @return the shape corresponding to bitsize
 951          * @see #bitSize
 952          */
 953         public static Shape forBitSize(int bitSize) {
 954             switch (bitSize) {
 955                 case 64:
 956                     return Shape.S_64_BIT;
 957                 case 128:
 958                     return Shape.S_128_BIT;
 959                 case 256:
 960                     return Shape.S_256_BIT;
 961                 case 512:
 962                     return Shape.S_512_BIT;
 963                 default:
 964                     if ((bitSize > 0) && (bitSize <= 2048) && (bitSize % 128 == 0)) {
 965                         return Shape.S_Max_BIT;
 966                     } else {
 967                         throw new IllegalArgumentException("Bad vector bit size: " + bitSize);
 968                     }
 969             }
 970         }
 971     }
 972 
 973 
 974     /**
 975      * Class representing vectors of same element type, {@code E} and {@link Vector.Shape Shape}.
 976      *
 977      * @param <E> the boxed element type of this species
 978      */
 979     public static abstract class Species<E> {
 980         Species() {}
 981 
 982         /**
 983          * Returns the primitive element type of vectors produced by this
 984          * species.
 985          *
 986          * @return the primitive element type
 987          */
 988         public abstract Class<E> elementType();
 989 
 990         /**
 991          * Returns the vector box type for this species
 992          *
 993          * @return the box type
 994          */
 995         abstract Class<?> boxType();
 996 
 997         /**
 998          * Returns the vector mask type for this species
 999          *
1000          * @return the box type
1001          */
1002         abstract Class<?> maskType();
1003 
1004         /**
1005          * Returns the element size, in bits, of vectors produced by this
1006          * species.
1007          *
1008          * @return the element size, in bits
1009          */
1010         public abstract int elementSize();
1011 
1012         abstract Class<?> vectorType();
1013 
1014         /**
1015          * Returns the shape of masks, shuffles, and vectors produced by this
1016          * species.
1017          *
1018          * @return the primitive element type
1019          */
1020         public abstract Shape shape();
1021 
1022         /**
1023          * Returns the mask, shuffe, or vector lanes produced by this species.
1024          *
1025          * @return the the number of lanes
1026          */
1027         public int length() { return shape().length(this); }
1028 
1029         /**
1030          * Returns the total vector size, in bits, of vectors produced by this
1031          * species.
1032          *
1033          * @return the total vector size, in bits
1034          */
1035         public int bitSize() { return shape().bitSize(); }
1036 
1037         // Factory
1038 
1039         /**
1040          * Finds a species for an element type and shape.
1041          *
1042          * @param c the element type
1043          * @param s the shape
1044          * @param <E> the boxed element type
1045          * @return a species for an element type and shape
1046          * @throws IllegalArgumentException if no such species exists for the
1047          * element type and/or shape
1048          */
1049         @SuppressWarnings("unchecked")
1050         public static <E> Vector.Species<E> of(Class<E> c, Shape s) {
1051             if (c == float.class) {
1052                 return (Vector.Species<E>) FloatVector.species(s);
1053             }
1054             else if (c == double.class) {
1055                 return (Vector.Species<E>) DoubleVector.species(s);
1056             }
1057             else if (c == byte.class) {
1058                 return (Vector.Species<E>) ByteVector.species(s);
1059             }
1060             else if (c == short.class) {
1061                 return (Vector.Species<E>) ShortVector.species(s);
1062             }
1063             else if (c == int.class) {
1064                 return (Vector.Species<E>) IntVector.species(s);
1065             }
1066             else if (c == long.class) {
1067                 return (Vector.Species<E>) LongVector.species(s);
1068             }
1069             else {
1070                 throw new IllegalArgumentException("Bad vector element type: " + c.getName());
1071             }
1072         }
1073 
1074         /**
1075          * Finds a preferred species for an element type.
1076          * <p>
1077          * A preferred species is a species chosen by the platform that has a
1078          * shape of maximal bit size.  A preferred species for different element
1079          * types will have the same shape, and therefore vectors created from
1080          * such species will be shape compatible.
1081          *
1082          * @param c the element type
1083          * @param <E> the boxed element type
1084          * @return a preferred species for an element type
1085          * @throws IllegalArgumentException if no such species exists for the
1086          * element type
1087          */
1088         @SuppressWarnings("unchecked")
1089         public static <E> Vector.Species<E> ofPreferred(Class<E> c) {
1090             Unsafe u = Unsafe.getUnsafe();
1091 
1092             int vectorLength = u.getMaxVectorSize(c);
1093             int vectorBitSize = bitSizeForVectorLength(c, vectorLength);
1094             Shape s = Shape.forBitSize(vectorBitSize);
1095             return Species.of(c, s);
1096         }
1097 
1098         /**
1099          * Returns a vector where all lane elements are set to the default
1100          * primitive value.
1101          *
1102          * @return a zero vector
1103          */
1104         public abstract Vector<E> zero();
1105 
1106         /**
1107          * Converts a given mask of shape {@code T} and element type
1108          * {@code F} to a mask of this species shape {@code S} and element
1109          * type {@code E}.
1110          * <p>
1111          * For each mask lane, where {@code N} is the mask lane index, if the
1112          * mask lane at index {@code N} is set, then the mask lane at index
1113          * {@code N} of the resulting mask is set, otherwise that mask lane is
1114          * not set.
1115          *
1116          * @param m the mask
1117          * @param <F> the boxed element type of the mask
1118          * @return a mask, converted by shape and element type, from a given
1119          * mask.
1120          * @throws IllegalArgumentException if the mask length and this species
1121          * length differ
1122          */
1123         public abstract <F> Mask<E> cast(Mask<F> m);
1124 
1125         /**
1126          * Converts a given shuffle of shape {@code T} and element type
1127          * {@code F} to a shuffle of this species shape {@code S} and element
1128          * type {@code E}.
1129          * <p>
1130          * For each shuffle lane, where {@code N} is the mask lane index, the
1131          * shuffle element at index {@code N} is placed, unmodified, into the
1132          * resulting shuffle at index {@code N}.
1133          *
1134          * @param s the shuffle
1135          * @param <F> the boxed element type of the mask
1136          * @return a shuffle, converted by shape and element type, from a given
1137          * shuffle.
1138          * @throws IllegalArgumentException if the shuffle length and this
1139          * species length differ
1140          */
1141         public abstract <F> Shuffle<E> cast(Shuffle<F> s);
1142     }
1143 
1144     /**
1145      * A {@code Mask} represents an ordered immutable sequence of {@code boolean}
1146      * values.  A Mask can be used with a mask accepting vector operation to
1147      * control the selection and operation of lane elements of input vectors.
1148      * <p>
1149      * The number of values in the sequence is referred to as the Mask
1150      * {@link #length() length}.  The length also corresponds to the number of
1151      * Mask lanes.  The lane element at lane index {@code N} (from {@code 0},
1152      * inclusive, to length, exclusive) corresponds to the {@code N + 1}'th
1153      * value in the sequence.
1154      * A Mask and Vector of the same element type and shape have the same number
1155      * of lanes.
1156      * <p>
1157      * A lane is said to be <em>set</em> if the lane element is {@code true},
1158      * otherwise a lane is said to be <em>unset</em> if the lane element is
1159      * {@code false}.
1160      * <p>
1161      * Mask declares a limited set of unary, binary and reductive mask
1162      * operations.
1163      * <ul>
1164      * <li>
1165      * A mask unary operation (1-ary) operates on one input mask to produce a
1166      * result mask.
1167      * For each lane of the input mask the
1168      * lane element is operated on using the specified scalar unary operation and
1169      * the boolean result is placed into the mask result at the same lane.
1170      * The following pseudocode expresses the behaviour of this operation category:
1171      *
1172      * <pre>{@code
1173      * Mask<E> a = ...;
1174      * boolean[] ar = new boolean[a.length()];
1175      * for (int i = 0; i < a.length(); i++) {
1176      *     ar[i] = boolean_unary_op(a.isSet(i));
1177      * }
1178      * Mask<E> r = a.species().maskFromArray(ar, 0);
1179      * }</pre>
1180      *
1181      * <li>
1182      * A mask binary operation (2-ary) operates on two input
1183      * masks to produce a result mask.
1184      * For each lane of the two input masks,
1185      * a and b say, the corresponding lane elements from a and b are operated on
1186      * using the specified scalar binary operation and the boolean result is placed
1187      * into the mask result at the same lane.
1188      * The following pseudocode expresses the behaviour of this operation category:
1189      *
1190      * <pre>{@code
1191      * Mask<E> a = ...;
1192      * Mask<E> b = ...;
1193      * boolean[] ar = new boolean[a.length()];
1194      * for (int i = 0; i < a.length(); i++) {
1195      *     ar[i] = scalar_binary_op(a.isSet(i), b.isSet(i));
1196      * }
1197      * Mask<E> r = a.species().maskFromArray(ar, 0);
1198      * }</pre>
1199      *
1200      * </ul>
1201      * @param <E> the boxed element type of this mask
1202      */
1203     public static abstract class Mask<E> {
1204         Mask() {}
1205 
1206         /**
1207          * Returns the species of this mask.
1208          *
1209          * @return the species of this mask
1210          */
1211         public abstract Species<E> species();
1212 
1213         /**
1214          * Returns the number of mask lanes (the length).
1215          *
1216          * @return the number of mask lanes
1217          */
1218         public int length() { return species().length(); }
1219 
1220         /**
1221          * Converts this mask to a mask of the given species shape of element type {@code F}.
1222          * <p>
1223          * For each mask lane, where {@code N} is the lane index, if the
1224          * mask lane at index {@code N} is set, then the mask lane at index
1225          * {@code N} of the resulting mask is set, otherwise that mask lane is
1226          * not set.
1227          *
1228          * @param species the species of the desired mask
1229          * @param <F> the boxed element type of the species
1230          * @return a mask converted by shape and element type
1231          * @throws IllegalArgumentException if this mask length and the species
1232          * length differ
1233          */
1234         @ForceInline
1235         public <F> Mask<F> cast(Species<F> species) {
1236             return species.cast(this);
1237         }
1238 
1239         /**
1240          * Returns the lane elements of this mask packed into a {@code long}
1241          * value for at most the first 64 lane elements.
1242          * <p>
1243          * The lane elements are packed in the order of least significant bit
1244          * to most significant bit.
1245          * For each mask lane where {@code N} is the mask lane index, if the
1246          * mask lane is set then the {@code N}'th bit is set to one in the
1247          * resulting {@code long} value, otherwise the {@code N}'th bit is set
1248          * to zero.
1249          *
1250          * @return the lane elements of this mask packed into a {@code long}
1251          * value.
1252          */
1253         public abstract long toLong();
1254 
1255         /**
1256          * Returns an {@code boolean} array containing the lane elements of this
1257          * mask.
1258          * <p>
1259          * This method behaves as if it {@link #intoArray(boolean[], int)} stores}
1260          * this mask into an allocated array and returns that array as
1261          * follows:
1262          * <pre>{@code
1263          * boolean[] a = new boolean[this.length()];
1264          * this.intoArray(a, 0);
1265          * return a;
1266          * }</pre>
1267          *
1268          * @return an array containing the the lane elements of this vector
1269          */
1270         public abstract boolean[] toArray();
1271 
1272         /**
1273          * Stores this mask into a {@code boolean} array starting at offset.
1274          * <p>
1275          * For each mask lane, where {@code N} is the mask lane index,
1276          * the lane element at index {@code N} is stored into the array at index
1277          * {@code i + N}.
1278          *
1279          * @param a the array
1280          * @param i the offset into the array
1281          * @throws IndexOutOfBoundsException if {@code i < 0}, or
1282          * {@code i > a.length - this.length()}
1283          */
1284         public abstract void intoArray(boolean[] a, int i);
1285 
1286         /**
1287          * Returns {@code true} if any of the mask lanes are set.
1288          *
1289          * @return {@code true} if any of the mask lanes are set, otherwise
1290          * {@code false}.
1291          */
1292         public abstract boolean anyTrue();
1293 
1294         /**
1295          * Returns {@code true} if all of the mask lanes are set.
1296          *
1297          * @return {@code true} if all of the mask lanes are set, otherwise
1298          * {@code false}.
1299          */
1300         public abstract boolean allTrue();
1301 
1302         /**
1303          * Returns the number of mask lanes that are set.
1304          *
1305          * @return the number of mask lanes that are set.
1306          */
1307         public abstract int trueCount();
1308 
1309         /**
1310          * Logically ands this mask with an input mask.
1311          * <p>
1312          * This is a mask binary operation where the logical and operation
1313          * ({@code &&} is applied to lane elements.
1314          *
1315          * @param o the input mask
1316          * @return the result of logically and'ing this mask with an input mask
1317          */
1318         public abstract Mask<E> and(Mask<E> o);
1319 
1320         /**
1321          * Logically ors this mask with an input mask.
1322          * <p>
1323          * This is a mask binary operation where the logical or operation
1324          * ({@code ||} is applied to lane elements.
1325          *
1326          * @param o the input mask
1327          * @return the result of logically or'ing this mask with an input mask
1328          */
1329         public abstract Mask<E> or(Mask<E> o);
1330 
1331         /**
1332          * Logically negates this mask.
1333          * <p>
1334          * This is a mask unary operation where the logical not operation
1335          * ({@code !} is applied to lane elements.
1336          *
1337          * @return the result of logically negating this mask.
1338          */
1339         public abstract Mask<E> not();
1340 
1341         /**
1342          * Returns a vector representation of this mask.
1343          * <p>
1344          * For each mask lane, where {@code N} is the mask lane index,
1345          * if the mask lane is set then an element value whose most significant
1346          * bit is set is placed into the resulting vector at lane index
1347          * {@code N}, otherwise the default element value is placed into the
1348          * resulting vector at lane index {@code N}.
1349          *
1350          * @return a vector representation of this mask.
1351          */
1352         public abstract Vector<E> toVector();
1353 
1354         /**
1355          * Tests if the lane at index {@code i} is set
1356          * @param i the lane index
1357          *
1358          * @return true if the lane at index {@code i} is set, otherwise false
1359          */
1360         public abstract boolean getElement(int i);
1361 
1362         /**
1363          * Tests if the lane at index {@code i} is set
1364          * @param i the lane index
1365          * @return true if the lane at index {@code i} is set, otherwise false
1366          * @see #getElement
1367          */
1368         public boolean isSet(int i) {
1369             return getElement(i);
1370         }
1371     }
1372 
1373     /**
1374      * A {@code Shuffle} represents an ordered immutable sequence of
1375      * {@code int} values.  A Shuffle can be used with a shuffle accepting
1376      * vector operation to control the rearrangement of lane elements of input
1377      * vectors
1378      * <p>
1379      * The number of values in the sequence is referred to as the Shuffle
1380      * {@link #length() length}.  The length also corresponds to the number of
1381      * Shuffle lanes.  The lane element at lane index {@code N} (from {@code 0},
1382      * inclusive, to length, exclusive) corresponds to the {@code N + 1}'th
1383      * value in the sequence.
1384      * A Shuffle and Vector of the same element type and shape have the same
1385      * number of lanes.
1386      * <p>
1387      * A Shuffle describes how a lane element of a vector may cross lanes from
1388      * its lane index, {@code i} say, to another lane index whose value is the
1389      * Shuffle's lane element at lane index {@code i}.  Shuffle lane elements
1390      * will be in the range of {@code 0} (inclusive) to the shuffle length
1391      * (exclusive), and therefore cannot induce out of bounds errors when
1392      * used with vectors operations and vectors of the same length.
1393      *
1394      * @param <E> the boxed element type of this mask
1395      */
1396     public static abstract class Shuffle<E> {
1397         Shuffle() {}
1398 
1399         /**
1400          * Returns the species of this shuffle.
1401          *
1402          * @return the species of this shuffle
1403          */
1404         public abstract Species<E> species();
1405 
1406         /**
1407          * Returns the number of shuffle lanes (the length).
1408          *
1409          * @return the number of shuffle lanes
1410          */
1411         public int length() { return species().length(); }
1412 
1413         /**
1414          * Converts this shuffle to a shuffle of the given species of element type {@code F}.
1415          * <p>
1416          * For each shuffle lane, where {@code N} is the lane index, the
1417          * shuffle element at index {@code N} is placed, unmodified, into the
1418          * resulting shuffle at index {@code N}.
1419          *
1420          * @param species species of desired shuffle
1421          * @param <F> the boxed element type of the species
1422          * @return a shuffle converted by shape and element type
1423          * @throws IllegalArgumentException if this shuffle length and the
1424          * species length differ
1425          */
1426         @ForceInline
1427         public <F> Shuffle<F> cast(Species<F> species) {
1428             return species.cast(this);
1429         }
1430 
1431         /**
1432          * Returns an {@code int} array containing the lane elements of this
1433          * shuffle.
1434          * <p>
1435          * This method behaves as if it {@link #intoArray(int[], int)} stores}
1436          * this shuffle into an allocated array and returns that array as
1437          * follows:
1438          * <pre>{@code
1439          *   int[] a = new int[this.length()];
1440          *   this.intoArray(a, 0);
1441          *   return a;
1442          * }</pre>
1443          *
1444          * @return an array containing the the lane elements of this vector
1445          */
1446         public abstract int[] toArray();
1447 
1448         /**
1449          * Stores this shuffle into an {@code int} array starting at offset.
1450          * <p>
1451          * For each shuffle lane, where {@code N} is the shuffle lane index,
1452          * the lane element at index {@code N} is stored into the array at index
1453          * {@code i + N}.
1454          *
1455          * @param a the array
1456          * @param i the offset into the array
1457          * @throws IndexOutOfBoundsException if {@code i < 0}, or
1458          * {@code i > a.length - this.length()}
1459          */
1460         public abstract void intoArray(int[] a, int i);
1461 
1462         /**
1463          * Converts this shuffle into a vector, creating a vector from shuffle
1464          * lane elements (int values) cast to the vector element type.
1465          * <p>
1466          * This method behaves as if it returns the result of creating a
1467          * vector given an {@code int} array obtained from this shuffle's
1468          * lane elements, as follows:
1469          * <pre>{@code
1470          *   int[] sa = this.toArray();
1471          *   $type$[] va = new $type$[a.length];
1472          *   for (int i = 0; i < a.length; i++) {
1473          *       va[i] = ($type$) sa[i];
1474          *   }
1475          *   return this.species().fromArray(va, 0);
1476          * }</pre>
1477          *
1478          * @return a vector representation of this shuffle
1479          */
1480         public abstract Vector<E> toVector();
1481 
1482         /**
1483          * Gets the {@code int} lane element at lane index {@code i}
1484          *
1485          * @param i the lane index
1486          * @return the {@code int} lane element at lane index {@code i}
1487          */
1488         public int getElement(int i) { return toArray()[i]; }
1489 
1490         /**
1491          * Rearranges the lane elements of this shuffle selecting lane indexes
1492          * controlled by another shuffle.
1493          * <p>
1494          * For each lane of the shuffle, at lane index {@code N} with lane
1495          * element {@code I}, the lane element at {@code I} from this shuffle is
1496          * selected and placed into the resulting shuffle at {@code N}.
1497          *
1498          * @param s the shuffle controlling lane index selection
1499          * @return the rearrangement of the lane elements of this shuffle
1500          */
1501         public abstract Shuffle<E> rearrange(Shuffle<E> s);
1502     }
1503 
1504     /**
1505      * Find bit size based on element type and number of elements.
1506      *
1507      * @param c the element type
1508      * @param numElem number of lanes in the vector
1509      * @return size in bits for vector
1510      */
1511     public static int bitSizeForVectorLength(Class<?> c, int numElem) {
1512         if (c == float.class) {
1513             return Float.SIZE * numElem;
1514         }
1515         else if (c == double.class) {
1516             return Double.SIZE * numElem;
1517         }
1518         else if (c == byte.class) {
1519             return Byte.SIZE * numElem;
1520         }
1521         else if (c == short.class) {
1522             return Short.SIZE * numElem;
1523         }
1524         else if (c == int.class) {
1525             return Integer.SIZE * numElem;
1526         }
1527         else if (c == long.class) {
1528             return Long.SIZE * numElem;
1529         }
1530         else {
1531             throw new IllegalArgumentException("Bad vector type: " + c.getName());
1532         }
1533     }
1534 }