1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import jdk.internal.misc.Unsafe;
  28 import jdk.internal.vm.annotation.ForceInline;
  29 
  30 import java.nio.ByteBuffer;
  31 import java.util.function.IntUnaryOperator;
  32 
  33 /**
  34  * A {@code Vector} is designed for use in computations that can be transformed
  35  * by a runtime compiler, on supported hardware, to Single Instruction Multiple
  36  * Data (SIMD) computations leveraging vector hardware registers and vector
  37  * hardware instructions.  Such SIMD computations exploit data parallelism to
  38  * perform the same operation on multiple data points simultaneously in a
  39  * faster time it would ordinarily take to perform the same operation
  40  * sequentially on each data point.
  41  * <p>
  42  * A Vector represents an ordered immutable sequence of values of the same
  43  * element type {@code e} that is one of the following primitive types
  44  * {@code byte}, {@code short}, {@code int}, {@code long}, {@code float}, or
  45  * {@code double}).  The type variable {@code E} corresponds to the boxed
  46  * element type, specifically the class that wraps a value of {@code e} in an
  47  * object (such the {@code Integer} class that wraps a value of {@code int}}.
  48  * A Vector has a {@link #shape() shape} {@code S}, extending type
  49  * {@link Shape}, that governs the total {@link #bitSize() size} in bits
  50  * of the sequence of values.
  51  * <p>
  52  * The number of values in the sequence is referred to as the Vector
  53  * {@link #length() length}.  The length also corresponds to the number of
  54  * Vector lanes.  The lane element at lane index {@code N} (from {@code 0},
  55  * inclusive, to length, exclusive) corresponds to the {@code N + 1}'th value in
  56  * the sequence.
  57  * Note: this arrangement
  58  * of Vector bit size, Vector length, element bit size, and lane element index
  59  * has no bearing on how a Vector instance and its sequence of elements may be
  60  * arranged in memory or represented as a value in a vector hardware register.
  61  * <p>
  62  * Vector declares a set of vector operations (methods) that are common to all
  63  * element types (such as addition).  Sub-classes of Vector with a concrete
  64  * boxed element type declare further operations that are specific to that
  65  * element type (such as access to element values in lanes, logical operations
  66  * on values of integral elements types, or transcendental operations on values
  67  * of floating point element types).
  68  * There are six sub-classes of Vector corresponding to the supported set
  69  * of element types, {@link ByteVector<S>}, {@link ShortVector<S>},
  70  * {@link IntVector<S>} {@link LongVector<S>}, {@link FloatVector<S>}, and
  71  * {@link DoubleVector<S>}.
  72  * <p>
  73  * Vector values, instances of Vector, are created from a special kind of
  74  * factory called a {@link Species}.  A Species has an
  75  * element type and shape and creates Vector values of the same element type
  76  * and shape.
  77  * A species can be {@link #species(Class, Shape)} obtained} given an element
  78  * type and shape, or a preferred species can be
  79  * {@link #preferredSpecies obtained} given just an element type where the most
  80  * optimal shape is selected for the current platform.  It is recommended that
  81  * Species instances be held in {@code static final} fields for optimal creation
  82  * and usage of Vector values by the runtime compiler.
  83  * <p>
  84  * Vector operations can be grouped into various categories and their behaviour
  85  * generally specified as follows:
  86  * <ul>
  87  * <li>
  88  * A vector unary operation (1-ary) operates on one input vector to produce a
  89  * result vector.
  90  * For each lane of the input vector the
  91  * lane element is operated on using the specified scalar unary operation and
  92  * the element result is placed into the vector result at the same lane.
  93  * The following pseudocode expresses the behaviour of this operation category,
  94  * where {@code e} is the element type and {@code EVector} corresponds to the
  95  * primitive Vector type:
  96  *
  97  * <pre>{@code
  98  * EVector<S> a = ...;
  99  * e[] ar = new e[a.length()];
 100  * for (int i = 0; i < a.length(); i++) {
 101  *     ar[i] = scalar_unary_op(a.get(i));
 102  * }
 103  * EVector<S> r = a.species().fromArray(ar, 0);
 104  * }</pre>
 105  *
 106  * Unless otherwise specified the input and result vectors will have the same
 107  * element type and shape.
 108  *
 109  * <li>
 110  * A vector binary operation (2-ary) operates on two input
 111  * vectors to produce a result vector.
 112  * For each lane of the two input vectors,
 113  * a and b say, the corresponding lane elements from a and b are operated on
 114  * using the specified scalar binary operation and the element result is placed
 115  * into the vector result at the same lane.
 116  * The following pseudocode expresses the behaviour of this operation category:
 117  *
 118  * <pre>{@code
 119  * EVector<S> a = ...;
 120  * EVector<S> b = ...;
 121  * e[] ar = new e[a.length()];
 122  * for (int i = 0; i < a.length(); i++) {
 123  *     ar[i] = scalar_binary_op(a.get(i), b.get(i));
 124  * }
 125  * EVector<S> r = a.species().fromArray(ar, 0);
 126  * }</pre>
 127  *
 128  * Unless otherwise specified the two input and result vectors will have the
 129  * same element type and shape.
 130  *
 131  * <li>
 132  * Generalizing from unary (1-ary) and binary (2-ary) operations, a vector n-ary
 133  * operation operates in n input vectors to produce a
 134  * result vector.
 135  * N lane elements from each input vector are operated on
 136  * using the specified n-ary scalar operation and the element result is placed
 137  * into the vector result at the same lane.
 138  * Unless otherwise specified the n input and result vectors will have the same
 139  * element type and shape.
 140  *
 141  * <li>
 142  * A vector reduction operation operates on all the lane
 143  * elements of an input vector.
 144  * An accumulation function is applied to all the
 145  * lane elements to produce a scalar result.
 146  * If the reduction operation is associative then the result may be accumulated
 147  * by operating on the lane elements in any order using a specified associative
 148  * scalar binary operation and identity value.  Otherwise, the reduction
 149  * operation specifies the behaviour of the accumulation function.
 150  * The following pseudocode expresses the behaviour of this operation category
 151  * if it is associative:
 152  * <pre>{@code
 153  * EVector<S> a = ...;
 154  * e r = <identity value>;
 155  * for (int i = 0; i < a.length(); i++) {
 156  *     r = assoc_scalar_binary_op(r, a.get(i));
 157  * }
 158  * }</pre>
 159  *
 160  * Unless otherwise specified the scalar result type and element type will be
 161  * the same.
 162  *
 163  * <li>
 164  * A vector binary test operation operates on two input vectors to produce a
 165  * result mask.  For each lane of the two input vectors, a and b say, the
 166  * the corresponding lane elements from a and b are operated on using the
 167  * specified scalar binary test operation and the boolean result is placed
 168  * into the mask at the same lane.
 169  * The following pseudocode expresses the behaviour of this operation category:
 170  * <pre>{@code
 171  * EVector<S> a = ...;
 172  * EVector<S> b = ...;
 173  * boolean[] ar = new boolean[a.length()];
 174  * for (int i = 0; i < a.length(); i++) {
 175  *     ar[i] = scalar_binary_test_op(a.get(i), b.get(i));
 176  * }
 177  * Mask<E, S> r = a.species().maskFromArray(ar, 0);
 178  * }</pre>
 179  *
 180  * Unless otherwise specified the two input vectors and result mask will have
 181  * the same element type and shape.
 182  *
 183  * <li>
 184  * The prior categories of operation can be said to operate within the vector
 185  * lanes, where lane access is uniformly applied to all vectors, specifically
 186  * the scalar operation is applied to elements taken from input vectors at the
 187  * same lane, and if appropriate applied to the result vector at the same lane.
 188  * A further category of operation is a cross-lane vector operation where lane
 189  * access is defined by the arguments to the operation.  Cross-lane operations
 190  * generally rearrange lane elements, for example by permutation (commonly
 191  * controlled by a {@link Shuffle}) or by blending (commonly controlled by a
 192  * {@link Mask}).  Such an operation explicitly specifies how it rearranges lane
 193  * elements.
 194  * </ul>
 195  *
 196  * If a vector operation is represented as an instance method then first input
 197  * vector corresponds to {@code this} vector and subsequent input vectors are
 198  * arguments of the method.  Otherwise, if the an operation is represented as a
 199  * static method then all input vectors are arguments of the method.
 200  * <p>
 201  * If a vector operation does not belong to one of the above categories then
 202  * the operation explicitly specifies how it processes the lane elements of
 203  * input vectors, and where appropriate expresses the behaviour using
 204  * pseudocode.
 205  *
 206  * <p>
 207  * Many vector operations provide an additional {@link Mask mask} accepting
 208  * variant.
 209  * The mask controls which lanes are selected for application of the scalar
 210  * operation.  Masks are a key component for the support of control flow in
 211  * vector computations.
 212  * <p>
 213  * For certain operation categories the mask accepting variants can be specified
 214  * in generic terms.  If a lane of the mask is set then the scalar operation is
 215  * applied to corresponding lane elements, otherwise if a lane of a mask is not
 216  * set then a default scalar operation is applied and its result is placed into
 217  * the vector result at the same lane. The default operation is specified for
 218  * the following operation categories:
 219  * <ul>
 220  * <li>
 221  * For a vector n-ary operation the default operation is a function that returns
 222  * it's first argument, specifically a lane element of the first input vector.
 223  * <li>
 224  * For an associative vector reduction operation the default operation is a
 225  * function that returns the identity value.
 226  * <li>
 227  * For vector binary test operation the default operation is a function that
 228  * returns false.
 229  *</ul>
 230  * Otherwise, the mask accepting variant of the operation explicitly specifies
 231  * how it processes the lane elements of input vectors, and where appropriate
 232  * expresses the behaviour using pseudocode.
 233  *
 234  * <p>
 235  * For convenience many vector operations, of arity greater than one, provide
 236  * an additional scalar accepting variant.  This variant accepts compatible
 237  * scalar values instead of vectors for the second and subsequent input vectors,
 238  * if any.
 239  * Unless otherwise specified the scalar variant behaves as if each scalar value
 240  * is transformed to a vector using the vector Species
 241  * {@code broadcast} operation, and
 242  * then the vector accepting vector operation is applied using the transformed
 243  * values.
 244  *
 245  * <p>
 246  * This is a value-based
 247  * class; use of identity-sensitive operations (including reference equality
 248  * ({@code ==}), identity hash code, or synchronization) on instances of
 249  * {@code Vector} may have unpredictable results and should be avoided.
 250  *
 251  * @param <E> the boxed element type of elements in this vector
 252  * @param <S> the type of shape of this vector
 253  */
 254 public abstract class Vector<E, S extends Vector.Shape> {
 255 
 256     Vector() {}
 257 
 258     /**
 259      * Returns the species of this vector.
 260      *
 261      * @return the species of this vector
 262      */
 263     public abstract Species<E, S> species();
 264 
 265     // @@@
 266 
 267     /**
 268      * Returns the primitive element type of this vector.
 269      *
 270      * @return the primitive element type of this vector
 271      */
 272     public Class<E> elementType() { return species().elementType(); }
 273 
 274     /**
 275      * Returns the element size, in bits, of this vector.
 276      *
 277      * @return the element size, in bits
 278      */
 279     public int elementSize() { return species().elementSize(); }
 280 
 281     /**
 282      * Returns the shape of this vector.
 283      *
 284      * @return the shape of this vector
 285      */
 286     public S shape() { return species().shape(); }
 287 
 288     /**
 289      * Returns the number of vector lanes (the length).
 290      *
 291      * @return the number of vector lanes
 292      */
 293     public int length() { return species().length(); }
 294 
 295     /**
 296      * Returns the total vector size, in bits.
 297      *
 298      * @return the total vector size, in bits
 299      */
 300     public int bitSize() { return species().bitSize(); }
 301 
 302     //Arithmetic
 303 
 304     /**
 305      * Adds this vector to an input vector.
 306      * <p>
 307      * This is a vector binary operation where the primitive addition operation
 308      * ({@code +}) is applied to lane elements.
 309      *
 310      * @param v the input vector
 311      * @return the result of adding this vector to the input vector
 312      */
 313     public abstract Vector<E, S> add(Vector<E, S> v);
 314 
 315     /**
 316      * Adds this vector to an input vector, selecting lane elements
 317      * controlled by a mask.
 318      * <p>
 319      * This is a vector binary operation where the primitive addition operation
 320      * ({@code +}) is applied to lane elements.
 321      *
 322      * @param v the input vector
 323      * @param m the mask controlling lane selection
 324      * @return the result of adding this vector to the given vector
 325      */
 326     public abstract Vector<E, S> add(Vector<E, S> v, Mask<E, S> m);
 327 
 328     /**
 329      * Subtracts an input vector from this vector.
 330      * <p>
 331      * This is a vector binary operation where the primitive subtraction
 332      * operation ({@code -}) is applied to lane elements.
 333      *
 334      * @param v the input vector
 335      * @return the result of subtracting the input vector from this vector
 336      */
 337     public abstract Vector<E, S> sub(Vector<E, S> v);
 338 
 339     /**
 340      * Subtracts an input vector from this vector, selecting lane elements
 341      * controlled by a mask.
 342      * <p>
 343      * This is a vector binary operation where the primitive subtraction
 344      * operation ({@code -}) is applied to lane elements.
 345      *
 346      * @param v the input vector
 347      * @param m the mask controlling lane selection
 348      * @return the result of subtracting the input vector from this vector
 349      */
 350     public abstract Vector<E, S> sub(Vector<E, S> v, Mask<E, S> m);
 351 
 352     /**
 353      * Multiplies this vector with an input vector.
 354      * <p>
 355      * This is a vector binary operation where the primitive multiplication
 356      * operation ({@code *}) is applied to lane elements.
 357      *
 358      * @param v the input vector
 359      * @return the result of multiplying this vector with the input vector
 360      */
 361     public abstract Vector<E, S> mul(Vector<E, S> v);
 362 
 363     /**
 364      * Multiplies this vector with an input vector, selecting lane elements
 365      * controlled by a mask.
 366      * <p>
 367      * This is a vector binary operation where the primitive multiplication
 368      * operation ({@code *}) is applied to lane elements.
 369      *
 370      * @param v the input vector
 371      * @param m the mask controlling lane selection
 372      * @return the result of multiplying this vector with the input vector
 373      */
 374     public abstract Vector<E, S> mul(Vector<E, S> v, Mask<E, S> m);
 375 
 376     /**
 377      * Negates this vector.
 378      * <p>
 379      * This is a vector unary operation where the primitive negation operation
 380      * ({@code -}) is applied to lane elements.
 381      *
 382      * @return the negation this vector
 383      */
 384     public abstract Vector<E, S> neg();
 385 
 386     /**
 387      * Negates this vector, selecting lane elements controlled by a mask.
 388      * <p>
 389      * This is a vector unary operation where the primitive negation operation
 390      * ({@code -})is applied to lane elements.
 391      *
 392      * @param m the mask controlling lane selection
 393      * @return the negation this vector
 394      */
 395     public abstract Vector<E, S> neg(Mask<E, S> m);
 396 
 397     // Maths from java.math
 398 
 399     /**
 400      * Returns the modulus of this vector.
 401      * <p>
 402      * This is a vector unary operation where the operation
 403      * {@code (a) -> (a < 0) ? -a : a} is applied to lane elements.
 404      *
 405      * @return the modulus this vector
 406      */
 407     public abstract Vector<E, S> abs();
 408 
 409     /**
 410      * Returns the modulus of this vector, selecting lane elements controlled by
 411      * a mask.
 412      * <p>
 413      * This is a vector unary operation where the operation
 414      * {@code (a) -> (a < 0) ? -a : a} is applied to lane elements.
 415      *
 416      * @param m the mask controlling lane selection
 417      * @return the modulus this vector
 418      */
 419     public abstract Vector<E, S> abs(Mask<E, S> m);
 420 
 421     /**
 422      * Returns the minimum of this vector and an input vector.
 423      * <p>
 424      * This is a vector binary operation where the operation
 425      * {@code (a, b) -> a < b ? a : b}  is applied to lane elements.
 426      *
 427      * @param v the input vector
 428      * @return the minimum of this vector and the input vector
 429      */
 430     public abstract Vector<E, S> min(Vector<E, S> v);
 431     // TODO mask variant?
 432 
 433     /**
 434      * Returns the maximum of this vector and an input vector.
 435      * <p>
 436      * This is a vector binary operation where the operation
 437      * {@code (a, b) -> a > b ? a : b}  is applied to lane elements.
 438      *
 439      * @param v the input vector
 440      * @return the maximum of this vector and the input vector
 441      */
 442     public abstract Vector<E, S> max(Vector<E, S> v);
 443     // TODO mask variant?
 444 
 445     // Comparisons
 446 
 447     /**
 448      * Tests if this vector is equal to an input vector.
 449      * <p>
 450      * This is a vector binary test operation where the primitive equals
 451      * operation ({@code ==}) is applied to lane elements.
 452      *
 453      * @param v the input vector
 454      * @return the result mask of testing if this vector is equal to the input
 455      * vector
 456      */
 457     public abstract Mask<E, S> equal(Vector<E, S> v);
 458 
 459     /**
 460      * Tests if this vector is not equal to an input vector.
 461      * <p>
 462      * This is a vector binary test operation where the primitive not equals
 463      * operation ({@code !=}) is applied to lane elements.
 464      *
 465      * @param v the input vector
 466      * @return the result mask of testing if this vector is not equal to the
 467      * input vector
 468      */
 469     public abstract Mask<E, S> notEqual(Vector<E, S> v);
 470 
 471     /**
 472      * Tests if this vector is less than an input vector.
 473      * <p>
 474      * This is a vector binary test operation where the primitive less than
 475      * operation ({@code <}) is applied to lane elements.
 476      *
 477      * @param v the input vector
 478      * @return the mask result of testing if this vector is less than the input
 479      * vector
 480      */
 481     public abstract Mask<E, S> lessThan(Vector<E, S> v);
 482 
 483     /**
 484      * Tests if this vector is less or equal to an input vector.
 485      * <p>
 486      * This is a vector binary test operation where the primitive less than
 487      * or equal to operation ({@code <=}) is applied to lane elements.
 488      *
 489      * @param v the input vector
 490      * @return the mask result of testing if this vector is less than or equal
 491      * to the input vector
 492      */
 493     public abstract Mask<E, S> lessThanEq(Vector<E, S> v);
 494 
 495     /**
 496      * Tests if this vector is greater than an input vector.
 497      * <p>
 498      * This is a vector binary test operation where the primitive greater than
 499      * operation ({@code >}) is applied to lane elements.
 500      *
 501      * @param v the input vector
 502      * @return the mask result of testing if this vector is greater than the
 503      * input vector
 504      */
 505     public abstract Mask<E, S> greaterThan(Vector<E, S> v);
 506 
 507     /**
 508      * Tests if this vector is greater than or equal to an input vector.
 509      * <p>
 510      * This is a vector binary test operation where the primitive greater than
 511      * or equal to operation ({@code >=}) is applied to lane elements.
 512      *
 513      * @param v the input vector
 514      * @return the mask result of testing if this vector is greater than or
 515      * equal to the given vector
 516      */
 517     public abstract Mask<E, S> greaterThanEq(Vector<E, S> v);
 518 
 519     // Elemental shifting
 520 
 521     /**
 522      * Rotates left the lane elements of this vector by the given number of
 523      * lanes, {@code i}, modulus the vector length.
 524      * <p>
 525      * This is a cross-lane operation that permutes the lane elements of this
 526      * vector.
 527      * For each lane of the input vector, at lane index {@code N}, the lane
 528      * element is placed into to the result vector at lane index
 529      * {@code (i + N) % this.length()}.
 530      *
 531      * @param i the number of lanes to rotate left
 532      * @return the result of rotating left lane elements of this vector by the
 533      * given number of lanes
 534      */
 535     public abstract Vector<E, S> rotateEL(int i);
 536 
 537     /**
 538      * Rotates right the lane elements of this vector by the given number of
 539      * lanes, {@code i}, modulus the vector length.
 540      * <p>
 541      * This is a cross-lane operation that permutes the lane elements of this
 542      * vector and behaves as if rotating left the lane elements by
 543      * {@code this.length() - (i % this.length())} lanes.
 544      *
 545      * @param i the number of lanes to rotate left
 546      * @return the result of rotating right lane elements of this vector by the
 547      * given number of lanes
 548      */
 549     public abstract Vector<E, S> rotateER(int i);
 550 
 551     /**
 552      * Shift left the lane elements of this vector by the given number of
 553      * lanes, {@code i}, modulus the vector length.
 554      * <p>
 555      * This is a cross-lane operation that permutes the lane elements of this
 556      * vector and behaves as if rotating left the lane elements by {@code i},
 557      * and then the zero value is placed into the result vector at lane indexes
 558      * less than {@code i % this.length()}.
 559      *
 560      * @param i the number of lanes to shift left
 561      * @return the result of shifting left lane elements of this vector by the
 562      * given number of lanes
 563      * @throws IllegalArgumentException if {@code i} is {@code < 0}.
 564      */
 565     public abstract Vector<E, S> shiftEL(int i);
 566 
 567     /**
 568      * Shift right the lane elements of this vector by the given number of
 569      * lanes, {@code i}, modulus the vector length.
 570      * <p>
 571      * This is a cross-lane operation that permutes the lane elements of this
 572      * vector and behaves as if rotating right the lane elements by {@code i},
 573      * and then the zero value is placed into the result vector at lane indexes
 574      * greater or equal to {@code this.length() - (i % this.length())}.
 575      *
 576      * @param i the number of lanes to shift left
 577      * @return the result of shifting left lane elements of this vector by the
 578      * given number of lanes
 579      * @throws IllegalArgumentException if {@code i} is {@code < 0}.
 580      */
 581     public abstract Vector<E, S> shiftER(int i);
 582 
 583     /**
 584      * Blends the lane elements of this vector with those of an input vector,
 585      * selecting lanes controlled by a mask.
 586      * <p>
 587      * For each lane of the mask, at lane index {@code N}, if the mask lane
 588      * is set then the lane element at {@code N} from the input vector is
 589      * selected and placed into the resulting vector at {@code N},
 590      * otherwise the the lane element at {@code N} from this input vector is
 591      * selected and placed into the resulting vector at {@code N}.
 592      *
 593      * @param v the input vector
 594      * @param m the mask controlling lane selection
 595      * @return the result of blending the lane elements of this vector with
 596      * those of an input vector
 597      */
 598     public abstract Vector<E, S> blend(Vector<E, S> v, Mask<E, S> m);
 599 
 600     /**
 601      * Rearranges the lane elements of this vector and those of an input vector,
 602      * selecting lane indexes controlled by shuffles and a mask.
 603      * <p>
 604      * This is a cross-lane operation that rearranges the lane elements of this
 605      * vector and the input vector.  This method behaves as if it rearranges
 606      * each vector with the corresponding shuffle and then blends the two
 607      * results with the mask:
 608      * <pre>{@code
 609      * return this.rearrange(s1).blend(v.rearrange(s2), m);
 610      * }</pre>
 611      *
 612      * @param v the input vector
 613      * @param s the shuffle controlling lane index selection of the input vector
 614      * if corresponding mask lanes are set, otherwise controlling lane
 615      * index selection of this vector
 616      * @param m the mask controlling shuffled lane selection
 617      * @return the rearrangement of lane elements of this vector and
 618      * those of an input vector
 619      */
 620     @ForceInline
 621     // rearrange
 622     public abstract Vector<E, S> rearrange(Vector<E, S> v,
 623                                            Shuffle<E, S> s, Mask<E, S> m);
 624 
 625     /**
 626      * Rearranges the lane elements of this vector selecting lane indexes
 627      * controlled by a shuffle.
 628      * <p>
 629      * This is a cross-lane operation that rearranges the lane elements of this
 630      * vector.
 631      * For each lane of the shuffle, at lane index {@code N} with lane
 632      * element {@code I}, the lane element at {@code I} from this vector is
 633      * selected and placed into the resulting vector at {@code N}.
 634      *
 635      * @param s the shuffle controlling lane index selection
 636      * @return the rearrangement of the lane elements of this vector
 637      */
 638     // rearrange
 639     public abstract Vector<E, S> rearrange(Shuffle<E, S> s);
 640 
 641 
 642     // Conversions
 643 
 644     /**
 645      * Converts this vector into a shuffle, creating a shuffle from vector
 646      * lane elements cast to {@code int} then logically AND'ed with the
 647      * shuffle length minus one.
 648      * <p>
 649      * This methods behaves as if it returns the result of creating a shuffle
 650      * given an array of the vector lane elements, as follows:
 651      * <pre>{@code
 652      * $type$[] a = this.toArray();
 653      * int[] sa = new int[a.length];
 654      * for (int i = 0; i < a.length; i++) {
 655      *     sa[i] = (int) a[i];
 656      * }
 657      * return this.species().shuffleFromValues(sa);
 658      * }</pre>
 659      *
 660      * @return a shuffle representation of this vector
 661      */
 662     public abstract Shuffle<E, S> toShuffle();
 663 
 664     // Bitwise preserving
 665 
 666     /**
 667      * Transforms this vector to a vector of the given species shape {@code T}
 668      * and element type {@code F}.
 669      * <p>
 670      * This method behaves as if it returns the result of calling
 671      * {@link Species#reshape(Vector) reshape} on the given species with this
 672      * vector:
 673      * <pre>{@code
 674      * return species.reshape(this);
 675      * }</pre>
 676      *
 677      * @param species the species
 678      * @param <F> the boxed element type of the species
 679      * @param <T> the type of shape of the species
 680      * @return a vector transformed by shape and element type
 681      * @see Species#reshape(Vector)
 682      */
 683     @ForceInline
 684     public <F, T extends Shape> Vector<F, T> reshape(Species<F, T> species) {
 685         return species.reshape(this);
 686     }
 687 
 688     /**
 689      * Transforms this vector to a vector of the given species element type
 690      * {@code F}, where this vector's shape {@code S} is preserved.
 691      * <p>
 692      * This method behaves as if it returns the result of calling
 693      * {@link Species#rebracket(Vector) rebracket} on the given species with this
 694      * vector:
 695      * <pre>{@code
 696      * return species.rebracket(this);
 697      * }</pre>
 698      *
 699      * @param species the species
 700      * @param <F> the boxed element type of the species
 701      * @return a vector transformed element type
 702      * @see Species#rebracket(Vector)
 703      */
 704     @ForceInline
 705     public <F> Vector<F, S> rebracket(Species<F, S> species) {
 706         return species.rebracket(this);
 707     }
 708 
 709     /**
 710      * Transforms this vector to a vector of the given species shape {@code T},
 711      * where this vector's element type {@code E} is preserved.
 712      * <p>
 713      * This method behaves as if it returns the result of calling
 714      * {@link Species#resize(Vector) resize} on the given species with this vector:
 715      * <pre>{@code
 716      * return species.resize(this);
 717      * }</pre>
 718      *
 719      * @param species the species
 720      * @param <T> the type of shape of the species
 721      * @return a vector transformed by shape
 722      * @see Species#resize(Vector)
 723      */
 724     public abstract <T extends Shape> Vector<E, T> resize(Species<E, T> species);
 725 
 726     // Cast
 727 
 728     /**
 729      * Converts this vector to a vector of the given species shape {@code T} and
 730      * element type {@code F}.
 731      * <p>
 732      * This method behaves as if it returns the result of calling
 733      * {@link Species#cast(Vector) cast} on the given species with this vector:
 734      * <pre>{@code
 735      * return species.cast(this);
 736      * }</pre>
 737      *
 738      * @param species the species
 739      * @param <F> the boxed element type of the species
 740      * @param <T> the type of shape of the species
 741      * @return a vector converted by shape and element type
 742      * @see Species#cast(Vector)
 743      */
 744     @ForceInline
 745     public <F, T extends Shape> Vector<F, T> cast(Species<F, T> species) {
 746         return species.cast(this);
 747     }
 748 
 749     //Array stores
 750 
 751     /**
 752      * Stores this vector into a byte array starting at an offset.
 753      * <p>
 754      * Bytes are extracted from primitive lane elements according to the
 755      * native byte order of the underlying platform.
 756      * <p>
 757      * This method behaves as it calls the
 758      * byte buffer, offset, and mask accepting
 759      * {@link #intoByteBuffer(ByteBuffer, int, Mask) method} as follows:
 760      * <pre>{@code
 761      * return this.intoByteBuffer(ByteBuffer.wrap(a), i, this.maskAllTrue());
 762      * }</pre>
 763      *
 764      * @param a the byte array
 765      * @param i the offset into the array
 766      * @return a vector loaded from a byte array
 767      * @throws IndexOutOfBoundsException if {@code i < 0} or
 768      * {@code i > a.length - (this.length() * this.elementSize() / Byte.SIZE)}
 769      */
 770     public abstract void intoByteArray(byte[] a, int i);
 771 
 772     /**
 773      * Stores this vector into a byte array starting at an offset and using a mask.
 774      * <p>
 775      * Bytes are extracted from primitive lane elements according to the
 776      * native byte order of the underlying platform.
 777      * <p>
 778      * This method behaves as it calls the
 779      * byte buffer, offset, and mask accepting
 780      * {@link #intoByteBuffer(ByteBuffer, int, Mask) method} as follows:
 781      * <pre>{@code
 782      * return this.intoByteBuffer(ByteBuffer.wrap(a), i, m);
 783      * }</pre>
 784      *
 785      * @param a the byte array
 786      * @param i the offset into the array
 787      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 788      * or {@code > a.length},
 789      * for any vector lane index {@code N} where the mask at lane {@code N}
 790      * is set
 791      * {@code i >= a.length - (N * this.elementSize() / Byte.SIZE)}
 792      */
 793     public abstract void intoByteArray(byte[] a, int i, Mask<E, S> m);
 794 
 795     /**
 796      * Stores this vector into a {@link ByteBuffer byte buffer} starting at an
 797      * offset into the byte buffer.
 798      * <p>
 799      * Bytes are extracted from primitive lane elements according to the
 800      * native byte order of the underlying platform.
 801      * <p>
 802      * This method behaves as if it calls the byte buffer, offset, and mask
 803      * accepting
 804      * {@link #intoByteBuffer(ByteBuffer, int, Mask)} method} as follows:
 805      * <pre>{@code
 806      *   this.intoByteBuffer(b, i, this.maskAllTrue())
 807      * }</pre>
 808      *
 809      * @param b the byte buffer
 810      * @param i the offset into the byte buffer
 811      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 812      * or {@code > b.limit()},
 813      * or if there are fewer than
 814      * {@code this.length() * this.elementSize() / Byte.SIZE} bytes
 815      * remaining in the byte buffer from the given offset
 816      */
 817     public abstract void intoByteBuffer(ByteBuffer b, int i);
 818 
 819     /**
 820      * Stores this vector into a {@link ByteBuffer byte buffer} starting at an
 821      * offset into the byte buffer and using a mask.
 822      * <p>
 823      * This method behaves as if the byte buffer is viewed as a primitive
 824      * {@link java.nio.Buffer buffer} for the primitive element type,
 825      * according to the native byte order of the underlying platform, and
 826      * the lane elements of this vector are put into the buffer if the
 827      * corresponding mask lane is set.
 828      * The following pseudocode expresses the behaviour, where
 829      * {@coce EBuffer} is the primitive buffer type, {@code e} is the
 830      * primitive element type, and {@code EVector<S>} is the primitive
 831      * vector type for this vector:
 832      * <pre>{@code
 833      * EBuffer eb = b.duplicate().
 834      *     order(ByteOrder.nativeOrder()).position(i).
 835      *     asEBuffer();
 836      * e[] es = ((EVector<S>)this).toArray();
 837      * for (int n = 0; n < t.length; n++) {
 838      *     if (m.isSet(n)) {
 839      *         eb.put(n, es[n]);
 840      *     }
 841      * }
 842      * }</pre>
 843      *
 844      * @param b the byte buffer
 845      * @param i the offset into the byte buffer
 846      * @param m the mask
 847      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 848      * or {@code > b.limit()},
 849      * for any vector lane index {@code N} where the mask at lane {@code N}
 850      * is set
 851      * {@code i >= b.limit() - (N * this.elementSize() / Byte.SIZE)} bytes
 852      */
 853     public abstract void intoByteBuffer(ByteBuffer b, int i, Mask<E, S> m);
 854 
 855 
 856     /**
 857      * A factory for creating {@link Vector}, {@link Mask} and {@link Shuffle}
 858      * values of the same element type and shape.
 859      *
 860      * @param <E> the boxed element type of this species
 861      * @param <S> the type of shape of this species
 862      */
 863     public static abstract class Species<E, S extends Shape> {
 864         Species() {}
 865 
 866         /**
 867          * Returns the primitive element type of vectors produced by this
 868          * species.
 869          *
 870          * @return the primitive element type
 871          */
 872         public abstract Class<E> elementType();
 873 
 874         /**
 875          * Returns the element size, in bits, of vectors produced by this
 876          * species.
 877          *
 878          * @return the element size, in bits
 879          */
 880         public abstract int elementSize();
 881 
 882         /**
 883          * Returns the shape of masks, shuffles, and vectors produced by this
 884          * species.
 885          *
 886          * @return the primitive element type
 887          */
 888         public abstract S shape();
 889 
 890         /**
 891          * Returns the mask, shuffe, or vector lanes produced by this species.
 892          *
 893          * @return the the number of lanes
 894          */
 895         public int length() { return shape().length(this); }
 896 
 897         /**
 898          * Returns the total vector size, in bits, of vectors produced by this
 899          * species.
 900          *
 901          * @return the total vector size, in bits
 902          */
 903         public int bitSize() { return shape().bitSize(); }
 904 
 905         // Factory
 906 
 907         /**
 908          * Returns a vector where all lane elements are set to the default
 909          * primitive value.
 910          *
 911          * @return a zero vector
 912          */
 913         public abstract Vector<E, S> zero();
 914 
 915         /**
 916          * Loads a vector from a byte array starting at an offset.
 917          * <p>
 918          * Bytes are composed into primitive lane elements according to the
 919          * native byte order of the underlying platform
 920          * <p>
 921          * This method behaves as if it returns the result of calling the
 922          * byte buffer, offset, and mask accepting
 923          * {@link #fromByteBuffer(ByteBuffer, int, Mask) method} as follows:
 924          * <pre>{@code
 925          * return this.fromByteBuffer(ByteBuffer.wrap(a), i, this.maskAllTrue());
 926          * }</pre>
 927          *
 928          * @param a the byte array
 929          * @param i the offset into the array
 930          * @return a vector loaded from a byte array
 931          * @throws IndexOutOfBoundsException if {@code i < 0} or
 932          * {@code i > a.length - (this.length() * this.elementSize() / Byte.SIZE)}
 933          */
 934         public abstract Vector<E, S> fromByteArray(byte[] a, int i);
 935 
 936         /**
 937          * Loads a vector from a byte array starting at an offset and using a
 938          * mask.
 939          * <p>
 940          * Bytes are composed into primitive lane elements according to the
 941          * native byte order of the underlying platform.
 942          * <p>
 943          * This method behaves as if it returns the result of calling the
 944          * byte buffer, offset, and mask accepting
 945          * {@link #fromByteBuffer(ByteBuffer, int, Mask) method} as follows:
 946          * <pre>{@code
 947          * return this.fromByteBuffer(ByteBuffer.wrap(a), i, m);
 948          * }</pre>
 949          *
 950          * @param a the byte array
 951          * @param i the offset into the array
 952          * @param m the mask
 953          * @return a vector loaded from a byte array
 954          * @throws IndexOutOfBoundsException if {@code i < 0} or
 955          * {@code i > a.length - (this.length() * this.elementSize() / Byte.SIZE)}
 956          * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 957          * or {@code > a.length},
 958          * for any vector lane index {@code N} where the mask at lane {@code N}
 959          * is set
 960          * {@code i >= a.length - (N * this.elementSize() / Byte.SIZE)}
 961          */
 962         public abstract Vector<E, S> fromByteArray(byte[] a, int i, Mask<E, S> m);
 963 
 964         /**
 965          * Loads a vector from a {@link ByteBuffer byte buffer} starting at an
 966          * offset into the byte buffer.
 967          * <p>
 968          * Bytes are composed into primitive lane elements according to the
 969          * native byte order of the underlying platform.
 970          * <p>
 971          * This method behaves as if it returns the result of calling the
 972          * byte buffer, offset, and mask accepting
 973          * {@link #fromByteBuffer(ByteBuffer, int, Mask)} method} as follows:
 974          * <pre>{@code
 975          *   return this.fromByteBuffer(b, i, this.maskAllTrue())
 976          * }</pre>
 977          *
 978          * @param b the byte buffer
 979          * @param i the offset into the byte buffer
 980          * @return a vector loaded from a byte buffer
 981          * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 982          * or {@code > b.limit()},
 983          * or if there are fewer than
 984          * {@code this.length() * this.elementSize() / Byte.SIZE} bytes
 985          * remaining in the byte buffer from the given offset
 986          */
 987         public abstract Vector<E, S> fromByteBuffer(ByteBuffer b, int i);
 988 
 989         /**
 990          * Loads a vector from a {@link ByteBuffer byte buffer} starting at an
 991          * offset into the byte buffer and using a mask.
 992          * <p>
 993          * This method behaves as if the byte buffer is viewed as a primitive
 994          * {@link java.nio.Buffer buffer} for the primitive element type,
 995          * according to the native byte order of the underlying platform, and
 996          * the returned vector is loaded with a mask from a primitive array
 997          * obtained from the primitive buffer.
 998          * The following pseudocode expresses the behaviour, where
 999          * {@coce EBuffer} is the primitive buffer type, {@code e} is the
1000          * primitive element type, and {@code ESpecies<S>} is the primitive
1001          * species for {@code e}:
1002          * <pre>{@code
1003          * EBuffer eb = b.duplicate().
1004          *     order(ByteOrder.nativeOrder()).position(i).
1005          *     asEBuffer();
1006          * e[] es = new e[this.length()];
1007          * for (int n = 0; n < t.length; n++) {
1008          *     if (m.isSet(n))
1009          *         es[n] = eb.get(n);
1010          * }
1011          * Vector<E, S> r = ((ESpecies<S>)this).fromArray(es, 0, m);
1012          * }</pre>
1013          *
1014          * @param b the byte buffer
1015          * @param i the offset into the byte buffer
1016          * @return a vector loaded from a byte buffer
1017          * @throws IndexOutOfBoundsException if the offset is {@code < 0},
1018          * or {@code > b.limit()},
1019          * for any vector lane index {@code N} where the mask at lane {@code N}
1020          * is set
1021          * {@code i >= b.limit() - (N * this.elementSize() / Byte.SIZE)}
1022          */
1023         public abstract Vector<E, S> fromByteBuffer(ByteBuffer b, int i, Mask<E, S> m);
1024 
1025         //Mask and shuffle constructions
1026 
1027         /**
1028          * Returns a mask where each lane is set or unset according to a given
1029          * {@code boolean} value.
1030          * <p>
1031          * For each mask lane, where {@code N} is the mask lane index,
1032          * if the given {@code boolean} value at index {@code N} is {@code true}
1033          * then the mask lane at index {@code N} is set, otherwise it is unset.
1034          *
1035          * @@@ What should happen if bits.length < this.length() ? use the
1036          * default value or throw IndexOutOfBoundsException
1037          *
1038          * @param bits the given {@code boolean} values
1039          * @return a mask where each lane is set or unset according to a given
1040          * {@code boolean} value
1041          */
1042         public abstract Mask<E, S> maskFromValues(boolean... bits);
1043 
1044         /**
1045          * Loads a mask from a {@code boolean} array starting at an offset.
1046          * <p>
1047          * For each mask lane, where {@code N} is the mask lane index,
1048          * if the array element at index {@code i + N} is {@code true} then the
1049          * mask lane at index {@code N} is set, otherwise it is unset.
1050          *
1051          * @param a the {@code boolean} array
1052          * @param i the offset into the array
1053          * @return the mask loaded from a {@code boolean} array
1054          * @throws IndexOutOfBoundsException if {@code i < 0}, or
1055          * {@code i > a.length - this.length()}
1056          */
1057         public abstract Mask<E, S> maskFromArray(boolean[] a, int i);
1058 
1059         /**
1060          * Returns a mask where all lanes are a set.
1061          *
1062          * @return a mask where all lanes are a set
1063          */
1064         public abstract Mask<E, S> maskAllTrue();
1065 
1066         /**
1067          * Returns a mask where all lanes are unset.
1068          *
1069          * @return a mask where all lanes are unset
1070          */
1071         public abstract Mask<E, S> maskAllFalse();
1072 
1073         /**
1074          * Returns a shuffle of mapped indexes where each lane element is
1075          * the result of applying a mapping function to the corresponding lane
1076          * index.
1077          * <p>
1078          * Care should be taken to ensure Shuffle values produced from this
1079          * method are consumed as constants to ensure optimal generation of
1080          * code.  For example, values held in static final fields or values
1081          * held in loop constant local variables.
1082          * <p>
1083          * This method behaves as if a shuffle is created from an array of
1084          * mapped indexes as follows:
1085          * <pre>{@code
1086          *   int[] a = new int[this.length()];
1087          *   for (int i = 0; i < a.length; i++) {
1088          *       a[i] = f.applyAsInt(i);
1089          *   }
1090          *   return this.shuffleFromValues(a);
1091          * }</pre>
1092          *
1093          * @param f the lane index mapping function
1094          * @return a shuffle of mapped indexes.
1095          */
1096         public abstract Shuffle<E, S> shuffle(IntUnaryOperator f);
1097 
1098         /**
1099          * Returns a shuffle where each lane element is the value of its
1100          * corresponding lane index.
1101          * <p>
1102          * This method behaves as if a shuffle is created from an identity
1103          * index mapping function as follows:
1104          * <pre>{@code
1105          *   return this.shuffle(i -> i);
1106          * }</pre>
1107          *
1108          * @return a shuffle of lane indexes.
1109          */
1110         public abstract Shuffle<E, S> shuffleIota();
1111 
1112         /**
1113          * Returns a shuffle where each lane element is set to a given
1114          * {@code int} value logically AND'ed by the species length minus one.
1115          * <p>
1116          * For each shuffle lane, where {@code N} is the shuffle lane index, the
1117          * the {@code int} value at index {@code N} logically AND'ed by
1118          * {@code this.length() - 1} is placed into the resulting shuffle at
1119          * lane index {@code N}.
1120          *
1121          * @param indexes the given {@code int} values
1122          * @return a shuffle where each lane element is set to a given
1123          * {@code int} value
1124          * @throws IndexOutOfBoundsException if the number of int values is
1125          * {@code < this.length()}.
1126          */
1127         public abstract Shuffle<E, S> shuffleFromValues(int... indexes);
1128 
1129         /**
1130          * Loads a shuffle from an {@code int} array starting at offset.
1131          * <p>
1132          * For each shuffle lane, where {@code N} is the shuffle lane index, the
1133          * array element at index {@code i + N} logically AND'ed by
1134          * {@code this.length() - 1} is placed into the resulting shuffle at lane
1135          * index {@code N}.
1136          *
1137          * @param a the {@code int} array
1138          * @param i the offset into the array
1139          * @return a shuffle loaded from an {@code int} array
1140          * @throws IndexOutOfBoundsException if {@code i < 0}, or
1141          * {@code i > a.length - this.length()}
1142          */
1143         public abstract Shuffle<E, S> shuffleFromArray(int[] a, int i);
1144 
1145         // Shuffle iota, 0...N
1146 
1147         // Vector type/shape transformations
1148 
1149         /**
1150          * Transforms an input vector of shape {@code T} and element type
1151          * {@code F} to a vector of this species shape {@code S} and element
1152          * type {@code E}.
1153          * <p>
1154          * The underlying bits of the input vector are copied to the resulting
1155          * vector without modification, but those bits, before copying, may be
1156          * truncated if the vector bit size is greater than this species bit
1157          * size, or appended to with zero bits if the vector bit size is less
1158          * than this species bit size.
1159          * <p>
1160          * The method behaves as if the input vector is stored into a byte buffer
1161          * and then the returned vector is loaded from the byte buffer using
1162          * native byte ordering. The implication is that ByteBuffer reads bytes
1163          * and then composes them based on the byte ordering so the result
1164          * depends on this composition.
1165          * <p>
1166          * For example, on a system with ByteOrder.LITTLE_ENDIAN, loading from
1167          * byte array with values {0,1,2,3} and reshaping to int, leads to bytes
1168          * being composed in order 0x3 0x2 0x1 0x0 which is decimal value 50462976.
1169          * On a system with ByteOrder.BIG_ENDIAN, the value is instead 66051 because
1170          * bytes are composed in order 0x0 0x1 0x2 0x3.
1171          * <p>
1172          * The following pseudocode expresses the behaviour:
1173          * <pre>{@code
1174          * int blen = Math.max(v.bitSize(), bitSize()) / Byte.SIZE;
1175          * ByteBuffer bb = ByteBuffer.allocate(blen).order(ByteOrder.nativeOrder());
1176          * v.intoByteBuffer(bb, 0);
1177          * return fromByteBuffer(bb, 0);
1178          * }</pre>
1179          *
1180          * @param v the input vector
1181          * @param <F> the boxed element type of the vector
1182          * @param <T> the type of shape of the vector
1183          * @return a vector transformed, by shape and element type, from an
1184          * input vector
1185          */
1186         public abstract <F, T extends Shape> Vector<E, S> reshape(Vector<F, T> v);
1187 
1188         /**
1189          * Transforms an input vector of element type {@code F} to a vector of
1190          * this species element type {@code E}, where the this species shape
1191          * {@code S} is preserved.
1192          * <p>
1193          * The underlying bits of the input vector are copied without
1194          * modification to the resulting vector.
1195          * <p>
1196          * The method behaves as if the input vector is stored into a byte buffer
1197          * and then the returned vector is loaded from the byte buffer using
1198          * native byte ordering. The implication is that ByteBuffer reads bytes
1199          * and then composes them based on the byte ordering so the result
1200          * depends on this composition.
1201          * <p>
1202          * For example, on a system with ByteOrder.LITTLE_ENDIAN, loading from
1203          * byte array with values {0,1,2,3} and rebracketing to int, leads to bytes
1204          * being composed in order 0x3 0x2 0x1 0x0 which is decimal value 50462976.
1205          * On a system with ByteOrder.BIG_ENDIAN, the value is instead 66051 because
1206          * bytes are composed in order 0x0 0x1 0x2 0x3.
1207          * <p>
1208          * The following pseudocode expresses the behaviour:
1209          * <pre>{@code
1210          * ByteBuffer bb = ByteBuffer.allocate(v.bitSize()).order(ByteOrder.nativeOrder());
1211          * v.intoByteBuffer(bb, 0);
1212          * return fromByteBuffer(bb, 0);
1213          * }</pre>
1214          *
1215          * @param v the input vector
1216          * @param <F> the boxed element type of the vector
1217          * @return a vector transformed, by element type, from an input vector
1218          */
1219         public abstract <F> Vector<E, S> rebracket(Vector<F, S> v);
1220 
1221         /**
1222          * Transforms an input vector of shape {@code T} to a vector of this
1223          * species shape {@code S}, where the this species element type
1224          * {@code E} is preserved.
1225          * <p>
1226          * The lane elements of the input vector are copied without
1227          * modification to the resulting vector, but those lane elements, before
1228          * copying, may be truncated if the vector length is greater than this
1229          * species length, or appended to with default element values if the
1230          * vector length is less than this species length.
1231          * <p>
1232          * The method behaves as if the input vector is stored into a byte array
1233          * and then the returned vector is loaded from the byte array.
1234          * The following pseudocode expresses the behaviour:
1235          * <pre>{@code
1236          * int alen = Math.max(v.bitSize(), this.bitSize()) / Byte.SIZE;
1237          * byte[] a = new byte[alen];
1238          * v.intoByteArray(a, 0);
1239          * return fromByteArray(a, 0);
1240          * }</pre>
1241          *
1242          * @param v the input vector
1243          * @param <T> the type of shape of the vector
1244          * @return a vector transformed, by shape, from an input vector
1245          */
1246         public abstract <T extends Shape> Vector<E, S> resize(Vector<E, T> v);
1247 
1248         /**
1249          * Converts an input vector of shape {@code T} and element type
1250          * {@code F} to a vector of this species shape {@code S} and element
1251          * type {@code E}.
1252          * <p>
1253          * For each input vector lane up to the length of the input vector or
1254          * this species, which ever is the minimum, and where {@code N} is the
1255          * vector lane index, the element at index {@code N} of primitive type
1256          * {@code F} is converted, according to primitive conversion rules
1257          * specified by the Java Language Specification, to a value of primitive
1258          * type {@code E} and placed into the resulting vector at lane index
1259          * {@code N}.  If this species length is greater than the input
1260          * vector length then the default primitive value is placed into
1261          * subsequent lanes of the resulting vector.
1262          *
1263          * @param v the input vector
1264          * @param <F> the boxed element type of the vector
1265          * @param <T> the type of shape of the vector
1266          * @return a vector, converted by shape and element type, from an input
1267          * vector.
1268          */
1269         public abstract <F, T extends Shape> Vector<E, S> cast(Vector<F, T> v);
1270 
1271         /**
1272          * Converts a given mask of shape {@code T} and element type
1273          * {@code F} to a mask of this species shape {@code S} and element
1274          * type {@code E}.
1275          * <p>
1276          * For each mask lane, where {@code N} is the mask lane index, if the
1277          * mask lane at index {@code N} is set, then the mask lane at index
1278          * {@code N} of the resulting mask is set, otherwise that mask lane is
1279          * not set.
1280          *
1281          * @param m the mask
1282          * @param <F> the boxed element type of the mask
1283          * @param <T> the type of shape of the mask
1284          * @return a mask, converted by shape and element type, from a given
1285          * mask.
1286          * @throws IllegalArgumentException if the mask length and this species
1287          * length differ
1288          */
1289         public abstract <F, T extends Shape> Mask<E, S> cast(Mask<F, T> m);
1290 
1291         /**
1292          * Converts a given shuffle of shape {@code T} and element type
1293          * {@code F} to a shuffle of this species shape {@code S} and element
1294          * type {@code E}.
1295          * <p>
1296          * For each shuffle lane, where {@code N} is the mask lane index, the
1297          * shuffle element at index {@code N} is placed, unmodified, into the
1298          * resulting shuffle at index {@code N}.
1299          *
1300          * @param s the shuffle
1301          * @param <F> the boxed element type of the mask
1302          * @param <T> the type of shape of the mask
1303          * @return a shuffle, converted by shape and element type, from a given
1304          * shuffle.
1305          * @throws IllegalArgumentException if the shuffle length and this
1306          * species length differ
1307          */
1308         public abstract <F, T extends Shape> Shuffle<E, S> cast(Shuffle<F, T> s);
1309 
1310         // Species/species transformations
1311 
1312         // Returns a species for a given element type and the length of this
1313         // species.
1314         // The length of the returned species will be equal to the length of
1315         // this species.
1316         //
1317         // Throws IAE if no shape exists for the element type and this species length,
1318 //        public <F> Species<F, ?> toSpeciesWithSameNumberOfLanes(Class<F> c) {
1319 //            // @@@ TODO implement and find better name
1320 //            throw new UnsupportedOperationException();
1321 //        }
1322 
1323     }
1324 
1325     /**
1326      * A {@code Shape} governs the total size, in bits, of a
1327      * {@link Vector}, {@link Mask}, or {@code Shuffle}.  The shape in
1328      * combination with the element type together govern the number of lanes.
1329      */
1330     public static abstract class Shape {
1331         Shape() {}
1332 
1333         /**
1334          * Returns the size, in bits, of this shape.
1335          *
1336          * @return the size, in bits, of this shape.
1337          */
1338         public abstract int bitSize();
1339 
1340         // @@@ remove this method
1341         public int length(Species<?, ?> s) { return bitSize() / s.elementSize(); }
1342     }
1343 
1344     /**
1345      * A {@code Mask} represents an ordered immutable sequence of {@code boolean}
1346      * values.  A Mask can be used with a mask accepting vector operation to
1347      * control the selection and operation of lane elements of input vectors.
1348      * <p>
1349      * The number of values in the sequence is referred to as the Mask
1350      * {@link #length() length}.  The length also corresponds to the number of
1351      * Mask lanes.  The lane element at lane index {@code N} (from {@code 0},
1352      * inclusive, to length, exclusive) corresponds to the {@code N + 1}'th
1353      * value in the sequence.
1354      * A Mask and Vector of the same element type and shape have the same number
1355      * of lanes.
1356      * <p>
1357      * A lane is said to be <em>set</em> if the lane element is {@code true},
1358      * otherwise a lane is said to be <em>unset</em> if the lane element is
1359      * {@code false}.
1360      * <p>
1361      * Mask declares a limited set of unary, binary and reductive mask
1362      * operations.
1363      * <ul>
1364      * <li>
1365      * A mask unary operation (1-ary) operates on one input mask to produce a
1366      * result mask.
1367      * For each lane of the input mask the
1368      * lane element is operated on using the specified scalar unary operation and
1369      * the boolean result is placed into the mask result at the same lane.
1370      * The following pseudocode expresses the behaviour of this operation category:
1371      *
1372      * <pre>{@code
1373      * Mask<E, S> a = ...;
1374      * boolean[] ar = new boolean[a.length()];
1375      * for (int i = 0; i < a.length(); i++) {
1376      *     ar[i] = boolean_unary_op(a.isSet(i));
1377      * }
1378      * Mask<E, S> r = a.species().maskFromArray(ar, 0);
1379      * }</pre>
1380      *
1381      * <li>
1382      * A mask binary operation (2-ary) operates on two input
1383      * masks to produce a result mask.
1384      * For each lane of the two input masks,
1385      * a and b say, the corresponding lane elements from a and b are operated on
1386      * using the specified scalar binary operation and the boolean result is placed
1387      * into the mask result at the same lane.
1388      * The following pseudocode expresses the behaviour of this operation category:
1389      *
1390      * <pre>{@code
1391      * Mask<E, S> a = ...;
1392      * Mask<E, S> b = ...;
1393      * boolean[] ar = new boolean[a.length()];
1394      * for (int i = 0; i < a.length(); i++) {
1395      *     ar[i] = scalar_binary_op(a.isSet(i), b.isSet(i));
1396      * }
1397      * Mask<E, S> r = a.species().maskFromArray(ar, 0);
1398      * }</pre>
1399      *
1400      * @param <E> the boxed element type of this mask
1401      * @param <S> the type of shape of this mask
1402      */
1403     public static abstract class Mask<E, S extends Shape> {
1404         Mask() {}
1405 
1406         /**
1407          * Returns the species of this mask.
1408          *
1409          * @return the species of this mask
1410          */
1411         public abstract Species<E, S> species();
1412 
1413         /**
1414          * Returns the number of mask lanes (the length).
1415          *
1416          * @return the number of mask lanes
1417          */
1418         public int length() { return species().length(); }
1419 
1420         /**
1421          * Converts this mask to a mask of the given species shape {@code T} and
1422          * element type {@code F}.
1423          * <p>
1424          * This method behaves as if it returns the result of calling
1425          * {@link Species#cast(Mask) cast} on the given species with this mask:
1426          * <pre>{@code
1427          * return species.cast(this);
1428          * }</pre>
1429          *
1430          * @param species the species
1431          * @param <F> the boxed element type of the species
1432          * @param <T> the type of shape of the species
1433          * @return a mask converted by shape and element type
1434          * @throws IllegalArgumentException if this mask length and the species
1435          * length differ
1436          * @see Species#cast(Mask)
1437          */
1438         @ForceInline
1439         public <F, T extends Shape> Mask<F, T> cast(Species<F, T> species) {
1440             return species.cast(this);
1441         }
1442 
1443         /**
1444          * Returns the lane elements of this mask packed into a {@code long}
1445          * value for at most the first 64 lane elements.
1446          * <p>
1447          * The lane elements are packed in the order of least significant bit
1448          * to most significant bit.
1449          * For each mask lane where {@code N} is the mask lane index, if the
1450          * mask lane is set then the {@code N}'th bit is set to one in the
1451          * resulting {@code long} value, otherwise the {@code N}'th bit is set
1452          * to zero.
1453          *
1454          * @return the lane elements of this mask packed into a {@code long}
1455          * value.
1456          */
1457         public abstract long toLong();
1458 
1459         /**
1460          * Returns an {@code boolean} array containing the lane elements of this
1461          * mask.
1462          * <p>
1463          * This method behaves as if it {@link #intoArray(boolean[], int)} stores}
1464          * this mask into an allocated array and returns that array as
1465          * follows:
1466          * <pre>{@code
1467          * boolean[] a = new boolean[this.length()];
1468          * this.intoArray(a, 0);
1469          * return a;
1470          * }</pre>
1471          *
1472          * @return an array containing the the lane elements of this vector
1473          */
1474         public abstract boolean[] toArray();
1475 
1476         /**
1477          * Stores this mask into a {@code boolean} array starting at offset.
1478          * <p>
1479          * For each mask lane, where {@code N} is the mask lane index,
1480          * the lane element at index {@code N} is stored into the array at index
1481          * {@code i + N}.
1482          *
1483          * @param a the array
1484          * @param i the offset into the array
1485          * @throws IndexOutOfBoundsException if {@code i < 0}, or
1486          * {@code i > a.length - this.length()}
1487          */
1488         public abstract void intoArray(boolean[] a, int i);
1489 
1490         /**
1491          * Returns {@code true} if any of the mask lanes are set.
1492          *
1493          * @return {@code true} if any of the mask lanes are set, otherwise
1494          * {@code false}.
1495          */
1496         public abstract boolean anyTrue();
1497 
1498         /**
1499          * Returns {@code true} if all of the mask lanes are set.
1500          *
1501          * @return {@code true} if all of the mask lanes are set, otherwise
1502          * {@code false}.
1503          */
1504         public abstract boolean allTrue();
1505 
1506         /**
1507          * Returns the number of mask lanes that are set.
1508          *
1509          * @return the number of mask lanes that are set.
1510          */
1511         public abstract int trueCount();
1512 
1513         // TODO: LZ count/numberOfLeadingZeros
1514         // TODO: xor, shiftl, shiftr
1515 
1516         /**
1517          * Logically ands this mask with an input mask.
1518          * <p>
1519          * This is a mask binary operation where the logical and operation
1520          * ({@code &&} is applied to lane elements.
1521          *
1522          * @param o the input mask
1523          * @return the result of logically and'ing this mask with an input mask
1524          */
1525         public abstract Mask<E, S> and(Mask<E, S> o);
1526 
1527         /**
1528          * Logically ors this mask with an input mask.
1529          * <p>
1530          * This is a mask binary operation where the logical or operation
1531          * ({@code ||} is applied to lane elements.
1532          *
1533          * @param o the input mask
1534          * @return the result of logically or'ing this mask with an input mask
1535          */
1536         public abstract Mask<E, S> or(Mask<E, S> o);
1537 
1538         /**
1539          * Logically negates this mask.
1540          * <p>
1541          * This is a mask unary operation where the logical not operation
1542          * ({@code !} is applied to lane elements.
1543          *
1544          * @return the result of logically negating this mask.
1545          */
1546         public abstract Mask<E, S> not();
1547 
1548         /**
1549          * Returns a vector representation of this mask.
1550          * <p>
1551          * For each mask lane, where {@code N} is the mask lane index,
1552          * if the mask lane is set then an element value whose most significant
1553          * bit is set is placed into the resulting vector at lane index
1554          * {@code N}, otherwise the default element value is placed into the
1555          * resulting vector at lane index {@code N}.
1556          *
1557          * @return a vector representation of this mask.
1558          */
1559         public abstract Vector<E, S> toVector();
1560 
1561         /**
1562          * Tests if the lane at index {@code i} is set
1563          * @param i the lane index
1564          *
1565          * @return true if the lane at index {@code i} is set, otherwise false
1566          */
1567         // @@@ Rename to isSet
1568         public abstract boolean getElement(int i);
1569     }
1570 
1571     /**
1572      * A {@code Shuffle} represents an ordered immutable sequence of
1573      * {@code int} values.  A Shuffle can be used with a shuffle accepting
1574      * vector operation to control the rearrangement of lane elements of input
1575      * vectors
1576      * <p>
1577      * The number of values in the sequence is referred to as the Shuffle
1578      * {@link #length() length}.  The length also corresponds to the number of
1579      * Shuffle lanes.  The lane element at lane index {@code N} (from {@code 0},
1580      * inclusive, to length, exclusive) corresponds to the {@code N + 1}'th
1581      * value in the sequence.
1582      * A Shuffle and Vector of the same element type and shape have the same
1583      * number of lanes.
1584      * <p>
1585      * A Shuffle describes how a lane element of a vector may cross lanes from
1586      * its lane index, {@code i} say, to another lane index whose value is the
1587      * Shuffle's lane element at lane index {@code i}.  Shuffle lane elements
1588      * will be in the range of {@code 0} (inclusive) to the shuffle length
1589      * (exclusive), and therefore cannot induce out of bounds errors when
1590      * used with vectors operations and vectors of the same length.
1591      *
1592      * @param <E> the boxed element type of this mask
1593      * @param <S> the type of shape of this mask
1594      */
1595     public static abstract class Shuffle<E, S extends Shape> {
1596         Shuffle() {}
1597 
1598         /**
1599          * Returns the species of this shuffle.
1600          *
1601          * @return the species of this shuffle
1602          */
1603         public abstract Species<E, S> species();
1604 
1605         /**
1606          * Returns the number of shuffle lanes (the length).
1607          *
1608          * @return the number of shuffle lanes
1609          */
1610         public int length() { return species().length(); }
1611 
1612         /**
1613          * Converts this shuffle to a shuffle of the given species shape
1614          * {@code T} and element type {@code F}.
1615          * <p>
1616          * This method behaves as if it returns the result of calling
1617          * {@link Species#cast(Shuffle) cast} on the given species with this
1618          * shuffle:
1619          * <pre>{@code
1620          * return species.cast(this);
1621          * }</pre>
1622          *
1623          * @param species the species
1624          * @param <F> the boxed element type of the species
1625          * @param <T> the type of shape of the species
1626          * @return a shuffle converted by shape and element type
1627          * @throws IllegalArgumentException if this shuffle length and the
1628          * species length differ
1629          * @see Species#cast(Mask)
1630          */
1631         @ForceInline
1632         public <F, T extends Shape> Shuffle<F, T> cast(Species<F, T> species) {
1633             return species.cast(this);
1634         }
1635 
1636         /**
1637          * Returns an {@code int} array containing the lane elements of this
1638          * shuffle.
1639          * <p>
1640          * This method behaves as if it {@link #intoArray(int[], int)} stores}
1641          * this shuffle into an allocated array and returns that array as
1642          * follows:
1643          * <pre>{@code
1644          *   int[] a = new int[this.length()];
1645          *   this.intoArray(a, 0);
1646          *   return a;
1647          * }</pre>
1648          *
1649          * @return an array containing the the lane elements of this vector
1650          */
1651         public abstract int[] toArray();
1652 
1653         /**
1654          * Stores this shuffle into an {@code int} array starting at offset.
1655          * <p>
1656          * For each shuffle lane, where {@code N} is the shuffle lane index,
1657          * the lane element at index {@code N} is stored into the array at index
1658          * {@code i + N}.
1659          *
1660          * @param a the array
1661          * @param i the offset into the array
1662          * @throws IndexOutOfBoundsException if {@code i < 0}, or
1663          * {@code i > a.length - this.length()}
1664          */
1665         public abstract void intoArray(int[] a, int i);
1666 
1667         // @@@ rotate/shift/EL/ER
1668 
1669         /**
1670          * Converts this shuffle into a vector, creating a vector from shuffle
1671          * lane elements (int values) cast to the vector element type.
1672          * <p>
1673          * This method behaves as if it returns the result of creating a
1674          * vector given an {@code int} array obtained from this shuffle's
1675          * lane elements, as follows:
1676          * <pre>{@code
1677          *   int[] sa = this.toArray();
1678          *   $type$[] va = new $type$[a.length];
1679          *   for (int i = 0; i < a.length; i++) {
1680          *       va[i] = ($type$) sa[i];
1681          *   }
1682          *   return this.species().fromArray(va, 0);
1683          * }</pre>
1684          *
1685          * @return a vector representation of this shuffle
1686          */
1687         public abstract Vector<E, S> toVector();
1688 
1689         /**
1690          * Gets the {@code int} lane element at lane index {@code i}
1691          *
1692          * @param i the lane index
1693          * @return the {@code int} lane element at lane index {@code i}
1694          */
1695         public int getElement(int i) { return toArray()[i]; }
1696 
1697         /**
1698          * Rearranges the lane elements of this shuffle selecting lane indexes
1699          * controlled by another shuffle.
1700          * <p>
1701          * For each lane of the shuffle, at lane index {@code N} with lane
1702          * element {@code I}, the lane element at {@code I} from this shuffle is
1703          * selected and placed into the resulting shuffle at {@code N}.
1704          *
1705          * @param s the shuffle controlling lane index selection
1706          * @return the rearrangement of the lane elements of this shuffle
1707          */
1708         public abstract Shuffle<E, S> rearrange(Shuffle<E, S> s);
1709     }
1710 
1711     /**
1712      * Finds a preferred species for an element type.
1713      * <p>
1714      * A preferred species is a species chosen by the platform that has a
1715      * shape of maximal bit size.  A preferred species for different element
1716      * types will have the same shape, and therefore vectors created from
1717      * such species will be shape compatible.
1718      *
1719      * @param c the element type
1720      * @param <E> the boxed element type
1721      * @return a preferred species for an element type
1722      * @throws IllegalArgumentException if no such species exists for the
1723      * element type
1724      */
1725     @SuppressWarnings("unchecked")
1726     public static <E> Vector.Species<E, ?> preferredSpecies(Class<E> c) {
1727         Unsafe u = Unsafe.getUnsafe();
1728 
1729         int vectorLength = u.getMaxVectorSize(c);
1730         int vectorBitSize = bitSizeForVectorLength(c, vectorLength);
1731         Shape s = shapeForVectorBitSize(vectorBitSize);
1732         return species(c, s);
1733     }
1734 
1735     // @@@ public static method on Species?
1736     private static int bitSizeForVectorLength(Class<?> c, int elementSize) {
1737         if (c == float.class) {
1738             return Float.SIZE * elementSize;
1739         }
1740         else if (c == double.class) {
1741             return Double.SIZE * elementSize;
1742         }
1743         else if (c == byte.class) {
1744             return Byte.SIZE * elementSize;
1745         }
1746         else if (c == short.class) {
1747             return Short.SIZE * elementSize;
1748         }
1749         else if (c == int.class) {
1750             return Integer.SIZE * elementSize;
1751         }
1752         else if (c == long.class) {
1753             return Long.SIZE * elementSize;
1754         }
1755         else {
1756             throw new IllegalArgumentException("Bad vector type: " + c.getName());
1757         }
1758     }
1759 
1760     // @@@ public static method on Shape?
1761     private static Shape shapeForVectorBitSize(int bitSize) {
1762         switch (bitSize) {
1763             case 64:
1764                 return Shapes.S_64_BIT;
1765             case 128:
1766                 return Shapes.S_128_BIT;
1767             case 256:
1768                 return Shapes.S_256_BIT;
1769             case 512:
1770                 return Shapes.S_512_BIT;
1771             default:
1772                 throw new IllegalArgumentException("Bad vector bit size: " + bitSize);
1773         }
1774     }
1775 
1776     /**
1777      * Finds a species for an element type and shape.
1778      *
1779      * @param c the element type
1780      * @param s the shape
1781      * @param <E> the boxed element type
1782      * @param <S> the type of shape
1783      * @return a species for an element type and shape
1784      * @throws IllegalArgumentException if no such species exists for the
1785      * element type and/or shape
1786      */
1787     @SuppressWarnings("unchecked")
1788     public static <E, S extends Shape> Vector.Species<E, S> species(Class<E> c, S s) {
1789         if (c == float.class) {
1790             return (Vector.Species<E, S>) FloatVector.species(s);
1791         }
1792         else if (c == double.class) {
1793             return (Vector.Species<E, S>) DoubleVector.species(s);
1794         }
1795         else if (c == byte.class) {
1796             return (Vector.Species<E, S>) ByteVector.species(s);
1797         }
1798         else if (c == short.class) {
1799             return (Vector.Species<E, S>) ShortVector.species(s);
1800         }
1801         else if (c == int.class) {
1802             return (Vector.Species<E, S>) IntVector.species(s);
1803         }
1804         else if (c == long.class) {
1805             return (Vector.Species<E, S>) LongVector.species(s);
1806         }
1807         else {
1808             throw new IllegalArgumentException("Bad vector element type: " + c.getName());
1809         }
1810     }
1811 }