1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ByteOrder;
  29 import java.util.Objects;
  30 import java.util.function.IntUnaryOperator;
  31 import java.util.concurrent.ThreadLocalRandom;
  32 
  33 import jdk.internal.misc.Unsafe;
  34 import jdk.internal.vm.annotation.ForceInline;
  35 import static jdk.incubator.vector.VectorIntrinsics.*;
  36 
  37 
  38 /**
  39  * A specialized {@link Vector} representing an ordered immutable sequence of
  40  * {@code byte} values.
  41  */
  42 @SuppressWarnings("cast")
  43 public abstract class ByteVector extends Vector<Byte> {
  44 
  45     ByteVector() {}
  46 
  47     private static final int ARRAY_SHIFT = 31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_BYTE_INDEX_SCALE);
  48 
  49     // Unary operator
  50 
  51     interface FUnOp {
  52         byte apply(int i, byte a);
  53     }
  54 
  55     abstract ByteVector uOp(FUnOp f);
  56 
  57     abstract ByteVector uOp(Mask<Byte> m, FUnOp f);
  58 
  59     // Binary operator
  60 
  61     interface FBinOp {
  62         byte apply(int i, byte a, byte b);
  63     }
  64 
  65     abstract ByteVector bOp(Vector<Byte> v, FBinOp f);
  66 
  67     abstract ByteVector bOp(Vector<Byte> v, Mask<Byte> m, FBinOp f);
  68 
  69     // Trinary operator
  70 
  71     interface FTriOp {
  72         byte apply(int i, byte a, byte b, byte c);
  73     }
  74 
  75     abstract ByteVector tOp(Vector<Byte> v1, Vector<Byte> v2, FTriOp f);
  76 
  77     abstract ByteVector tOp(Vector<Byte> v1, Vector<Byte> v2, Mask<Byte> m, FTriOp f);
  78 
  79     // Reduction operator
  80 
  81     abstract byte rOp(byte v, FBinOp f);
  82 
  83     // Binary test
  84 
  85     interface FBinTest {
  86         boolean apply(int i, byte a, byte b);
  87     }
  88 
  89     abstract Mask<Byte> bTest(Vector<Byte> v, FBinTest f);
  90 
  91     // Foreach
  92 
  93     interface FUnCon {
  94         void apply(int i, byte a);
  95     }
  96 
  97     abstract void forEach(FUnCon f);
  98 
  99     abstract void forEach(Mask<Byte> m, FUnCon f);
 100 
 101     // Static factories
 102 
 103     /**
 104      * Returns a vector where all lane elements are set to the default
 105      * primitive value.
 106      *
 107      * @param species species of desired vector
 108      * @return a zero vector of given species
 109      */
 110     @ForceInline
 111     @SuppressWarnings("unchecked")
 112     public static ByteVector zero(ByteSpecies species) {
 113         return species.zero();
 114     }
 115 
 116     /**
 117      * Loads a vector from a byte array starting at an offset.
 118      * <p>
 119      * Bytes are composed into primitive lane elements according to the
 120      * native byte order of the underlying platform
 121      * <p>
 122      * This method behaves as if it returns the result of calling the
 123      * byte buffer, offset, and mask accepting
 124      * {@link #fromByteBuffer(ByteSpecies, ByteBuffer, int, Mask) method} as follows:
 125      * <pre>{@code
 126      * return this.fromByteBuffer(ByteBuffer.wrap(a), i, this.maskAllTrue());
 127      * }</pre>
 128      *
 129      * @param species species of desired vector
 130      * @param a the byte array
 131      * @param ix the offset into the array
 132      * @return a vector loaded from a byte array
 133      * @throws IndexOutOfBoundsException if {@code i < 0} or
 134      * {@code i > a.length - (this.length() * this.elementSize() / Byte.SIZE)}
 135      */
 136     @ForceInline
 137     @SuppressWarnings("unchecked")
 138     public static ByteVector fromByteArray(ByteSpecies species, byte[] a, int ix) {
 139         Objects.requireNonNull(a);
 140         ix = VectorIntrinsics.checkIndex(ix, a.length, species.bitSize() / Byte.SIZE);
 141         return VectorIntrinsics.load((Class<ByteVector>) species.boxType(), byte.class, species.length(),
 142                                      a, ((long) ix) + Unsafe.ARRAY_BYTE_BASE_OFFSET,
 143                                      a, ix, species,
 144                                      (c, idx, s) -> {
 145                                          ByteBuffer bbc = ByteBuffer.wrap(c, idx, a.length - idx).order(ByteOrder.nativeOrder());
 146                                          ByteBuffer tb = bbc;
 147                                          return ((ByteSpecies)s).op(i -> tb.get());
 148                                      });
 149     }
 150 
 151     /**
 152      * Loads a vector from a byte array starting at an offset and using a
 153      * mask.
 154      * <p>
 155      * Bytes are composed into primitive lane elements according to the
 156      * native byte order of the underlying platform.
 157      * <p>
 158      * This method behaves as if it returns the result of calling the
 159      * byte buffer, offset, and mask accepting
 160      * {@link #fromByteBuffer(ByteSpecies, ByteBuffer, int, Mask) method} as follows:
 161      * <pre>{@code
 162      * return this.fromByteBuffer(ByteBuffer.wrap(a), i, m);
 163      * }</pre>
 164      *
 165      * @param species species of desired vector
 166      * @param a the byte array
 167      * @param ix the offset into the array
 168      * @param m the mask
 169      * @return a vector loaded from a byte array
 170      * @throws IndexOutOfBoundsException if {@code i < 0} or
 171      * {@code i > a.length - (this.length() * this.elementSize() / Byte.SIZE)}
 172      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 173      * or {@code > a.length},
 174      * for any vector lane index {@code N} where the mask at lane {@code N}
 175      * is set
 176      * {@code i >= a.length - (N * this.elementSize() / Byte.SIZE)}
 177      */
 178     @ForceInline
 179     public static ByteVector fromByteArray(ByteSpecies species, byte[] a, int ix, Mask<Byte> m) {
 180         return zero(species).blend(fromByteArray(species, a, ix), m);
 181     }
 182 
 183     /**
 184      * Loads a vector from an array starting at offset.
 185      * <p>
 186      * For each vector lane, where {@code N} is the vector lane index, the
 187      * array element at index {@code i + N} is placed into the
 188      * resulting vector at lane index {@code N}.
 189      *
 190      * @param species species of desired vector
 191      * @param a the array
 192      * @param i the offset into the array
 193      * @return the vector loaded from an array
 194      * @throws IndexOutOfBoundsException if {@code i < 0}, or
 195      * {@code i > a.length - this.length()}
 196      */
 197     @ForceInline
 198     public static ByteVector fromArray(ByteSpecies species, byte[] a, int i) {
 199         Objects.requireNonNull(a);
 200         i = VectorIntrinsics.checkIndex(i, a.length, species.length());
 201         return fromArrayWithoutCheck(species, a, i);
 202     }
 203 
 204     @ForceInline
 205     @SuppressWarnings("unchecked")
 206     static ByteVector fromArrayWithoutCheck(ByteSpecies species, byte[] a, int i) {
 207         return VectorIntrinsics.load((Class<ByteVector>) species.boxType(), byte.class, species.length(),
 208                                      a, (((long) i) << ARRAY_SHIFT) + Unsafe.ARRAY_BYTE_BASE_OFFSET,
 209                                      a, i, species,
 210                                      (c, idx, s) -> ((ByteSpecies)s).op(n -> c[idx + n]));
 211     }
 212 
 213     /**
 214      * Loads a vector from an array starting at offset and using a mask.
 215      * <p>
 216      * For each vector lane, where {@code N} is the vector lane index,
 217      * if the mask lane at index {@code N} is set then the array element at
 218      * index {@code i + N} is placed into the resulting vector at lane index
 219      * {@code N}, otherwise the default element value is placed into the
 220      * resulting vector at lane index {@code N}.
 221      *
 222      * @param species species of desired vector
 223      * @param a the array
 224      * @param i the offset into the array
 225      * @param m the mask
 226      * @return the vector loaded from an array
 227      * @throws IndexOutOfBoundsException if {@code i < 0}, or
 228      * for any vector lane index {@code N} where the mask at lane {@code N}
 229      * is set {@code i > a.length - N}
 230      */
 231     @ForceInline
 232     public static ByteVector fromArray(ByteSpecies species, byte[] a, int i, Mask<Byte> m) {
 233         Objects.requireNonNull(a);
 234         if (i + species.length() <= a.length) {
 235             return zero(species).blend(fromArrayWithoutCheck(species, a, i), m);
 236         } else {
 237             return species.op(m, n -> a[i + n]);
 238         }
 239     }
 240 
 241     /**
 242      * Loads a vector from an array using indexes obtained from an index
 243      * map.
 244      * <p>
 245      * For each vector lane, where {@code N} is the vector lane index, the
 246      * array element at index {@code i + indexMap[j + N]} is placed into the
 247      * resulting vector at lane index {@code N}.
 248      *
 249      * @param species species of desired vector
 250      * @param a the array
 251      * @param i the offset into the array, may be negative if relative
 252      * indexes in the index map compensate to produce a value within the
 253      * array bounds
 254      * @param indexMap the index map
 255      * @param j the offset into the index map
 256      * @return the vector loaded from an array
 257      * @throws IndexOutOfBoundsException if {@code j < 0}, or
 258      * {@code j > indexMap.length - this.length()},
 259      * or for any vector lane index {@code N} the result of
 260      * {@code i + indexMap[j + N]} is {@code < 0} or {@code >= a.length}
 261      */
 262     public static ByteVector fromArray(ByteSpecies species, byte[] a, int i, int[] indexMap, int j) {
 263         return species.op(n -> a[i + indexMap[j + n]]);
 264     }
 265 
 266     /**
 267      * Loads a vector from an array using indexes obtained from an index
 268      * map and using a mask.
 269      * <p>
 270      * For each vector lane, where {@code N} is the vector lane index,
 271      * if the mask lane at index {@code N} is set then the array element at
 272      * index {@code i + indexMap[j + N]} is placed into the resulting vector
 273      * at lane index {@code N}.
 274      *
 275      * @param species species of desired vector
 276      * @param a the array
 277      * @param i the offset into the array, may be negative if relative
 278      * indexes in the index map compensate to produce a value within the
 279      * array bounds
 280      * @param m the mask
 281      * @param indexMap the index map
 282      * @param j the offset into the index map
 283      * @return the vector loaded from an array
 284      * @throws IndexOutOfBoundsException if {@code j < 0}, or
 285      * {@code j > indexMap.length - this.length()},
 286      * or for any vector lane index {@code N} where the mask at lane
 287      * {@code N} is set the result of {@code i + indexMap[j + N]} is
 288      * {@code < 0} or {@code >= a.length}
 289      */
 290     public static ByteVector fromArray(ByteSpecies species, byte[] a, int i, Mask<Byte> m, int[] indexMap, int j) {
 291         return species.op(m, n -> a[i + indexMap[j + n]]);
 292     }
 293 
 294     /**
 295      * Loads a vector from a {@link ByteBuffer byte buffer} starting at an
 296      * offset into the byte buffer.
 297      * <p>
 298      * Bytes are composed into primitive lane elements according to the
 299      * native byte order of the underlying platform.
 300      * <p>
 301      * This method behaves as if it returns the result of calling the
 302      * byte buffer, offset, and mask accepting
 303      * {@link #fromByteBuffer(ByteSpecies, ByteBuffer, int, Mask)} method} as follows:
 304      * <pre>{@code
 305      *   return this.fromByteBuffer(b, i, this.maskAllTrue())
 306      * }</pre>
 307      *
 308      * @param species species of desired vector
 309      * @param bb the byte buffer
 310      * @param ix the offset into the byte buffer
 311      * @return a vector loaded from a byte buffer
 312      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 313      * or {@code > b.limit()},
 314      * or if there are fewer than
 315      * {@code this.length() * this.elementSize() / Byte.SIZE} bytes
 316      * remaining in the byte buffer from the given offset
 317      */
 318     @ForceInline
 319     @SuppressWarnings("unchecked")
 320     public static ByteVector fromByteBuffer(ByteSpecies species, ByteBuffer bb, int ix) {
 321         if (bb.order() != ByteOrder.nativeOrder()) {
 322             throw new IllegalArgumentException();
 323         }
 324         ix = VectorIntrinsics.checkIndex(ix, bb.limit(), species.bitSize() / Byte.SIZE);
 325         return VectorIntrinsics.load((Class<ByteVector>) species.boxType(), byte.class, species.length(),
 326                                      U.getReference(bb, BYTE_BUFFER_HB), U.getLong(bb, BUFFER_ADDRESS) + ix,
 327                                      bb, ix, species,
 328                                      (c, idx, s) -> {
 329                                          ByteBuffer bbc = c.duplicate().position(idx).order(ByteOrder.nativeOrder());
 330                                          ByteBuffer tb = bbc;
 331                                          return ((ByteSpecies)s).op(i -> tb.get());
 332                                      });
 333     }
 334 
 335     /**
 336      * Loads a vector from a {@link ByteBuffer byte buffer} starting at an
 337      * offset into the byte buffer and using a mask.
 338      * <p>
 339      * This method behaves as if the byte buffer is viewed as a primitive
 340      * {@link java.nio.Buffer buffer} for the primitive element type,
 341      * according to the native byte order of the underlying platform, and
 342      * the returned vector is loaded with a mask from a primitive array
 343      * obtained from the primitive buffer.
 344      * The following pseudocode expresses the behaviour, where
 345      * {@coce EBuffer} is the primitive buffer type, {@code e} is the
 346      * primitive element type, and {@code ESpecies<S>} is the primitive
 347      * species for {@code e}:
 348      * <pre>{@code
 349      * EBuffer eb = b.duplicate().
 350      *     order(ByteOrder.nativeOrder()).position(i).
 351      *     asEBuffer();
 352      * e[] es = new e[this.length()];
 353      * for (int n = 0; n < t.length; n++) {
 354      *     if (m.isSet(n))
 355      *         es[n] = eb.get(n);
 356      * }
 357      * Vector<E> r = ((ESpecies<S>)this).fromArray(es, 0, m);
 358      * }</pre>
 359      *
 360      * @param species species of desired vector
 361      * @param bb the byte buffer
 362      * @param ix the offset into the byte buffer
 363      * @param m the mask
 364      * @return a vector loaded from a byte buffer
 365      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 366      * or {@code > b.limit()},
 367      * for any vector lane index {@code N} where the mask at lane {@code N}
 368      * is set
 369      * {@code i >= b.limit() - (N * this.elementSize() / Byte.SIZE)}
 370      */
 371     @ForceInline
 372     public static ByteVector fromByteBuffer(ByteSpecies species, ByteBuffer bb, int ix, Mask<Byte> m) {
 373         return zero(species).blend(fromByteBuffer(species, bb, ix), m);
 374     }
 375 
 376     /**
 377      * Returns a mask where each lane is set or unset according to given
 378      * {@code boolean} values
 379      * <p>
 380      * For each mask lane, where {@code N} is the mask lane index,
 381      * if the given {@code boolean} value at index {@code N} is {@code true}
 382      * then the mask lane at index {@code N} is set, otherwise it is unset.
 383      *
 384      * @param species mask species
 385      * @param bits the given {@code boolean} values
 386      * @return a mask where each lane is set or unset according to the given {@code boolean} value
 387      * @throws IndexOutOfBoundsException if {@code bits.length < species.length()}
 388      */
 389     @ForceInline
 390     public static Mask<Byte> maskFromValues(ByteSpecies species, boolean... bits) {
 391         if (species.boxType() == ByteMaxVector.class)
 392             return new ByteMaxVector.ByteMaxMask(bits);
 393         switch (species.bitSize()) {
 394             case 64: return new Byte64Vector.Byte64Mask(bits);
 395             case 128: return new Byte128Vector.Byte128Mask(bits);
 396             case 256: return new Byte256Vector.Byte256Mask(bits);
 397             case 512: return new Byte512Vector.Byte512Mask(bits);
 398             default: throw new IllegalArgumentException(Integer.toString(species.bitSize()));
 399         }
 400     }
 401 
 402     // @@@ This is a bad implementation -- makes lambdas capturing -- fix this
 403     static Mask<Byte> trueMask(ByteSpecies species) {
 404         if (species.boxType() == ByteMaxVector.class)
 405             return ByteMaxVector.ByteMaxMask.TRUE_MASK;
 406         switch (species.bitSize()) {
 407             case 64: return Byte64Vector.Byte64Mask.TRUE_MASK;
 408             case 128: return Byte128Vector.Byte128Mask.TRUE_MASK;
 409             case 256: return Byte256Vector.Byte256Mask.TRUE_MASK;
 410             case 512: return Byte512Vector.Byte512Mask.TRUE_MASK;
 411             default: throw new IllegalArgumentException(Integer.toString(species.bitSize()));
 412         }
 413     }
 414 
 415     static Mask<Byte> falseMask(ByteSpecies species) {
 416         if (species.boxType() == ByteMaxVector.class)
 417             return ByteMaxVector.ByteMaxMask.FALSE_MASK;
 418         switch (species.bitSize()) {
 419             case 64: return Byte64Vector.Byte64Mask.FALSE_MASK;
 420             case 128: return Byte128Vector.Byte128Mask.FALSE_MASK;
 421             case 256: return Byte256Vector.Byte256Mask.FALSE_MASK;
 422             case 512: return Byte512Vector.Byte512Mask.FALSE_MASK;
 423             default: throw new IllegalArgumentException(Integer.toString(species.bitSize()));
 424         }
 425     }
 426 
 427     /**
 428      * Loads a mask from a {@code boolean} array starting at an offset.
 429      * <p>
 430      * For each mask lane, where {@code N} is the mask lane index,
 431      * if the array element at index {@code ix + N} is {@code true} then the
 432      * mask lane at index {@code N} is set, otherwise it is unset.
 433      *
 434      * @param species mask species
 435      * @param bits the {@code boolean} array
 436      * @param ix the offset into the array
 437      * @return the mask loaded from a {@code boolean} array
 438      * @throws IndexOutOfBoundsException if {@code ix < 0}, or
 439      * {@code ix > bits.length - species.length()}
 440      */
 441     @ForceInline
 442     @SuppressWarnings("unchecked")
 443     public static Mask<Byte> maskFromArray(ByteSpecies species, boolean[] bits, int ix) {
 444         Objects.requireNonNull(bits);
 445         ix = VectorIntrinsics.checkIndex(ix, bits.length, species.length());
 446         return VectorIntrinsics.load((Class<Mask<Byte>>) species.maskType(), byte.class, species.length(),
 447                                      bits, (((long) ix) << ARRAY_SHIFT) + Unsafe.ARRAY_BOOLEAN_BASE_OFFSET,
 448                                      bits, ix, species,
 449                                      (c, idx, s) -> (Mask<Byte>) ((ByteSpecies)s).opm(n -> c[idx + n]));
 450     }
 451 
 452     /**
 453      * Returns a mask where all lanes are set.
 454      *
 455      * @param species mask species
 456      * @return a mask where all lanes are set
 457      */
 458     @ForceInline
 459     @SuppressWarnings("unchecked")
 460     public static Mask<Byte> maskAllTrue(ByteSpecies species) {
 461         return VectorIntrinsics.broadcastCoerced((Class<Mask<Byte>>) species.maskType(), byte.class, species.length(),
 462                                                  (byte)-1,  species,
 463                                                  ((z, s) -> trueMask((ByteSpecies)s)));
 464     }
 465 
 466     /**
 467      * Returns a mask where all lanes are unset.
 468      *
 469      * @param species mask species
 470      * @return a mask where all lanes are unset
 471      */
 472     @ForceInline
 473     @SuppressWarnings("unchecked")
 474     public static Mask<Byte> maskAllFalse(ByteSpecies species) {
 475         return VectorIntrinsics.broadcastCoerced((Class<Mask<Byte>>) species.maskType(), byte.class, species.length(),
 476                                                  0, species,
 477                                                  ((z, s) -> falseMask((ByteSpecies)s)));
 478     }
 479 
 480     /**
 481      * Returns a shuffle of mapped indexes where each lane element is
 482      * the result of applying a mapping function to the corresponding lane
 483      * index.
 484      * <p>
 485      * Care should be taken to ensure Shuffle values produced from this
 486      * method are consumed as constants to ensure optimal generation of
 487      * code.  For example, values held in static final fields or values
 488      * held in loop constant local variables.
 489      * <p>
 490      * This method behaves as if a shuffle is created from an array of
 491      * mapped indexes as follows:
 492      * <pre>{@code
 493      *   int[] a = new int[species.length()];
 494      *   for (int i = 0; i < a.length; i++) {
 495      *       a[i] = f.applyAsInt(i);
 496      *   }
 497      *   return this.shuffleFromValues(a);
 498      * }</pre>
 499      *
 500      * @param species shuffle species
 501      * @param f the lane index mapping function
 502      * @return a shuffle of mapped indexes
 503      */
 504     @ForceInline
 505     public static Shuffle<Byte> shuffle(ByteSpecies species, IntUnaryOperator f) {
 506         if (species.boxType() == ByteMaxVector.class)
 507             return new ByteMaxVector.ByteMaxShuffle(f);
 508         switch (species.bitSize()) {
 509             case 64: return new Byte64Vector.Byte64Shuffle(f);
 510             case 128: return new Byte128Vector.Byte128Shuffle(f);
 511             case 256: return new Byte256Vector.Byte256Shuffle(f);
 512             case 512: return new Byte512Vector.Byte512Shuffle(f);
 513             default: throw new IllegalArgumentException(Integer.toString(species.bitSize()));
 514         }
 515     }
 516 
 517     /**
 518      * Returns a shuffle where each lane element is the value of its
 519      * corresponding lane index.
 520      * <p>
 521      * This method behaves as if a shuffle is created from an identity
 522      * index mapping function as follows:
 523      * <pre>{@code
 524      *   return this.shuffle(i -> i);
 525      * }</pre>
 526      *
 527      * @param species shuffle species
 528      * @return a shuffle of lane indexes
 529      */
 530     @ForceInline
 531     public static Shuffle<Byte> shuffleIota(ByteSpecies species) {
 532         if (species.boxType() == ByteMaxVector.class)
 533             return new ByteMaxVector.ByteMaxShuffle(AbstractShuffle.IDENTITY);
 534         switch (species.bitSize()) {
 535             case 64: return new Byte64Vector.Byte64Shuffle(AbstractShuffle.IDENTITY);
 536             case 128: return new Byte128Vector.Byte128Shuffle(AbstractShuffle.IDENTITY);
 537             case 256: return new Byte256Vector.Byte256Shuffle(AbstractShuffle.IDENTITY);
 538             case 512: return new Byte512Vector.Byte512Shuffle(AbstractShuffle.IDENTITY);
 539             default: throw new IllegalArgumentException(Integer.toString(species.bitSize()));
 540         }
 541     }
 542 
 543     /**
 544      * Returns a shuffle where each lane element is set to a given
 545      * {@code int} value logically AND'ed by the species length minus one.
 546      * <p>
 547      * For each shuffle lane, where {@code N} is the shuffle lane index, the
 548      * the {@code int} value at index {@code N} logically AND'ed by
 549      * {@code species.length() - 1} is placed into the resulting shuffle at
 550      * lane index {@code N}.
 551      *
 552      * @param species shuffle species
 553      * @param ixs the given {@code int} values
 554      * @return a shuffle where each lane element is set to a given
 555      * {@code int} value
 556      * @throws IndexOutOfBoundsException if the number of int values is
 557      * {@code < species.length()}
 558      */
 559     @ForceInline
 560     public static Shuffle<Byte> shuffleFromValues(ByteSpecies species, int... ixs) {
 561         if (species.boxType() == ByteMaxVector.class)
 562             return new ByteMaxVector.ByteMaxShuffle(ixs);
 563         switch (species.bitSize()) {
 564             case 64: return new Byte64Vector.Byte64Shuffle(ixs);
 565             case 128: return new Byte128Vector.Byte128Shuffle(ixs);
 566             case 256: return new Byte256Vector.Byte256Shuffle(ixs);
 567             case 512: return new Byte512Vector.Byte512Shuffle(ixs);
 568             default: throw new IllegalArgumentException(Integer.toString(species.bitSize()));
 569         }
 570     }
 571 
 572     /**
 573      * Loads a shuffle from an {@code int} array starting at an offset.
 574      * <p>
 575      * For each shuffle lane, where {@code N} is the shuffle lane index, the
 576      * array element at index {@code i + N} logically AND'ed by
 577      * {@code species.length() - 1} is placed into the resulting shuffle at lane
 578      * index {@code N}.
 579      *
 580      * @param species shuffle species
 581      * @param ixs the {@code int} array
 582      * @param i the offset into the array
 583      * @return a shuffle loaded from the {@code int} array
 584      * @throws IndexOutOfBoundsException if {@code i < 0}, or
 585      * {@code i > a.length - species.length()}
 586      */
 587     @ForceInline
 588     public static Shuffle<Byte> shuffleFromArray(ByteSpecies species, int[] ixs, int i) {
 589         if (species.boxType() == ByteMaxVector.class)
 590             return new ByteMaxVector.ByteMaxShuffle(ixs, i);
 591         switch (species.bitSize()) {
 592             case 64: return new Byte64Vector.Byte64Shuffle(ixs, i);
 593             case 128: return new Byte128Vector.Byte128Shuffle(ixs, i);
 594             case 256: return new Byte256Vector.Byte256Shuffle(ixs, i);
 595             case 512: return new Byte512Vector.Byte512Shuffle(ixs, i);
 596             default: throw new IllegalArgumentException(Integer.toString(species.bitSize()));
 597         }
 598     }
 599 
 600 
 601     // Ops
 602 
 603     @Override
 604     public abstract ByteVector add(Vector<Byte> v);
 605 
 606     /**
 607      * Adds this vector to the broadcast of an input scalar.
 608      * <p>
 609      * This is a vector binary operation where the primitive addition operation
 610      * ({@code +}) is applied to lane elements.
 611      *
 612      * @param s the input scalar
 613      * @return the result of adding this vector to the broadcast of an input
 614      * scalar
 615      */
 616     public abstract ByteVector add(byte s);
 617 
 618     @Override
 619     public abstract ByteVector add(Vector<Byte> v, Mask<Byte> m);
 620 
 621     /**
 622      * Adds this vector to broadcast of an input scalar,
 623      * selecting lane elements controlled by a mask.
 624      * <p>
 625      * This is a vector binary operation where the primitive addition operation
 626      * ({@code +}) is applied to lane elements.
 627      *
 628      * @param s the input scalar
 629      * @param m the mask controlling lane selection
 630      * @return the result of adding this vector to the broadcast of an input
 631      * scalar
 632      */
 633     public abstract ByteVector add(byte s, Mask<Byte> m);
 634 
 635     @Override
 636     public abstract ByteVector sub(Vector<Byte> v);
 637 
 638     /**
 639      * Subtracts the broadcast of an input scalar from this vector.
 640      * <p>
 641      * This is a vector binary operation where the primitive subtraction
 642      * operation ({@code -}) is applied to lane elements.
 643      *
 644      * @param s the input scalar
 645      * @return the result of subtracting the broadcast of an input
 646      * scalar from this vector
 647      */
 648     public abstract ByteVector sub(byte s);
 649 
 650     @Override
 651     public abstract ByteVector sub(Vector<Byte> v, Mask<Byte> m);
 652 
 653     /**
 654      * Subtracts the broadcast of an input scalar from this vector, selecting
 655      * lane elements controlled by a mask.
 656      * <p>
 657      * This is a vector binary operation where the primitive subtraction
 658      * operation ({@code -}) is applied to lane elements.
 659      *
 660      * @param s the input scalar
 661      * @param m the mask controlling lane selection
 662      * @return the result of subtracting the broadcast of an input
 663      * scalar from this vector
 664      */
 665     public abstract ByteVector sub(byte s, Mask<Byte> m);
 666 
 667     @Override
 668     public abstract ByteVector mul(Vector<Byte> v);
 669 
 670     /**
 671      * Multiplies this vector with the broadcast of an input scalar.
 672      * <p>
 673      * This is a vector binary operation where the primitive multiplication
 674      * operation ({@code *}) is applied to lane elements.
 675      *
 676      * @param s the input scalar
 677      * @return the result of multiplying this vector with the broadcast of an
 678      * input scalar
 679      */
 680     public abstract ByteVector mul(byte s);
 681 
 682     @Override
 683     public abstract ByteVector mul(Vector<Byte> v, Mask<Byte> m);
 684 
 685     /**
 686      * Multiplies this vector with the broadcast of an input scalar, selecting
 687      * lane elements controlled by a mask.
 688      * <p>
 689      * This is a vector binary operation where the primitive multiplication
 690      * operation ({@code *}) is applied to lane elements.
 691      *
 692      * @param s the input scalar
 693      * @param m the mask controlling lane selection
 694      * @return the result of multiplying this vector with the broadcast of an
 695      * input scalar
 696      */
 697     public abstract ByteVector mul(byte s, Mask<Byte> m);
 698 
 699     @Override
 700     public abstract ByteVector neg();
 701 
 702     @Override
 703     public abstract ByteVector neg(Mask<Byte> m);
 704 
 705     @Override
 706     public abstract ByteVector abs();
 707 
 708     @Override
 709     public abstract ByteVector abs(Mask<Byte> m);
 710 
 711     @Override
 712     public abstract ByteVector min(Vector<Byte> v);
 713 
 714     @Override
 715     public abstract ByteVector min(Vector<Byte> v, Mask<Byte> m);
 716 
 717     /**
 718      * Returns the minimum of this vector and the broadcast of an input scalar.
 719      * <p>
 720      * This is a vector binary operation where the operation
 721      * {@code (a, b) -> Math.min(a, b)} is applied to lane elements.
 722      *
 723      * @param s the input scalar
 724      * @return the minimum of this vector and the broadcast of an input scalar
 725      */
 726     public abstract ByteVector min(byte s);
 727 
 728     @Override
 729     public abstract ByteVector max(Vector<Byte> v);
 730 
 731     @Override
 732     public abstract ByteVector max(Vector<Byte> v, Mask<Byte> m);
 733 
 734     /**
 735      * Returns the maximum of this vector and the broadcast of an input scalar.
 736      * <p>
 737      * This is a vector binary operation where the operation
 738      * {@code (a, b) -> Math.max(a, b)} is applied to lane elements.
 739      *
 740      * @param s the input scalar
 741      * @return the maximum of this vector and the broadcast of an input scalar
 742      */
 743     public abstract ByteVector max(byte s);
 744 
 745     @Override
 746     public abstract Mask<Byte> equal(Vector<Byte> v);
 747 
 748     /**
 749      * Tests if this vector is equal to the broadcast of an input scalar.
 750      * <p>
 751      * This is a vector binary test operation where the primitive equals
 752      * operation ({@code ==}) is applied to lane elements.
 753      *
 754      * @param s the input scalar
 755      * @return the result mask of testing if this vector is equal to the
 756      * broadcast of an input scalar
 757      */
 758     public abstract Mask<Byte> equal(byte s);
 759 
 760     @Override
 761     public abstract Mask<Byte> notEqual(Vector<Byte> v);
 762 
 763     /**
 764      * Tests if this vector is not equal to the broadcast of an input scalar.
 765      * <p>
 766      * This is a vector binary test operation where the primitive not equals
 767      * operation ({@code !=}) is applied to lane elements.
 768      *
 769      * @param s the input scalar
 770      * @return the result mask of testing if this vector is not equal to the
 771      * broadcast of an input scalar
 772      */
 773     public abstract Mask<Byte> notEqual(byte s);
 774 
 775     @Override
 776     public abstract Mask<Byte> lessThan(Vector<Byte> v);
 777 
 778     /**
 779      * Tests if this vector is less than the broadcast of an input scalar.
 780      * <p>
 781      * This is a vector binary test operation where the primitive less than
 782      * operation ({@code <}) is applied to lane elements.
 783      *
 784      * @param s the input scalar
 785      * @return the mask result of testing if this vector is less than the
 786      * broadcast of an input scalar
 787      */
 788     public abstract Mask<Byte> lessThan(byte s);
 789 
 790     @Override
 791     public abstract Mask<Byte> lessThanEq(Vector<Byte> v);
 792 
 793     /**
 794      * Tests if this vector is less or equal to the broadcast of an input scalar.
 795      * <p>
 796      * This is a vector binary test operation where the primitive less than
 797      * or equal to operation ({@code <=}) is applied to lane elements.
 798      *
 799      * @param s the input scalar
 800      * @return the mask result of testing if this vector is less than or equal
 801      * to the broadcast of an input scalar
 802      */
 803     public abstract Mask<Byte> lessThanEq(byte s);
 804 
 805     @Override
 806     public abstract Mask<Byte> greaterThan(Vector<Byte> v);
 807 
 808     /**
 809      * Tests if this vector is greater than the broadcast of an input scalar.
 810      * <p>
 811      * This is a vector binary test operation where the primitive greater than
 812      * operation ({@code >}) is applied to lane elements.
 813      *
 814      * @param s the input scalar
 815      * @return the mask result of testing if this vector is greater than the
 816      * broadcast of an input scalar
 817      */
 818     public abstract Mask<Byte> greaterThan(byte s);
 819 
 820     @Override
 821     public abstract Mask<Byte> greaterThanEq(Vector<Byte> v);
 822 
 823     /**
 824      * Tests if this vector is greater than or equal to the broadcast of an
 825      * input scalar.
 826      * <p>
 827      * This is a vector binary test operation where the primitive greater than
 828      * or equal to operation ({@code >=}) is applied to lane elements.
 829      *
 830      * @param s the input scalar
 831      * @return the mask result of testing if this vector is greater than or
 832      * equal to the broadcast of an input scalar
 833      */
 834     public abstract Mask<Byte> greaterThanEq(byte s);
 835 
 836     @Override
 837     public abstract ByteVector blend(Vector<Byte> v, Mask<Byte> m);
 838 
 839     /**
 840      * Blends the lane elements of this vector with those of the broadcast of an
 841      * input scalar, selecting lanes controlled by a mask.
 842      * <p>
 843      * For each lane of the mask, at lane index {@code N}, if the mask lane
 844      * is set then the lane element at {@code N} from the input vector is
 845      * selected and placed into the resulting vector at {@code N},
 846      * otherwise the the lane element at {@code N} from this input vector is
 847      * selected and placed into the resulting vector at {@code N}.
 848      *
 849      * @param s the input scalar
 850      * @param m the mask controlling lane selection
 851      * @return the result of blending the lane elements of this vector with
 852      * those of the broadcast of an input scalar
 853      */
 854     public abstract ByteVector blend(byte s, Mask<Byte> m);
 855 
 856     @Override
 857     public abstract ByteVector rearrange(Vector<Byte> v,
 858                                                       Shuffle<Byte> s, Mask<Byte> m);
 859 
 860     @Override
 861     public abstract ByteVector rearrange(Shuffle<Byte> m);
 862 
 863     @Override
 864     public abstract ByteVector reshape(Species<Byte> s);
 865 
 866     @Override
 867     public abstract ByteVector rotateEL(int i);
 868 
 869     @Override
 870     public abstract ByteVector rotateER(int i);
 871 
 872     @Override
 873     public abstract ByteVector shiftEL(int i);
 874 
 875     @Override
 876     public abstract ByteVector shiftER(int i);
 877 
 878 
 879 
 880     /**
 881      * Bitwise ANDs this vector with an input vector.
 882      * <p>
 883      * This is a vector binary operation where the primitive bitwise AND
 884      * operation ({@code &}) is applied to lane elements.
 885      *
 886      * @param v the input vector
 887      * @return the bitwise AND of this vector with the input vector
 888      */
 889     public abstract ByteVector and(Vector<Byte> v);
 890 
 891     /**
 892      * Bitwise ANDs this vector with the broadcast of an input scalar.
 893      * <p>
 894      * This is a vector binary operation where the primitive bitwise AND
 895      * operation ({@code &}) is applied to lane elements.
 896      *
 897      * @param s the input scalar
 898      * @return the bitwise AND of this vector with the broadcast of an input
 899      * scalar
 900      */
 901     public abstract ByteVector and(byte s);
 902 
 903     /**
 904      * Bitwise ANDs this vector with an input vector, selecting lane elements
 905      * controlled by a mask.
 906      * <p>
 907      * This is a vector binary operation where the primitive bitwise AND
 908      * operation ({@code &}) is applied to lane elements.
 909      *
 910      * @param v the input vector
 911      * @param m the mask controlling lane selection
 912      * @return the bitwise AND of this vector with the input vector
 913      */
 914     public abstract ByteVector and(Vector<Byte> v, Mask<Byte> m);
 915 
 916     /**
 917      * Bitwise ANDs this vector with the broadcast of an input scalar, selecting
 918      * lane elements controlled by a mask.
 919      * <p>
 920      * This is a vector binary operation where the primitive bitwise AND
 921      * operation ({@code &}) is applied to lane elements.
 922      *
 923      * @param s the input scalar
 924      * @param m the mask controlling lane selection
 925      * @return the bitwise AND of this vector with the broadcast of an input
 926      * scalar
 927      */
 928     public abstract ByteVector and(byte s, Mask<Byte> m);
 929 
 930     /**
 931      * Bitwise ORs this vector with an input vector.
 932      * <p>
 933      * This is a vector binary operation where the primitive bitwise OR
 934      * operation ({@code |}) is applied to lane elements.
 935      *
 936      * @param v the input vector
 937      * @return the bitwise OR of this vector with the input vector
 938      */
 939     public abstract ByteVector or(Vector<Byte> v);
 940 
 941     /**
 942      * Bitwise ORs this vector with the broadcast of an input scalar.
 943      * <p>
 944      * This is a vector binary operation where the primitive bitwise OR
 945      * operation ({@code |}) is applied to lane elements.
 946      *
 947      * @param s the input scalar
 948      * @return the bitwise OR of this vector with the broadcast of an input
 949      * scalar
 950      */
 951     public abstract ByteVector or(byte s);
 952 
 953     /**
 954      * Bitwise ORs this vector with an input vector, selecting lane elements
 955      * controlled by a mask.
 956      * <p>
 957      * This is a vector binary operation where the primitive bitwise OR
 958      * operation ({@code |}) is applied to lane elements.
 959      *
 960      * @param v the input vector
 961      * @param m the mask controlling lane selection
 962      * @return the bitwise OR of this vector with the input vector
 963      */
 964     public abstract ByteVector or(Vector<Byte> v, Mask<Byte> m);
 965 
 966     /**
 967      * Bitwise ORs this vector with the broadcast of an input scalar, selecting
 968      * lane elements controlled by a mask.
 969      * <p>
 970      * This is a vector binary operation where the primitive bitwise OR
 971      * operation ({@code |}) is applied to lane elements.
 972      *
 973      * @param s the input scalar
 974      * @param m the mask controlling lane selection
 975      * @return the bitwise OR of this vector with the broadcast of an input
 976      * scalar
 977      */
 978     public abstract ByteVector or(byte s, Mask<Byte> m);
 979 
 980     /**
 981      * Bitwise XORs this vector with an input vector.
 982      * <p>
 983      * This is a vector binary operation where the primitive bitwise XOR
 984      * operation ({@code ^}) is applied to lane elements.
 985      *
 986      * @param v the input vector
 987      * @return the bitwise XOR of this vector with the input vector
 988      */
 989     public abstract ByteVector xor(Vector<Byte> v);
 990 
 991     /**
 992      * Bitwise XORs this vector with the broadcast of an input scalar.
 993      * <p>
 994      * This is a vector binary operation where the primitive bitwise XOR
 995      * operation ({@code ^}) is applied to lane elements.
 996      *
 997      * @param s the input scalar
 998      * @return the bitwise XOR of this vector with the broadcast of an input
 999      * scalar
1000      */
1001     public abstract ByteVector xor(byte s);
1002 
1003     /**
1004      * Bitwise XORs this vector with an input vector, selecting lane elements
1005      * controlled by a mask.
1006      * <p>
1007      * This is a vector binary operation where the primitive bitwise XOR
1008      * operation ({@code ^}) is applied to lane elements.
1009      *
1010      * @param v the input vector
1011      * @param m the mask controlling lane selection
1012      * @return the bitwise XOR of this vector with the input vector
1013      */
1014     public abstract ByteVector xor(Vector<Byte> v, Mask<Byte> m);
1015 
1016     /**
1017      * Bitwise XORs this vector with the broadcast of an input scalar, selecting
1018      * lane elements controlled by a mask.
1019      * <p>
1020      * This is a vector binary operation where the primitive bitwise XOR
1021      * operation ({@code ^}) is applied to lane elements.
1022      *
1023      * @param s the input scalar
1024      * @param m the mask controlling lane selection
1025      * @return the bitwise XOR of this vector with the broadcast of an input
1026      * scalar
1027      */
1028     public abstract ByteVector xor(byte s, Mask<Byte> m);
1029 
1030     /**
1031      * Bitwise NOTs this vector.
1032      * <p>
1033      * This is a vector unary operation where the primitive bitwise NOT
1034      * operation ({@code ~}) is applied to lane elements.
1035      *
1036      * @return the bitwise NOT of this vector
1037      */
1038     public abstract ByteVector not();
1039 
1040     /**
1041      * Bitwise NOTs this vector, selecting lane elements controlled by a mask.
1042      * <p>
1043      * This is a vector unary operation where the primitive bitwise NOT
1044      * operation ({@code ~}) is applied to lane elements.
1045      *
1046      * @param m the mask controlling lane selection
1047      * @return the bitwise NOT of this vector
1048      */
1049     public abstract ByteVector not(Mask<Byte> m);
1050 
1051     /**
1052      * Logically left shifts this vector by the broadcast of an input scalar.
1053      * <p>
1054      * This is a vector binary operation where the primitive logical left shift
1055      * operation ({@code <<}) is applied to lane elements to left shift the
1056      * element by shift value as specified by the input scalar. Only the 3
1057      * lowest-order bits of shift value are used. It is as if the shift value
1058      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1059      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1060      *
1061      * @param s the input scalar; the number of the bits to left shift
1062      * @return the result of logically left shifting left this vector by the
1063      * broadcast of an input scalar
1064      */
1065     public abstract ByteVector shiftL(int s);
1066 
1067     /**
1068      * Logically left shifts this vector by the broadcast of an input scalar,
1069      * selecting lane elements controlled by a mask.
1070      * <p>
1071      * This is a vector binary operation where the primitive logical left shift
1072      * operation ({@code <<}) is applied to lane elements to left shift the
1073      * element by shift value as specified by the input scalar. Only the 3
1074      * lowest-order bits of shift value are used. It is as if the shift value
1075      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1076      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1077      *
1078      * @param s the input scalar; the number of the bits to left shift
1079      * @param m the mask controlling lane selection
1080      * @return the result of logically left shifting left this vector by the
1081      * broadcast of an input scalar
1082      */
1083     public abstract ByteVector shiftL(int s, Mask<Byte> m);
1084 
1085 
1086     // logical, or unsigned, shift right
1087 
1088      /**
1089      * Logically right shifts (or unsigned right shifts) this vector by the
1090      * broadcast of an input scalar.
1091      * <p>
1092      * This is a vector binary operation where the primitive logical right shift
1093      * operation ({@code >>>}) is applied to lane elements to logically right shift the
1094      * element by shift value as specified by the input scalar. Only the 3
1095      * lowest-order bits of shift value are used. It is as if the shift value
1096      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1097      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1098      *
1099      * @param s the input scalar; the number of the bits to right shift
1100      * @return the result of logically right shifting this vector by the
1101      * broadcast of an input scalar
1102      */
1103     public abstract ByteVector shiftR(int s);
1104 
1105      /**
1106      * Logically right shifts (or unsigned right shifts) this vector by the
1107      * broadcast of an input scalar, selecting lane elements controlled by a
1108      * mask.
1109      * <p>
1110      * This is a vector binary operation where the primitive logical right shift
1111      * operation ({@code >>>}) is applied to lane elements to logically right shift the
1112      * element by shift value as specified by the input scalar. Only the 3
1113      * lowest-order bits of shift value are used. It is as if the shift value
1114      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1115      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1116      *
1117      * @param s the input scalar; the number of the bits to right shift
1118      * @param m the mask controlling lane selection
1119      * @return the result of logically right shifting this vector by the
1120      * broadcast of an input scalar
1121      */
1122     public abstract ByteVector shiftR(int s, Mask<Byte> m);
1123 
1124 
1125     /**
1126      * Arithmetically right shifts (or signed right shifts) this vector by the
1127      * broadcast of an input scalar.
1128      * <p>
1129      * This is a vector binary operation where the primitive arithmetic right
1130      * shift operation ({@code >>}) is applied to lane elements  to arithmetically
1131      * right shift the element by shift value as specified by the input scalar.
1132      * Only the 3 lowest-order bits of shift value are used. It is as if the shift
1133      * value were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1134      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1135      *
1136      * @param s the input scalar; the number of the bits to right shift
1137      * @return the result of arithmetically right shifting this vector by the
1138      * broadcast of an input scalar
1139      */
1140     public abstract ByteVector aShiftR(int s);
1141 
1142     /**
1143      * Arithmetically right shifts (or signed right shifts) this vector by the
1144      * broadcast of an input scalar, selecting lane elements controlled by a
1145      * mask.
1146      * <p>
1147      * This is a vector binary operation where the primitive arithmetic right
1148      * shift operation ({@code >>}) is applied to lane elements  to arithmetically
1149      * right shift the element by shift value as specified by the input scalar.
1150      * Only the 3 lowest-order bits of shift value are used. It is as if the shift
1151      * value were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1152      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1153      *
1154      * @param s the input scalar; the number of the bits to right shift
1155      * @param m the mask controlling lane selection
1156      * @return the result of arithmetically right shifting this vector by the
1157      * broadcast of an input scalar
1158      */
1159     public abstract ByteVector aShiftR(int s, Mask<Byte> m);
1160 
1161 
1162     @Override
1163     public abstract void intoByteArray(byte[] a, int ix);
1164 
1165     @Override
1166     public abstract void intoByteArray(byte[] a, int ix, Mask<Byte> m);
1167 
1168     @Override
1169     public abstract void intoByteBuffer(ByteBuffer bb, int ix);
1170 
1171     @Override
1172     public abstract void intoByteBuffer(ByteBuffer bb, int ix, Mask<Byte> m);
1173 
1174 
1175     // Type specific horizontal reductions
1176     /**
1177      * Adds all lane elements of this vector.
1178      * <p>
1179      * This is an associative vector reduction operation where the addition
1180      * operation ({@code +}) is applied to lane elements,
1181      * and the identity value is {@code 0}.
1182      *
1183      * @return the addition of all the lane elements of this vector
1184      */
1185     public abstract byte addAll();
1186 
1187     /**
1188      * Adds all lane elements of this vector, selecting lane elements
1189      * controlled by a mask.
1190      * <p>
1191      * This is an associative vector reduction operation where the addition
1192      * operation ({@code +}) is applied to lane elements,
1193      * and the identity value is {@code 0}.
1194      *
1195      * @param m the mask controlling lane selection
1196      * @return the addition of the selected lane elements of this vector
1197      */
1198     public abstract byte addAll(Mask<Byte> m);
1199 
1200     /**
1201      * Multiplies all lane elements of this vector.
1202      * <p>
1203      * This is an associative vector reduction operation where the
1204      * multiplication operation ({@code *}) is applied to lane elements,
1205      * and the identity value is {@code 1}.
1206      *
1207      * @return the multiplication of all the lane elements of this vector
1208      */
1209     public abstract byte mulAll();
1210 
1211     /**
1212      * Multiplies all lane elements of this vector, selecting lane elements
1213      * controlled by a mask.
1214      * <p>
1215      * This is an associative vector reduction operation where the
1216      * multiplication operation ({@code *}) is applied to lane elements,
1217      * and the identity value is {@code 1}.
1218      *
1219      * @param m the mask controlling lane selection
1220      * @return the multiplication of all the lane elements of this vector
1221      */
1222     public abstract byte mulAll(Mask<Byte> m);
1223 
1224     /**
1225      * Returns the minimum lane element of this vector.
1226      * <p>
1227      * This is an associative vector reduction operation where the operation
1228      * {@code (a, b) -> Math.min(a, b)} is applied to lane elements,
1229      * and the identity value is
1230      * {@link Byte#MAX_VALUE}.
1231      *
1232      * @return the minimum lane element of this vector
1233      */
1234     public abstract byte minAll();
1235 
1236     /**
1237      * Returns the minimum lane element of this vector, selecting lane elements
1238      * controlled by a mask.
1239      * <p>
1240      * This is an associative vector reduction operation where the operation
1241      * {@code (a, b) -> Math.min(a, b)} is applied to lane elements,
1242      * and the identity value is
1243      * {@link Byte#MAX_VALUE}.
1244      *
1245      * @param m the mask controlling lane selection
1246      * @return the minimum lane element of this vector
1247      */
1248     public abstract byte minAll(Mask<Byte> m);
1249 
1250     /**
1251      * Returns the maximum lane element of this vector.
1252      * <p>
1253      * This is an associative vector reduction operation where the operation
1254      * {@code (a, b) -> Math.max(a, b)} is applied to lane elements,
1255      * and the identity value is
1256      * {@link Byte#MIN_VALUE}.
1257      *
1258      * @return the maximum lane element of this vector
1259      */
1260     public abstract byte maxAll();
1261 
1262     /**
1263      * Returns the maximum lane element of this vector, selecting lane elements
1264      * controlled by a mask.
1265      * <p>
1266      * This is an associative vector reduction operation where the operation
1267      * {@code (a, b) -> Math.max(a, b)} is applied to lane elements,
1268      * and the identity value is
1269      * {@link Byte#MIN_VALUE}.
1270      *
1271      * @param m the mask controlling lane selection
1272      * @return the maximum lane element of this vector
1273      */
1274     public abstract byte maxAll(Mask<Byte> m);
1275 
1276     /**
1277      * Logically ORs all lane elements of this vector.
1278      * <p>
1279      * This is an associative vector reduction operation where the logical OR
1280      * operation ({@code |}) is applied to lane elements,
1281      * and the identity value is {@code 0}.
1282      *
1283      * @return the logical OR all the lane elements of this vector
1284      */
1285     public abstract byte orAll();
1286 
1287     /**
1288      * Logically ORs all lane elements of this vector, selecting lane elements
1289      * controlled by a mask.
1290      * <p>
1291      * This is an associative vector reduction operation where the logical OR
1292      * operation ({@code |}) is applied to lane elements,
1293      * and the identity value is {@code 0}.
1294      *
1295      * @param m the mask controlling lane selection
1296      * @return the logical OR all the lane elements of this vector
1297      */
1298     public abstract byte orAll(Mask<Byte> m);
1299 
1300     /**
1301      * Logically ANDs all lane elements of this vector.
1302      * <p>
1303      * This is an associative vector reduction operation where the logical AND
1304      * operation ({@code |}) is applied to lane elements,
1305      * and the identity value is {@code -1}.
1306      *
1307      * @return the logical AND all the lane elements of this vector
1308      */
1309     public abstract byte andAll();
1310 
1311     /**
1312      * Logically ANDs all lane elements of this vector, selecting lane elements
1313      * controlled by a mask.
1314      * <p>
1315      * This is an associative vector reduction operation where the logical AND
1316      * operation ({@code |}) is applied to lane elements,
1317      * and the identity value is {@code -1}.
1318      *
1319      * @param m the mask controlling lane selection
1320      * @return the logical AND all the lane elements of this vector
1321      */
1322     public abstract byte andAll(Mask<Byte> m);
1323 
1324     /**
1325      * Logically XORs all lane elements of this vector.
1326      * <p>
1327      * This is an associative vector reduction operation where the logical XOR
1328      * operation ({@code ^}) is applied to lane elements,
1329      * and the identity value is {@code 0}.
1330      *
1331      * @return the logical XOR all the lane elements of this vector
1332      */
1333     public abstract byte xorAll();
1334 
1335     /**
1336      * Logically XORs all lane elements of this vector, selecting lane elements
1337      * controlled by a mask.
1338      * <p>
1339      * This is an associative vector reduction operation where the logical XOR
1340      * operation ({@code ^}) is applied to lane elements,
1341      * and the identity value is {@code 0}.
1342      *
1343      * @param m the mask controlling lane selection
1344      * @return the logical XOR all the lane elements of this vector
1345      */
1346     public abstract byte xorAll(Mask<Byte> m);
1347 
1348     // Type specific accessors
1349 
1350     /**
1351      * Gets the lane element at lane index {@code i}
1352      *
1353      * @param i the lane index
1354      * @return the lane element at lane index {@code i}
1355      * @throws IllegalArgumentException if the index is is out of range
1356      * ({@code < 0 || >= length()})
1357      */
1358     public abstract byte get(int i);
1359 
1360     /**
1361      * Replaces the lane element of this vector at lane index {@code i} with
1362      * value {@code e}.
1363      * <p>
1364      * This is a cross-lane operation and behaves as if it returns the result
1365      * of blending this vector with an input vector that is the result of
1366      * broadcasting {@code e} and a mask that has only one lane set at lane
1367      * index {@code i}.
1368      *
1369      * @param i the lane index of the lane element to be replaced
1370      * @param e the value to be placed
1371      * @return the result of replacing the lane element of this vector at lane
1372      * index {@code i} with value {@code e}.
1373      * @throws IllegalArgumentException if the index is is out of range
1374      * ({@code < 0 || >= length()})
1375      */
1376     public abstract ByteVector with(int i, byte e);
1377 
1378     // Type specific extractors
1379 
1380     /**
1381      * Returns an array containing the lane elements of this vector.
1382      * <p>
1383      * This method behaves as if it {@link #intoArray(byte[], int)} stores}
1384      * this vector into an allocated array and returns the array as follows:
1385      * <pre>{@code
1386      *   byte[] a = new byte[this.length()];
1387      *   this.intoArray(a, 0);
1388      *   return a;
1389      * }</pre>
1390      *
1391      * @return an array containing the the lane elements of this vector
1392      */
1393     @ForceInline
1394     public final byte[] toArray() {
1395         byte[] a = new byte[species().length()];
1396         intoArray(a, 0);
1397         return a;
1398     }
1399 
1400     /**
1401      * Stores this vector into an array starting at offset.
1402      * <p>
1403      * For each vector lane, where {@code N} is the vector lane index,
1404      * the lane element at index {@code N} is stored into the array at index
1405      * {@code i + N}.
1406      *
1407      * @param a the array
1408      * @param i the offset into the array
1409      * @throws IndexOutOfBoundsException if {@code i < 0}, or
1410      * {@code i > a.length - this.length()}
1411      */
1412     public abstract void intoArray(byte[] a, int i);
1413 
1414     /**
1415      * Stores this vector into an array starting at offset and using a mask.
1416      * <p>
1417      * For each vector lane, where {@code N} is the vector lane index,
1418      * if the mask lane at index {@code N} is set then the lane element at
1419      * index {@code N} is stored into the array index {@code i + N}.
1420      *
1421      * @param a the array
1422      * @param i the offset into the array
1423      * @param m the mask
1424      * @throws IndexOutOfBoundsException if {@code i < 0}, or
1425      * for any vector lane index {@code N} where the mask at lane {@code N}
1426      * is set {@code i >= a.length - N}
1427      */
1428     public abstract void intoArray(byte[] a, int i, Mask<Byte> m);
1429 
1430     /**
1431      * Stores this vector into an array using indexes obtained from an index
1432      * map.
1433      * <p>
1434      * For each vector lane, where {@code N} is the vector lane index, the
1435      * lane element at index {@code N} is stored into the array at index
1436      * {@code i + indexMap[j + N]}.
1437      *
1438      * @param a the array
1439      * @param i the offset into the array, may be negative if relative
1440      * indexes in the index map compensate to produce a value within the
1441      * array bounds
1442      * @param indexMap the index map
1443      * @param j the offset into the index map
1444      * @throws IndexOutOfBoundsException if {@code j < 0}, or
1445      * {@code j > indexMap.length - this.length()},
1446      * or for any vector lane index {@code N} the result of
1447      * {@code i + indexMap[j + N]} is {@code < 0} or {@code >= a.length}
1448      */
1449     public void intoArray(byte[] a, int i, int[] indexMap, int j) {
1450         forEach((n, e) -> a[i + indexMap[j + n]] = e);
1451     }
1452 
1453     /**
1454      * Stores this vector into an array using indexes obtained from an index
1455      * map and using a mask.
1456      * <p>
1457      * For each vector lane, where {@code N} is the vector lane index,
1458      * if the mask lane at index {@code N} is set then the lane element at
1459      * index {@code N} is stored into the array at index
1460      * {@code i + indexMap[j + N]}.
1461      *
1462      * @param a the array
1463      * @param i the offset into the array, may be negative if relative
1464      * indexes in the index map compensate to produce a value within the
1465      * array bounds
1466      * @param m the mask
1467      * @param indexMap the index map
1468      * @param j the offset into the index map
1469      * @throws IndexOutOfBoundsException if {@code j < 0}, or
1470      * {@code j > indexMap.length - this.length()},
1471      * or for any vector lane index {@code N} where the mask at lane
1472      * {@code N} is set the result of {@code i + indexMap[j + N]} is
1473      * {@code < 0} or {@code >= a.length}
1474      */
1475     public void intoArray(byte[] a, int i, Mask<Byte> m, int[] indexMap, int j) {
1476         forEach(m, (n, e) -> a[i + indexMap[j + n]] = e);
1477     }
1478     // Species
1479 
1480     @Override
1481     public abstract ByteSpecies species();
1482 
1483     /**
1484      * Class representing {@link ByteVector}'s of the same {@link Vector.Shape Shape}.
1485      */
1486     public static abstract class ByteSpecies extends Vector.Species<Byte> {
1487         interface FOp {
1488             byte apply(int i);
1489         }
1490 
1491         abstract ByteVector op(FOp f);
1492 
1493         abstract ByteVector op(Mask<Byte> m, FOp f);
1494 
1495         interface FOpm {
1496             boolean apply(int i);
1497         }
1498 
1499         abstract Mask<Byte> opm(FOpm f);
1500 
1501 
1502 
1503         // Factories
1504 
1505         @Override
1506         public abstract ByteVector zero();
1507 
1508         /**
1509          * Returns a vector where all lane elements are set to the primitive
1510          * value {@code e}.
1511          *
1512          * @param e the value
1513          * @return a vector of vector where all lane elements are set to
1514          * the primitive value {@code e}
1515          */
1516         public abstract ByteVector broadcast(byte e);
1517 
1518         /**
1519          * Returns a vector where the first lane element is set to the primtive
1520          * value {@code e}, all other lane elements are set to the default
1521          * value.
1522          *
1523          * @param e the value
1524          * @return a vector where the first lane element is set to the primitive
1525          * value {@code e}
1526          */
1527         @ForceInline
1528         public final ByteVector single(byte e) {
1529             return zero().with(0, e);
1530         }
1531 
1532         /**
1533          * Returns a vector where each lane element is set to a randomly
1534          * generated primitive value.
1535          *
1536          * The semantics are equivalent to calling
1537          * {@code (byte)ThreadLocalRandom#nextInt()}.
1538          *
1539          * @return a vector where each lane elements is set to a randomly
1540          * generated primitive value
1541          */
1542         public ByteVector random() {
1543             ThreadLocalRandom r = ThreadLocalRandom.current();
1544             return op(i -> (byte) r.nextInt());
1545         }
1546 
1547         /**
1548          * Returns a vector where each lane element is set to a given
1549          * primitive value.
1550          * <p>
1551          * For each vector lane, where {@code N} is the vector lane index, the
1552          * the primitive value at index {@code N} is placed into the resulting
1553          * vector at lane index {@code N}.
1554          *
1555          * @param es the given primitive values
1556          * @return a vector where each lane element is set to a given primitive
1557          * value
1558          * @throws IndexOutOfBoundsException if {@code es.length < this.length()}
1559          */
1560         public abstract ByteVector scalars(byte... es);
1561     }
1562 
1563     /**
1564      * Finds the preferred species for an element type of {@code byte}.
1565      * <p>
1566      * A preferred species is a species chosen by the platform that has a
1567      * shape of maximal bit size.  A preferred species for different element
1568      * types will have the same shape, and therefore vectors, masks, and
1569      * shuffles created from such species will be shape compatible.
1570      *
1571      * @return the preferred species for an element type of {@code byte}
1572      */
1573     @SuppressWarnings("unchecked")
1574     public static ByteSpecies preferredSpecies() {
1575         return (ByteSpecies) Species.ofPreferred(byte.class);
1576     }
1577 
1578     /**
1579      * Finds a species for an element type of {@code byte} and shape.
1580      *
1581      * @param s the shape
1582      * @return a species for an element type of {@code byte} and shape
1583      * @throws IllegalArgumentException if no such species exists for the shape
1584      */
1585     @SuppressWarnings("unchecked")
1586     public static ByteSpecies species(Vector.Shape s) {
1587         Objects.requireNonNull(s);
1588         switch (s) {
1589             case S_64_BIT: return Byte64Vector.SPECIES;
1590             case S_128_BIT: return Byte128Vector.SPECIES;
1591             case S_256_BIT: return Byte256Vector.SPECIES;
1592             case S_512_BIT: return Byte512Vector.SPECIES;
1593             case S_Max_BIT: return ByteMaxVector.SPECIES;
1594             default: throw new IllegalArgumentException("Bad shape: " + s);
1595         }
1596     }
1597 }