1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ByteOrder;
  29 import java.util.Objects;
  30 import java.util.function.IntUnaryOperator;
  31 import java.util.concurrent.ThreadLocalRandom;
  32 
  33 import jdk.internal.misc.Unsafe;
  34 import jdk.internal.vm.annotation.ForceInline;
  35 import static jdk.incubator.vector.VectorIntrinsics.*;
  36 
  37 
  38 /**
  39  * A specialized {@link Vector} representing an ordered immutable sequence of
  40  * {@code byte} values.
  41  */
  42 @SuppressWarnings("cast")
  43 public abstract class ByteVector extends Vector<Byte> {
  44 
  45     ByteVector() {}
  46 
  47     private static final int ARRAY_SHIFT = 31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_BYTE_INDEX_SCALE);
  48 
  49     // Unary operator
  50 
  51     interface FUnOp {
  52         byte apply(int i, byte a);
  53     }
  54 
  55     abstract ByteVector uOp(FUnOp f);
  56 
  57     abstract ByteVector uOp(Mask<Byte> m, FUnOp f);
  58 
  59     // Binary operator
  60 
  61     interface FBinOp {
  62         byte apply(int i, byte a, byte b);
  63     }
  64 
  65     abstract ByteVector bOp(Vector<Byte> v, FBinOp f);
  66 
  67     abstract ByteVector bOp(Vector<Byte> v, Mask<Byte> m, FBinOp f);
  68 
  69     // Trinary operator
  70 
  71     interface FTriOp {
  72         byte apply(int i, byte a, byte b, byte c);
  73     }
  74 
  75     abstract ByteVector tOp(Vector<Byte> v1, Vector<Byte> v2, FTriOp f);
  76 
  77     abstract ByteVector tOp(Vector<Byte> v1, Vector<Byte> v2, Mask<Byte> m, FTriOp f);
  78 
  79     // Reduction operator
  80 
  81     abstract byte rOp(byte v, FBinOp f);
  82 
  83     // Binary test
  84 
  85     interface FBinTest {
  86         boolean apply(int i, byte a, byte b);
  87     }
  88 
  89     abstract Mask<Byte> bTest(Vector<Byte> v, FBinTest f);
  90 
  91     // Foreach
  92 
  93     interface FUnCon {
  94         void apply(int i, byte a);
  95     }
  96 
  97     abstract void forEach(FUnCon f);
  98 
  99     abstract void forEach(Mask<Byte> m, FUnCon f);
 100 
 101     // Static factories
 102 
 103     /**
 104      * Returns a vector where all lane elements are set to the default
 105      * primitive value.
 106      *
 107      * @param species species of desired vector
 108      * @return a zero vector of given species
 109      */
 110     @ForceInline
 111     @SuppressWarnings("unchecked")
 112     public static ByteVector zero(ByteSpecies species) {
 113         return species.zero();
 114     }
 115 
 116     /**
 117      * Loads a vector from a byte array starting at an offset.
 118      * <p>
 119      * Bytes are composed into primitive lane elements according to the
 120      * native byte order of the underlying platform
 121      * <p>
 122      * This method behaves as if it returns the result of calling the
 123      * byte buffer, offset, and mask accepting
 124      * {@link #fromByteBuffer(ByteSpecies, ByteBuffer, int, Mask) method} as follows:
 125      * <pre>{@code
 126      * return this.fromByteBuffer(ByteBuffer.wrap(a), i, this.maskAllTrue());
 127      * }</pre>
 128      *
 129      * @param species species of desired vector
 130      * @param a the byte array
 131      * @param ix the offset into the array
 132      * @return a vector loaded from a byte array
 133      * @throws IndexOutOfBoundsException if {@code i < 0} or
 134      * {@code i > a.length - (this.length() * this.elementSize() / Byte.SIZE)}
 135      */
 136     @ForceInline
 137     @SuppressWarnings("unchecked")
 138     public static ByteVector fromByteArray(ByteSpecies species, byte[] a, int ix) {
 139         Objects.requireNonNull(a);
 140         ix = VectorIntrinsics.checkIndex(ix, a.length, species.bitSize() / Byte.SIZE);
 141         return VectorIntrinsics.load((Class<ByteVector>) species.boxType(), byte.class, species.length(),
 142                                      a, ((long) ix) + Unsafe.ARRAY_BYTE_BASE_OFFSET,
 143                                      a, ix, species,
 144                                      (c, idx, s) -> {
 145                                          ByteBuffer bbc = ByteBuffer.wrap(c, idx, a.length - idx).order(ByteOrder.nativeOrder());
 146                                          ByteBuffer tb = bbc;
 147                                          return ((ByteSpecies)s).op(i -> tb.get());
 148                                      });
 149     }
 150 
 151     /**
 152      * Loads a vector from a byte array starting at an offset and using a
 153      * mask.
 154      * <p>
 155      * Bytes are composed into primitive lane elements according to the
 156      * native byte order of the underlying platform.
 157      * <p>
 158      * This method behaves as if it returns the result of calling the
 159      * byte buffer, offset, and mask accepting
 160      * {@link #fromByteBuffer(ByteSpecies, ByteBuffer, int, Mask) method} as follows:
 161      * <pre>{@code
 162      * return this.fromByteBuffer(ByteBuffer.wrap(a), i, m);
 163      * }</pre>
 164      *
 165      * @param species species of desired vector
 166      * @param a the byte array
 167      * @param ix the offset into the array
 168      * @param m the mask
 169      * @return a vector loaded from a byte array
 170      * @throws IndexOutOfBoundsException if {@code i < 0} or
 171      * {@code i > a.length - (this.length() * this.elementSize() / Byte.SIZE)}
 172      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 173      * or {@code > a.length},
 174      * for any vector lane index {@code N} where the mask at lane {@code N}
 175      * is set
 176      * {@code i >= a.length - (N * this.elementSize() / Byte.SIZE)}
 177      */
 178     @ForceInline
 179     public static ByteVector fromByteArray(ByteSpecies species, byte[] a, int ix, Mask<Byte> m) {
 180         return zero(species).blend(fromByteArray(species, a, ix), m);
 181     }
 182 
 183     /**
 184      * Loads a vector from an array starting at offset.
 185      * <p>
 186      * For each vector lane, where {@code N} is the vector lane index, the
 187      * array element at index {@code i + N} is placed into the
 188      * resulting vector at lane index {@code N}.
 189      *
 190      * @param species species of desired vector
 191      * @param a the array
 192      * @param i the offset into the array
 193      * @return the vector loaded from an array
 194      * @throws IndexOutOfBoundsException if {@code i < 0}, or
 195      * {@code i > a.length - this.length()}
 196      */
 197     @ForceInline
 198     @SuppressWarnings("unchecked")
 199     public static ByteVector fromArray(ByteSpecies species, byte[] a, int i){
 200         Objects.requireNonNull(a);
 201         i = VectorIntrinsics.checkIndex(i, a.length, species.length());
 202         return VectorIntrinsics.load((Class<ByteVector>) species.boxType(), byte.class, species.length(),
 203                                      a, (((long) i) << ARRAY_SHIFT) + Unsafe.ARRAY_BYTE_BASE_OFFSET,
 204                                      a, i, species,
 205                                      (c, idx, s) -> ((ByteSpecies)s).op(n -> c[idx + n]));
 206     }
 207 
 208 
 209     /**
 210      * Loads a vector from an array starting at offset and using a mask.
 211      * <p>
 212      * For each vector lane, where {@code N} is the vector lane index,
 213      * if the mask lane at index {@code N} is set then the array element at
 214      * index {@code i + N} is placed into the resulting vector at lane index
 215      * {@code N}, otherwise the default element value is placed into the
 216      * resulting vector at lane index {@code N}.
 217      *
 218      * @param species species of desired vector
 219      * @param a the array
 220      * @param i the offset into the array
 221      * @param m the mask
 222      * @return the vector loaded from an array
 223      * @throws IndexOutOfBoundsException if {@code i < 0}, or
 224      * for any vector lane index {@code N} where the mask at lane {@code N}
 225      * is set {@code i > a.length - N}
 226      */
 227     @ForceInline
 228     public static ByteVector fromArray(ByteSpecies species, byte[] a, int i, Mask<Byte> m) {
 229         return zero(species).blend(fromArray(species, a, i), m);
 230     }
 231 
 232     /**
 233      * Loads a vector from an array using indexes obtained from an index
 234      * map.
 235      * <p>
 236      * For each vector lane, where {@code N} is the vector lane index, the
 237      * array element at index {@code i + indexMap[j + N]} is placed into the
 238      * resulting vector at lane index {@code N}.
 239      *
 240      * @param species species of desired vector
 241      * @param a the array
 242      * @param i the offset into the array, may be negative if relative
 243      * indexes in the index map compensate to produce a value within the
 244      * array bounds
 245      * @param indexMap the index map
 246      * @param j the offset into the index map
 247      * @return the vector loaded from an array
 248      * @throws IndexOutOfBoundsException if {@code j < 0}, or
 249      * {@code j > indexMap.length - this.length()},
 250      * or for any vector lane index {@code N} the result of
 251      * {@code i + indexMap[j + N]} is {@code < 0} or {@code >= a.length}
 252      */
 253     public static ByteVector fromArray(ByteSpecies species, byte[] a, int i, int[] indexMap, int j) {
 254         return species.op(n -> a[i + indexMap[j + n]]);
 255     }
 256     /**
 257      * Loads a vector from an array using indexes obtained from an index
 258      * map and using a mask.
 259      * <p>
 260      * For each vector lane, where {@code N} is the vector lane index,
 261      * if the mask lane at index {@code N} is set then the array element at
 262      * index {@code i + indexMap[j + N]} is placed into the resulting vector
 263      * at lane index {@code N}.
 264      *
 265      * @param species species of desired vector
 266      * @param a the array
 267      * @param i the offset into the array, may be negative if relative
 268      * indexes in the index map compensate to produce a value within the
 269      * array bounds
 270      * @param m the mask
 271      * @param indexMap the index map
 272      * @param j the offset into the index map
 273      * @return the vector loaded from an array
 274      * @throws IndexOutOfBoundsException if {@code j < 0}, or
 275      * {@code j > indexMap.length - this.length()},
 276      * or for any vector lane index {@code N} where the mask at lane
 277      * {@code N} is set the result of {@code i + indexMap[j + N]} is
 278      * {@code < 0} or {@code >= a.length}
 279      */
 280     public static ByteVector fromArray(ByteSpecies species, byte[] a, int i, Mask<Byte> m, int[] indexMap, int j) {
 281         return species.op(m, n -> a[i + indexMap[j + n]]);
 282     }
 283 
 284     /**
 285      * Loads a vector from a {@link ByteBuffer byte buffer} starting at an
 286      * offset into the byte buffer.
 287      * <p>
 288      * Bytes are composed into primitive lane elements according to the
 289      * native byte order of the underlying platform.
 290      * <p>
 291      * This method behaves as if it returns the result of calling the
 292      * byte buffer, offset, and mask accepting
 293      * {@link #fromByteBuffer(ByteSpecies, ByteBuffer, int, Mask)} method} as follows:
 294      * <pre>{@code
 295      *   return this.fromByteBuffer(b, i, this.maskAllTrue())
 296      * }</pre>
 297      *
 298      * @param species species of desired vector
 299      * @param bb the byte buffer
 300      * @param ix the offset into the byte buffer
 301      * @return a vector loaded from a byte buffer
 302      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 303      * or {@code > b.limit()},
 304      * or if there are fewer than
 305      * {@code this.length() * this.elementSize() / Byte.SIZE} bytes
 306      * remaining in the byte buffer from the given offset
 307      */
 308     @ForceInline
 309     @SuppressWarnings("unchecked")
 310     public static ByteVector fromByteBuffer(ByteSpecies species, ByteBuffer bb, int ix) {
 311         if (bb.order() != ByteOrder.nativeOrder()) {
 312             throw new IllegalArgumentException();
 313         }
 314         ix = VectorIntrinsics.checkIndex(ix, bb.limit(), species.bitSize() / Byte.SIZE);
 315         return VectorIntrinsics.load((Class<ByteVector>) species.boxType(), byte.class, species.length(),
 316                                      U.getReference(bb, BYTE_BUFFER_HB), U.getLong(bb, BUFFER_ADDRESS) + ix,
 317                                      bb, ix, species,
 318                                      (c, idx, s) -> {
 319                                          ByteBuffer bbc = c.duplicate().position(idx).order(ByteOrder.nativeOrder());
 320                                          ByteBuffer tb = bbc;
 321                                          return ((ByteSpecies)s).op(i -> tb.get());
 322                                      });
 323     }
 324 
 325     /**
 326      * Loads a vector from a {@link ByteBuffer byte buffer} starting at an
 327      * offset into the byte buffer and using a mask.
 328      * <p>
 329      * This method behaves as if the byte buffer is viewed as a primitive
 330      * {@link java.nio.Buffer buffer} for the primitive element type,
 331      * according to the native byte order of the underlying platform, and
 332      * the returned vector is loaded with a mask from a primitive array
 333      * obtained from the primitive buffer.
 334      * The following pseudocode expresses the behaviour, where
 335      * {@coce EBuffer} is the primitive buffer type, {@code e} is the
 336      * primitive element type, and {@code ESpecies<S>} is the primitive
 337      * species for {@code e}:
 338      * <pre>{@code
 339      * EBuffer eb = b.duplicate().
 340      *     order(ByteOrder.nativeOrder()).position(i).
 341      *     asEBuffer();
 342      * e[] es = new e[this.length()];
 343      * for (int n = 0; n < t.length; n++) {
 344      *     if (m.isSet(n))
 345      *         es[n] = eb.get(n);
 346      * }
 347      * Vector<E> r = ((ESpecies<S>)this).fromArray(es, 0, m);
 348      * }</pre>
 349      *
 350      * @param species species of desired vector
 351      * @param bb the byte buffer
 352      * @param ix the offset into the byte buffer
 353      * @param m the mask
 354      * @return a vector loaded from a byte buffer
 355      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 356      * or {@code > b.limit()},
 357      * for any vector lane index {@code N} where the mask at lane {@code N}
 358      * is set
 359      * {@code i >= b.limit() - (N * this.elementSize() / Byte.SIZE)}
 360      */
 361     @ForceInline
 362     public static ByteVector fromByteBuffer(ByteSpecies species, ByteBuffer bb, int ix, Mask<Byte> m) {
 363         return zero(species).blend(fromByteBuffer(species, bb, ix), m);
 364     }
 365 
 366     /**
 367      * Returns a mask where each lane is set or unset according to given
 368      * {@code boolean} values
 369      * <p>
 370      * For each mask lane, where {@code N} is the mask lane index,
 371      * if the given {@code boolean} value at index {@code N} is {@code true}
 372      * then the mask lane at index {@code N} is set, otherwise it is unset.
 373      *
 374      * @param species mask species
 375      * @param bits the given {@code boolean} values
 376      * @return a mask where each lane is set or unset according to the given {@code boolean} value
 377      * @throws IndexOutOfBoundsException if {@code bits.length < species.length()}
 378      */
 379     @ForceInline
 380     public static Mask<Byte> maskFromValues(ByteSpecies species, boolean... bits) {
 381         if (species.boxType() == ByteMaxVector.class)
 382             return new ByteMaxVector.ByteMaxMask(bits);
 383         switch (species.bitSize()) {
 384             case 64: return new Byte64Vector.Byte64Mask(bits);
 385             case 128: return new Byte128Vector.Byte128Mask(bits);
 386             case 256: return new Byte256Vector.Byte256Mask(bits);
 387             case 512: return new Byte512Vector.Byte512Mask(bits);
 388             default: throw new IllegalArgumentException(Integer.toString(species.bitSize()));
 389         }
 390     }
 391 
 392     // @@@ This is a bad implementation -- makes lambdas capturing -- fix this
 393     static Mask<Byte> trueMask(ByteSpecies species) {
 394         if (species.boxType() == ByteMaxVector.class)
 395             return ByteMaxVector.ByteMaxMask.TRUE_MASK;
 396         switch (species.bitSize()) {
 397             case 64: return Byte64Vector.Byte64Mask.TRUE_MASK;
 398             case 128: return Byte128Vector.Byte128Mask.TRUE_MASK;
 399             case 256: return Byte256Vector.Byte256Mask.TRUE_MASK;
 400             case 512: return Byte512Vector.Byte512Mask.TRUE_MASK;
 401             default: throw new IllegalArgumentException(Integer.toString(species.bitSize()));
 402         }
 403     }
 404 
 405     static Mask<Byte> falseMask(ByteSpecies species) {
 406         if (species.boxType() == ByteMaxVector.class)
 407             return ByteMaxVector.ByteMaxMask.FALSE_MASK;
 408         switch (species.bitSize()) {
 409             case 64: return Byte64Vector.Byte64Mask.FALSE_MASK;
 410             case 128: return Byte128Vector.Byte128Mask.FALSE_MASK;
 411             case 256: return Byte256Vector.Byte256Mask.FALSE_MASK;
 412             case 512: return Byte512Vector.Byte512Mask.FALSE_MASK;
 413             default: throw new IllegalArgumentException(Integer.toString(species.bitSize()));
 414         }
 415     }
 416 
 417     /**
 418      * Loads a mask from a {@code boolean} array starting at an offset.
 419      * <p>
 420      * For each mask lane, where {@code N} is the mask lane index,
 421      * if the array element at index {@code i + N} is {@code true} then the
 422      * mask lane at index {@code N} is set, otherwise it is unset.
 423      *
 424      * @param species mask species
 425      * @param bits the {@code boolean} array
 426      * @param ix the offset into the array
 427      * @return the mask loaded from a {@code boolean} array
 428      * @throws IndexOutOfBoundsException if {@code ix < 0}, or
 429      * {@code ix > bits.length - species.length()}
 430      */
 431     @ForceInline
 432     @SuppressWarnings("unchecked")
 433     public static Mask<Byte> maskFromArray(ByteSpecies species, boolean[] bits, int ix) {
 434         Objects.requireNonNull(bits);
 435         ix = VectorIntrinsics.checkIndex(ix, bits.length, species.length());
 436         return VectorIntrinsics.load((Class<Mask<Byte>>) species.maskType(), byte.class, species.length(),
 437                                      bits, (((long) ix) << ARRAY_SHIFT) + Unsafe.ARRAY_BOOLEAN_BASE_OFFSET,
 438                                      bits, ix, species,
 439                                      (c, idx, s) -> (Mask<Byte>) ((ByteSpecies)s).opm(n -> c[idx + n]));
 440     }
 441 
 442     /**
 443      * Returns a mask where all lanes are a set.
 444      *
 445      * @param species mask species
 446      * @return a mask where all lanes are a set
 447      */
 448     @ForceInline
 449     @SuppressWarnings("unchecked")
 450     public static Mask<Byte> maskAllTrue(ByteSpecies species) {
 451         return VectorIntrinsics.broadcastCoerced((Class<Mask<Byte>>) species.maskType(), byte.class, species.length(),
 452                                                  (byte)-1,  species,
 453                                                  ((z, s) -> trueMask((ByteSpecies)s)));
 454     }
 455 
 456     /**
 457      * Returns a mask where all lanes are a unset.
 458      *
 459      * @param species mask species
 460      * @return a mask where all lanes are a unset
 461      */
 462     @ForceInline
 463     @SuppressWarnings("unchecked")
 464     public static Mask<Byte> maskAllFalse(ByteSpecies species) {
 465         return VectorIntrinsics.broadcastCoerced((Class<Mask<Byte>>) species.maskType(), byte.class, species.length(),
 466                                                  0, species, 
 467                                                  ((z, s) -> falseMask((ByteSpecies)s)));
 468     }
 469 
 470     /**
 471      * Returns a shuffle of mapped indexes where each lane element is
 472      * the result of applying a mapping function to the corresponding lane
 473      * index.
 474      * <p>
 475      * Care should be taken to ensure Shuffle values produced from this
 476      * method are consumed as constants to ensure optimal generation of
 477      * code.  For example, values held in static final fields or values
 478      * held in loop constant local variables.
 479      * <p>
 480      * This method behaves as if a shuffle is created from an array of
 481      * mapped indexes as follows:
 482      * <pre>{@code
 483      *   int[] a = new int[species.length()];
 484      *   for (int i = 0; i < a.length; i++) {
 485      *       a[i] = f.applyAsInt(i);
 486      *   }
 487      *   return this.shuffleFromValues(a);
 488      * }</pre>
 489      *
 490      * @param species shuffle species
 491      * @param f the lane index mapping function
 492      * @return a shuffle of mapped indexes
 493      */
 494     @ForceInline
 495     public static Shuffle<Byte> shuffle(ByteSpecies species, IntUnaryOperator f) {
 496         if (species.boxType() == ByteMaxVector.class)
 497             return new ByteMaxVector.ByteMaxShuffle(f);
 498         switch (species.bitSize()) {
 499             case 64: return new Byte64Vector.Byte64Shuffle(f);
 500             case 128: return new Byte128Vector.Byte128Shuffle(f);
 501             case 256: return new Byte256Vector.Byte256Shuffle(f);
 502             case 512: return new Byte512Vector.Byte512Shuffle(f);
 503             default: throw new IllegalArgumentException(Integer.toString(species.bitSize()));
 504         }
 505     }
 506 
 507     /**
 508      * Returns a shuffle where each lane element is the value of its
 509      * corresponding lane index.
 510      * <p>
 511      * This method behaves as if a shuffle is created from an identity
 512      * index mapping function as follows:
 513      * <pre>{@code
 514      *   return this.shuffle(i -> i);
 515      * }</pre>
 516      *
 517      * @param species shuffle species
 518      * @return a shuffle of lane indexes
 519      */
 520     @ForceInline
 521     public static Shuffle<Byte> shuffleIota(ByteSpecies species) {
 522         if (species.boxType() == ByteMaxVector.class)
 523             return new ByteMaxVector.ByteMaxShuffle(AbstractShuffle.IDENTITY);
 524         switch (species.bitSize()) {
 525             case 64: return new Byte64Vector.Byte64Shuffle(AbstractShuffle.IDENTITY);
 526             case 128: return new Byte128Vector.Byte128Shuffle(AbstractShuffle.IDENTITY);
 527             case 256: return new Byte256Vector.Byte256Shuffle(AbstractShuffle.IDENTITY);
 528             case 512: return new Byte512Vector.Byte512Shuffle(AbstractShuffle.IDENTITY);
 529             default: throw new IllegalArgumentException(Integer.toString(species.bitSize()));
 530         }
 531     }
 532 
 533     /**
 534      * Returns a shuffle where each lane element is set to a given
 535      * {@code int} value logically AND'ed by the species length minus one.
 536      * <p>
 537      * For each shuffle lane, where {@code N} is the shuffle lane index, the
 538      * the {@code int} value at index {@code N} logically AND'ed by
 539      * {@code species.length() - 1} is placed into the resulting shuffle at
 540      * lane index {@code N}.
 541      *
 542      * @param species shuffle species
 543      * @param ixs the given {@code int} values
 544      * @return a shuffle where each lane element is set to a given
 545      * {@code int} value
 546      * @throws IndexOutOfBoundsException if the number of int values is
 547      * {@code < species.length()}
 548      */
 549     @ForceInline
 550     public static Shuffle<Byte> shuffleFromValues(ByteSpecies species, int... ixs) {
 551         if (species.boxType() == ByteMaxVector.class)
 552             return new ByteMaxVector.ByteMaxShuffle(ixs);
 553         switch (species.bitSize()) {
 554             case 64: return new Byte64Vector.Byte64Shuffle(ixs);
 555             case 128: return new Byte128Vector.Byte128Shuffle(ixs);
 556             case 256: return new Byte256Vector.Byte256Shuffle(ixs);
 557             case 512: return new Byte512Vector.Byte512Shuffle(ixs);
 558             default: throw new IllegalArgumentException(Integer.toString(species.bitSize()));
 559         }
 560     }
 561 
 562     /**
 563      * Loads a shuffle from an {@code int} array starting at an offset.
 564      * <p>
 565      * For each shuffle lane, where {@code N} is the shuffle lane index, the
 566      * array element at index {@code i + N} logically AND'ed by
 567      * {@code species.length() - 1} is placed into the resulting shuffle at lane
 568      * index {@code N}.
 569      *
 570      * @param species shuffle species
 571      * @param ixs the {@code int} array
 572      * @param i the offset into the array
 573      * @return a shuffle loaded from the {@code int} array
 574      * @throws IndexOutOfBoundsException if {@code i < 0}, or
 575      * {@code i > a.length - species.length()}
 576      */
 577     @ForceInline
 578     public static Shuffle<Byte> shuffleFromArray(ByteSpecies species, int[] ixs, int i) {
 579         if (species.boxType() == ByteMaxVector.class)
 580             return new ByteMaxVector.ByteMaxShuffle(ixs, i);
 581         switch (species.bitSize()) {
 582             case 64: return new Byte64Vector.Byte64Shuffle(ixs, i);
 583             case 128: return new Byte128Vector.Byte128Shuffle(ixs, i);
 584             case 256: return new Byte256Vector.Byte256Shuffle(ixs, i);
 585             case 512: return new Byte512Vector.Byte512Shuffle(ixs, i);
 586             default: throw new IllegalArgumentException(Integer.toString(species.bitSize()));
 587         }
 588     }
 589 
 590 
 591     // Ops
 592 
 593     @Override
 594     public abstract ByteVector add(Vector<Byte> v);
 595 
 596     /**
 597      * Adds this vector to the broadcast of an input scalar.
 598      * <p>
 599      * This is a vector binary operation where the primitive addition operation
 600      * ({@code +}) is applied to lane elements.
 601      *
 602      * @param s the input scalar
 603      * @return the result of adding this vector to the broadcast of an input
 604      * scalar
 605      */
 606     public abstract ByteVector add(byte s);
 607 
 608     @Override
 609     public abstract ByteVector add(Vector<Byte> v, Mask<Byte> m);
 610 
 611     /**
 612      * Adds this vector to broadcast of an input scalar,
 613      * selecting lane elements controlled by a mask.
 614      * <p>
 615      * This is a vector binary operation where the primitive addition operation
 616      * ({@code +}) is applied to lane elements.
 617      *
 618      * @param s the input scalar
 619      * @param m the mask controlling lane selection
 620      * @return the result of adding this vector to the broadcast of an input
 621      * scalar
 622      */
 623     public abstract ByteVector add(byte s, Mask<Byte> m);
 624 
 625     @Override
 626     public abstract ByteVector sub(Vector<Byte> v);
 627 
 628     /**
 629      * Subtracts the broadcast of an input scalar from this vector.
 630      * <p>
 631      * This is a vector binary operation where the primitive subtraction
 632      * operation ({@code -}) is applied to lane elements.
 633      *
 634      * @param s the input scalar
 635      * @return the result of subtracting the broadcast of an input
 636      * scalar from this vector
 637      */
 638     public abstract ByteVector sub(byte s);
 639 
 640     @Override
 641     public abstract ByteVector sub(Vector<Byte> v, Mask<Byte> m);
 642 
 643     /**
 644      * Subtracts the broadcast of an input scalar from this vector, selecting
 645      * lane elements controlled by a mask.
 646      * <p>
 647      * This is a vector binary operation where the primitive subtraction
 648      * operation ({@code -}) is applied to lane elements.
 649      *
 650      * @param s the input scalar
 651      * @param m the mask controlling lane selection
 652      * @return the result of subtracting the broadcast of an input
 653      * scalar from this vector
 654      */
 655     public abstract ByteVector sub(byte s, Mask<Byte> m);
 656 
 657     @Override
 658     public abstract ByteVector mul(Vector<Byte> v);
 659 
 660     /**
 661      * Multiplies this vector with the broadcast of an input scalar.
 662      * <p>
 663      * This is a vector binary operation where the primitive multiplication
 664      * operation ({@code *}) is applied to lane elements.
 665      *
 666      * @param s the input scalar
 667      * @return the result of multiplying this vector with the broadcast of an
 668      * input scalar
 669      */
 670     public abstract ByteVector mul(byte s);
 671 
 672     @Override
 673     public abstract ByteVector mul(Vector<Byte> v, Mask<Byte> m);
 674 
 675     /**
 676      * Multiplies this vector with the broadcast of an input scalar, selecting
 677      * lane elements controlled by a mask.
 678      * <p>
 679      * This is a vector binary operation where the primitive multiplication
 680      * operation ({@code *}) is applied to lane elements.
 681      *
 682      * @param s the input scalar
 683      * @param m the mask controlling lane selection
 684      * @return the result of multiplying this vector with the broadcast of an
 685      * input scalar
 686      */
 687     public abstract ByteVector mul(byte s, Mask<Byte> m);
 688 
 689     @Override
 690     public abstract ByteVector neg();
 691 
 692     @Override
 693     public abstract ByteVector neg(Mask<Byte> m);
 694 
 695     @Override
 696     public abstract ByteVector abs();
 697 
 698     @Override
 699     public abstract ByteVector abs(Mask<Byte> m);
 700 
 701     @Override
 702     public abstract ByteVector min(Vector<Byte> v);
 703 
 704     @Override
 705     public abstract ByteVector min(Vector<Byte> v, Mask<Byte> m);
 706 
 707     /**
 708      * Returns the minimum of this vector and the broadcast of an input scalar.
 709      * <p>
 710      * This is a vector binary operation where the operation
 711      * {@code (a, b) -> Math.min(a, b)} is applied to lane elements.
 712      *
 713      * @param s the input scalar
 714      * @return the minimum of this vector and the broadcast of an input scalar
 715      */
 716     public abstract ByteVector min(byte s);
 717 
 718     @Override
 719     public abstract ByteVector max(Vector<Byte> v);
 720 
 721     @Override
 722     public abstract ByteVector max(Vector<Byte> v, Mask<Byte> m);
 723 
 724     /**
 725      * Returns the maximum of this vector and the broadcast of an input scalar.
 726      * <p>
 727      * This is a vector binary operation where the operation
 728      * {@code (a, b) -> Math.max(a, b)} is applied to lane elements.
 729      *
 730      * @param s the input scalar
 731      * @return the maximum of this vector and the broadcast of an input scalar
 732      */
 733     public abstract ByteVector max(byte s);
 734 
 735     @Override
 736     public abstract Mask<Byte> equal(Vector<Byte> v);
 737 
 738     /**
 739      * Tests if this vector is equal to the broadcast of an input scalar.
 740      * <p>
 741      * This is a vector binary test operation where the primitive equals
 742      * operation ({@code ==}) is applied to lane elements.
 743      *
 744      * @param s the input scalar
 745      * @return the result mask of testing if this vector is equal to the
 746      * broadcast of an input scalar
 747      */
 748     public abstract Mask<Byte> equal(byte s);
 749 
 750     @Override
 751     public abstract Mask<Byte> notEqual(Vector<Byte> v);
 752 
 753     /**
 754      * Tests if this vector is not equal to the broadcast of an input scalar.
 755      * <p>
 756      * This is a vector binary test operation where the primitive not equals
 757      * operation ({@code !=}) is applied to lane elements.
 758      *
 759      * @param s the input scalar
 760      * @return the result mask of testing if this vector is not equal to the
 761      * broadcast of an input scalar
 762      */
 763     public abstract Mask<Byte> notEqual(byte s);
 764 
 765     @Override
 766     public abstract Mask<Byte> lessThan(Vector<Byte> v);
 767 
 768     /**
 769      * Tests if this vector is less than the broadcast of an input scalar.
 770      * <p>
 771      * This is a vector binary test operation where the primitive less than
 772      * operation ({@code <}) is applied to lane elements.
 773      *
 774      * @param s the input scalar
 775      * @return the mask result of testing if this vector is less than the
 776      * broadcast of an input scalar
 777      */
 778     public abstract Mask<Byte> lessThan(byte s);
 779 
 780     @Override
 781     public abstract Mask<Byte> lessThanEq(Vector<Byte> v);
 782 
 783     /**
 784      * Tests if this vector is less or equal to the broadcast of an input scalar.
 785      * <p>
 786      * This is a vector binary test operation where the primitive less than
 787      * or equal to operation ({@code <=}) is applied to lane elements.
 788      *
 789      * @param s the input scalar
 790      * @return the mask result of testing if this vector is less than or equal
 791      * to the broadcast of an input scalar
 792      */
 793     public abstract Mask<Byte> lessThanEq(byte s);
 794 
 795     @Override
 796     public abstract Mask<Byte> greaterThan(Vector<Byte> v);
 797 
 798     /**
 799      * Tests if this vector is greater than the broadcast of an input scalar.
 800      * <p>
 801      * This is a vector binary test operation where the primitive greater than
 802      * operation ({@code >}) is applied to lane elements.
 803      *
 804      * @param s the input scalar
 805      * @return the mask result of testing if this vector is greater than the
 806      * broadcast of an input scalar
 807      */
 808     public abstract Mask<Byte> greaterThan(byte s);
 809 
 810     @Override
 811     public abstract Mask<Byte> greaterThanEq(Vector<Byte> v);
 812 
 813     /**
 814      * Tests if this vector is greater than or equal to the broadcast of an
 815      * input scalar.
 816      * <p>
 817      * This is a vector binary test operation where the primitive greater than
 818      * or equal to operation ({@code >=}) is applied to lane elements.
 819      *
 820      * @param s the input scalar
 821      * @return the mask result of testing if this vector is greater than or
 822      * equal to the broadcast of an input scalar
 823      */
 824     public abstract Mask<Byte> greaterThanEq(byte s);
 825 
 826     @Override
 827     public abstract ByteVector blend(Vector<Byte> v, Mask<Byte> m);
 828 
 829     /**
 830      * Blends the lane elements of this vector with those of the broadcast of an
 831      * input scalar, selecting lanes controlled by a mask.
 832      * <p>
 833      * For each lane of the mask, at lane index {@code N}, if the mask lane
 834      * is set then the lane element at {@code N} from the input vector is
 835      * selected and placed into the resulting vector at {@code N},
 836      * otherwise the the lane element at {@code N} from this input vector is
 837      * selected and placed into the resulting vector at {@code N}.
 838      *
 839      * @param s the input scalar
 840      * @param m the mask controlling lane selection
 841      * @return the result of blending the lane elements of this vector with
 842      * those of the broadcast of an input scalar
 843      */
 844     public abstract ByteVector blend(byte s, Mask<Byte> m);
 845 
 846     @Override
 847     public abstract ByteVector rearrange(Vector<Byte> v,
 848                                                       Shuffle<Byte> s, Mask<Byte> m);
 849 
 850     @Override
 851     public abstract ByteVector rearrange(Shuffle<Byte> m);
 852 
 853     @Override
 854     public abstract ByteVector reshape(Species<Byte> s);
 855 
 856     @Override
 857     public abstract ByteVector rotateEL(int i);
 858 
 859     @Override
 860     public abstract ByteVector rotateER(int i);
 861 
 862     @Override
 863     public abstract ByteVector shiftEL(int i);
 864 
 865     @Override
 866     public abstract ByteVector shiftER(int i);
 867 
 868 
 869 
 870     /**
 871      * Bitwise ANDs this vector with an input vector.
 872      * <p>
 873      * This is a vector binary operation where the primitive bitwise AND
 874      * operation ({@code &}) is applied to lane elements.
 875      *
 876      * @param v the input vector
 877      * @return the bitwise AND of this vector with the input vector
 878      */
 879     public abstract ByteVector and(Vector<Byte> v);
 880 
 881     /**
 882      * Bitwise ANDs this vector with the broadcast of an input scalar.
 883      * <p>
 884      * This is a vector binary operation where the primitive bitwise AND
 885      * operation ({@code &}) is applied to lane elements.
 886      *
 887      * @param s the input scalar
 888      * @return the bitwise AND of this vector with the broadcast of an input
 889      * scalar
 890      */
 891     public abstract ByteVector and(byte s);
 892 
 893     /**
 894      * Bitwise ANDs this vector with an input vector, selecting lane elements
 895      * controlled by a mask.
 896      * <p>
 897      * This is a vector binary operation where the primitive bitwise AND
 898      * operation ({@code &}) is applied to lane elements.
 899      *
 900      * @param v the input vector
 901      * @param m the mask controlling lane selection
 902      * @return the bitwise AND of this vector with the input vector
 903      */
 904     public abstract ByteVector and(Vector<Byte> v, Mask<Byte> m);
 905 
 906     /**
 907      * Bitwise ANDs this vector with the broadcast of an input scalar, selecting
 908      * lane elements controlled by a mask.
 909      * <p>
 910      * This is a vector binary operation where the primitive bitwise AND
 911      * operation ({@code &}) is applied to lane elements.
 912      *
 913      * @param s the input scalar
 914      * @param m the mask controlling lane selection
 915      * @return the bitwise AND of this vector with the broadcast of an input
 916      * scalar
 917      */
 918     public abstract ByteVector and(byte s, Mask<Byte> m);
 919 
 920     /**
 921      * Bitwise ORs this vector with an input vector.
 922      * <p>
 923      * This is a vector binary operation where the primitive bitwise OR
 924      * operation ({@code |}) is applied to lane elements.
 925      *
 926      * @param v the input vector
 927      * @return the bitwise OR of this vector with the input vector
 928      */
 929     public abstract ByteVector or(Vector<Byte> v);
 930 
 931     /**
 932      * Bitwise ORs this vector with the broadcast of an input scalar.
 933      * <p>
 934      * This is a vector binary operation where the primitive bitwise OR
 935      * operation ({@code |}) is applied to lane elements.
 936      *
 937      * @param s the input scalar
 938      * @return the bitwise OR of this vector with the broadcast of an input
 939      * scalar
 940      */
 941     public abstract ByteVector or(byte s);
 942 
 943     /**
 944      * Bitwise ORs this vector with an input vector, selecting lane elements
 945      * controlled by a mask.
 946      * <p>
 947      * This is a vector binary operation where the primitive bitwise OR
 948      * operation ({@code |}) is applied to lane elements.
 949      *
 950      * @param v the input vector
 951      * @param m the mask controlling lane selection
 952      * @return the bitwise OR of this vector with the input vector
 953      */
 954     public abstract ByteVector or(Vector<Byte> v, Mask<Byte> m);
 955 
 956     /**
 957      * Bitwise ORs this vector with the broadcast of an input scalar, selecting
 958      * lane elements controlled by a mask.
 959      * <p>
 960      * This is a vector binary operation where the primitive bitwise OR
 961      * operation ({@code |}) is applied to lane elements.
 962      *
 963      * @param s the input scalar
 964      * @param m the mask controlling lane selection
 965      * @return the bitwise OR of this vector with the broadcast of an input
 966      * scalar
 967      */
 968     public abstract ByteVector or(byte s, Mask<Byte> m);
 969 
 970     /**
 971      * Bitwise XORs this vector with an input vector.
 972      * <p>
 973      * This is a vector binary operation where the primitive bitwise XOR
 974      * operation ({@code ^}) is applied to lane elements.
 975      *
 976      * @param v the input vector
 977      * @return the bitwise XOR of this vector with the input vector
 978      */
 979     public abstract ByteVector xor(Vector<Byte> v);
 980 
 981     /**
 982      * Bitwise XORs this vector with the broadcast of an input scalar.
 983      * <p>
 984      * This is a vector binary operation where the primitive bitwise XOR
 985      * operation ({@code ^}) is applied to lane elements.
 986      *
 987      * @param s the input scalar
 988      * @return the bitwise XOR of this vector with the broadcast of an input
 989      * scalar
 990      */
 991     public abstract ByteVector xor(byte s);
 992 
 993     /**
 994      * Bitwise XORs this vector with an input vector, selecting lane elements
 995      * controlled by a mask.
 996      * <p>
 997      * This is a vector binary operation where the primitive bitwise XOR
 998      * operation ({@code ^}) is applied to lane elements.
 999      *
1000      * @param v the input vector
1001      * @param m the mask controlling lane selection
1002      * @return the bitwise XOR of this vector with the input vector
1003      */
1004     public abstract ByteVector xor(Vector<Byte> v, Mask<Byte> m);
1005 
1006     /**
1007      * Bitwise XORs this vector with the broadcast of an input scalar, selecting
1008      * lane elements controlled by a mask.
1009      * <p>
1010      * This is a vector binary operation where the primitive bitwise XOR
1011      * operation ({@code ^}) is applied to lane elements.
1012      *
1013      * @param s the input scalar
1014      * @param m the mask controlling lane selection
1015      * @return the bitwise XOR of this vector with the broadcast of an input
1016      * scalar
1017      */
1018     public abstract ByteVector xor(byte s, Mask<Byte> m);
1019 
1020     /**
1021      * Bitwise NOTs this vector.
1022      * <p>
1023      * This is a vector unary operation where the primitive bitwise NOT
1024      * operation ({@code ~}) is applied to lane elements.
1025      *
1026      * @return the bitwise NOT of this vector
1027      */
1028     public abstract ByteVector not();
1029 
1030     /**
1031      * Bitwise NOTs this vector, selecting lane elements controlled by a mask.
1032      * <p>
1033      * This is a vector unary operation where the primitive bitwise NOT
1034      * operation ({@code ~}) is applied to lane elements.
1035      *
1036      * @param m the mask controlling lane selection
1037      * @return the bitwise NOT of this vector
1038      */
1039     public abstract ByteVector not(Mask<Byte> m);
1040 
1041     /**
1042      * Logically left shifts this vector by the broadcast of an input scalar.
1043      * <p>
1044      * This is a vector binary operation where the primitive logical left shift
1045      * operation ({@code <<}) is applied to lane elements to left shift the
1046      * element by shift value as specified by the input scalar. Only the 3
1047      * lowest-order bits of shift value are used. It is as if the shift value
1048      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1049      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1050      *
1051      * @param s the input scalar; the number of the bits to left shift
1052      * @return the result of logically left shifting left this vector by the
1053      * broadcast of an input scalar
1054      */
1055     public abstract ByteVector shiftL(int s);
1056 
1057     /**
1058      * Logically left shifts this vector by the broadcast of an input scalar,
1059      * selecting lane elements controlled by a mask.
1060      * <p>
1061      * This is a vector binary operation where the primitive logical left shift
1062      * operation ({@code <<}) is applied to lane elements to left shift the
1063      * element by shift value as specified by the input scalar. Only the 3
1064      * lowest-order bits of shift value are used. It is as if the shift value
1065      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1066      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1067      *
1068      * @param s the input scalar; the number of the bits to left shift
1069      * @param m the mask controlling lane selection
1070      * @return the result of logically left shifting left this vector by the
1071      * broadcast of an input scalar
1072      */
1073     public abstract ByteVector shiftL(int s, Mask<Byte> m);
1074 
1075 
1076     // logical, or unsigned, shift right
1077 
1078      /**
1079      * Logically right shifts (or unsigned right shifts) this vector by the
1080      * broadcast of an input scalar.
1081      * <p>
1082      * This is a vector binary operation where the primitive logical right shift
1083      * operation ({@code >>>}) is applied to lane elements to logically right shift the
1084      * element by shift value as specified by the input scalar. Only the 3
1085      * lowest-order bits of shift value are used. It is as if the shift value
1086      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1087      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1088      *
1089      * @param s the input scalar; the number of the bits to right shift
1090      * @return the result of logically right shifting this vector by the
1091      * broadcast of an input scalar
1092      */
1093     public abstract ByteVector shiftR(int s);
1094 
1095      /**
1096      * Logically right shifts (or unsigned right shifts) this vector by the
1097      * broadcast of an input scalar, selecting lane elements controlled by a
1098      * mask.
1099      * <p>
1100      * This is a vector binary operation where the primitive logical right shift
1101      * operation ({@code >>>}) is applied to lane elements to logically right shift the
1102      * element by shift value as specified by the input scalar. Only the 3
1103      * lowest-order bits of shift value are used. It is as if the shift value
1104      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1105      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1106      *
1107      * @param s the input scalar; the number of the bits to right shift
1108      * @param m the mask controlling lane selection
1109      * @return the result of logically right shifting this vector by the
1110      * broadcast of an input scalar
1111      */
1112     public abstract ByteVector shiftR(int s, Mask<Byte> m);
1113 
1114 
1115     /**
1116      * Arithmetically right shifts (or signed right shifts) this vector by the
1117      * broadcast of an input scalar.
1118      * <p>
1119      * This is a vector binary operation where the primitive arithmetic right
1120      * shift operation ({@code >>}) is applied to lane elements  to arithmetically
1121      * right shift the element by shift value as specified by the input scalar.
1122      * Only the 3 lowest-order bits of shift value are used. It is as if the shift
1123      * value were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1124      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1125      *
1126      * @param s the input scalar; the number of the bits to right shift
1127      * @return the result of arithmetically right shifting this vector by the
1128      * broadcast of an input scalar
1129      */
1130     public abstract ByteVector aShiftR(int s);
1131 
1132     /**
1133      * Arithmetically right shifts (or signed right shifts) this vector by the
1134      * broadcast of an input scalar, selecting lane elements controlled by a
1135      * mask.
1136      * <p>
1137      * This is a vector binary operation where the primitive arithmetic right
1138      * shift operation ({@code >>}) is applied to lane elements  to arithmetically
1139      * right shift the element by shift value as specified by the input scalar.
1140      * Only the 3 lowest-order bits of shift value are used. It is as if the shift
1141      * value were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1142      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1143      *
1144      * @param s the input scalar; the number of the bits to right shift
1145      * @param m the mask controlling lane selection
1146      * @return the result of arithmetically right shifting this vector by the
1147      * broadcast of an input scalar
1148      */
1149     public abstract ByteVector aShiftR(int s, Mask<Byte> m);
1150 
1151 
1152     @Override
1153     public abstract void intoByteArray(byte[] a, int ix);
1154 
1155     @Override
1156     public abstract void intoByteArray(byte[] a, int ix, Mask<Byte> m);
1157 
1158     @Override
1159     public abstract void intoByteBuffer(ByteBuffer bb, int ix);
1160 
1161     @Override
1162     public abstract void intoByteBuffer(ByteBuffer bb, int ix, Mask<Byte> m);
1163 
1164 
1165     // Type specific horizontal reductions
1166     /**
1167      * Adds all lane elements of this vector.
1168      * <p>
1169      * This is an associative vector reduction operation where the addition
1170      * operation ({@code +}) is applied to lane elements,
1171      * and the identity value is {@code 0}.
1172      *
1173      * @return the addition of all the lane elements of this vector
1174      */
1175     public abstract byte addAll();
1176 
1177     /**
1178      * Adds all lane elements of this vector, selecting lane elements
1179      * controlled by a mask.
1180      * <p>
1181      * This is an associative vector reduction operation where the addition
1182      * operation ({@code +}) is applied to lane elements,
1183      * and the identity value is {@code 0}.
1184      *
1185      * @param m the mask controlling lane selection
1186      * @return the addition of the selected lane elements of this vector
1187      */
1188     public abstract byte addAll(Mask<Byte> m);
1189 
1190     /**
1191      * Multiplies all lane elements of this vector.
1192      * <p>
1193      * This is an associative vector reduction operation where the
1194      * multiplication operation ({@code *}) is applied to lane elements,
1195      * and the identity value is {@code 1}.
1196      *
1197      * @return the multiplication of all the lane elements of this vector
1198      */
1199     public abstract byte mulAll();
1200 
1201     /**
1202      * Multiplies all lane elements of this vector, selecting lane elements
1203      * controlled by a mask.
1204      * <p>
1205      * This is an associative vector reduction operation where the
1206      * multiplication operation ({@code *}) is applied to lane elements,
1207      * and the identity value is {@code 1}.
1208      *
1209      * @param m the mask controlling lane selection
1210      * @return the multiplication of all the lane elements of this vector
1211      */
1212     public abstract byte mulAll(Mask<Byte> m);
1213 
1214     /**
1215      * Returns the minimum lane element of this vector.
1216      * <p>
1217      * This is an associative vector reduction operation where the operation
1218      * {@code (a, b) -> Math.min(a, b)} is applied to lane elements,
1219      * and the identity value is
1220      * {@link Byte#MAX_VALUE}.
1221      *
1222      * @return the minimum lane element of this vector
1223      */
1224     public abstract byte minAll();
1225 
1226     /**
1227      * Returns the minimum lane element of this vector, selecting lane elements
1228      * controlled by a mask.
1229      * <p>
1230      * This is an associative vector reduction operation where the operation
1231      * {@code (a, b) -> Math.min(a, b)} is applied to lane elements,
1232      * and the identity value is
1233      * {@link Byte#MAX_VALUE}.
1234      *
1235      * @param m the mask controlling lane selection
1236      * @return the minimum lane element of this vector
1237      */
1238     public abstract byte minAll(Mask<Byte> m);
1239 
1240     /**
1241      * Returns the maximum lane element of this vector.
1242      * <p>
1243      * This is an associative vector reduction operation where the operation
1244      * {@code (a, b) -> Math.max(a, b)} is applied to lane elements,
1245      * and the identity value is
1246      * {@link Byte#MIN_VALUE}.
1247      *
1248      * @return the maximum lane element of this vector
1249      */
1250     public abstract byte maxAll();
1251 
1252     /**
1253      * Returns the maximum lane element of this vector, selecting lane elements
1254      * controlled by a mask.
1255      * <p>
1256      * This is an associative vector reduction operation where the operation
1257      * {@code (a, b) -> Math.max(a, b)} is applied to lane elements,
1258      * and the identity value is
1259      * {@link Byte#MIN_VALUE}.
1260      *
1261      * @param m the mask controlling lane selection
1262      * @return the maximum lane element of this vector
1263      */
1264     public abstract byte maxAll(Mask<Byte> m);
1265 
1266     /**
1267      * Logically ORs all lane elements of this vector.
1268      * <p>
1269      * This is an associative vector reduction operation where the logical OR
1270      * operation ({@code |}) is applied to lane elements,
1271      * and the identity value is {@code 0}.
1272      *
1273      * @return the logical OR all the lane elements of this vector
1274      */
1275     public abstract byte orAll();
1276 
1277     /**
1278      * Logically ORs all lane elements of this vector, selecting lane elements
1279      * controlled by a mask.
1280      * <p>
1281      * This is an associative vector reduction operation where the logical OR
1282      * operation ({@code |}) is applied to lane elements,
1283      * and the identity value is {@code 0}.
1284      *
1285      * @param m the mask controlling lane selection
1286      * @return the logical OR all the lane elements of this vector
1287      */
1288     public abstract byte orAll(Mask<Byte> m);
1289 
1290     /**
1291      * Logically ANDs all lane elements of this vector.
1292      * <p>
1293      * This is an associative vector reduction operation where the logical AND
1294      * operation ({@code |}) is applied to lane elements,
1295      * and the identity value is {@code -1}.
1296      *
1297      * @return the logical AND all the lane elements of this vector
1298      */
1299     public abstract byte andAll();
1300 
1301     /**
1302      * Logically ANDs all lane elements of this vector, selecting lane elements
1303      * controlled by a mask.
1304      * <p>
1305      * This is an associative vector reduction operation where the logical AND
1306      * operation ({@code |}) is applied to lane elements,
1307      * and the identity value is {@code -1}.
1308      *
1309      * @param m the mask controlling lane selection
1310      * @return the logical AND all the lane elements of this vector
1311      */
1312     public abstract byte andAll(Mask<Byte> m);
1313 
1314     /**
1315      * Logically XORs all lane elements of this vector.
1316      * <p>
1317      * This is an associative vector reduction operation where the logical XOR
1318      * operation ({@code ^}) is applied to lane elements,
1319      * and the identity value is {@code 0}.
1320      *
1321      * @return the logical XOR all the lane elements of this vector
1322      */
1323     public abstract byte xorAll();
1324 
1325     /**
1326      * Logically XORs all lane elements of this vector, selecting lane elements
1327      * controlled by a mask.
1328      * <p>
1329      * This is an associative vector reduction operation where the logical XOR
1330      * operation ({@code ^}) is applied to lane elements,
1331      * and the identity value is {@code 0}.
1332      *
1333      * @param m the mask controlling lane selection
1334      * @return the logical XOR all the lane elements of this vector
1335      */
1336     public abstract byte xorAll(Mask<Byte> m);
1337 
1338     // Type specific accessors
1339 
1340     /**
1341      * Gets the lane element at lane index {@code i}
1342      *
1343      * @param i the lane index
1344      * @return the lane element at lane index {@code i}
1345      * @throws IllegalArgumentException if the index is is out of range
1346      * ({@code < 0 || >= length()})
1347      */
1348     public abstract byte get(int i);
1349 
1350     /**
1351      * Replaces the lane element of this vector at lane index {@code i} with
1352      * value {@code e}.
1353      * <p>
1354      * This is a cross-lane operation and behaves as if it returns the result
1355      * of blending this vector with an input vector that is the result of
1356      * broadcasting {@code e} and a mask that has only one lane set at lane
1357      * index {@code i}.
1358      *
1359      * @param i the lane index of the lane element to be replaced
1360      * @param e the value to be placed
1361      * @return the result of replacing the lane element of this vector at lane
1362      * index {@code i} with value {@code e}.
1363      * @throws IllegalArgumentException if the index is is out of range
1364      * ({@code < 0 || >= length()})
1365      */
1366     public abstract ByteVector with(int i, byte e);
1367 
1368     // Type specific extractors
1369 
1370     /**
1371      * Returns an array containing the lane elements of this vector.
1372      * <p>
1373      * This method behaves as if it {@link #intoArray(byte[], int)} stores}
1374      * this vector into an allocated array and returns the array as follows:
1375      * <pre>{@code
1376      *   byte[] a = new byte[this.length()];
1377      *   this.intoArray(a, 0);
1378      *   return a;
1379      * }</pre>
1380      *
1381      * @return an array containing the the lane elements of this vector
1382      */
1383     @ForceInline
1384     public final byte[] toArray() {
1385         byte[] a = new byte[species().length()];
1386         intoArray(a, 0);
1387         return a;
1388     }
1389 
1390     /**
1391      * Stores this vector into an array starting at offset.
1392      * <p>
1393      * For each vector lane, where {@code N} is the vector lane index,
1394      * the lane element at index {@code N} is stored into the array at index
1395      * {@code i + N}.
1396      *
1397      * @param a the array
1398      * @param i the offset into the array
1399      * @throws IndexOutOfBoundsException if {@code i < 0}, or
1400      * {@code i > a.length - this.length()}
1401      */
1402     public abstract void intoArray(byte[] a, int i);
1403 
1404     /**
1405      * Stores this vector into an array starting at offset and using a mask.
1406      * <p>
1407      * For each vector lane, where {@code N} is the vector lane index,
1408      * if the mask lane at index {@code N} is set then the lane element at
1409      * index {@code N} is stored into the array index {@code i + N}.
1410      *
1411      * @param a the array
1412      * @param i the offset into the array
1413      * @param m the mask
1414      * @throws IndexOutOfBoundsException if {@code i < 0}, or
1415      * for any vector lane index {@code N} where the mask at lane {@code N}
1416      * is set {@code i >= a.length - N}
1417      */
1418     public abstract void intoArray(byte[] a, int i, Mask<Byte> m);
1419 
1420     /**
1421      * Stores this vector into an array using indexes obtained from an index
1422      * map.
1423      * <p>
1424      * For each vector lane, where {@code N} is the vector lane index, the
1425      * lane element at index {@code N} is stored into the array at index
1426      * {@code i + indexMap[j + N]}.
1427      *
1428      * @param a the array
1429      * @param i the offset into the array, may be negative if relative
1430      * indexes in the index map compensate to produce a value within the
1431      * array bounds
1432      * @param indexMap the index map
1433      * @param j the offset into the index map
1434      * @throws IndexOutOfBoundsException if {@code j < 0}, or
1435      * {@code j > indexMap.length - this.length()},
1436      * or for any vector lane index {@code N} the result of
1437      * {@code i + indexMap[j + N]} is {@code < 0} or {@code >= a.length}
1438      */
1439     public void intoArray(byte[] a, int i, int[] indexMap, int j) {
1440         forEach((n, e) -> a[i + indexMap[j + n]] = e);
1441     }
1442 
1443     /**
1444      * Stores this vector into an array using indexes obtained from an index
1445      * map and using a mask.
1446      * <p>
1447      * For each vector lane, where {@code N} is the vector lane index,
1448      * if the mask lane at index {@code N} is set then the lane element at
1449      * index {@code N} is stored into the array at index
1450      * {@code i + indexMap[j + N]}.
1451      *
1452      * @param a the array
1453      * @param i the offset into the array, may be negative if relative
1454      * indexes in the index map compensate to produce a value within the
1455      * array bounds
1456      * @param m the mask
1457      * @param indexMap the index map
1458      * @param j the offset into the index map
1459      * @throws IndexOutOfBoundsException if {@code j < 0}, or
1460      * {@code j > indexMap.length - this.length()},
1461      * or for any vector lane index {@code N} where the mask at lane
1462      * {@code N} is set the result of {@code i + indexMap[j + N]} is
1463      * {@code < 0} or {@code >= a.length}
1464      */
1465     public void intoArray(byte[] a, int i, Mask<Byte> m, int[] indexMap, int j) {
1466         forEach(m, (n, e) -> a[i + indexMap[j + n]] = e);
1467     }
1468     // Species
1469 
1470     @Override
1471     public abstract ByteSpecies species();
1472 
1473     /**
1474      * A specialized factory for creating {@link ByteVector} value of the same
1475      * shape, and a {@link Mask} and {@link Shuffle} values of the same shape
1476      * and {@code int} element type.
1477      */
1478     public static abstract class ByteSpecies extends Vector.Species<Byte> {
1479         interface FOp {
1480             byte apply(int i);
1481         }
1482 
1483         abstract ByteVector op(FOp f);
1484 
1485         abstract ByteVector op(Mask<Byte> m, FOp f);
1486 
1487         interface FOpm {
1488             boolean apply(int i);
1489         }
1490 
1491         abstract Mask<Byte> opm(FOpm f);
1492 
1493 
1494 
1495         // Factories
1496 
1497         @Override
1498         public abstract ByteVector zero();
1499 
1500         /**
1501          * Returns a vector where all lane elements are set to the primitive
1502          * value {@code e}.
1503          *
1504          * @param e the value
1505          * @return a vector of vector where all lane elements are set to
1506          * the primitive value {@code e}
1507          */
1508         public abstract ByteVector broadcast(byte e);
1509 
1510         /**
1511          * Returns a vector where the first lane element is set to the primtive
1512          * value {@code e}, all other lane elements are set to the default
1513          * value.
1514          *
1515          * @param e the value
1516          * @return a vector where the first lane element is set to the primitive
1517          * value {@code e}
1518          */
1519         @ForceInline
1520         public final ByteVector single(byte e) {
1521             return zero().with(0, e);
1522         }
1523 
1524         /**
1525          * Returns a vector where each lane element is set to a randomly
1526          * generated primitive value.
1527          *
1528          * The semantics are equivalent to calling
1529          * {@code (byte)ThreadLocalRandom#nextInt()}.
1530          *
1531          * @return a vector where each lane elements is set to a randomly
1532          * generated primitive value
1533          */
1534         public ByteVector random() {
1535             ThreadLocalRandom r = ThreadLocalRandom.current();
1536             return op(i -> (byte) r.nextInt());
1537         }
1538 
1539         /**
1540          * Returns a vector where each lane element is set to a given
1541          * primitive value.
1542          * <p>
1543          * For each vector lane, where {@code N} is the vector lane index, the
1544          * the primitive value at index {@code N} is placed into the resulting
1545          * vector at lane index {@code N}.
1546          *
1547          * @param es the given primitive values
1548          * @return a vector where each lane element is set to a given primitive
1549          * value
1550          * @throws IndexOutOfBoundsException if {@code es.length < this.length()}
1551          */
1552         public abstract ByteVector scalars(byte... es);
1553     }
1554 
1555     /**
1556      * Finds the preferred species for an element type of {@code byte}.
1557      * <p>
1558      * A preferred species is a species chosen by the platform that has a
1559      * shape of maximal bit size.  A preferred species for different element
1560      * types will have the same shape, and therefore vectors, masks, and
1561      * shuffles created from such species will be shape compatible.
1562      *
1563      * @return the preferred species for an element type of {@code byte}
1564      */
1565     @SuppressWarnings("unchecked")
1566     public static ByteSpecies preferredSpecies() {
1567         return (ByteSpecies) Species.ofPreferred(byte.class);
1568     }
1569 
1570     /**
1571      * Finds a species for an element type of {@code byte} and shape.
1572      *
1573      * @param s the shape
1574      * @return a species for an element type of {@code byte} and shape
1575      * @throws IllegalArgumentException if no such species exists for the shape
1576      */
1577     @SuppressWarnings("unchecked")
1578     public static ByteSpecies species(Vector.Shape s) {
1579         Objects.requireNonNull(s);
1580         switch (s) {
1581             case S_64_BIT: return Byte64Vector.SPECIES;
1582             case S_128_BIT: return Byte128Vector.SPECIES;
1583             case S_256_BIT: return Byte256Vector.SPECIES;
1584             case S_512_BIT: return Byte512Vector.SPECIES;
1585             case S_Max_BIT: return ByteMaxVector.SPECIES;
1586             default: throw new IllegalArgumentException("Bad shape: " + s);
1587         }
1588     }
1589 }