1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ByteOrder;
  29 import java.util.Objects;
  30 import java.util.function.IntUnaryOperator;
  31 import java.util.function.Function;
  32 import java.util.concurrent.ThreadLocalRandom;
  33 
  34 import jdk.internal.misc.Unsafe;
  35 import jdk.internal.vm.annotation.ForceInline;
  36 import static jdk.incubator.vector.VectorIntrinsics.*;
  37 
  38 
  39 /**
  40  * A specialized {@link Vector} representing an ordered immutable sequence of
  41  * {@code byte} values.
  42  */
  43 @SuppressWarnings("cast")
  44 public abstract class ByteVector extends Vector<Byte> {
  45 
  46     ByteVector() {}
  47 
  48     private static final int ARRAY_SHIFT = 31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_BYTE_INDEX_SCALE);
  49 
  50     // Unary operator
  51 
  52     interface FUnOp {
  53         byte apply(int i, byte a);
  54     }
  55 
  56     abstract ByteVector uOp(FUnOp f);
  57 
  58     abstract ByteVector uOp(VectorMask<Byte> m, FUnOp f);
  59 
  60     // Binary operator
  61 
  62     interface FBinOp {
  63         byte apply(int i, byte a, byte b);
  64     }
  65 
  66     abstract ByteVector bOp(Vector<Byte> v, FBinOp f);
  67 
  68     abstract ByteVector bOp(Vector<Byte> v, VectorMask<Byte> m, FBinOp f);
  69 
  70     // Trinary operator
  71 
  72     interface FTriOp {
  73         byte apply(int i, byte a, byte b, byte c);
  74     }
  75 
  76     abstract ByteVector tOp(Vector<Byte> v1, Vector<Byte> v2, FTriOp f);
  77 
  78     abstract ByteVector tOp(Vector<Byte> v1, Vector<Byte> v2, VectorMask<Byte> m, FTriOp f);
  79 
  80     // Reduction operator
  81 
  82     abstract byte rOp(byte v, FBinOp f);
  83 
  84     // Binary test
  85 
  86     interface FBinTest {
  87         boolean apply(int i, byte a, byte b);
  88     }
  89 
  90     abstract VectorMask<Byte> bTest(Vector<Byte> v, FBinTest f);
  91 
  92     // Foreach
  93 
  94     interface FUnCon {
  95         void apply(int i, byte a);
  96     }
  97 
  98     abstract void forEach(FUnCon f);
  99 
 100     abstract void forEach(VectorMask<Byte> m, FUnCon f);
 101 
 102     // Static factories
 103 
 104     /**
 105      * Returns a vector where all lane elements are set to the default
 106      * primitive value.
 107      *
 108      * @param species species of desired vector
 109      * @return a zero vector of given species
 110      */
 111     @ForceInline
 112     @SuppressWarnings("unchecked")
 113     public static ByteVector zero(VectorSpecies<Byte> species) {
 114         return VectorIntrinsics.broadcastCoerced((Class<ByteVector>) species.vectorType(), byte.class, species.length(),
 115                                                  0, species,
 116                                                  ((bits, s) -> ((ByteSpecies)s).op(i -> (byte)bits)));
 117     }
 118 
 119     @ForceInline
 120     @SuppressWarnings("unchecked")
 121     static VectorShuffle<Byte> shuffleIotaHelper(VectorSpecies<Byte> species, int step) {
 122         switch (species.bitSize()) {
 123             case 64: return VectorIntrinsics.shuffleIota(byte.class, Byte64Vector.Byte64Shuffle.class, species,
 124                                                         64 / Byte.SIZE, step,
 125                                                         (val, l) -> new Byte64Vector.Byte64Shuffle(i -> ((i + val) & (l-1))));
 126             case 128: return VectorIntrinsics.shuffleIota(byte.class, Byte128Vector.Byte128Shuffle.class, species,
 127                                                         128/ Byte.SIZE, step,
 128                                                         (val, l) -> new Byte128Vector.Byte128Shuffle(i -> ((i + val) & (l-1))));
 129             case 256: return VectorIntrinsics.shuffleIota(byte.class, Byte256Vector.Byte256Shuffle.class, species,
 130                                                         256/ Byte.SIZE, step,
 131                                                         (val, l) -> new Byte256Vector.Byte256Shuffle(i -> ((i + val) & (l-1))));
 132             case 512: return VectorIntrinsics.shuffleIota(byte.class, Byte512Vector.Byte512Shuffle.class, species,
 133                                                         512 / Byte.SIZE, step,
 134                                                         (val, l) -> new Byte512Vector.Byte512Shuffle(i -> ((i + val) & (l-1))));
 135             default: throw new IllegalArgumentException(Integer.toString(species.bitSize()));
 136         }
 137     }
 138 
 139     /**
 140      * Loads a vector from a byte array starting at an offset.
 141      * <p>
 142      * Bytes are composed into primitive lane elements according to the
 143      * native byte order of the underlying platform
 144      * <p>
 145      * This method behaves as if it returns the result of calling the
 146      * byte buffer, offset, and mask accepting
 147      * {@link #fromByteBuffer(VectorSpecies, ByteBuffer, int, VectorMask) method} as follows:
 148      * <pre>{@code
 149      * return fromByteBuffer(species, ByteBuffer.wrap(a), offset, VectorMask.allTrue());
 150      * }</pre>
 151      *
 152      * @param species species of desired vector
 153      * @param a the byte array
 154      * @param offset the offset into the array
 155      * @return a vector loaded from a byte array
 156      * @throws IndexOutOfBoundsException if {@code i < 0} or
 157      * {@code offset > a.length - (species.length() * species.elementSize() / Byte.SIZE)}
 158      */
 159     @ForceInline
 160     @SuppressWarnings("unchecked")
 161     public static ByteVector fromByteArray(VectorSpecies<Byte> species, byte[] a, int offset) {
 162         Objects.requireNonNull(a);
 163         offset = VectorIntrinsics.checkIndex(offset, a.length, species.bitSize() / Byte.SIZE);
 164         return VectorIntrinsics.load((Class<ByteVector>) species.vectorType(), byte.class, species.length(),
 165                                      a, ((long) offset) + Unsafe.ARRAY_BYTE_BASE_OFFSET,
 166                                      a, offset, species,
 167                                      (c, idx, s) -> {
 168                                          ByteBuffer bbc = ByteBuffer.wrap(c, idx, c.length - idx).order(ByteOrder.nativeOrder());
 169                                          ByteBuffer tb = bbc;
 170                                          return ((ByteSpecies)s).op(i -> tb.get());
 171                                      });
 172     }
 173 
 174     /**
 175      * Loads a vector from a byte array starting at an offset and using a
 176      * mask.
 177      * <p>
 178      * Bytes are composed into primitive lane elements according to the
 179      * native byte order of the underlying platform.
 180      * <p>
 181      * This method behaves as if it returns the result of calling the
 182      * byte buffer, offset, and mask accepting
 183      * {@link #fromByteBuffer(VectorSpecies, ByteBuffer, int, VectorMask) method} as follows:
 184      * <pre>{@code
 185      * return fromByteBuffer(species, ByteBuffer.wrap(a), offset, m);
 186      * }</pre>
 187      *
 188      * @param species species of desired vector
 189      * @param a the byte array
 190      * @param offset the offset into the array
 191      * @param m the mask
 192      * @return a vector loaded from a byte array
 193      * @throws IndexOutOfBoundsException if {@code offset < 0} or
 194      * for any vector lane index {@code N} where the mask at lane {@code N}
 195      * is set
 196      * {@code offset >= a.length - (N * species.elementSize() / Byte.SIZE)}
 197      */
 198     @ForceInline
 199     public static ByteVector fromByteArray(VectorSpecies<Byte> species, byte[] a, int offset, VectorMask<Byte> m) {
 200         return zero(species).blend(fromByteArray(species, a, offset), m);
 201     }
 202 
 203     /**
 204      * Loads a vector from an array starting at offset.
 205      * <p>
 206      * For each vector lane, where {@code N} is the vector lane index, the
 207      * array element at index {@code offset + N} is placed into the
 208      * resulting vector at lane index {@code N}.
 209      *
 210      * @param species species of desired vector
 211      * @param a the array
 212      * @param offset the offset into the array
 213      * @return the vector loaded from an array
 214      * @throws IndexOutOfBoundsException if {@code offset < 0}, or
 215      * {@code offset > a.length - species.length()}
 216      */
 217     @ForceInline
 218     @SuppressWarnings("unchecked")
 219     public static ByteVector fromArray(VectorSpecies<Byte> species, byte[] a, int offset){
 220         Objects.requireNonNull(a);
 221         offset = VectorIntrinsics.checkIndex(offset, a.length, species.length());
 222         return VectorIntrinsics.load((Class<ByteVector>) species.vectorType(), byte.class, species.length(),
 223                                      a, (((long) offset) << ARRAY_SHIFT) + Unsafe.ARRAY_BYTE_BASE_OFFSET,
 224                                      a, offset, species,
 225                                      (c, idx, s) -> ((ByteSpecies)s).op(n -> c[idx + n]));
 226     }
 227 
 228 
 229     /**
 230      * Loads a vector from an array starting at offset and using a mask.
 231      * <p>
 232      * For each vector lane, where {@code N} is the vector lane index,
 233      * if the mask lane at index {@code N} is set then the array element at
 234      * index {@code offset + N} is placed into the resulting vector at lane index
 235      * {@code N}, otherwise the default element value is placed into the
 236      * resulting vector at lane index {@code N}.
 237      *
 238      * @param species species of desired vector
 239      * @param a the array
 240      * @param offset the offset into the array
 241      * @param m the mask
 242      * @return the vector loaded from an array
 243      * @throws IndexOutOfBoundsException if {@code offset < 0}, or
 244      * for any vector lane index {@code N} where the mask at lane {@code N}
 245      * is set {@code offset > a.length - N}
 246      */
 247     @ForceInline
 248     public static ByteVector fromArray(VectorSpecies<Byte> species, byte[] a, int offset, VectorMask<Byte> m) {
 249         return zero(species).blend(fromArray(species, a, offset), m);
 250     }
 251 
 252     /**
 253      * Loads a vector from an array using indexes obtained from an index
 254      * map.
 255      * <p>
 256      * For each vector lane, where {@code N} is the vector lane index, the
 257      * array element at index {@code a_offset + indexMap[i_offset + N]} is placed into the
 258      * resulting vector at lane index {@code N}.
 259      *
 260      * @param species species of desired vector
 261      * @param a the array
 262      * @param a_offset the offset into the array, may be negative if relative
 263      * indexes in the index map compensate to produce a value within the
 264      * array bounds
 265      * @param indexMap the index map
 266      * @param i_offset the offset into the index map
 267      * @return the vector loaded from an array
 268      * @throws IndexOutOfBoundsException if {@code i_offset < 0}, or
 269      * {@code i_offset > indexMap.length - species.length()},
 270      * or for any vector lane index {@code N} the result of
 271      * {@code a_offset + indexMap[i_offset + N]} is {@code < 0} or {@code >= a.length}
 272      */
 273     public static ByteVector fromArray(VectorSpecies<Byte> species, byte[] a, int a_offset, int[] indexMap, int i_offset) {
 274         return ((ByteSpecies)species).op(n -> a[a_offset + indexMap[i_offset + n]]);
 275     }
 276     /**
 277      * Loads a vector from an array using indexes obtained from an index
 278      * map and using a mask.
 279      * <p>
 280      * For each vector lane, where {@code N} is the vector lane index,
 281      * if the mask lane at index {@code N} is set then the array element at
 282      * index {@code a_offset + indexMap[i_offset + N]} is placed into the resulting vector
 283      * at lane index {@code N}.
 284      *
 285      * @param species species of desired vector
 286      * @param a the array
 287      * @param a_offset the offset into the array, may be negative if relative
 288      * indexes in the index map compensate to produce a value within the
 289      * array bounds
 290      * @param m the mask
 291      * @param indexMap the index map
 292      * @param i_offset the offset into the index map
 293      * @return the vector loaded from an array
 294      * @throws IndexOutOfBoundsException if {@code i_offset < 0}, or
 295      * {@code i_offset > indexMap.length - species.length()},
 296      * or for any vector lane index {@code N} where the mask at lane
 297      * {@code N} is set the result of {@code a_offset + indexMap[i_offset + N]} is
 298      * {@code < 0} or {@code >= a.length}
 299      */
 300     public static ByteVector fromArray(VectorSpecies<Byte> species, byte[] a, int a_offset, VectorMask<Byte> m, int[] indexMap, int i_offset) {
 301         return ((ByteSpecies)species).op(m, n -> a[a_offset + indexMap[i_offset + n]]);
 302     }
 303 
 304     /**
 305      * Loads a vector from a {@link ByteBuffer byte buffer} starting at an
 306      * offset into the byte buffer.
 307      * <p>
 308      * Bytes are composed into primitive lane elements according to the
 309      * native byte order of the underlying platform.
 310      * <p>
 311      * This method behaves as if it returns the result of calling the
 312      * byte buffer, offset, and mask accepting
 313      * {@link #fromByteBuffer(VectorSpecies, ByteBuffer, int, VectorMask)} method} as follows:
 314      * <pre>{@code
 315      *   return fromByteBuffer(b, offset, VectorMask.allTrue())
 316      * }</pre>
 317      *
 318      * @param species species of desired vector
 319      * @param bb the byte buffer
 320      * @param offset the offset into the byte buffer
 321      * @return a vector loaded from a byte buffer
 322      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 323      * or {@code > b.limit()},
 324      * or if there are fewer than
 325      * {@code species.length() * species.elementSize() / Byte.SIZE} bytes
 326      * remaining in the byte buffer from the given offset
 327      */
 328     @ForceInline
 329     @SuppressWarnings("unchecked")
 330     public static ByteVector fromByteBuffer(VectorSpecies<Byte> species, ByteBuffer bb, int offset) {
 331         if (bb.order() != ByteOrder.nativeOrder()) {
 332             throw new IllegalArgumentException();
 333         }
 334         offset = VectorIntrinsics.checkIndex(offset, bb.limit(), species.bitSize() / Byte.SIZE);
 335         return VectorIntrinsics.load((Class<ByteVector>) species.vectorType(), byte.class, species.length(),
 336                                      U.getReference(bb, BYTE_BUFFER_HB), U.getLong(bb, BUFFER_ADDRESS) + offset,
 337                                      bb, offset, species,
 338                                      (c, idx, s) -> {
 339                                          ByteBuffer bbc = c.duplicate().position(idx).order(ByteOrder.nativeOrder());
 340                                          ByteBuffer tb = bbc;
 341                                          return ((ByteSpecies)s).op(i -> tb.get());
 342                                      });
 343     }
 344 
 345     /**
 346      * Loads a vector from a {@link ByteBuffer byte buffer} starting at an
 347      * offset into the byte buffer and using a mask.
 348      * <p>
 349      * This method behaves as if the byte buffer is viewed as a primitive
 350      * {@link java.nio.Buffer buffer} for the primitive element type,
 351      * according to the native byte order of the underlying platform, and
 352      * the returned vector is loaded with a mask from a primitive array
 353      * obtained from the primitive buffer.
 354      * The following pseudocode expresses the behaviour, where
 355      * {@code EBuffer} is the primitive buffer type, {@code e} is the
 356      * primitive element type, and {@code ESpecies} is the primitive
 357      * species for {@code e}:
 358      * <pre>{@code
 359      * EBuffer eb = b.duplicate().
 360      *     order(ByteOrder.nativeOrder()).position(offset).
 361      *     asEBuffer();
 362      * e[] es = new e[species.length()];
 363      * for (int n = 0; n < t.length; n++) {
 364      *     if (m.isSet(n))
 365      *         es[n] = eb.get(n);
 366      * }
 367      * EVector r = EVector.fromArray(es, 0, m);
 368      * }</pre>
 369      *
 370      * @param species species of desired vector
 371      * @param bb the byte buffer
 372      * @param offset the offset into the byte buffer
 373      * @param m the mask
 374      * @return a vector loaded from a byte buffer
 375      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 376      * or {@code > b.limit()},
 377      * for any vector lane index {@code N} where the mask at lane {@code N}
 378      * is set
 379      * {@code offset >= b.limit() - (N * species.elementSize() / Byte.SIZE)}
 380      */
 381     @ForceInline
 382     public static ByteVector fromByteBuffer(VectorSpecies<Byte> species, ByteBuffer bb, int offset, VectorMask<Byte> m) {
 383         return zero(species).blend(fromByteBuffer(species, bb, offset), m);
 384     }
 385 
 386     /**
 387      * Returns a vector where all lane elements are set to the primitive
 388      * value {@code e}.
 389      *
 390      * @param species species of the desired vector
 391      * @param e the value to be broadcasted
 392      * @return a vector of vector where all lane elements are set to
 393      * the primitive value {@code e}
 394      */
 395     @ForceInline
 396     @SuppressWarnings("unchecked")
 397     public static ByteVector broadcast(VectorSpecies<Byte> species, byte e) {
 398         return VectorIntrinsics.broadcastCoerced(
 399             (Class<ByteVector>) species.vectorType(), byte.class, species.length(),
 400             e, species,
 401             ((bits, sp) -> ((ByteSpecies)sp).op(i -> (byte)bits)));
 402     }
 403 
 404     /**
 405      * Returns a vector where each lane element is set to given
 406      * primitive values.
 407      * <p>
 408      * For each vector lane, where {@code N} is the vector lane index, the
 409      * the primitive value at index {@code N} is placed into the resulting
 410      * vector at lane index {@code N}.
 411      *
 412      * @param species species of the desired vector
 413      * @param es the given primitive values
 414      * @return a vector where each lane element is set to given primitive
 415      * values
 416      * @throws IndexOutOfBoundsException if {@code es.length < species.length()}
 417      */
 418     @ForceInline
 419     @SuppressWarnings("unchecked")
 420     public static ByteVector scalars(VectorSpecies<Byte> species, byte... es) {
 421         Objects.requireNonNull(es);
 422         int ix = VectorIntrinsics.checkIndex(0, es.length, species.length());
 423         return VectorIntrinsics.load((Class<ByteVector>) species.vectorType(), byte.class, species.length(),
 424                                      es, Unsafe.ARRAY_BYTE_BASE_OFFSET,
 425                                      es, ix, species,
 426                                      (c, idx, sp) -> ((ByteSpecies)sp).op(n -> c[idx + n]));
 427     }
 428 
 429     /**
 430      * Returns a vector where the first lane element is set to the primtive
 431      * value {@code e}, all other lane elements are set to the default
 432      * value.
 433      *
 434      * @param species species of the desired vector
 435      * @param e the value
 436      * @return a vector where the first lane element is set to the primitive
 437      * value {@code e}
 438      */
 439     @ForceInline
 440     public static final ByteVector single(VectorSpecies<Byte> species, byte e) {
 441         return zero(species).with(0, e);
 442     }
 443 
 444     /**
 445      * Returns a vector where each lane element is set to a randomly
 446      * generated primitive value.
 447      *
 448      * The semantics are equivalent to calling
 449      * (byte){@link ThreadLocalRandom#nextInt()}
 450      *
 451      * @param species species of the desired vector
 452      * @return a vector where each lane elements is set to a randomly
 453      * generated primitive value
 454      */
 455     public static ByteVector random(VectorSpecies<Byte> species) {
 456         ThreadLocalRandom r = ThreadLocalRandom.current();
 457         return ((ByteSpecies)species).op(i -> (byte) r.nextInt());
 458     }
 459 
 460     // Ops
 461 
 462     /**
 463      * {@inheritDoc}
 464      */
 465     @Override
 466     public abstract ByteVector add(Vector<Byte> v);
 467 
 468     /**
 469      * Adds this vector to the broadcast of an input scalar.
 470      * <p>
 471      * This is a lane-wise binary operation which applies the primitive addition operation
 472      * ({@code +}) to each lane.
 473      *
 474      * @param s the input scalar
 475      * @return the result of adding this vector to the broadcast of an input
 476      * scalar
 477      */
 478     public abstract ByteVector add(byte s);
 479 
 480     /**
 481      * {@inheritDoc}
 482      */
 483     @Override
 484     public abstract ByteVector add(Vector<Byte> v, VectorMask<Byte> m);
 485 
 486     /**
 487      * Adds this vector to broadcast of an input scalar,
 488      * selecting lane elements controlled by a mask.
 489      * <p>
 490      * This is a lane-wise binary operation which applies the primitive addition operation
 491      * ({@code +}) to each lane.
 492      *
 493      * @param s the input scalar
 494      * @param m the mask controlling lane selection
 495      * @return the result of adding this vector to the broadcast of an input
 496      * scalar
 497      */
 498     public abstract ByteVector add(byte s, VectorMask<Byte> m);
 499 
 500     /**
 501      * {@inheritDoc}
 502      */
 503     @Override
 504     public abstract ByteVector sub(Vector<Byte> v);
 505 
 506     /**
 507      * Subtracts the broadcast of an input scalar from this vector.
 508      * <p>
 509      * This is a lane-wise binary operation which applies the primitive subtraction
 510      * operation ({@code -}) to each lane.
 511      *
 512      * @param s the input scalar
 513      * @return the result of subtracting the broadcast of an input
 514      * scalar from this vector
 515      */
 516     public abstract ByteVector sub(byte s);
 517 
 518     /**
 519      * {@inheritDoc}
 520      */
 521     @Override
 522     public abstract ByteVector sub(Vector<Byte> v, VectorMask<Byte> m);
 523 
 524     /**
 525      * Subtracts the broadcast of an input scalar from this vector, selecting
 526      * lane elements controlled by a mask.
 527      * <p>
 528      * This is a lane-wise binary operation which applies the primitive subtraction
 529      * operation ({@code -}) to each lane.
 530      *
 531      * @param s the input scalar
 532      * @param m the mask controlling lane selection
 533      * @return the result of subtracting the broadcast of an input
 534      * scalar from this vector
 535      */
 536     public abstract ByteVector sub(byte s, VectorMask<Byte> m);
 537 
 538     /**
 539      * {@inheritDoc}
 540      */
 541     @Override
 542     public abstract ByteVector mul(Vector<Byte> v);
 543 
 544     /**
 545      * Multiplies this vector with the broadcast of an input scalar.
 546      * <p>
 547      * This is a lane-wise binary operation which applies the primitive multiplication
 548      * operation ({@code *}) to each lane.
 549      *
 550      * @param s the input scalar
 551      * @return the result of multiplying this vector with the broadcast of an
 552      * input scalar
 553      */
 554     public abstract ByteVector mul(byte s);
 555 
 556     /**
 557      * {@inheritDoc}
 558      */
 559     @Override
 560     public abstract ByteVector mul(Vector<Byte> v, VectorMask<Byte> m);
 561 
 562     /**
 563      * Multiplies this vector with the broadcast of an input scalar, selecting
 564      * lane elements controlled by a mask.
 565      * <p>
 566      * This is a lane-wise binary operation which applies the primitive multiplication
 567      * operation ({@code *}) to each lane.
 568      *
 569      * @param s the input scalar
 570      * @param m the mask controlling lane selection
 571      * @return the result of multiplying this vector with the broadcast of an
 572      * input scalar
 573      */
 574     public abstract ByteVector mul(byte s, VectorMask<Byte> m);
 575 
 576     /**
 577      * {@inheritDoc}
 578      */
 579     @Override
 580     public abstract ByteVector neg();
 581 
 582     /**
 583      * {@inheritDoc}
 584      */
 585     @Override
 586     public abstract ByteVector neg(VectorMask<Byte> m);
 587 
 588     /**
 589      * {@inheritDoc}
 590      */
 591     @Override
 592     public abstract ByteVector abs();
 593 
 594     /**
 595      * {@inheritDoc}
 596      */
 597     @Override
 598     public abstract ByteVector abs(VectorMask<Byte> m);
 599 
 600     /**
 601      * {@inheritDoc}
 602      */
 603     @Override
 604     public abstract ByteVector min(Vector<Byte> v);
 605 
 606     /**
 607      * {@inheritDoc}
 608      */
 609     @Override
 610     public abstract ByteVector min(Vector<Byte> v, VectorMask<Byte> m);
 611 
 612     /**
 613      * Returns the minimum of this vector and the broadcast of an input scalar.
 614      * <p>
 615      * This is a lane-wise binary operation which applies the operation
 616      * {@code (a, b) -> Math.min(a, b)} to each lane.
 617      *
 618      * @param s the input scalar
 619      * @return the minimum of this vector and the broadcast of an input scalar
 620      */
 621     public abstract ByteVector min(byte s);
 622 
 623     /**
 624      * {@inheritDoc}
 625      */
 626     @Override
 627     public abstract ByteVector max(Vector<Byte> v);
 628 
 629     /**
 630      * {@inheritDoc}
 631      */
 632     @Override
 633     public abstract ByteVector max(Vector<Byte> v, VectorMask<Byte> m);
 634 
 635     /**
 636      * Returns the maximum of this vector and the broadcast of an input scalar.
 637      * <p>
 638      * This is a lane-wise binary operation which applies the operation
 639      * {@code (a, b) -> Math.max(a, b)} to each lane.
 640      *
 641      * @param s the input scalar
 642      * @return the maximum of this vector and the broadcast of an input scalar
 643      */
 644     public abstract ByteVector max(byte s);
 645 
 646     /**
 647      * {@inheritDoc}
 648      */
 649     @Override
 650     public abstract VectorMask<Byte> equal(Vector<Byte> v);
 651 
 652     /**
 653      * Tests if this vector is equal to the broadcast of an input scalar.
 654      * <p>
 655      * This is a lane-wise binary test operation which applies the primitive equals
 656      * operation ({@code ==}) each lane.
 657      *
 658      * @param s the input scalar
 659      * @return the result mask of testing if this vector is equal to the
 660      * broadcast of an input scalar
 661      */
 662     public abstract VectorMask<Byte> equal(byte s);
 663 
 664     /**
 665      * {@inheritDoc}
 666      */
 667     @Override
 668     public abstract VectorMask<Byte> notEqual(Vector<Byte> v);
 669 
 670     /**
 671      * Tests if this vector is not equal to the broadcast of an input scalar.
 672      * <p>
 673      * This is a lane-wise binary test operation which applies the primitive not equals
 674      * operation ({@code !=}) to each lane.
 675      *
 676      * @param s the input scalar
 677      * @return the result mask of testing if this vector is not equal to the
 678      * broadcast of an input scalar
 679      */
 680     public abstract VectorMask<Byte> notEqual(byte s);
 681 
 682     /**
 683      * {@inheritDoc}
 684      */
 685     @Override
 686     public abstract VectorMask<Byte> lessThan(Vector<Byte> v);
 687 
 688     /**
 689      * Tests if this vector is less than the broadcast of an input scalar.
 690      * <p>
 691      * This is a lane-wise binary test operation which applies the primitive less than
 692      * operation ({@code <}) to each lane.
 693      *
 694      * @param s the input scalar
 695      * @return the mask result of testing if this vector is less than the
 696      * broadcast of an input scalar
 697      */
 698     public abstract VectorMask<Byte> lessThan(byte s);
 699 
 700     /**
 701      * {@inheritDoc}
 702      */
 703     @Override
 704     public abstract VectorMask<Byte> lessThanEq(Vector<Byte> v);
 705 
 706     /**
 707      * Tests if this vector is less or equal to the broadcast of an input scalar.
 708      * <p>
 709      * This is a lane-wise binary test operation which applies the primitive less than
 710      * or equal to operation ({@code <=}) to each lane.
 711      *
 712      * @param s the input scalar
 713      * @return the mask result of testing if this vector is less than or equal
 714      * to the broadcast of an input scalar
 715      */
 716     public abstract VectorMask<Byte> lessThanEq(byte s);
 717 
 718     /**
 719      * {@inheritDoc}
 720      */
 721     @Override
 722     public abstract VectorMask<Byte> greaterThan(Vector<Byte> v);
 723 
 724     /**
 725      * Tests if this vector is greater than the broadcast of an input scalar.
 726      * <p>
 727      * This is a lane-wise binary test operation which applies the primitive greater than
 728      * operation ({@code >}) to each lane.
 729      *
 730      * @param s the input scalar
 731      * @return the mask result of testing if this vector is greater than the
 732      * broadcast of an input scalar
 733      */
 734     public abstract VectorMask<Byte> greaterThan(byte s);
 735 
 736     /**
 737      * {@inheritDoc}
 738      */
 739     @Override
 740     public abstract VectorMask<Byte> greaterThanEq(Vector<Byte> v);
 741 
 742     /**
 743      * Tests if this vector is greater than or equal to the broadcast of an
 744      * input scalar.
 745      * <p>
 746      * This is a lane-wise binary test operation which applies the primitive greater than
 747      * or equal to operation ({@code >=}) to each lane.
 748      *
 749      * @param s the input scalar
 750      * @return the mask result of testing if this vector is greater than or
 751      * equal to the broadcast of an input scalar
 752      */
 753     public abstract VectorMask<Byte> greaterThanEq(byte s);
 754 
 755     /**
 756      * {@inheritDoc}
 757      */
 758     @Override
 759     public abstract ByteVector blend(Vector<Byte> v, VectorMask<Byte> m);
 760 
 761     /**
 762      * Blends the lane elements of this vector with those of the broadcast of an
 763      * input scalar, selecting lanes controlled by a mask.
 764      * <p>
 765      * For each lane of the mask, at lane index {@code N}, if the mask lane
 766      * is set then the lane element at {@code N} from the input vector is
 767      * selected and placed into the resulting vector at {@code N},
 768      * otherwise the the lane element at {@code N} from this input vector is
 769      * selected and placed into the resulting vector at {@code N}.
 770      *
 771      * @param s the input scalar
 772      * @param m the mask controlling lane selection
 773      * @return the result of blending the lane elements of this vector with
 774      * those of the broadcast of an input scalar
 775      */
 776     public abstract ByteVector blend(byte s, VectorMask<Byte> m);
 777 
 778     /**
 779      * {@inheritDoc}
 780      */
 781     @Override
 782     public abstract ByteVector rearrange(Vector<Byte> v,
 783                                                       VectorShuffle<Byte> s, VectorMask<Byte> m);
 784 
 785     /**
 786      * {@inheritDoc}
 787      */
 788     @Override
 789     public abstract ByteVector rearrange(VectorShuffle<Byte> m);
 790 
 791     /**
 792      * {@inheritDoc}
 793      */
 794     @Override
 795     public abstract ByteVector reshape(VectorSpecies<Byte> s);
 796 
 797     /**
 798      * {@inheritDoc}
 799      */
 800     @Override
 801     public abstract ByteVector rotateLanesLeft(int i);
 802 
 803     /**
 804      * {@inheritDoc}
 805      */
 806     @Override
 807     public abstract ByteVector rotateLanesRight(int i);
 808 
 809     /**
 810      * {@inheritDoc}
 811      */
 812     @Override
 813     public abstract ByteVector shiftLanesLeft(int i);
 814 
 815     /**
 816      * {@inheritDoc}
 817      */
 818     @Override
 819     public abstract ByteVector shiftLanesRight(int i);
 820 
 821 
 822 
 823     /**
 824      * Bitwise ANDs this vector with an input vector.
 825      * <p>
 826      * This is a lane-wise binary operation which applies the primitive bitwise AND
 827      * operation ({@code &}) to each lane.
 828      *
 829      * @param v the input vector
 830      * @return the bitwise AND of this vector with the input vector
 831      */
 832     public abstract ByteVector and(Vector<Byte> v);
 833 
 834     /**
 835      * Bitwise ANDs this vector with the broadcast of an input scalar.
 836      * <p>
 837      * This is a lane-wise binary operation which applies the primitive bitwise AND
 838      * operation ({@code &}) to each lane.
 839      *
 840      * @param s the input scalar
 841      * @return the bitwise AND of this vector with the broadcast of an input
 842      * scalar
 843      */
 844     public abstract ByteVector and(byte s);
 845 
 846     /**
 847      * Bitwise ANDs this vector with an input vector, selecting lane elements
 848      * controlled by a mask.
 849      * <p>
 850      * This is a lane-wise binary operation which applies the primitive bitwise AND
 851      * operation ({@code &}) to each lane.
 852      *
 853      * @param v the input vector
 854      * @param m the mask controlling lane selection
 855      * @return the bitwise AND of this vector with the input vector
 856      */
 857     public abstract ByteVector and(Vector<Byte> v, VectorMask<Byte> m);
 858 
 859     /**
 860      * Bitwise ANDs this vector with the broadcast of an input scalar, selecting
 861      * lane elements controlled by a mask.
 862      * <p>
 863      * This is a lane-wise binary operation which applies the primitive bitwise AND
 864      * operation ({@code &}) to each lane.
 865      *
 866      * @param s the input scalar
 867      * @param m the mask controlling lane selection
 868      * @return the bitwise AND of this vector with the broadcast of an input
 869      * scalar
 870      */
 871     public abstract ByteVector and(byte s, VectorMask<Byte> m);
 872 
 873     /**
 874      * Bitwise ORs this vector with an input vector.
 875      * <p>
 876      * This is a lane-wise binary operation which applies the primitive bitwise OR
 877      * operation ({@code |}) to each lane.
 878      *
 879      * @param v the input vector
 880      * @return the bitwise OR of this vector with the input vector
 881      */
 882     public abstract ByteVector or(Vector<Byte> v);
 883 
 884     /**
 885      * Bitwise ORs this vector with the broadcast of an input scalar.
 886      * <p>
 887      * This is a lane-wise binary operation which applies the primitive bitwise OR
 888      * operation ({@code |}) to each lane.
 889      *
 890      * @param s the input scalar
 891      * @return the bitwise OR of this vector with the broadcast of an input
 892      * scalar
 893      */
 894     public abstract ByteVector or(byte s);
 895 
 896     /**
 897      * Bitwise ORs this vector with an input vector, selecting lane elements
 898      * controlled by a mask.
 899      * <p>
 900      * This is a lane-wise binary operation which applies the primitive bitwise OR
 901      * operation ({@code |}) to each lane.
 902      *
 903      * @param v the input vector
 904      * @param m the mask controlling lane selection
 905      * @return the bitwise OR of this vector with the input vector
 906      */
 907     public abstract ByteVector or(Vector<Byte> v, VectorMask<Byte> m);
 908 
 909     /**
 910      * Bitwise ORs this vector with the broadcast of an input scalar, selecting
 911      * lane elements controlled by a mask.
 912      * <p>
 913      * This is a lane-wise binary operation which applies the primitive bitwise OR
 914      * operation ({@code |}) to each lane.
 915      *
 916      * @param s the input scalar
 917      * @param m the mask controlling lane selection
 918      * @return the bitwise OR of this vector with the broadcast of an input
 919      * scalar
 920      */
 921     public abstract ByteVector or(byte s, VectorMask<Byte> m);
 922 
 923     /**
 924      * Bitwise XORs this vector with an input vector.
 925      * <p>
 926      * This is a lane-wise binary operation which applies the primitive bitwise XOR
 927      * operation ({@code ^}) to each lane.
 928      *
 929      * @param v the input vector
 930      * @return the bitwise XOR of this vector with the input vector
 931      */
 932     public abstract ByteVector xor(Vector<Byte> v);
 933 
 934     /**
 935      * Bitwise XORs this vector with the broadcast of an input scalar.
 936      * <p>
 937      * This is a lane-wise binary operation which applies the primitive bitwise XOR
 938      * operation ({@code ^}) to each lane.
 939      *
 940      * @param s the input scalar
 941      * @return the bitwise XOR of this vector with the broadcast of an input
 942      * scalar
 943      */
 944     public abstract ByteVector xor(byte s);
 945 
 946     /**
 947      * Bitwise XORs this vector with an input vector, selecting lane elements
 948      * controlled by a mask.
 949      * <p>
 950      * This is a lane-wise binary operation which applies the primitive bitwise XOR
 951      * operation ({@code ^}) to each lane.
 952      *
 953      * @param v the input vector
 954      * @param m the mask controlling lane selection
 955      * @return the bitwise XOR of this vector with the input vector
 956      */
 957     public abstract ByteVector xor(Vector<Byte> v, VectorMask<Byte> m);
 958 
 959     /**
 960      * Bitwise XORs this vector with the broadcast of an input scalar, selecting
 961      * lane elements controlled by a mask.
 962      * <p>
 963      * This is a lane-wise binary operation which applies the primitive bitwise XOR
 964      * operation ({@code ^}) to each lane.
 965      *
 966      * @param s the input scalar
 967      * @param m the mask controlling lane selection
 968      * @return the bitwise XOR of this vector with the broadcast of an input
 969      * scalar
 970      */
 971     public abstract ByteVector xor(byte s, VectorMask<Byte> m);
 972 
 973     /**
 974      * Bitwise NOTs this vector.
 975      * <p>
 976      * This is a lane-wise unary operation which applies the primitive bitwise NOT
 977      * operation ({@code ~}) to each lane.
 978      *
 979      * @return the bitwise NOT of this vector
 980      */
 981     public abstract ByteVector not();
 982 
 983     /**
 984      * Bitwise NOTs this vector, selecting lane elements controlled by a mask.
 985      * <p>
 986      * This is a lane-wise unary operation which applies the primitive bitwise NOT
 987      * operation ({@code ~}) to each lane.
 988      *
 989      * @param m the mask controlling lane selection
 990      * @return the bitwise NOT of this vector
 991      */
 992     public abstract ByteVector not(VectorMask<Byte> m);
 993 
 994     /**
 995      * Logically left shifts this vector by the broadcast of an input scalar.
 996      * <p>
 997      * This is a lane-wise binary operation which applies the primitive logical left shift
 998      * operation ({@code <<}) to each lane to left shift the
 999      * element by shift value as specified by the input scalar.
1000      * Only the 3 lowest-order bits of shift value are used. It is as if the shift value
1001      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1002      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1003      *
1004      * @param s the input scalar; the number of the bits to left shift
1005      * @return the result of logically left shifting this vector by the
1006      * broadcast of an input scalar
1007      */
1008     public abstract ByteVector shiftLeft(int s);
1009 
1010     /**
1011      * Logically left shifts this vector by the broadcast of an input scalar,
1012      * selecting lane elements controlled by a mask.
1013      * <p>
1014      * This is a lane-wise binary operation which applies the primitive logical left shift
1015      * operation ({@code <<}) to each lane to left shift the
1016      * element by shift value as specified by the input scalar.
1017      * Only the 3 lowest-order bits of shift value are used. It is as if the shift value
1018      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1019      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1020      *
1021      * @param s the input scalar; the number of the bits to left shift
1022      * @param m the mask controlling lane selection
1023      * @return the result of logically left shifting this vector by the
1024      * broadcast of an input scalar
1025      */
1026     public abstract ByteVector shiftLeft(int s, VectorMask<Byte> m);
1027 
1028     /**
1029      * Logically left shifts this vector by an input vector.
1030      * <p>
1031      * This is a lane-wise binary operation which applies the primitive logical left shift
1032      * operation ({@code <<}) to each lane. For each lane of this vector, the
1033      * shift value is the corresponding lane of input vector.
1034      * Only the 3 lowest-order bits of shift value are used. It is as if the shift value
1035      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1036      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1037      *
1038      * @param v the input vector
1039      * @return the result of logically left shifting this vector by the input
1040      * vector
1041      */
1042     public abstract ByteVector shiftLeft(Vector<Byte> v);
1043 
1044     /**
1045      * Logically left shifts this vector by an input vector, selecting lane
1046      * elements controlled by a mask.
1047      * <p>
1048      * This is a lane-wise binary operation which applies the primitive logical left shift
1049      * operation ({@code <<}) to each lane. For each lane of this vector, the
1050      * shift value is the corresponding lane of input vector.
1051      * Only the 3 lowest-order bits of shift value are used. It is as if the shift value
1052      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1053      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1054      *
1055      * @param v the input vector
1056      * @param m the mask controlling lane selection
1057      * @return the result of logically left shifting this vector by the input
1058      * vector
1059      */
1060     public ByteVector shiftLeft(Vector<Byte> v, VectorMask<Byte> m) {
1061         return blend(shiftLeft(v), m);
1062     }
1063 
1064     // logical, or unsigned, shift right
1065 
1066      /**
1067      * Logically right shifts (or unsigned right shifts) this vector by the
1068      * broadcast of an input scalar.
1069      * <p>
1070      * This is a lane-wise binary operation which applies the primitive logical right shift
1071      * operation ({@code >>>}) to each lane to logically right shift the
1072      * element by shift value as specified by the input scalar.
1073      * Only the 3 lowest-order bits of shift value are used. It is as if the shift value
1074      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1075      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1076      *
1077      * @param s the input scalar; the number of the bits to right shift
1078      * @return the result of logically right shifting this vector by the
1079      * broadcast of an input scalar
1080      */
1081     public abstract ByteVector shiftRight(int s);
1082 
1083      /**
1084      * Logically right shifts (or unsigned right shifts) this vector by the
1085      * broadcast of an input scalar, selecting lane elements controlled by a
1086      * mask.
1087      * <p>
1088      * This is a lane-wise binary operation which applies the primitive logical right shift
1089      * operation ({@code >>}) to each lane to logically right shift the
1090      * element by shift value as specified by the input scalar.
1091      * Only the 3 lowest-order bits of shift value are used. It is as if the shift value
1092      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1093      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1094      *
1095      * @param s the input scalar; the number of the bits to right shift
1096      * @param m the mask controlling lane selection
1097      * @return the result of logically right shifting this vector by the
1098      * broadcast of an input scalar
1099      */
1100     public abstract ByteVector shiftRight(int s, VectorMask<Byte> m);
1101 
1102     /**
1103      * Logically right shifts (or unsigned right shifts) this vector by an
1104      * input vector.
1105      * <p>
1106      * This is a lane-wise binary operation which applies the primitive logical right shift
1107      * operation ({@code >>>}) to each lane. For each lane of this vector, the
1108      * shift value is the corresponding lane of input vector.
1109      * Only the 3 lowest-order bits of shift value are used. It is as if the shift value
1110      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1111      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1112      *
1113      * @param v the input vector
1114      * @return the result of logically right shifting this vector by the
1115      * input vector
1116      */
1117     public abstract ByteVector shiftRight(Vector<Byte> v);
1118 
1119     /**
1120      * Logically right shifts (or unsigned right shifts) this vector by an
1121      * input vector, selecting lane elements controlled by a mask.
1122      * <p>
1123      * This is a lane-wise binary operation which applies the primitive logical right shift
1124      * operation ({@code >>>}) to each lane. For each lane of this vector, the
1125      * shift value is the corresponding lane of input vector.
1126      * Only the 3 lowest-order bits of shift value are used. It is as if the shift value
1127      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1128      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1129      *
1130      * @param v the input vector
1131      * @param m the mask controlling lane selection
1132      * @return the result of logically right shifting this vector by the
1133      * input vector
1134      */
1135     public ByteVector shiftRight(Vector<Byte> v, VectorMask<Byte> m) {
1136         return blend(shiftRight(v), m);
1137     }
1138 
1139     /**
1140      * Arithmetically right shifts (or signed right shifts) this vector by the
1141      * broadcast of an input scalar.
1142      * <p>
1143      * This is a lane-wise binary operation which applies the primitive arithmetic right
1144      * shift operation ({@code >>}) to each lane to arithmetically
1145      * right shift the element by shift value as specified by the input scalar.
1146      * Only the 3 lowest-order bits of shift value are used. It is as if the shift
1147      * value were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1148      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1149      *
1150      * @param s the input scalar; the number of the bits to right shift
1151      * @return the result of arithmetically right shifting this vector by the
1152      * broadcast of an input scalar
1153      */
1154     public abstract ByteVector shiftArithmeticRight(int s);
1155 
1156     /**
1157      * Arithmetically right shifts (or signed right shifts) this vector by the
1158      * broadcast of an input scalar, selecting lane elements controlled by a
1159      * mask.
1160      * <p>
1161      * This is a lane-wise binary operation which applies the primitive arithmetic right
1162      * shift operation ({@code >>}) to each lane to arithmetically
1163      * right shift the element by shift value as specified by the input scalar.
1164      * Only the 3 lowest-order bits of shift value are used. It is as if the shift
1165      * value were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1166      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1167      *
1168      * @param s the input scalar; the number of the bits to right shift
1169      * @param m the mask controlling lane selection
1170      * @return the result of arithmetically right shifting this vector by the
1171      * broadcast of an input scalar
1172      */
1173     public abstract ByteVector shiftArithmeticRight(int s, VectorMask<Byte> m);
1174 
1175     /**
1176      * Arithmetically right shifts (or signed right shifts) this vector by an
1177      * input vector.
1178      * <p>
1179      * This is a lane-wise binary operation which applies the primitive arithmetic right
1180      * shift operation ({@code >>}) to each lane. For each lane of this vector, the
1181      * shift value is the corresponding lane of input vector.
1182      * Only the 3 lowest-order bits of shift value are used. It is as if the shift
1183      * value were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1184      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1185      *
1186      * @param v the input vector
1187      * @return the result of arithmetically right shifting this vector by the
1188      * input vector
1189      */
1190     public abstract ByteVector shiftArithmeticRight(Vector<Byte> v);
1191 
1192     /**
1193      * Arithmetically right shifts (or signed right shifts) this vector by an
1194      * input vector, selecting lane elements controlled by a mask.
1195      * <p>
1196      * This is a lane-wise binary operation which applies the primitive arithmetic right
1197      * shift operation ({@code >>}) to each lane. For each lane of this vector, the
1198      * shift value is the corresponding lane of input vector.
1199      * Only the 3 lowest-order bits of shift value are used. It is as if the shift
1200      * value were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1201      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1202      *
1203      * @param v the input vector
1204      * @param m the mask controlling lane selection
1205      * @return the result of arithmetically right shifting this vector by the
1206      * input vector
1207      */
1208     public ByteVector shiftArithmeticRight(Vector<Byte> v, VectorMask<Byte> m) {
1209         return blend(shiftArithmeticRight(v), m);
1210     }
1211 
1212     /**
1213      * Rotates left this vector by the broadcast of an input scalar.
1214      * <p>
1215      * This is a lane-wise binary operation which produces the result of rotating left the two's
1216      * complement binary representation of each lane of first operand (this vector) by input scalar.
1217      * Rotation by any multiple of 8 is a no-op, so only the 3 lowest-order bits of input value are used.
1218      * It is as if the input value were subjected to a bitwise logical
1219      * AND operator ({@code &}) with the mask value 0x7.
1220      *
1221      * @param s the input scalar; the number of the bits to rotate left
1222      * @return the result of rotating left this vector by the broadcast of an
1223      * input scalar
1224      */
1225     @ForceInline
1226     public final ByteVector rotateLeft(int s) {
1227         return shiftLeft(s).or(shiftRight(-s));
1228     }
1229 
1230     /**
1231      * Rotates left this vector by the broadcast of an input scalar, selecting
1232      * lane elements controlled by a mask.
1233      * <p>
1234      * This is a lane-wise binary operation which produces the result of rotating left the two's
1235      * complement binary representation of each lane of first operand (this vector) by input scalar.
1236      * Rotation by any multiple of 8 is a no-op, so only the 3 lowest-order bits of input value are used.
1237      * It is as if the input value were subjected to a bitwise logical
1238      * AND operator ({@code &}) with the mask value 0x7.
1239      *
1240      * @param s the input scalar; the number of the bits to rotate left
1241      * @param m the mask controlling lane selection
1242      * @return the result of rotating left this vector by the broadcast of an
1243      * input scalar
1244      */
1245     @ForceInline
1246     public final ByteVector rotateLeft(int s, VectorMask<Byte> m) {
1247         return shiftLeft(s, m).or(shiftRight(-s, m), m);
1248     }
1249 
1250     /**
1251      * Rotates right this vector by the broadcast of an input scalar.
1252      * <p>
1253      * This is a lane-wise binary operation which produces the result of rotating right the two's
1254      * complement binary representation of each lane of first operand (this vector) by input scalar.
1255      * Rotation by any multiple of 8 is a no-op, so only the 3 lowest-order bits of input value are used.
1256      * It is as if the input value were subjected to a bitwise logical
1257      * AND operator ({@code &}) with the mask value 0x7.
1258      *
1259      * @param s the input scalar; the number of the bits to rotate right
1260      * @return the result of rotating right this vector by the broadcast of an
1261      * input scalar
1262      */
1263     @ForceInline
1264     public final ByteVector rotateRight(int s) {
1265         return shiftRight(s).or(shiftLeft(-s));
1266     }
1267 
1268     /**
1269      * Rotates right this vector by the broadcast of an input scalar, selecting
1270      * lane elements controlled by a mask.
1271      * <p>
1272      * This is a lane-wise binary operation which produces the result of rotating right the two's
1273      * complement binary representation of each lane of first operand (this vector) by input scalar.
1274      * Rotation by any multiple of 8 is a no-op, so only the 3 lowest-order bits of input value are used.
1275      * It is as if the input value were subjected to a bitwise logical
1276      * AND operator ({@code &}) with the mask value 0x7.
1277      *
1278      * @param s the input scalar; the number of the bits to rotate right
1279      * @param m the mask controlling lane selection
1280      * @return the result of rotating right this vector by the broadcast of an
1281      * input scalar
1282      */
1283     @ForceInline
1284     public final ByteVector rotateRight(int s, VectorMask<Byte> m) {
1285         return shiftRight(s, m).or(shiftLeft(-s, m), m);
1286     }
1287 
1288     /**
1289      * {@inheritDoc}
1290      */
1291     @Override
1292     public abstract void intoByteArray(byte[] a, int ix);
1293 
1294     /**
1295      * {@inheritDoc}
1296      */
1297     @Override
1298     public abstract void intoByteArray(byte[] a, int ix, VectorMask<Byte> m);
1299 
1300     /**
1301      * {@inheritDoc}
1302      */
1303     @Override
1304     public abstract void intoByteBuffer(ByteBuffer bb, int ix);
1305 
1306     /**
1307      * {@inheritDoc}
1308      */
1309     @Override
1310     public abstract void intoByteBuffer(ByteBuffer bb, int ix, VectorMask<Byte> m);
1311 
1312 
1313     // Type specific horizontal reductions
1314     /**
1315      * Adds all lane elements of this vector.
1316      * <p>
1317      * This is an associative cross-lane reduction operation which applies the addition
1318      * operation ({@code +}) to lane elements,
1319      * and the identity value is {@code 0}.
1320      *
1321      * @return the addition of all the lane elements of this vector
1322      */
1323     public abstract byte addLanes();
1324 
1325     /**
1326      * Adds all lane elements of this vector, selecting lane elements
1327      * controlled by a mask.
1328      * <p>
1329      * This is an associative cross-lane reduction operation which applies the addition
1330      * operation ({@code +}) to lane elements,
1331      * and the identity value is {@code 0}.
1332      *
1333      * @param m the mask controlling lane selection
1334      * @return the addition of the selected lane elements of this vector
1335      */
1336     public abstract byte addLanes(VectorMask<Byte> m);
1337 
1338     /**
1339      * Multiplies all lane elements of this vector.
1340      * <p>
1341      * This is an associative cross-lane reduction operation which applies the
1342      * multiplication operation ({@code *}) to lane elements,
1343      * and the identity value is {@code 1}.
1344      *
1345      * @return the multiplication of all the lane elements of this vector
1346      */
1347     public abstract byte mulLanes();
1348 
1349     /**
1350      * Multiplies all lane elements of this vector, selecting lane elements
1351      * controlled by a mask.
1352      * <p>
1353      * This is an associative cross-lane reduction operation which applies the
1354      * multiplication operation ({@code *}) to lane elements,
1355      * and the identity value is {@code 1}.
1356      *
1357      * @param m the mask controlling lane selection
1358      * @return the multiplication of all the lane elements of this vector
1359      */
1360     public abstract byte mulLanes(VectorMask<Byte> m);
1361 
1362     /**
1363      * Returns the minimum lane element of this vector.
1364      * <p>
1365      * This is an associative cross-lane reduction operation which applies the operation
1366      * {@code (a, b) -> Math.min(a, b)} to lane elements,
1367      * and the identity value is
1368      * {@link Byte#MAX_VALUE}.
1369      *
1370      * @return the minimum lane element of this vector
1371      */
1372     public abstract byte minLanes();
1373 
1374     /**
1375      * Returns the minimum lane element of this vector, selecting lane elements
1376      * controlled by a mask.
1377      * <p>
1378      * This is an associative cross-lane reduction operation which applies the operation
1379      * {@code (a, b) -> Math.min(a, b)} to lane elements,
1380      * and the identity value is
1381      * {@link Byte#MAX_VALUE}.
1382      *
1383      * @param m the mask controlling lane selection
1384      * @return the minimum lane element of this vector
1385      */
1386     public abstract byte minLanes(VectorMask<Byte> m);
1387 
1388     /**
1389      * Returns the maximum lane element of this vector.
1390      * <p>
1391      * This is an associative cross-lane reduction operation which applies the operation
1392      * {@code (a, b) -> Math.max(a, b)} to lane elements,
1393      * and the identity value is
1394      * {@link Byte#MIN_VALUE}.
1395      *
1396      * @return the maximum lane element of this vector
1397      */
1398     public abstract byte maxLanes();
1399 
1400     /**
1401      * Returns the maximum lane element of this vector, selecting lane elements
1402      * controlled by a mask.
1403      * <p>
1404      * This is an associative cross-lane reduction operation which applies the operation
1405      * {@code (a, b) -> Math.max(a, b)} to lane elements,
1406      * and the identity value is
1407      * {@link Byte#MIN_VALUE}.
1408      *
1409      * @param m the mask controlling lane selection
1410      * @return the maximum lane element of this vector
1411      */
1412     public abstract byte maxLanes(VectorMask<Byte> m);
1413 
1414     /**
1415      * Logically ORs all lane elements of this vector.
1416      * <p>
1417      * This is an associative cross-lane reduction operation which applies the logical OR
1418      * operation ({@code |}) to lane elements,
1419      * and the identity value is {@code 0}.
1420      *
1421      * @return the logical OR all the lane elements of this vector
1422      */
1423     public abstract byte orLanes();
1424 
1425     /**
1426      * Logically ORs all lane elements of this vector, selecting lane elements
1427      * controlled by a mask.
1428      * <p>
1429      * This is an associative cross-lane reduction operation which applies the logical OR
1430      * operation ({@code |}) to lane elements,
1431      * and the identity value is {@code 0}.
1432      *
1433      * @param m the mask controlling lane selection
1434      * @return the logical OR all the lane elements of this vector
1435      */
1436     public abstract byte orLanes(VectorMask<Byte> m);
1437 
1438     /**
1439      * Logically ANDs all lane elements of this vector.
1440      * <p>
1441      * This is an associative cross-lane reduction operation which applies the logical AND
1442      * operation ({@code |}) to lane elements,
1443      * and the identity value is {@code -1}.
1444      *
1445      * @return the logical AND all the lane elements of this vector
1446      */
1447     public abstract byte andLanes();
1448 
1449     /**
1450      * Logically ANDs all lane elements of this vector, selecting lane elements
1451      * controlled by a mask.
1452      * <p>
1453      * This is an associative cross-lane reduction operation which applies the logical AND
1454      * operation ({@code |}) to lane elements,
1455      * and the identity value is {@code -1}.
1456      *
1457      * @param m the mask controlling lane selection
1458      * @return the logical AND all the lane elements of this vector
1459      */
1460     public abstract byte andLanes(VectorMask<Byte> m);
1461 
1462     /**
1463      * Logically XORs all lane elements of this vector.
1464      * <p>
1465      * This is an associative cross-lane reduction operation which applies the logical XOR
1466      * operation ({@code ^}) to lane elements,
1467      * and the identity value is {@code 0}.
1468      *
1469      * @return the logical XOR all the lane elements of this vector
1470      */
1471     public abstract byte xorLanes();
1472 
1473     /**
1474      * Logically XORs all lane elements of this vector, selecting lane elements
1475      * controlled by a mask.
1476      * <p>
1477      * This is an associative cross-lane reduction operation which applies the logical XOR
1478      * operation ({@code ^}) to lane elements,
1479      * and the identity value is {@code 0}.
1480      *
1481      * @param m the mask controlling lane selection
1482      * @return the logical XOR all the lane elements of this vector
1483      */
1484     public abstract byte xorLanes(VectorMask<Byte> m);
1485 
1486     // Type specific accessors
1487 
1488     /**
1489      * Gets the lane element at lane index {@code i}
1490      *
1491      * @param i the lane index
1492      * @return the lane element at lane index {@code i}
1493      * @throws IllegalArgumentException if the index is is out of range
1494      * ({@code < 0 || >= length()})
1495      */
1496     public abstract byte lane(int i);
1497 
1498     /**
1499      * Replaces the lane element of this vector at lane index {@code i} with
1500      * value {@code e}.
1501      * <p>
1502      * This is a cross-lane operation and behaves as if it returns the result
1503      * of blending this vector with an input vector that is the result of
1504      * broadcasting {@code e} and a mask that has only one lane set at lane
1505      * index {@code i}.
1506      *
1507      * @param i the lane index of the lane element to be replaced
1508      * @param e the value to be placed
1509      * @return the result of replacing the lane element of this vector at lane
1510      * index {@code i} with value {@code e}.
1511      * @throws IllegalArgumentException if the index is is out of range
1512      * ({@code < 0 || >= length()})
1513      */
1514     public abstract ByteVector with(int i, byte e);
1515 
1516     // Type specific extractors
1517 
1518     /**
1519      * Returns an array containing the lane elements of this vector.
1520      * <p>
1521      * This method behaves as if it {@link #intoArray(byte[], int)} stores}
1522      * this vector into an allocated array and returns the array as follows:
1523      * <pre>{@code
1524      *   byte[] a = new byte[this.length()];
1525      *   this.intoArray(a, 0);
1526      *   return a;
1527      * }</pre>
1528      *
1529      * @return an array containing the the lane elements of this vector
1530      */
1531     @ForceInline
1532     public final byte[] toArray() {
1533         byte[] a = new byte[species().length()];
1534         intoArray(a, 0);
1535         return a;
1536     }
1537 
1538     /**
1539      * Stores this vector into an array starting at offset.
1540      * <p>
1541      * For each vector lane, where {@code N} is the vector lane index,
1542      * the lane element at index {@code N} is stored into the array at index
1543      * {@code offset + N}.
1544      *
1545      * @param a the array
1546      * @param offset the offset into the array
1547      * @throws IndexOutOfBoundsException if {@code offset < 0}, or
1548      * {@code offset > a.length - this.length()}
1549      */
1550     public abstract void intoArray(byte[] a, int offset);
1551 
1552     /**
1553      * Stores this vector into an array starting at offset and using a mask.
1554      * <p>
1555      * For each vector lane, where {@code N} is the vector lane index,
1556      * if the mask lane at index {@code N} is set then the lane element at
1557      * index {@code N} is stored into the array index {@code offset + N}.
1558      *
1559      * @param a the array
1560      * @param offset the offset into the array
1561      * @param m the mask
1562      * @throws IndexOutOfBoundsException if {@code offset < 0}, or
1563      * for any vector lane index {@code N} where the mask at lane {@code N}
1564      * is set {@code offset >= a.length - N}
1565      */
1566     public abstract void intoArray(byte[] a, int offset, VectorMask<Byte> m);
1567 
1568     /**
1569      * Stores this vector into an array using indexes obtained from an index
1570      * map.
1571      * <p>
1572      * For each vector lane, where {@code N} is the vector lane index, the
1573      * lane element at index {@code N} is stored into the array at index
1574      * {@code a_offset + indexMap[i_offset + N]}.
1575      *
1576      * @param a the array
1577      * @param a_offset the offset into the array, may be negative if relative
1578      * indexes in the index map compensate to produce a value within the
1579      * array bounds
1580      * @param indexMap the index map
1581      * @param i_offset the offset into the index map
1582      * @throws IndexOutOfBoundsException if {@code i_offset < 0}, or
1583      * {@code i_offset > indexMap.length - this.length()},
1584      * or for any vector lane index {@code N} the result of
1585      * {@code a_offset + indexMap[i_offset + N]} is {@code < 0} or {@code >= a.length}
1586      */
1587     public void intoArray(byte[] a, int a_offset, int[] indexMap, int i_offset) {
1588         forEach((n, e) -> a[a_offset + indexMap[i_offset + n]] = e);
1589     }
1590 
1591     /**
1592      * Stores this vector into an array using indexes obtained from an index
1593      * map and using a mask.
1594      * <p>
1595      * For each vector lane, where {@code N} is the vector lane index,
1596      * if the mask lane at index {@code N} is set then the lane element at
1597      * index {@code N} is stored into the array at index
1598      * {@code a_offset + indexMap[i_offset + N]}.
1599      *
1600      * @param a the array
1601      * @param a_offset the offset into the array, may be negative if relative
1602      * indexes in the index map compensate to produce a value within the
1603      * array bounds
1604      * @param m the mask
1605      * @param indexMap the index map
1606      * @param i_offset the offset into the index map
1607      * @throws IndexOutOfBoundsException if {@code j < 0}, or
1608      * {@code i_offset > indexMap.length - this.length()},
1609      * or for any vector lane index {@code N} where the mask at lane
1610      * {@code N} is set the result of {@code a_offset + indexMap[i_offset + N]} is
1611      * {@code < 0} or {@code >= a.length}
1612      */
1613     public void intoArray(byte[] a, int a_offset, VectorMask<Byte> m, int[] indexMap, int i_offset) {
1614         forEach(m, (n, e) -> a[a_offset + indexMap[i_offset + n]] = e);
1615     }
1616     // Species
1617 
1618     /**
1619      * {@inheritDoc}
1620      */
1621     @Override
1622     public abstract VectorSpecies<Byte> species();
1623 
1624     /**
1625      * Class representing {@link ByteVector}'s of the same {@link VectorShape VectorShape}.
1626      */
1627     static final class ByteSpecies extends AbstractSpecies<Byte> {
1628         final Function<byte[], ByteVector> vectorFactory;
1629 
1630         private ByteSpecies(VectorShape shape,
1631                           Class<?> vectorType,
1632                           Class<?> maskType,
1633                           Function<byte[], ByteVector> vectorFactory,
1634                           Function<boolean[], VectorMask<Byte>> maskFactory,
1635                           Function<IntUnaryOperator, VectorShuffle<Byte>> shuffleFromArrayFactory,
1636                           fShuffleFromArray<Byte> shuffleFromOpFactory) {
1637             super(shape, byte.class, Byte.SIZE, vectorType, maskType, maskFactory,
1638                   shuffleFromArrayFactory, shuffleFromOpFactory);
1639             this.vectorFactory = vectorFactory;
1640         }
1641 
1642         interface FOp {
1643             byte apply(int i);
1644         }
1645 
1646         ByteVector op(FOp f) {
1647             byte[] res = new byte[length()];
1648             for (int i = 0; i < length(); i++) {
1649                 res[i] = f.apply(i);
1650             }
1651             return vectorFactory.apply(res);
1652         }
1653 
1654         ByteVector op(VectorMask<Byte> o, FOp f) {
1655             byte[] res = new byte[length()];
1656             boolean[] mbits = ((AbstractMask<Byte>)o).getBits();
1657             for (int i = 0; i < length(); i++) {
1658                 if (mbits[i]) {
1659                     res[i] = f.apply(i);
1660                 }
1661             }
1662             return vectorFactory.apply(res);
1663         }
1664     }
1665 
1666     /**
1667      * Finds the preferred species for an element type of {@code byte}.
1668      * <p>
1669      * A preferred species is a species chosen by the platform that has a
1670      * shape of maximal bit size.  A preferred species for different element
1671      * types will have the same shape, and therefore vectors, masks, and
1672      * shuffles created from such species will be shape compatible.
1673      *
1674      * @return the preferred species for an element type of {@code byte}
1675      */
1676     private static ByteSpecies preferredSpecies() {
1677         return (ByteSpecies) VectorSpecies.ofPreferred(byte.class);
1678     }
1679 
1680     /**
1681      * Finds a species for an element type of {@code byte} and shape.
1682      *
1683      * @param s the shape
1684      * @return a species for an element type of {@code byte} and shape
1685      * @throws IllegalArgumentException if no such species exists for the shape
1686      */
1687     static ByteSpecies species(VectorShape s) {
1688         Objects.requireNonNull(s);
1689         switch (s) {
1690             case S_64_BIT: return (ByteSpecies) SPECIES_64;
1691             case S_128_BIT: return (ByteSpecies) SPECIES_128;
1692             case S_256_BIT: return (ByteSpecies) SPECIES_256;
1693             case S_512_BIT: return (ByteSpecies) SPECIES_512;
1694             case S_Max_BIT: return (ByteSpecies) SPECIES_MAX;
1695             default: throw new IllegalArgumentException("Bad shape: " + s);
1696         }
1697     }
1698 
1699     /** Species representing {@link ByteVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
1700     public static final VectorSpecies<Byte> SPECIES_64 = new ByteSpecies(VectorShape.S_64_BIT, Byte64Vector.class, Byte64Vector.Byte64Mask.class,
1701                                                                      Byte64Vector::new, Byte64Vector.Byte64Mask::new,
1702                                                                      Byte64Vector.Byte64Shuffle::new, Byte64Vector.Byte64Shuffle::new);
1703 
1704     /** Species representing {@link ByteVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
1705     public static final VectorSpecies<Byte> SPECIES_128 = new ByteSpecies(VectorShape.S_128_BIT, Byte128Vector.class, Byte128Vector.Byte128Mask.class,
1706                                                                       Byte128Vector::new, Byte128Vector.Byte128Mask::new,
1707                                                                       Byte128Vector.Byte128Shuffle::new, Byte128Vector.Byte128Shuffle::new);
1708 
1709     /** Species representing {@link ByteVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
1710     public static final VectorSpecies<Byte> SPECIES_256 = new ByteSpecies(VectorShape.S_256_BIT, Byte256Vector.class, Byte256Vector.Byte256Mask.class,
1711                                                                       Byte256Vector::new, Byte256Vector.Byte256Mask::new,
1712                                                                       Byte256Vector.Byte256Shuffle::new, Byte256Vector.Byte256Shuffle::new);
1713 
1714     /** Species representing {@link ByteVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
1715     public static final VectorSpecies<Byte> SPECIES_512 = new ByteSpecies(VectorShape.S_512_BIT, Byte512Vector.class, Byte512Vector.Byte512Mask.class,
1716                                                                       Byte512Vector::new, Byte512Vector.Byte512Mask::new,
1717                                                                       Byte512Vector.Byte512Shuffle::new, Byte512Vector.Byte512Shuffle::new);
1718 
1719     /** Species representing {@link ByteVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
1720     public static final VectorSpecies<Byte> SPECIES_MAX = new ByteSpecies(VectorShape.S_Max_BIT, ByteMaxVector.class, ByteMaxVector.ByteMaxMask.class,
1721                                                                       ByteMaxVector::new, ByteMaxVector.ByteMaxMask::new,
1722                                                                       ByteMaxVector.ByteMaxShuffle::new, ByteMaxVector.ByteMaxShuffle::new);
1723 
1724     /**
1725      * Preferred species for {@link ByteVector}s.
1726      * A preferred species is a species of maximal bit size for the platform.
1727      */
1728     public static final VectorSpecies<Byte> SPECIES_PREFERRED = (VectorSpecies<Byte>) preferredSpecies();
1729 }