1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ByteOrder;
  29 import java.util.Objects;
  30 import java.util.function.IntUnaryOperator;
  31 import java.util.function.Function;
  32 import java.util.concurrent.ThreadLocalRandom;
  33 
  34 import jdk.internal.misc.Unsafe;
  35 import jdk.internal.vm.annotation.ForceInline;
  36 import static jdk.incubator.vector.VectorIntrinsics.*;
  37 
  38 
  39 /**
  40  * A specialized {@link Vector} representing an ordered immutable sequence of
  41  * {@code byte} values.
  42  */
  43 @SuppressWarnings("cast")
  44 public abstract class ByteVector extends Vector<Byte> {
  45 
  46     ByteVector() {}
  47 
  48     private static final int ARRAY_SHIFT = 31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_BYTE_INDEX_SCALE);
  49 
  50     // Unary operator
  51 
  52     interface FUnOp {
  53         byte apply(int i, byte a);
  54     }
  55 
  56     abstract ByteVector uOp(FUnOp f);
  57 
  58     abstract ByteVector uOp(VectorMask<Byte> m, FUnOp f);
  59 
  60     // Binary operator
  61 
  62     interface FBinOp {
  63         byte apply(int i, byte a, byte b);
  64     }
  65 
  66     abstract ByteVector bOp(Vector<Byte> v, FBinOp f);
  67 
  68     abstract ByteVector bOp(Vector<Byte> v, VectorMask<Byte> m, FBinOp f);
  69 
  70     // Trinary operator
  71 
  72     interface FTriOp {
  73         byte apply(int i, byte a, byte b, byte c);
  74     }
  75 
  76     abstract ByteVector tOp(Vector<Byte> v1, Vector<Byte> v2, FTriOp f);
  77 
  78     abstract ByteVector tOp(Vector<Byte> v1, Vector<Byte> v2, VectorMask<Byte> m, FTriOp f);
  79 
  80     // Reduction operator
  81 
  82     abstract byte rOp(byte v, FBinOp f);
  83 
  84     // Binary test
  85 
  86     interface FBinTest {
  87         boolean apply(int i, byte a, byte b);
  88     }
  89 
  90     abstract VectorMask<Byte> bTest(Vector<Byte> v, FBinTest f);
  91 
  92     // Foreach
  93 
  94     interface FUnCon {
  95         void apply(int i, byte a);
  96     }
  97 
  98     abstract void forEach(FUnCon f);
  99 
 100     abstract void forEach(VectorMask<Byte> m, FUnCon f);
 101 
 102     // Static factories
 103 
 104     /**
 105      * Returns a vector where all lane elements are set to the default
 106      * primitive value.
 107      *
 108      * @param species species of desired vector
 109      * @return a zero vector of given species
 110      */
 111     @ForceInline
 112     @SuppressWarnings("unchecked")
 113     public static ByteVector zero(VectorSpecies<Byte> species) {
 114         return VectorIntrinsics.broadcastCoerced((Class<ByteVector>) species.vectorType(), byte.class, species.length(),
 115                                                  0, species,
 116                                                  ((bits, s) -> ((ByteSpecies)s).op(i -> (byte)bits)));
 117     }
 118 
 119     /**
 120      * Loads a vector from a byte array starting at an offset.
 121      * <p>
 122      * Bytes are composed into primitive lane elements according to the
 123      * native byte order of the underlying platform
 124      * <p>
 125      * This method behaves as if it returns the result of calling the
 126      * byte buffer, offset, and mask accepting
 127      * {@link #fromByteBuffer(VectorSpecies, ByteBuffer, int, VectorMask) method} as follows:
 128      * <pre>{@code
 129      * return fromByteBuffer(species, ByteBuffer.wrap(a), offset, VectorMask.allTrue());
 130      * }</pre>
 131      *
 132      * @param species species of desired vector
 133      * @param a the byte array
 134      * @param offset the offset into the array
 135      * @return a vector loaded from a byte array
 136      * @throws IndexOutOfBoundsException if {@code i < 0} or
 137      * {@code offset > a.length - (species.length() * species.elementSize() / Byte.SIZE)}
 138      */
 139     @ForceInline
 140     @SuppressWarnings("unchecked")
 141     public static ByteVector fromByteArray(VectorSpecies<Byte> species, byte[] a, int offset) {
 142         Objects.requireNonNull(a);
 143         offset = VectorIntrinsics.checkIndex(offset, a.length, species.bitSize() / Byte.SIZE);
 144         return VectorIntrinsics.load((Class<ByteVector>) species.vectorType(), byte.class, species.length(),
 145                                      a, ((long) offset) + Unsafe.ARRAY_BYTE_BASE_OFFSET,
 146                                      a, offset, species,
 147                                      (c, idx, s) -> {
 148                                          ByteBuffer bbc = ByteBuffer.wrap(c, idx, a.length - idx).order(ByteOrder.nativeOrder());
 149                                          ByteBuffer tb = bbc;
 150                                          return ((ByteSpecies)s).op(i -> tb.get());
 151                                      });
 152     }
 153 
 154     /**
 155      * Loads a vector from a byte array starting at an offset and using a
 156      * mask.
 157      * <p>
 158      * Bytes are composed into primitive lane elements according to the
 159      * native byte order of the underlying platform.
 160      * <p>
 161      * This method behaves as if it returns the result of calling the
 162      * byte buffer, offset, and mask accepting
 163      * {@link #fromByteBuffer(VectorSpecies, ByteBuffer, int, VectorMask) method} as follows:
 164      * <pre>{@code
 165      * return fromByteBuffer(species, ByteBuffer.wrap(a), offset, m);
 166      * }</pre>
 167      *
 168      * @param species species of desired vector
 169      * @param a the byte array
 170      * @param offset the offset into the array
 171      * @param m the mask
 172      * @return a vector loaded from a byte array
 173      * @throws IndexOutOfBoundsException if {@code offset < 0} or
 174      * for any vector lane index {@code N} where the mask at lane {@code N}
 175      * is set
 176      * {@code offset >= a.length - (N * species.elementSize() / Byte.SIZE)}
 177      */
 178     @ForceInline
 179     public static ByteVector fromByteArray(VectorSpecies<Byte> species, byte[] a, int offset, VectorMask<Byte> m) {
 180         return zero(species).blend(fromByteArray(species, a, offset), m);
 181     }
 182 
 183     /**
 184      * Loads a vector from an array starting at offset.
 185      * <p>
 186      * For each vector lane, where {@code N} is the vector lane index, the
 187      * array element at index {@code offset + N} is placed into the
 188      * resulting vector at lane index {@code N}.
 189      *
 190      * @param species species of desired vector
 191      * @param a the array
 192      * @param offset the offset into the array
 193      * @return the vector loaded from an array
 194      * @throws IndexOutOfBoundsException if {@code offset < 0}, or
 195      * {@code offset > a.length - species.length()}
 196      */
 197     @ForceInline
 198     @SuppressWarnings("unchecked")
 199     public static ByteVector fromArray(VectorSpecies<Byte> species, byte[] a, int offset){
 200         Objects.requireNonNull(a);
 201         offset = VectorIntrinsics.checkIndex(offset, a.length, species.length());
 202         return VectorIntrinsics.load((Class<ByteVector>) species.vectorType(), byte.class, species.length(),
 203                                      a, (((long) offset) << ARRAY_SHIFT) + Unsafe.ARRAY_BYTE_BASE_OFFSET,
 204                                      a, offset, species,
 205                                      (c, idx, s) -> ((ByteSpecies)s).op(n -> c[idx + n]));
 206     }
 207 
 208 
 209     /**
 210      * Loads a vector from an array starting at offset and using a mask.
 211      * <p>
 212      * For each vector lane, where {@code N} is the vector lane index,
 213      * if the mask lane at index {@code N} is set then the array element at
 214      * index {@code offset + N} is placed into the resulting vector at lane index
 215      * {@code N}, otherwise the default element value is placed into the
 216      * resulting vector at lane index {@code N}.
 217      *
 218      * @param species species of desired vector
 219      * @param a the array
 220      * @param offset the offset into the array
 221      * @param m the mask
 222      * @return the vector loaded from an array
 223      * @throws IndexOutOfBoundsException if {@code offset < 0}, or
 224      * for any vector lane index {@code N} where the mask at lane {@code N}
 225      * is set {@code offset > a.length - N}
 226      */
 227     @ForceInline
 228     public static ByteVector fromArray(VectorSpecies<Byte> species, byte[] a, int offset, VectorMask<Byte> m) {
 229         return zero(species).blend(fromArray(species, a, offset), m);
 230     }
 231 
 232     /**
 233      * Loads a vector from an array using indexes obtained from an index
 234      * map.
 235      * <p>
 236      * For each vector lane, where {@code N} is the vector lane index, the
 237      * array element at index {@code a_offset + indexMap[i_offset + N]} is placed into the
 238      * resulting vector at lane index {@code N}.
 239      *
 240      * @param species species of desired vector
 241      * @param a the array
 242      * @param a_offset the offset into the array, may be negative if relative
 243      * indexes in the index map compensate to produce a value within the
 244      * array bounds
 245      * @param indexMap the index map
 246      * @param i_offset the offset into the index map
 247      * @return the vector loaded from an array
 248      * @throws IndexOutOfBoundsException if {@code i_offset < 0}, or
 249      * {@code i_offset > indexMap.length - species.length()},
 250      * or for any vector lane index {@code N} the result of
 251      * {@code a_offset + indexMap[i_offset + N]} is {@code < 0} or {@code >= a.length}
 252      */
 253     public static ByteVector fromArray(VectorSpecies<Byte> species, byte[] a, int a_offset, int[] indexMap, int i_offset) {
 254         return ((ByteSpecies)species).op(n -> a[a_offset + indexMap[i_offset + n]]);
 255     }
 256     /**
 257      * Loads a vector from an array using indexes obtained from an index
 258      * map and using a mask.
 259      * <p>
 260      * For each vector lane, where {@code N} is the vector lane index,
 261      * if the mask lane at index {@code N} is set then the array element at
 262      * index {@code a_offset + indexMap[i_offset + N]} is placed into the resulting vector
 263      * at lane index {@code N}.
 264      *
 265      * @param species species of desired vector
 266      * @param a the array
 267      * @param a_offset the offset into the array, may be negative if relative
 268      * indexes in the index map compensate to produce a value within the
 269      * array bounds
 270      * @param m the mask
 271      * @param indexMap the index map
 272      * @param i_offset the offset into the index map
 273      * @return the vector loaded from an array
 274      * @throws IndexOutOfBoundsException if {@code i_offset < 0}, or
 275      * {@code i_offset > indexMap.length - species.length()},
 276      * or for any vector lane index {@code N} where the mask at lane
 277      * {@code N} is set the result of {@code a_offset + indexMap[i_offset + N]} is
 278      * {@code < 0} or {@code >= a.length}
 279      */
 280     public static ByteVector fromArray(VectorSpecies<Byte> species, byte[] a, int a_offset, VectorMask<Byte> m, int[] indexMap, int i_offset) {
 281         return ((ByteSpecies)species).op(m, n -> a[a_offset + indexMap[i_offset + n]]);
 282     }
 283 
 284     /**
 285      * Loads a vector from a {@link ByteBuffer byte buffer} starting at an
 286      * offset into the byte buffer.
 287      * <p>
 288      * Bytes are composed into primitive lane elements according to the
 289      * native byte order of the underlying platform.
 290      * <p>
 291      * This method behaves as if it returns the result of calling the
 292      * byte buffer, offset, and mask accepting
 293      * {@link #fromByteBuffer(VectorSpecies, ByteBuffer, int, VectorMask)} method} as follows:
 294      * <pre>{@code
 295      *   return fromByteBuffer(b, offset, VectorMask.allTrue())
 296      * }</pre>
 297      *
 298      * @param species species of desired vector
 299      * @param bb the byte buffer
 300      * @param offset the offset into the byte buffer
 301      * @return a vector loaded from a byte buffer
 302      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 303      * or {@code > b.limit()},
 304      * or if there are fewer than
 305      * {@code species.length() * species.elementSize() / Byte.SIZE} bytes
 306      * remaining in the byte buffer from the given offset
 307      */
 308     @ForceInline
 309     @SuppressWarnings("unchecked")
 310     public static ByteVector fromByteBuffer(VectorSpecies<Byte> species, ByteBuffer bb, int offset) {
 311         if (bb.order() != ByteOrder.nativeOrder()) {
 312             throw new IllegalArgumentException();
 313         }
 314         offset = VectorIntrinsics.checkIndex(offset, bb.limit(), species.bitSize() / Byte.SIZE);
 315         return VectorIntrinsics.load((Class<ByteVector>) species.vectorType(), byte.class, species.length(),
 316                                      U.getReference(bb, BYTE_BUFFER_HB), U.getLong(bb, BUFFER_ADDRESS) + offset,
 317                                      bb, offset, species,
 318                                      (c, idx, s) -> {
 319                                          ByteBuffer bbc = c.duplicate().position(idx).order(ByteOrder.nativeOrder());
 320                                          ByteBuffer tb = bbc;
 321                                          return ((ByteSpecies)s).op(i -> tb.get());
 322                                      });
 323     }
 324 
 325     /**
 326      * Loads a vector from a {@link ByteBuffer byte buffer} starting at an
 327      * offset into the byte buffer and using a mask.
 328      * <p>
 329      * This method behaves as if the byte buffer is viewed as a primitive
 330      * {@link java.nio.Buffer buffer} for the primitive element type,
 331      * according to the native byte order of the underlying platform, and
 332      * the returned vector is loaded with a mask from a primitive array
 333      * obtained from the primitive buffer.
 334      * The following pseudocode expresses the behaviour, where
 335      * {@code EBuffer} is the primitive buffer type, {@code e} is the
 336      * primitive element type, and {@code ESpecies} is the primitive
 337      * species for {@code e}:
 338      * <pre>{@code
 339      * EBuffer eb = b.duplicate().
 340      *     order(ByteOrder.nativeOrder()).position(offset).
 341      *     asEBuffer();
 342      * e[] es = new e[species.length()];
 343      * for (int n = 0; n < t.length; n++) {
 344      *     if (m.isSet(n))
 345      *         es[n] = eb.get(n);
 346      * }
 347      * EVector r = EVector.fromArray(es, 0, m);
 348      * }</pre>
 349      *
 350      * @param species species of desired vector
 351      * @param bb the byte buffer
 352      * @param offset the offset into the byte buffer
 353      * @param m the mask
 354      * @return a vector loaded from a byte buffer
 355      * @throws IndexOutOfBoundsException if the offset is {@code < 0},
 356      * or {@code > b.limit()},
 357      * for any vector lane index {@code N} where the mask at lane {@code N}
 358      * is set
 359      * {@code offset >= b.limit() - (N * species.elementSize() / Byte.SIZE)}
 360      */
 361     @ForceInline
 362     public static ByteVector fromByteBuffer(VectorSpecies<Byte> species, ByteBuffer bb, int offset, VectorMask<Byte> m) {
 363         return zero(species).blend(fromByteBuffer(species, bb, offset), m);
 364     }
 365 
 366     /**
 367      * Returns a vector where all lane elements are set to the primitive
 368      * value {@code e}.
 369      *
 370      * @param species species of the desired vector
 371      * @param e the value
 372      * @return a vector of vector where all lane elements are set to
 373      * the primitive value {@code e}
 374      */
 375     @ForceInline
 376     @SuppressWarnings("unchecked")
 377     public static ByteVector broadcast(VectorSpecies<Byte> species, byte e) {
 378         return VectorIntrinsics.broadcastCoerced(
 379             (Class<ByteVector>) species.vectorType(), byte.class, species.length(),
 380             e, species,
 381             ((bits, sp) -> ((ByteSpecies)sp).op(i -> (byte)bits)));
 382     }
 383 
 384     /**
 385      * Returns a vector where each lane element is set to given
 386      * primitive values.
 387      * <p>
 388      * For each vector lane, where {@code N} is the vector lane index, the
 389      * the primitive value at index {@code N} is placed into the resulting
 390      * vector at lane index {@code N}.
 391      *
 392      * @param species species of the desired vector
 393      * @param es the given primitive values
 394      * @return a vector where each lane element is set to given primitive
 395      * values
 396      * @throws IndexOutOfBoundsException if {@code es.length < species.length()}
 397      */
 398     @ForceInline
 399     @SuppressWarnings("unchecked")
 400     public static ByteVector scalars(VectorSpecies<Byte> species, byte... es) {
 401         Objects.requireNonNull(es);
 402         int ix = VectorIntrinsics.checkIndex(0, es.length, species.length());
 403         return VectorIntrinsics.load((Class<ByteVector>) species.vectorType(), byte.class, species.length(),
 404                                      es, Unsafe.ARRAY_BYTE_BASE_OFFSET,
 405                                      es, ix, species,
 406                                      (c, idx, sp) -> ((ByteSpecies)sp).op(n -> c[idx + n]));
 407     }
 408 
 409     /**
 410      * Returns a vector where the first lane element is set to the primtive
 411      * value {@code e}, all other lane elements are set to the default
 412      * value.
 413      *
 414      * @param species species of the desired vector
 415      * @param e the value
 416      * @return a vector where the first lane element is set to the primitive
 417      * value {@code e}
 418      */
 419     @ForceInline
 420     public static final ByteVector single(VectorSpecies<Byte> species, byte e) {
 421         return zero(species).with(0, e);
 422     }
 423 
 424     /**
 425      * Returns a vector where each lane element is set to a randomly
 426      * generated primitive value.
 427      *
 428      * The semantics are equivalent to calling
 429      * (byte){@link ThreadLocalRandom#nextInt()}
 430      *
 431      * @param species species of the desired vector
 432      * @return a vector where each lane elements is set to a randomly
 433      * generated primitive value
 434      */
 435     public static ByteVector random(VectorSpecies<Byte> species) {
 436         ThreadLocalRandom r = ThreadLocalRandom.current();
 437         return ((ByteSpecies)species).op(i -> (byte) r.nextInt());
 438     }
 439 
 440     // Ops
 441 
 442     /**
 443      * {@inheritDoc}
 444      */
 445     @Override
 446     public abstract ByteVector add(Vector<Byte> v);
 447 
 448     /**
 449      * Adds this vector to the broadcast of an input scalar.
 450      * <p>
 451      * This is a lane-wise binary operation which applies the primitive addition operation
 452      * ({@code +}) to each lane.
 453      *
 454      * @param s the input scalar
 455      * @return the result of adding this vector to the broadcast of an input
 456      * scalar
 457      */
 458     public abstract ByteVector add(byte s);
 459 
 460     /**
 461      * {@inheritDoc}
 462      */
 463     @Override
 464     public abstract ByteVector add(Vector<Byte> v, VectorMask<Byte> m);
 465 
 466     /**
 467      * Adds this vector to broadcast of an input scalar,
 468      * selecting lane elements controlled by a mask.
 469      * <p>
 470      * This is a lane-wise binary operation which applies the primitive addition operation
 471      * ({@code +}) to each lane.
 472      *
 473      * @param s the input scalar
 474      * @param m the mask controlling lane selection
 475      * @return the result of adding this vector to the broadcast of an input
 476      * scalar
 477      */
 478     public abstract ByteVector add(byte s, VectorMask<Byte> m);
 479 
 480     /**
 481      * {@inheritDoc}
 482      */
 483     @Override
 484     public abstract ByteVector sub(Vector<Byte> v);
 485 
 486     /**
 487      * Subtracts the broadcast of an input scalar from this vector.
 488      * <p>
 489      * This is a lane-wise binary operation which applies the primitive subtraction
 490      * operation ({@code -}) to each lane.
 491      *
 492      * @param s the input scalar
 493      * @return the result of subtracting the broadcast of an input
 494      * scalar from this vector
 495      */
 496     public abstract ByteVector sub(byte s);
 497 
 498     /**
 499      * {@inheritDoc}
 500      */
 501     @Override
 502     public abstract ByteVector sub(Vector<Byte> v, VectorMask<Byte> m);
 503 
 504     /**
 505      * Subtracts the broadcast of an input scalar from this vector, selecting
 506      * lane elements controlled by a mask.
 507      * <p>
 508      * This is a lane-wise binary operation which applies the primitive subtraction
 509      * operation ({@code -}) to each lane.
 510      *
 511      * @param s the input scalar
 512      * @param m the mask controlling lane selection
 513      * @return the result of subtracting the broadcast of an input
 514      * scalar from this vector
 515      */
 516     public abstract ByteVector sub(byte s, VectorMask<Byte> m);
 517 
 518     /**
 519      * {@inheritDoc}
 520      */
 521     @Override
 522     public abstract ByteVector mul(Vector<Byte> v);
 523 
 524     /**
 525      * Multiplies this vector with the broadcast of an input scalar.
 526      * <p>
 527      * This is a lane-wise binary operation which applies the primitive multiplication
 528      * operation ({@code *}) to each lane.
 529      *
 530      * @param s the input scalar
 531      * @return the result of multiplying this vector with the broadcast of an
 532      * input scalar
 533      */
 534     public abstract ByteVector mul(byte s);
 535 
 536     /**
 537      * {@inheritDoc}
 538      */
 539     @Override
 540     public abstract ByteVector mul(Vector<Byte> v, VectorMask<Byte> m);
 541 
 542     /**
 543      * Multiplies this vector with the broadcast of an input scalar, selecting
 544      * lane elements controlled by a mask.
 545      * <p>
 546      * This is a lane-wise binary operation which applies the primitive multiplication
 547      * operation ({@code *}) to each lane.
 548      *
 549      * @param s the input scalar
 550      * @param m the mask controlling lane selection
 551      * @return the result of multiplying this vector with the broadcast of an
 552      * input scalar
 553      */
 554     public abstract ByteVector mul(byte s, VectorMask<Byte> m);
 555 
 556     /**
 557      * {@inheritDoc}
 558      */
 559     @Override
 560     public abstract ByteVector neg();
 561 
 562     /**
 563      * {@inheritDoc}
 564      */
 565     @Override
 566     public abstract ByteVector neg(VectorMask<Byte> m);
 567 
 568     /**
 569      * {@inheritDoc}
 570      */
 571     @Override
 572     public abstract ByteVector abs();
 573 
 574     /**
 575      * {@inheritDoc}
 576      */
 577     @Override
 578     public abstract ByteVector abs(VectorMask<Byte> m);
 579 
 580     /**
 581      * {@inheritDoc}
 582      */
 583     @Override
 584     public abstract ByteVector min(Vector<Byte> v);
 585 
 586     /**
 587      * {@inheritDoc}
 588      */
 589     @Override
 590     public abstract ByteVector min(Vector<Byte> v, VectorMask<Byte> m);
 591 
 592     /**
 593      * Returns the minimum of this vector and the broadcast of an input scalar.
 594      * <p>
 595      * This is a lane-wise binary operation which applies the operation
 596      * {@code (a, b) -> Math.min(a, b)} to each lane.
 597      *
 598      * @param s the input scalar
 599      * @return the minimum of this vector and the broadcast of an input scalar
 600      */
 601     public abstract ByteVector min(byte s);
 602 
 603     /**
 604      * {@inheritDoc}
 605      */
 606     @Override
 607     public abstract ByteVector max(Vector<Byte> v);
 608 
 609     /**
 610      * {@inheritDoc}
 611      */
 612     @Override
 613     public abstract ByteVector max(Vector<Byte> v, VectorMask<Byte> m);
 614 
 615     /**
 616      * Returns the maximum of this vector and the broadcast of an input scalar.
 617      * <p>
 618      * This is a lane-wise binary operation which applies the operation
 619      * {@code (a, b) -> Math.max(a, b)} to each lane.
 620      *
 621      * @param s the input scalar
 622      * @return the maximum of this vector and the broadcast of an input scalar
 623      */
 624     public abstract ByteVector max(byte s);
 625 
 626     /**
 627      * {@inheritDoc}
 628      */
 629     @Override
 630     public abstract VectorMask<Byte> equal(Vector<Byte> v);
 631 
 632     /**
 633      * Tests if this vector is equal to the broadcast of an input scalar.
 634      * <p>
 635      * This is a lane-wise binary test operation which applies the primitive equals
 636      * operation ({@code ==}) each lane.
 637      *
 638      * @param s the input scalar
 639      * @return the result mask of testing if this vector is equal to the
 640      * broadcast of an input scalar
 641      */
 642     public abstract VectorMask<Byte> equal(byte s);
 643 
 644     /**
 645      * {@inheritDoc}
 646      */
 647     @Override
 648     public abstract VectorMask<Byte> notEqual(Vector<Byte> v);
 649 
 650     /**
 651      * Tests if this vector is not equal to the broadcast of an input scalar.
 652      * <p>
 653      * This is a lane-wise binary test operation which applies the primitive not equals
 654      * operation ({@code !=}) to each lane.
 655      *
 656      * @param s the input scalar
 657      * @return the result mask of testing if this vector is not equal to the
 658      * broadcast of an input scalar
 659      */
 660     public abstract VectorMask<Byte> notEqual(byte s);
 661 
 662     /**
 663      * {@inheritDoc}
 664      */
 665     @Override
 666     public abstract VectorMask<Byte> lessThan(Vector<Byte> v);
 667 
 668     /**
 669      * Tests if this vector is less than the broadcast of an input scalar.
 670      * <p>
 671      * This is a lane-wise binary test operation which applies the primitive less than
 672      * operation ({@code <}) to each lane.
 673      *
 674      * @param s the input scalar
 675      * @return the mask result of testing if this vector is less than the
 676      * broadcast of an input scalar
 677      */
 678     public abstract VectorMask<Byte> lessThan(byte s);
 679 
 680     /**
 681      * {@inheritDoc}
 682      */
 683     @Override
 684     public abstract VectorMask<Byte> lessThanEq(Vector<Byte> v);
 685 
 686     /**
 687      * Tests if this vector is less or equal to the broadcast of an input scalar.
 688      * <p>
 689      * This is a lane-wise binary test operation which applies the primitive less than
 690      * or equal to operation ({@code <=}) to each lane.
 691      *
 692      * @param s the input scalar
 693      * @return the mask result of testing if this vector is less than or equal
 694      * to the broadcast of an input scalar
 695      */
 696     public abstract VectorMask<Byte> lessThanEq(byte s);
 697 
 698     /**
 699      * {@inheritDoc}
 700      */
 701     @Override
 702     public abstract VectorMask<Byte> greaterThan(Vector<Byte> v);
 703 
 704     /**
 705      * Tests if this vector is greater than the broadcast of an input scalar.
 706      * <p>
 707      * This is a lane-wise binary test operation which applies the primitive greater than
 708      * operation ({@code >}) to each lane.
 709      *
 710      * @param s the input scalar
 711      * @return the mask result of testing if this vector is greater than the
 712      * broadcast of an input scalar
 713      */
 714     public abstract VectorMask<Byte> greaterThan(byte s);
 715 
 716     /**
 717      * {@inheritDoc}
 718      */
 719     @Override
 720     public abstract VectorMask<Byte> greaterThanEq(Vector<Byte> v);
 721 
 722     /**
 723      * Tests if this vector is greater than or equal to the broadcast of an
 724      * input scalar.
 725      * <p>
 726      * This is a lane-wise binary test operation which applies the primitive greater than
 727      * or equal to operation ({@code >=}) to each lane.
 728      *
 729      * @param s the input scalar
 730      * @return the mask result of testing if this vector is greater than or
 731      * equal to the broadcast of an input scalar
 732      */
 733     public abstract VectorMask<Byte> greaterThanEq(byte s);
 734 
 735     /**
 736      * {@inheritDoc}
 737      */
 738     @Override
 739     public abstract ByteVector blend(Vector<Byte> v, VectorMask<Byte> m);
 740 
 741     /**
 742      * Blends the lane elements of this vector with those of the broadcast of an
 743      * input scalar, selecting lanes controlled by a mask.
 744      * <p>
 745      * For each lane of the mask, at lane index {@code N}, if the mask lane
 746      * is set then the lane element at {@code N} from the input vector is
 747      * selected and placed into the resulting vector at {@code N},
 748      * otherwise the the lane element at {@code N} from this input vector is
 749      * selected and placed into the resulting vector at {@code N}.
 750      *
 751      * @param s the input scalar
 752      * @param m the mask controlling lane selection
 753      * @return the result of blending the lane elements of this vector with
 754      * those of the broadcast of an input scalar
 755      */
 756     public abstract ByteVector blend(byte s, VectorMask<Byte> m);
 757 
 758     /**
 759      * {@inheritDoc}
 760      */
 761     @Override
 762     public abstract ByteVector rearrange(Vector<Byte> v,
 763                                                       VectorShuffle<Byte> s, VectorMask<Byte> m);
 764 
 765     /**
 766      * {@inheritDoc}
 767      */
 768     @Override
 769     public abstract ByteVector rearrange(VectorShuffle<Byte> m);
 770 
 771     /**
 772      * {@inheritDoc}
 773      */
 774     @Override
 775     public abstract ByteVector reshape(VectorSpecies<Byte> s);
 776 
 777     /**
 778      * {@inheritDoc}
 779      */
 780     @Override
 781     public abstract ByteVector rotateLanesLeft(int i);
 782 
 783     /**
 784      * {@inheritDoc}
 785      */
 786     @Override
 787     public abstract ByteVector rotateLanesRight(int i);
 788 
 789     /**
 790      * {@inheritDoc}
 791      */
 792     @Override
 793     public abstract ByteVector shiftLanesLeft(int i);
 794 
 795     /**
 796      * {@inheritDoc}
 797      */
 798     @Override
 799     public abstract ByteVector shiftLanesRight(int i);
 800 
 801 
 802 
 803     /**
 804      * Bitwise ANDs this vector with an input vector.
 805      * <p>
 806      * This is a lane-wise binary operation which applies the primitive bitwise AND
 807      * operation ({@code &}) to each lane.
 808      *
 809      * @param v the input vector
 810      * @return the bitwise AND of this vector with the input vector
 811      */
 812     public abstract ByteVector and(Vector<Byte> v);
 813 
 814     /**
 815      * Bitwise ANDs this vector with the broadcast of an input scalar.
 816      * <p>
 817      * This is a lane-wise binary operation which applies the primitive bitwise AND
 818      * operation ({@code &}) to each lane.
 819      *
 820      * @param s the input scalar
 821      * @return the bitwise AND of this vector with the broadcast of an input
 822      * scalar
 823      */
 824     public abstract ByteVector and(byte s);
 825 
 826     /**
 827      * Bitwise ANDs this vector with an input vector, selecting lane elements
 828      * controlled by a mask.
 829      * <p>
 830      * This is a lane-wise binary operation which applies the primitive bitwise AND
 831      * operation ({@code &}) to each lane.
 832      *
 833      * @param v the input vector
 834      * @param m the mask controlling lane selection
 835      * @return the bitwise AND of this vector with the input vector
 836      */
 837     public abstract ByteVector and(Vector<Byte> v, VectorMask<Byte> m);
 838 
 839     /**
 840      * Bitwise ANDs this vector with the broadcast of an input scalar, selecting
 841      * lane elements controlled by a mask.
 842      * <p>
 843      * This is a lane-wise binary operation which applies the primitive bitwise AND
 844      * operation ({@code &}) to each lane.
 845      *
 846      * @param s the input scalar
 847      * @param m the mask controlling lane selection
 848      * @return the bitwise AND of this vector with the broadcast of an input
 849      * scalar
 850      */
 851     public abstract ByteVector and(byte s, VectorMask<Byte> m);
 852 
 853     /**
 854      * Bitwise ORs this vector with an input vector.
 855      * <p>
 856      * This is a lane-wise binary operation which applies the primitive bitwise OR
 857      * operation ({@code |}) to each lane.
 858      *
 859      * @param v the input vector
 860      * @return the bitwise OR of this vector with the input vector
 861      */
 862     public abstract ByteVector or(Vector<Byte> v);
 863 
 864     /**
 865      * Bitwise ORs this vector with the broadcast of an input scalar.
 866      * <p>
 867      * This is a lane-wise binary operation which applies the primitive bitwise OR
 868      * operation ({@code |}) to each lane.
 869      *
 870      * @param s the input scalar
 871      * @return the bitwise OR of this vector with the broadcast of an input
 872      * scalar
 873      */
 874     public abstract ByteVector or(byte s);
 875 
 876     /**
 877      * Bitwise ORs this vector with an input vector, selecting lane elements
 878      * controlled by a mask.
 879      * <p>
 880      * This is a lane-wise binary operation which applies the primitive bitwise OR
 881      * operation ({@code |}) to each lane.
 882      *
 883      * @param v the input vector
 884      * @param m the mask controlling lane selection
 885      * @return the bitwise OR of this vector with the input vector
 886      */
 887     public abstract ByteVector or(Vector<Byte> v, VectorMask<Byte> m);
 888 
 889     /**
 890      * Bitwise ORs this vector with the broadcast of an input scalar, selecting
 891      * lane elements controlled by a mask.
 892      * <p>
 893      * This is a lane-wise binary operation which applies the primitive bitwise OR
 894      * operation ({@code |}) to each lane.
 895      *
 896      * @param s the input scalar
 897      * @param m the mask controlling lane selection
 898      * @return the bitwise OR of this vector with the broadcast of an input
 899      * scalar
 900      */
 901     public abstract ByteVector or(byte s, VectorMask<Byte> m);
 902 
 903     /**
 904      * Bitwise XORs this vector with an input vector.
 905      * <p>
 906      * This is a lane-wise binary operation which applies the primitive bitwise XOR
 907      * operation ({@code ^}) to each lane.
 908      *
 909      * @param v the input vector
 910      * @return the bitwise XOR of this vector with the input vector
 911      */
 912     public abstract ByteVector xor(Vector<Byte> v);
 913 
 914     /**
 915      * Bitwise XORs this vector with the broadcast of an input scalar.
 916      * <p>
 917      * This is a lane-wise binary operation which applies the primitive bitwise XOR
 918      * operation ({@code ^}) to each lane.
 919      *
 920      * @param s the input scalar
 921      * @return the bitwise XOR of this vector with the broadcast of an input
 922      * scalar
 923      */
 924     public abstract ByteVector xor(byte s);
 925 
 926     /**
 927      * Bitwise XORs this vector with an input vector, selecting lane elements
 928      * controlled by a mask.
 929      * <p>
 930      * This is a lane-wise binary operation which applies the primitive bitwise XOR
 931      * operation ({@code ^}) to each lane.
 932      *
 933      * @param v the input vector
 934      * @param m the mask controlling lane selection
 935      * @return the bitwise XOR of this vector with the input vector
 936      */
 937     public abstract ByteVector xor(Vector<Byte> v, VectorMask<Byte> m);
 938 
 939     /**
 940      * Bitwise XORs this vector with the broadcast of an input scalar, selecting
 941      * lane elements controlled by a mask.
 942      * <p>
 943      * This is a lane-wise binary operation which applies the primitive bitwise XOR
 944      * operation ({@code ^}) to each lane.
 945      *
 946      * @param s the input scalar
 947      * @param m the mask controlling lane selection
 948      * @return the bitwise XOR of this vector with the broadcast of an input
 949      * scalar
 950      */
 951     public abstract ByteVector xor(byte s, VectorMask<Byte> m);
 952 
 953     /**
 954      * Bitwise NOTs this vector.
 955      * <p>
 956      * This is a lane-wise unary operation which applies the primitive bitwise NOT
 957      * operation ({@code ~}) to each lane.
 958      *
 959      * @return the bitwise NOT of this vector
 960      */
 961     public abstract ByteVector not();
 962 
 963     /**
 964      * Bitwise NOTs this vector, selecting lane elements controlled by a mask.
 965      * <p>
 966      * This is a lane-wise unary operation which applies the primitive bitwise NOT
 967      * operation ({@code ~}) to each lane.
 968      *
 969      * @param m the mask controlling lane selection
 970      * @return the bitwise NOT of this vector
 971      */
 972     public abstract ByteVector not(VectorMask<Byte> m);
 973 
 974     /**
 975      * Logically left shifts this vector by the broadcast of an input scalar.
 976      * <p>
 977      * This is a lane-wise binary operation which applies the primitive logical left shift
 978      * operation ({@code <<}) to each lane to left shift the
 979      * element by shift value as specified by the input scalar.
 980      * Only the 3 lowest-order bits of shift value are used. It is as if the shift value
 981      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
 982      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
 983      *
 984      * @param s the input scalar; the number of the bits to left shift
 985      * @return the result of logically left shifting left this vector by the
 986      * broadcast of an input scalar
 987      */
 988     public abstract ByteVector shiftLeft(int s);
 989 
 990     /**
 991      * Logically left shifts this vector by the broadcast of an input scalar,
 992      * selecting lane elements controlled by a mask.
 993      * <p>
 994      * This is a lane-wise binary operation which applies the primitive logical left shift
 995      * operation ({@code <<}) to each lane to left shift the
 996      * element by shift value as specified by the input scalar.
 997      * Only the 3 lowest-order bits of shift value are used. It is as if the shift value
 998      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
 999      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1000      *
1001      * @param s the input scalar; the number of the bits to left shift
1002      * @param m the mask controlling lane selection
1003      * @return the result of logically left shifting left this vector by the
1004      * broadcast of an input scalar
1005      */
1006     public abstract ByteVector shiftLeft(int s, VectorMask<Byte> m);
1007 
1008     /**
1009      * Logically left shifts this vector by an input vector.
1010      * <p>
1011      * This is a lane-wise binary operation which applies the primitive logical left shift
1012      * operation ({@code <<}) to each lane. For each lane of this vector, the
1013      * shift value is the corresponding lane of input vector.
1014      * Only the 3 lowest-order bits of shift value are used. It is as if the shift value
1015      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1016      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1017      *
1018      * @param v the input vector
1019      * @return the result of logically left shifting this vector by the input
1020      * vector
1021      */
1022     public abstract ByteVector shiftLeft(Vector<Byte> v);
1023 
1024     /**
1025      * Logically left shifts this vector by an input vector, selecting lane
1026      * elements controlled by a mask.
1027      * <p>
1028      * This is a lane-wise binary operation which applies the primitive logical left shift
1029      * operation ({@code <<}) to each lane. For each lane of this vector, the
1030      * shift value is the corresponding lane of input vector.
1031      * Only the 3 lowest-order bits of shift value are used. It is as if the shift value
1032      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1033      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1034      *
1035      * @param v the input vector
1036      * @param m the mask controlling lane selection
1037      * @return the result of logically left shifting this vector by the input
1038      * vector
1039      */
1040     public ByteVector shiftLeft(Vector<Byte> v, VectorMask<Byte> m) {
1041         return blend(shiftLeft(v), m);
1042     }
1043 
1044     // logical, or unsigned, shift right
1045 
1046      /**
1047      * Logically right shifts (or unsigned right shifts) this vector by the
1048      * broadcast of an input scalar.
1049      * <p>
1050      * This is a lane-wise binary operation which applies the primitive logical right shift
1051      * operation ({@code >>>}) to each lane to logically right shift the
1052      * element by shift value as specified by the input scalar.
1053      * Only the 3 lowest-order bits of shift value are used. It is as if the shift value
1054      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1055      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1056      *
1057      * @param s the input scalar; the number of the bits to right shift
1058      * @return the result of logically right shifting this vector by the
1059      * broadcast of an input scalar
1060      */
1061     public abstract ByteVector shiftRight(int s);
1062 
1063      /**
1064      * Logically right shifts (or unsigned right shifts) this vector by the
1065      * broadcast of an input scalar, selecting lane elements controlled by a
1066      * mask.
1067      * <p>
1068      * This is a lane-wise binary operation which applies the primitive logical right shift
1069      * operation ({@code >>}) to each lane to logically right shift the
1070      * element by shift value as specified by the input scalar.
1071      * Only the 3 lowest-order bits of shift value are used. It is as if the shift value
1072      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1073      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1074      *
1075      * @param s the input scalar; the number of the bits to right shift
1076      * @param m the mask controlling lane selection
1077      * @return the result of logically right shifting this vector by the
1078      * broadcast of an input scalar
1079      */
1080     public abstract ByteVector shiftRight(int s, VectorMask<Byte> m);
1081 
1082     /**
1083      * Logically right shifts (or unsigned right shifts) this vector by an
1084      * input vector.
1085      * <p>
1086      * This is a lane-wise binary operation which applies the primitive logical right shift
1087      * operation ({@code >>>}) to each lane. For each lane of this vector, the
1088      * shift value is the corresponding lane of input vector.
1089      * Only the 3 lowest-order bits of shift value are used. It is as if the shift value
1090      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1091      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1092      *
1093      * @param v the input vector
1094      * @return the result of logically right shifting this vector by the
1095      * input vector
1096      */
1097     public abstract ByteVector shiftRight(Vector<Byte> v);
1098 
1099     /**
1100      * Logically right shifts (or unsigned right shifts) this vector by an
1101      * input vector, selecting lane elements controlled by a mask.
1102      * <p>
1103      * This is a lane-wise binary operation which applies the primitive logical right shift
1104      * operation ({@code >>>}) to each lane. For each lane of this vector, the
1105      * shift value is the corresponding lane of input vector.
1106      * Only the 3 lowest-order bits of shift value are used. It is as if the shift value
1107      * were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1108      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1109      *
1110      * @param v the input vector
1111      * @param m the mask controlling lane selection
1112      * @return the result of logically right shifting this vector by the
1113      * input vector
1114      */
1115     public ByteVector shiftRight(Vector<Byte> v, VectorMask<Byte> m) {
1116         return blend(shiftRight(v), m);
1117     }
1118 
1119     /**
1120      * Arithmetically right shifts (or signed right shifts) this vector by the
1121      * broadcast of an input scalar.
1122      * <p>
1123      * This is a lane-wise binary operation which applies the primitive arithmetic right
1124      * shift operation ({@code >>}) to each lane to arithmetically
1125      * right shift the element by shift value as specified by the input scalar.
1126      * Only the 3 lowest-order bits of shift value are used. It is as if the shift
1127      * value were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1128      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1129      *
1130      * @param s the input scalar; the number of the bits to right shift
1131      * @return the result of arithmetically right shifting this vector by the
1132      * broadcast of an input scalar
1133      */
1134     public abstract ByteVector shiftArithmeticRight(int s);
1135 
1136     /**
1137      * Arithmetically right shifts (or signed right shifts) this vector by the
1138      * broadcast of an input scalar, selecting lane elements controlled by a
1139      * mask.
1140      * <p>
1141      * This is a lane-wise binary operation which applies the primitive arithmetic right
1142      * shift operation ({@code >>}) to each lane to arithmetically
1143      * right shift the element by shift value as specified by the input scalar.
1144      * Only the 3 lowest-order bits of shift value are used. It is as if the shift
1145      * value were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1146      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1147      *
1148      * @param s the input scalar; the number of the bits to right shift
1149      * @param m the mask controlling lane selection
1150      * @return the result of arithmetically right shifting this vector by the
1151      * broadcast of an input scalar
1152      */
1153     public abstract ByteVector shiftArithmeticRight(int s, VectorMask<Byte> m);
1154 
1155     /**
1156      * Arithmetically right shifts (or signed right shifts) this vector by an
1157      * input vector.
1158      * <p>
1159      * This is a lane-wise binary operation which applies the primitive arithmetic right
1160      * shift operation ({@code >>}) to each lane. For each lane of this vector, the
1161      * shift value is the corresponding lane of input vector.
1162      * Only the 3 lowest-order bits of shift value are used. It is as if the shift
1163      * value were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1164      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1165      *
1166      * @param v the input vector
1167      * @return the result of arithmetically right shifting this vector by the
1168      * input vector
1169      */
1170     public abstract ByteVector shiftArithmeticRight(Vector<Byte> v);
1171 
1172     /**
1173      * Arithmetically right shifts (or signed right shifts) this vector by an
1174      * input vector, selecting lane elements controlled by a mask.
1175      * <p>
1176      * This is a lane-wise binary operation which applies the primitive arithmetic right
1177      * shift operation ({@code >>}) to each lane. For each lane of this vector, the
1178      * shift value is the corresponding lane of input vector.
1179      * Only the 3 lowest-order bits of shift value are used. It is as if the shift
1180      * value were subjected to a bitwise logical AND operator ({@code &}) with the mask value 0x7.
1181      * The shift distance actually used is therefore always in the range 0 to 7, inclusive.
1182      *
1183      * @param v the input vector
1184      * @param m the mask controlling lane selection
1185      * @return the result of arithmetically right shifting this vector by the
1186      * input vector
1187      */
1188     public ByteVector shiftArithmeticRight(Vector<Byte> v, VectorMask<Byte> m) {
1189         return blend(shiftArithmeticRight(v), m);
1190     }
1191 
1192     /**
1193      * Rotates left this vector by the broadcast of an input scalar.
1194      * <p>
1195      * This is a lane-wise binary operation which produces the result of rotating left the two's
1196      * complement binary representation of each lane of first operand (this vector) by input scalar.
1197      * Rotation by any multiple of 8 is a no-op, so only the 3 lowest-order bits of input value are used.
1198      * It is as if the input value were subjected to a bitwise logical
1199      * AND operator ({@code &}) with the mask value 0x7.
1200      *
1201      * @param s the input scalar; the number of the bits to rotate left
1202      * @return the result of rotating left this vector by the broadcast of an
1203      * input scalar
1204      */
1205     @ForceInline
1206     public final ByteVector rotateLeft(int s) {
1207         return shiftLeft(s).or(shiftRight(-s));
1208     }
1209 
1210     /**
1211      * Rotates left this vector by the broadcast of an input scalar, selecting
1212      * lane elements controlled by a mask.
1213      * <p>
1214      * This is a lane-wise binary operation which produces the result of rotating left the two's
1215      * complement binary representation of each lane of first operand (this vector) by input scalar.
1216      * Rotation by any multiple of 8 is a no-op, so only the 3 lowest-order bits of input value are used.
1217      * It is as if the input value were subjected to a bitwise logical
1218      * AND operator ({@code &}) with the mask value 0x7.
1219      *
1220      * @param s the input scalar; the number of the bits to rotate left
1221      * @param m the mask controlling lane selection
1222      * @return the result of rotating left this vector by the broadcast of an
1223      * input scalar
1224      */
1225     @ForceInline
1226     public final ByteVector rotateLeft(int s, VectorMask<Byte> m) {
1227         return shiftLeft(s, m).or(shiftRight(-s, m), m);
1228     }
1229 
1230     /**
1231      * Rotates right this vector by the broadcast of an input scalar.
1232      * <p>
1233      * This is a lane-wise binary operation which produces the result of rotating right the two's
1234      * complement binary representation of each lane of first operand (this vector) by input scalar.
1235      * Rotation by any multiple of 8 is a no-op, so only the 3 lowest-order bits of input value are used.
1236      * It is as if the input value were subjected to a bitwise logical
1237      * AND operator ({@code &}) with the mask value 0x7.
1238      *
1239      * @param s the input scalar; the number of the bits to rotate right
1240      * @return the result of rotating right this vector by the broadcast of an
1241      * input scalar
1242      */
1243     @ForceInline
1244     public final ByteVector rotateRight(int s) {
1245         return shiftRight(s).or(shiftLeft(-s));
1246     }
1247 
1248     /**
1249      * Rotates right this vector by the broadcast of an input scalar, selecting
1250      * lane elements controlled by a mask.
1251      * <p>
1252      * This is a lane-wise binary operation which produces the result of rotating right the two's
1253      * complement binary representation of each lane of first operand (this vector) by input scalar.
1254      * Rotation by any multiple of 8 is a no-op, so only the 3 lowest-order bits of input value are used.
1255      * It is as if the input value were subjected to a bitwise logical
1256      * AND operator ({@code &}) with the mask value 0x7.
1257      *
1258      * @param s the input scalar; the number of the bits to rotate right
1259      * @param m the mask controlling lane selection
1260      * @return the result of rotating right this vector by the broadcast of an
1261      * input scalar
1262      */
1263     @ForceInline
1264     public final ByteVector rotateRight(int s, VectorMask<Byte> m) {
1265         return shiftRight(s, m).or(shiftLeft(-s, m), m);
1266     }
1267 
1268     /**
1269      * {@inheritDoc}
1270      */
1271     @Override
1272     public abstract void intoByteArray(byte[] a, int ix);
1273 
1274     /**
1275      * {@inheritDoc}
1276      */
1277     @Override
1278     public abstract void intoByteArray(byte[] a, int ix, VectorMask<Byte> m);
1279 
1280     /**
1281      * {@inheritDoc}
1282      */
1283     @Override
1284     public abstract void intoByteBuffer(ByteBuffer bb, int ix);
1285 
1286     /**
1287      * {@inheritDoc}
1288      */
1289     @Override
1290     public abstract void intoByteBuffer(ByteBuffer bb, int ix, VectorMask<Byte> m);
1291 
1292 
1293     // Type specific horizontal reductions
1294     /**
1295      * Adds all lane elements of this vector.
1296      * <p>
1297      * This is an associative cross-lane reduction operation which applies the addition
1298      * operation ({@code +}) to lane elements,
1299      * and the identity value is {@code 0}.
1300      *
1301      * @return the addition of all the lane elements of this vector
1302      */
1303     public abstract byte addAll();
1304 
1305     /**
1306      * Adds all lane elements of this vector, selecting lane elements
1307      * controlled by a mask.
1308      * <p>
1309      * This is an associative cross-lane reduction operation which applies the addition
1310      * operation ({@code +}) to lane elements,
1311      * and the identity value is {@code 0}.
1312      *
1313      * @param m the mask controlling lane selection
1314      * @return the addition of the selected lane elements of this vector
1315      */
1316     public abstract byte addAll(VectorMask<Byte> m);
1317 
1318     /**
1319      * Multiplies all lane elements of this vector.
1320      * <p>
1321      * This is an associative cross-lane reduction operation which applies the
1322      * multiplication operation ({@code *}) to lane elements,
1323      * and the identity value is {@code 1}.
1324      *
1325      * @return the multiplication of all the lane elements of this vector
1326      */
1327     public abstract byte mulAll();
1328 
1329     /**
1330      * Multiplies all lane elements of this vector, selecting lane elements
1331      * controlled by a mask.
1332      * <p>
1333      * This is an associative cross-lane reduction operation which applies the
1334      * multiplication operation ({@code *}) to lane elements,
1335      * and the identity value is {@code 1}.
1336      *
1337      * @param m the mask controlling lane selection
1338      * @return the multiplication of all the lane elements of this vector
1339      */
1340     public abstract byte mulAll(VectorMask<Byte> m);
1341 
1342     /**
1343      * Returns the minimum lane element of this vector.
1344      * <p>
1345      * This is an associative cross-lane reduction operation which applies the operation
1346      * {@code (a, b) -> Math.min(a, b)} to lane elements,
1347      * and the identity value is
1348      * {@link Byte#MAX_VALUE}.
1349      *
1350      * @return the minimum lane element of this vector
1351      */
1352     public abstract byte minAll();
1353 
1354     /**
1355      * Returns the minimum lane element of this vector, selecting lane elements
1356      * controlled by a mask.
1357      * <p>
1358      * This is an associative cross-lane reduction operation which applies the operation
1359      * {@code (a, b) -> Math.min(a, b)} to lane elements,
1360      * and the identity value is
1361      * {@link Byte#MAX_VALUE}.
1362      *
1363      * @param m the mask controlling lane selection
1364      * @return the minimum lane element of this vector
1365      */
1366     public abstract byte minAll(VectorMask<Byte> m);
1367 
1368     /**
1369      * Returns the maximum lane element of this vector.
1370      * <p>
1371      * This is an associative cross-lane reduction operation which applies the operation
1372      * {@code (a, b) -> Math.max(a, b)} to lane elements,
1373      * and the identity value is
1374      * {@link Byte#MIN_VALUE}.
1375      *
1376      * @return the maximum lane element of this vector
1377      */
1378     public abstract byte maxAll();
1379 
1380     /**
1381      * Returns the maximum lane element of this vector, selecting lane elements
1382      * controlled by a mask.
1383      * <p>
1384      * This is an associative cross-lane reduction operation which applies the operation
1385      * {@code (a, b) -> Math.max(a, b)} to lane elements,
1386      * and the identity value is
1387      * {@link Byte#MIN_VALUE}.
1388      *
1389      * @param m the mask controlling lane selection
1390      * @return the maximum lane element of this vector
1391      */
1392     public abstract byte maxAll(VectorMask<Byte> m);
1393 
1394     /**
1395      * Logically ORs all lane elements of this vector.
1396      * <p>
1397      * This is an associative cross-lane reduction operation which applies the logical OR
1398      * operation ({@code |}) to lane elements,
1399      * and the identity value is {@code 0}.
1400      *
1401      * @return the logical OR all the lane elements of this vector
1402      */
1403     public abstract byte orAll();
1404 
1405     /**
1406      * Logically ORs all lane elements of this vector, selecting lane elements
1407      * controlled by a mask.
1408      * <p>
1409      * This is an associative cross-lane reduction operation which applies the logical OR
1410      * operation ({@code |}) to lane elements,
1411      * and the identity value is {@code 0}.
1412      *
1413      * @param m the mask controlling lane selection
1414      * @return the logical OR all the lane elements of this vector
1415      */
1416     public abstract byte orAll(VectorMask<Byte> m);
1417 
1418     /**
1419      * Logically ANDs all lane elements of this vector.
1420      * <p>
1421      * This is an associative cross-lane reduction operation which applies the logical AND
1422      * operation ({@code |}) to lane elements,
1423      * and the identity value is {@code -1}.
1424      *
1425      * @return the logical AND all the lane elements of this vector
1426      */
1427     public abstract byte andAll();
1428 
1429     /**
1430      * Logically ANDs all lane elements of this vector, selecting lane elements
1431      * controlled by a mask.
1432      * <p>
1433      * This is an associative cross-lane reduction operation which applies the logical AND
1434      * operation ({@code |}) to lane elements,
1435      * and the identity value is {@code -1}.
1436      *
1437      * @param m the mask controlling lane selection
1438      * @return the logical AND all the lane elements of this vector
1439      */
1440     public abstract byte andAll(VectorMask<Byte> m);
1441 
1442     /**
1443      * Logically XORs all lane elements of this vector.
1444      * <p>
1445      * This is an associative cross-lane reduction operation which applies the logical XOR
1446      * operation ({@code ^}) to lane elements,
1447      * and the identity value is {@code 0}.
1448      *
1449      * @return the logical XOR all the lane elements of this vector
1450      */
1451     public abstract byte xorAll();
1452 
1453     /**
1454      * Logically XORs all lane elements of this vector, selecting lane elements
1455      * controlled by a mask.
1456      * <p>
1457      * This is an associative cross-lane reduction operation which applies the logical XOR
1458      * operation ({@code ^}) to lane elements,
1459      * and the identity value is {@code 0}.
1460      *
1461      * @param m the mask controlling lane selection
1462      * @return the logical XOR all the lane elements of this vector
1463      */
1464     public abstract byte xorAll(VectorMask<Byte> m);
1465 
1466     // Type specific accessors
1467 
1468     /**
1469      * Gets the lane element at lane index {@code i}
1470      *
1471      * @param i the lane index
1472      * @return the lane element at lane index {@code i}
1473      * @throws IllegalArgumentException if the index is is out of range
1474      * ({@code < 0 || >= length()})
1475      */
1476     public abstract byte lane(int i);
1477 
1478     /**
1479      * Replaces the lane element of this vector at lane index {@code i} with
1480      * value {@code e}.
1481      * <p>
1482      * This is a cross-lane operation and behaves as if it returns the result
1483      * of blending this vector with an input vector that is the result of
1484      * broadcasting {@code e} and a mask that has only one lane set at lane
1485      * index {@code i}.
1486      *
1487      * @param i the lane index of the lane element to be replaced
1488      * @param e the value to be placed
1489      * @return the result of replacing the lane element of this vector at lane
1490      * index {@code i} with value {@code e}.
1491      * @throws IllegalArgumentException if the index is is out of range
1492      * ({@code < 0 || >= length()})
1493      */
1494     public abstract ByteVector with(int i, byte e);
1495 
1496     // Type specific extractors
1497 
1498     /**
1499      * Returns an array containing the lane elements of this vector.
1500      * <p>
1501      * This method behaves as if it {@link #intoArray(byte[], int)} stores}
1502      * this vector into an allocated array and returns the array as follows:
1503      * <pre>{@code
1504      *   byte[] a = new byte[this.length()];
1505      *   this.intoArray(a, 0);
1506      *   return a;
1507      * }</pre>
1508      *
1509      * @return an array containing the the lane elements of this vector
1510      */
1511     @ForceInline
1512     public final byte[] toArray() {
1513         byte[] a = new byte[species().length()];
1514         intoArray(a, 0);
1515         return a;
1516     }
1517 
1518     /**
1519      * Stores this vector into an array starting at offset.
1520      * <p>
1521      * For each vector lane, where {@code N} is the vector lane index,
1522      * the lane element at index {@code N} is stored into the array at index
1523      * {@code offset + N}.
1524      *
1525      * @param a the array
1526      * @param offset the offset into the array
1527      * @throws IndexOutOfBoundsException if {@code offset < 0}, or
1528      * {@code offset > a.length - this.length()}
1529      */
1530     public abstract void intoArray(byte[] a, int offset);
1531 
1532     /**
1533      * Stores this vector into an array starting at offset and using a mask.
1534      * <p>
1535      * For each vector lane, where {@code N} is the vector lane index,
1536      * if the mask lane at index {@code N} is set then the lane element at
1537      * index {@code N} is stored into the array index {@code offset + N}.
1538      *
1539      * @param a the array
1540      * @param offset the offset into the array
1541      * @param m the mask
1542      * @throws IndexOutOfBoundsException if {@code offset < 0}, or
1543      * for any vector lane index {@code N} where the mask at lane {@code N}
1544      * is set {@code offset >= a.length - N}
1545      */
1546     public abstract void intoArray(byte[] a, int offset, VectorMask<Byte> m);
1547 
1548     /**
1549      * Stores this vector into an array using indexes obtained from an index
1550      * map.
1551      * <p>
1552      * For each vector lane, where {@code N} is the vector lane index, the
1553      * lane element at index {@code N} is stored into the array at index
1554      * {@code a_offset + indexMap[i_offset + N]}.
1555      *
1556      * @param a the array
1557      * @param a_offset the offset into the array, may be negative if relative
1558      * indexes in the index map compensate to produce a value within the
1559      * array bounds
1560      * @param indexMap the index map
1561      * @param i_offset the offset into the index map
1562      * @throws IndexOutOfBoundsException if {@code i_offset < 0}, or
1563      * {@code i_offset > indexMap.length - this.length()},
1564      * or for any vector lane index {@code N} the result of
1565      * {@code a_offset + indexMap[i_offset + N]} is {@code < 0} or {@code >= a.length}
1566      */
1567     public void intoArray(byte[] a, int a_offset, int[] indexMap, int i_offset) {
1568         forEach((n, e) -> a[a_offset + indexMap[i_offset + n]] = e);
1569     }
1570 
1571     /**
1572      * Stores this vector into an array using indexes obtained from an index
1573      * map and using a mask.
1574      * <p>
1575      * For each vector lane, where {@code N} is the vector lane index,
1576      * if the mask lane at index {@code N} is set then the lane element at
1577      * index {@code N} is stored into the array at index
1578      * {@code a_offset + indexMap[i_offset + N]}.
1579      *
1580      * @param a the array
1581      * @param a_offset the offset into the array, may be negative if relative
1582      * indexes in the index map compensate to produce a value within the
1583      * array bounds
1584      * @param m the mask
1585      * @param indexMap the index map
1586      * @param i_offset the offset into the index map
1587      * @throws IndexOutOfBoundsException if {@code j < 0}, or
1588      * {@code i_offset > indexMap.length - this.length()},
1589      * or for any vector lane index {@code N} where the mask at lane
1590      * {@code N} is set the result of {@code a_offset + indexMap[i_offset + N]} is
1591      * {@code < 0} or {@code >= a.length}
1592      */
1593     public void intoArray(byte[] a, int a_offset, VectorMask<Byte> m, int[] indexMap, int i_offset) {
1594         forEach(m, (n, e) -> a[a_offset + indexMap[i_offset + n]] = e);
1595     }
1596     // Species
1597 
1598     /**
1599      * {@inheritDoc}
1600      */
1601     @Override
1602     public abstract VectorSpecies<Byte> species();
1603 
1604     /**
1605      * Class representing {@link ByteVector}'s of the same {@link VectorShape VectorShape}.
1606      */
1607     static final class ByteSpecies extends AbstractSpecies<Byte> {
1608         final Function<byte[], ByteVector> vectorFactory;
1609 
1610         private ByteSpecies(VectorShape shape,
1611                           Class<?> vectorType,
1612                           Class<?> maskType,
1613                           Function<byte[], ByteVector> vectorFactory,
1614                           Function<boolean[], VectorMask<Byte>> maskFactory,
1615                           Function<IntUnaryOperator, VectorShuffle<Byte>> shuffleFromArrayFactory,
1616                           fShuffleFromArray<Byte> shuffleFromOpFactory) {
1617             super(shape, byte.class, Byte.SIZE, vectorType, maskType, maskFactory,
1618                   shuffleFromArrayFactory, shuffleFromOpFactory);
1619             this.vectorFactory = vectorFactory;
1620         }
1621 
1622         interface FOp {
1623             byte apply(int i);
1624         }
1625 
1626         ByteVector op(FOp f) {
1627             byte[] res = new byte[length()];
1628             for (int i = 0; i < length(); i++) {
1629                 res[i] = f.apply(i);
1630             }
1631             return vectorFactory.apply(res);
1632         }
1633 
1634         ByteVector op(VectorMask<Byte> o, FOp f) {
1635             byte[] res = new byte[length()];
1636             boolean[] mbits = ((AbstractMask<Byte>)o).getBits();
1637             for (int i = 0; i < length(); i++) {
1638                 if (mbits[i]) {
1639                     res[i] = f.apply(i);
1640                 }
1641             }
1642             return vectorFactory.apply(res);
1643         }
1644     }
1645 
1646     /**
1647      * Finds the preferred species for an element type of {@code byte}.
1648      * <p>
1649      * A preferred species is a species chosen by the platform that has a
1650      * shape of maximal bit size.  A preferred species for different element
1651      * types will have the same shape, and therefore vectors, masks, and
1652      * shuffles created from such species will be shape compatible.
1653      *
1654      * @return the preferred species for an element type of {@code byte}
1655      */
1656     private static ByteSpecies preferredSpecies() {
1657         return (ByteSpecies) VectorSpecies.ofPreferred(byte.class);
1658     }
1659 
1660     /**
1661      * Finds a species for an element type of {@code byte} and shape.
1662      *
1663      * @param s the shape
1664      * @return a species for an element type of {@code byte} and shape
1665      * @throws IllegalArgumentException if no such species exists for the shape
1666      */
1667     static ByteSpecies species(VectorShape s) {
1668         Objects.requireNonNull(s);
1669         switch (s) {
1670             case S_64_BIT: return (ByteSpecies) SPECIES_64;
1671             case S_128_BIT: return (ByteSpecies) SPECIES_128;
1672             case S_256_BIT: return (ByteSpecies) SPECIES_256;
1673             case S_512_BIT: return (ByteSpecies) SPECIES_512;
1674             case S_Max_BIT: return (ByteSpecies) SPECIES_MAX;
1675             default: throw new IllegalArgumentException("Bad shape: " + s);
1676         }
1677     }
1678 
1679     /** Species representing {@link ByteVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
1680     public static final VectorSpecies<Byte> SPECIES_64 = new ByteSpecies(VectorShape.S_64_BIT, Byte64Vector.class, Byte64Vector.Byte64Mask.class,
1681                                                                      Byte64Vector::new, Byte64Vector.Byte64Mask::new,
1682                                                                      Byte64Vector.Byte64Shuffle::new, Byte64Vector.Byte64Shuffle::new);
1683 
1684     /** Species representing {@link ByteVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
1685     public static final VectorSpecies<Byte> SPECIES_128 = new ByteSpecies(VectorShape.S_128_BIT, Byte128Vector.class, Byte128Vector.Byte128Mask.class,
1686                                                                       Byte128Vector::new, Byte128Vector.Byte128Mask::new,
1687                                                                       Byte128Vector.Byte128Shuffle::new, Byte128Vector.Byte128Shuffle::new);
1688 
1689     /** Species representing {@link ByteVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
1690     public static final VectorSpecies<Byte> SPECIES_256 = new ByteSpecies(VectorShape.S_256_BIT, Byte256Vector.class, Byte256Vector.Byte256Mask.class,
1691                                                                       Byte256Vector::new, Byte256Vector.Byte256Mask::new,
1692                                                                       Byte256Vector.Byte256Shuffle::new, Byte256Vector.Byte256Shuffle::new);
1693 
1694     /** Species representing {@link ByteVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
1695     public static final VectorSpecies<Byte> SPECIES_512 = new ByteSpecies(VectorShape.S_512_BIT, Byte512Vector.class, Byte512Vector.Byte512Mask.class,
1696                                                                       Byte512Vector::new, Byte512Vector.Byte512Mask::new,
1697                                                                       Byte512Vector.Byte512Shuffle::new, Byte512Vector.Byte512Shuffle::new);
1698 
1699     /** Species representing {@link ByteVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
1700     public static final VectorSpecies<Byte> SPECIES_MAX = new ByteSpecies(VectorShape.S_Max_BIT, ByteMaxVector.class, ByteMaxVector.ByteMaxMask.class,
1701                                                                       ByteMaxVector::new, ByteMaxVector.ByteMaxMask::new,
1702                                                                       ByteMaxVector.ByteMaxShuffle::new, ByteMaxVector.ByteMaxShuffle::new);
1703 
1704     /**
1705      * Preferred species for {@link ByteVector}s.
1706      * A preferred species is a species of maximal bit size for the platform.
1707      */
1708     public static final VectorSpecies<Byte> SPECIES_PREFERRED = (VectorSpecies<Byte>) preferredSpecies();
1709 }