1 /*
   2  * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ByteOrder;
  29 import java.util.Arrays;
  30 import java.util.Objects;
  31 import java.util.function.BinaryOperator;
  32 import java.util.function.IntUnaryOperator;
  33 import java.util.function.Function;
  34 import java.util.function.UnaryOperator;
  35 import java.util.concurrent.ThreadLocalRandom;
  36 
  37 import jdk.internal.misc.Unsafe;
  38 import jdk.internal.vm.annotation.ForceInline;
  39 
  40 import static jdk.incubator.vector.VectorIntrinsics.*;
  41 import static jdk.incubator.vector.VectorOperators.*;
  42 
  43 // -- This file was mechanically generated: Do not edit! -- //
  44 
  45 /**
  46  * A specialized {@link Vector} representing an ordered immutable sequence of
  47  * {@code byte} values.
  48  */
  49 @SuppressWarnings("cast")  // warning: redundant cast
  50 public abstract class ByteVector extends AbstractVector<Byte> {
  51 
  52     ByteVector() {}
  53 
  54     static final int FORBID_OPCODE_KIND = VO_ONLYFP;
  55 
  56     @ForceInline
  57     static int opCode(Operator op) {
  58         return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
  59     }
  60     @ForceInline
  61     static int opCode(Operator op, int requireKind) {
  62         requireKind |= VO_OPCODE_VALID;
  63         return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
  64     }
  65     @ForceInline
  66     static boolean opKind(Operator op, int bit) {
  67         return VectorOperators.opKind(op, bit);
  68     }
  69 
  70     // Virtualized factories and operators,
  71     // coded with portable definitions.
  72     // These are all @ForceInline in case
  73     // they need to be used performantly.
  74     // The various shape-specific subclasses
  75     // also specialize them by wrapping
  76     // them in a call like this:
  77     //    return (Byte128Vector)
  78     //       super.bOp((Byte128Vector) o);
  79     // The purpose of that is to forcibly inline
  80     // the generic definition from this file
  81     // into a sharply type- and size-specific
  82     // wrapper in the subclass file, so that
  83     // the JIT can specialize the code.
  84     // The code is only inlined and expanded
  85     // if it gets hot.  Think of it as a cheap
  86     // and lazy version of C++ templates.
  87 
  88     // Virtualized getter
  89 
  90     /*package-private*/
  91     abstract byte[] getElements();
  92 
  93     // Virtualized constructors
  94 
  95     /**
  96      * Build a vector directly using my own constructor.
  97      * It is an error if the array is aliased elsewhere.
  98      */
  99     /*package-private*/
 100     abstract ByteVector vectorFactory(byte[] vec);
 101 
 102     /**
 103      * Build a mask directly using my species.
 104      * It is an error if the array is aliased elsewhere.
 105      */
 106     /*package-private*/
 107     @ForceInline
 108     final
 109     AbstractMask<Byte> maskFactory(boolean[] bits) {
 110         return vspecies().maskFactory(bits);
 111     }
 112 
 113     // Constant loader (takes dummy as vector arg)
 114     interface FVOp {
 115         byte apply(int i);
 116     }
 117 
 118     /*package-private*/
 119     @ForceInline
 120     final
 121     ByteVector vOp(FVOp f) {
 122         byte[] res = new byte[length()];
 123         for (int i = 0; i < res.length; i++) {
 124             res[i] = f.apply(i);
 125         }
 126         return vectorFactory(res);
 127     }
 128 
 129     @ForceInline
 130     final
 131     ByteVector vOp(VectorMask<Byte> m, FVOp f) {
 132         byte[] res = new byte[length()];
 133         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 134         for (int i = 0; i < res.length; i++) {
 135             if (mbits[i]) {
 136                 res[i] = f.apply(i);
 137             }
 138         }
 139         return vectorFactory(res);
 140     }
 141 
 142     // Unary operator
 143 
 144     /*package-private*/
 145     interface FUnOp {
 146         byte apply(int i, byte a);
 147     }
 148 
 149     /*package-private*/
 150     abstract
 151     ByteVector uOp(FUnOp f);
 152     @ForceInline
 153     final
 154     ByteVector uOpTemplate(FUnOp f) {
 155         byte[] vec = getElements();
 156         byte[] res = new byte[length()];
 157         for (int i = 0; i < res.length; i++) {
 158             res[i] = f.apply(i, vec[i]);
 159         }
 160         return vectorFactory(res);
 161     }
 162 
 163     /*package-private*/
 164     abstract
 165     ByteVector uOp(VectorMask<Byte> m,
 166                              FUnOp f);
 167     @ForceInline
 168     final
 169     ByteVector uOpTemplate(VectorMask<Byte> m,
 170                                      FUnOp f) {
 171         byte[] vec = getElements();
 172         byte[] res = new byte[length()];
 173         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 174         for (int i = 0; i < res.length; i++) {
 175             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 176         }
 177         return vectorFactory(res);
 178     }
 179 
 180     // Binary operator
 181 
 182     /*package-private*/
 183     interface FBinOp {
 184         byte apply(int i, byte a, byte b);
 185     }
 186 
 187     /*package-private*/
 188     abstract
 189     ByteVector bOp(Vector<Byte> o,
 190                              FBinOp f);
 191     @ForceInline
 192     final
 193     ByteVector bOpTemplate(Vector<Byte> o,
 194                                      FBinOp f) {
 195         byte[] res = new byte[length()];
 196         byte[] vec1 = this.getElements();
 197         byte[] vec2 = ((ByteVector)o).getElements();
 198         for (int i = 0; i < res.length; i++) {
 199             res[i] = f.apply(i, vec1[i], vec2[i]);
 200         }
 201         return vectorFactory(res);
 202     }
 203 
 204     /*package-private*/
 205     abstract
 206     ByteVector bOp(Vector<Byte> o,
 207                              VectorMask<Byte> m,
 208                              FBinOp f);
 209     @ForceInline
 210     final
 211     ByteVector bOpTemplate(Vector<Byte> o,
 212                                      VectorMask<Byte> m,
 213                                      FBinOp f) {
 214         byte[] res = new byte[length()];
 215         byte[] vec1 = this.getElements();
 216         byte[] vec2 = ((ByteVector)o).getElements();
 217         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 218         for (int i = 0; i < res.length; i++) {
 219             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 220         }
 221         return vectorFactory(res);
 222     }
 223 
 224     // Ternary operator
 225 
 226     /*package-private*/
 227     interface FTriOp {
 228         byte apply(int i, byte a, byte b, byte c);
 229     }
 230 
 231     /*package-private*/
 232     abstract
 233     ByteVector tOp(Vector<Byte> o1,
 234                              Vector<Byte> o2,
 235                              FTriOp f);
 236     @ForceInline
 237     final
 238     ByteVector tOpTemplate(Vector<Byte> o1,
 239                                      Vector<Byte> o2,
 240                                      FTriOp f) {
 241         byte[] res = new byte[length()];
 242         byte[] vec1 = this.getElements();
 243         byte[] vec2 = ((ByteVector)o1).getElements();
 244         byte[] vec3 = ((ByteVector)o2).getElements();
 245         for (int i = 0; i < res.length; i++) {
 246             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 247         }
 248         return vectorFactory(res);
 249     }
 250 
 251     /*package-private*/
 252     abstract
 253     ByteVector tOp(Vector<Byte> o1,
 254                              Vector<Byte> o2,
 255                              VectorMask<Byte> m,
 256                              FTriOp f);
 257     @ForceInline
 258     final
 259     ByteVector tOpTemplate(Vector<Byte> o1,
 260                                      Vector<Byte> o2,
 261                                      VectorMask<Byte> m,
 262                                      FTriOp f) {
 263         byte[] res = new byte[length()];
 264         byte[] vec1 = this.getElements();
 265         byte[] vec2 = ((ByteVector)o1).getElements();
 266         byte[] vec3 = ((ByteVector)o2).getElements();
 267         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 268         for (int i = 0; i < res.length; i++) {
 269             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 270         }
 271         return vectorFactory(res);
 272     }
 273 
 274     // Reduction operator
 275 
 276     /*package-private*/
 277     abstract
 278     byte rOp(byte v, FBinOp f);
 279     @ForceInline
 280     final
 281     byte rOpTemplate(byte v, FBinOp f) {
 282         byte[] vec = getElements();
 283         for (int i = 0; i < vec.length; i++) {
 284             v = f.apply(i, v, vec[i]);
 285         }
 286         return v;
 287     }
 288 
 289     // Memory reference
 290 
 291     /*package-private*/
 292     interface FLdOp<M> {
 293         byte apply(M memory, int offset, int i);
 294     }
 295 
 296     /*package-private*/
 297     @ForceInline
 298     final
 299     <M> ByteVector ldOp(M memory, int offset,
 300                                   FLdOp<M> f) {
 301         //dummy; no vec = getElements();
 302         byte[] res = new byte[length()];
 303         for (int i = 0; i < res.length; i++) {
 304             res[i] = f.apply(memory, offset, i);
 305         }
 306         return vectorFactory(res);
 307     }
 308 
 309     /*package-private*/
 310     @ForceInline
 311     final
 312     <M> ByteVector ldOp(M memory, int offset,
 313                                   VectorMask<Byte> m,
 314                                   FLdOp<M> f) {
 315         //byte[] vec = getElements();
 316         byte[] res = new byte[length()];
 317         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 318         for (int i = 0; i < res.length; i++) {
 319             if (mbits[i]) {
 320                 res[i] = f.apply(memory, offset, i);
 321             }
 322         }
 323         return vectorFactory(res);
 324     }
 325 
 326     interface FStOp<M> {
 327         void apply(M memory, int offset, int i, byte a);
 328     }
 329 
 330     /*package-private*/
 331     @ForceInline
 332     final
 333     <M> void stOp(M memory, int offset,
 334                   FStOp<M> f) {
 335         byte[] vec = getElements();
 336         for (int i = 0; i < vec.length; i++) {
 337             f.apply(memory, offset, i, vec[i]);
 338         }
 339     }
 340 
 341     /*package-private*/
 342     @ForceInline
 343     final
 344     <M> void stOp(M memory, int offset,
 345                   VectorMask<Byte> m,
 346                   FStOp<M> f) {
 347         byte[] vec = getElements();
 348         boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
 349         for (int i = 0; i < vec.length; i++) {
 350             if (mbits[i]) {
 351                 f.apply(memory, offset, i, vec[i]);
 352             }
 353         }
 354     }
 355 
 356     // Binary test
 357 
 358     /*package-private*/
 359     interface FBinTest {
 360         boolean apply(int cond, int i, byte a, byte b);
 361     }
 362 
 363     /*package-private*/
 364     @ForceInline
 365     final
 366     AbstractMask<Byte> bTest(int cond,
 367                                   Vector<Byte> o,
 368                                   FBinTest f) {
 369         byte[] vec1 = getElements();
 370         byte[] vec2 = ((ByteVector)o).getElements();
 371         boolean[] bits = new boolean[length()];
 372         for (int i = 0; i < length(); i++){
 373             bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
 374         }
 375         return maskFactory(bits);
 376     }
 377 
 378     /*package-private*/
 379     @ForceInline
 380     static boolean doBinTest(int cond, byte a, byte b) {
 381         switch (cond) {
 382         case BT_eq:  return a == b;
 383         case BT_ne:  return a != b;
 384         case BT_lt:  return a < b;
 385         case BT_le:  return a <= b;
 386         case BT_gt:  return a > b;
 387         case BT_ge:  return a >= b;
 388         }
 389         throw new AssertionError(Integer.toHexString(cond));
 390     }
 391 
 392     /*package-private*/
 393     @Override
 394     abstract ByteSpecies vspecies();
 395 
 396     /*package-private*/
 397     @ForceInline
 398     static long toBits(byte e) {
 399         return  e;
 400     }
 401 
 402     /*package-private*/
 403     @ForceInline
 404     static byte fromBits(long bits) {
 405         return ((byte)bits);
 406     }
 407 
 408     // Static factories (other than memory operations)
 409 
 410     // Note: A surprising behavior in javadoc
 411     // sometimes makes a lone /** {@inheritDoc} */
 412     // comment drop the method altogether,
 413     // apparently if the method mentions an
 414     // parameter or return type of Vector<Byte>
 415     // instead of Vector<E> as originally specified.
 416     // Adding an empty HTML fragment appears to
 417     // nudge javadoc into providing the desired
 418     // inherited documentation.  We use the HTML
 419     // comment <!--workaround--> for this.
 420 
 421     /**
 422      * {@inheritDoc} <!--workaround-->
 423      */
 424     @ForceInline
 425     public static ByteVector zero(VectorSpecies<Byte> species) {
 426         ByteSpecies vsp = (ByteSpecies) species;
 427         return VectorIntrinsics.broadcastCoerced(vsp.vectorType(), byte.class, species.length(),
 428                                 0, vsp,
 429                                 ((bits_, s_) -> s_.rvOp(i -> bits_)));
 430     }
 431 
 432     /**
 433      * Returns a vector of the same species as this one
 434      * where all lane elements are set to
 435      * the primitive value {@code e}.
 436      *
 437      * The contents of the current vector are discarded;
 438      * only the species is relevant to this operation.
 439      *
 440      * <p> This method returns the value of this expression:
 441      * {@code ByteVector.broadcast(this.species(), e)}.
 442      *
 443      * @apiNote
 444      * Unlike the similar method named {@code broadcast()}
 445      * in the supertype {@code Vector}, this method does not
 446      * need to validate its argument, and cannot throw
 447      * {@code IllegalArgumentException}.  This method is
 448      * therefore preferable to the supertype method.
 449      *
 450      * @param e the value to broadcast
 451      * @return a vector where all lane elements are set to
 452      *         the primitive value {@code e}
 453      * @see #broadcast(VectorSpecies,long)
 454      * @see Vector#broadcast(long)
 455      * @see VectorSpecies#broadcast(long)
 456      */
 457     public abstract ByteVector broadcast(byte e);
 458 
 459     /**
 460      * Returns a vector of the given species
 461      * where all lane elements are set to
 462      * the primitive value {@code e}.
 463      *
 464      * @param species species of the desired vector
 465      * @param e the value to broadcast
 466      * @return a vector where all lane elements are set to
 467      *         the primitive value {@code e}
 468      * @see #broadcast(long)
 469      * @see Vector#broadcast(long)
 470      * @see VectorSpecies#broadcast(long)
 471      */
 472     public static ByteVector broadcast(VectorSpecies<Byte> species, byte e) {
 473         ByteSpecies vsp = (ByteSpecies) species;
 474         return vsp.broadcast(e);
 475     }
 476 
 477     /*package-private*/
 478     @ForceInline
 479     final ByteVector broadcastTemplate(byte e) {
 480         ByteSpecies vsp = vspecies();
 481         return vsp.broadcast(e);
 482     }
 483 
 484     /**
 485      * {@inheritDoc} <!--workaround-->
 486      * @apiNote
 487      * When working with vector subtypes like {@code ByteVector},
 488      * {@linkplain #broadcast(byte) the more strongly typed method}
 489      * is typically selected.  It can be explicitly selected
 490      * using a cast: {@code v.broadcast((byte)e)}.
 491      * The two expressions will produce numerically identical results.
 492      */
 493     @Override
 494     public abstract ByteVector broadcast(long e);
 495 
 496     /**
 497      * Returns a vector of the given species
 498      * where all lane elements are set to
 499      * the primitive value {@code e}.
 500      *
 501      * The {@code long} value must be accurately representable
 502      * by the {@code ETYPE} of the vector species, so that
 503      * {@code e==(long)(ETYPE)e}.
 504      *
 505      * @param species species of the desired vector
 506      * @param e the value to broadcast
 507      * @return a vector where all lane elements are set to
 508      *         the primitive value {@code e}
 509      * @throws IllegalArgumentException
 510      *         if the given {@code long} value cannot
 511      *         be represented by the vector's {@code ETYPE}
 512      * @see #broadcast(VectorSpecies,byte)
 513      * @see VectorSpecies#checkValue(long)
 514      */
 515     public static ByteVector broadcast(VectorSpecies<Byte> species, long e) {
 516         ByteSpecies vsp = (ByteSpecies) species;
 517         return vsp.broadcast(e);
 518     }
 519 
 520     /*package-private*/
 521     @ForceInline
 522     final ByteVector broadcastTemplate(long e) {
 523         return vspecies().broadcast(e);
 524     }
 525 
 526     /**
 527      * Returns a vector where each lane element is set to given
 528      * primitive values.
 529      * <p>
 530      * For each vector lane, where {@code N} is the vector lane index, the
 531      * the primitive value at index {@code N} is placed into the resulting
 532      * vector at lane index {@code N}.
 533      *
 534      * @param species species of the desired vector
 535      * @param es the given primitive values
 536      * @return a vector where each lane element is set to given primitive
 537      * values
 538      * @throws IllegalArgumentException
 539      *         if {@code es.length != species.length()}
 540      */
 541     @ForceInline
 542     @SuppressWarnings("unchecked")
 543     public static ByteVector fromValues(VectorSpecies<Byte> species, byte... es) {
 544         ByteSpecies vsp = (ByteSpecies) species;
 545         int vlength = vsp.laneCount();
 546         VectorIntrinsics.requireLength(es.length, vlength);
 547         // Get an unaliased copy and use it directly:
 548         return vsp.vectorFactory(Arrays.copyOf(es, vlength));
 549     }
 550 
 551     /**
 552      * Returns a vector where the first lane element is set to the primtive
 553      * value {@code e}, all other lane elements are set to the default
 554      * value(zero).
 555      *
 556      * @param species species of the desired vector
 557      * @param e the value
 558      * @return a vector where the first lane element is set to the primitive
 559      * value {@code e}
 560      */
 561     // FIXME: Does this carry its weight?
 562     @ForceInline
 563     public static ByteVector single(VectorSpecies<Byte> species, byte e) {
 564         return zero(species).withLane(0, e);
 565     }
 566 
 567     /**
 568      * Returns a vector where each lane element is set to a randomly
 569      * generated primitive value.
 570      *
 571      * The semantics are equivalent to calling
 572      * {@code (byte)}{@link ThreadLocalRandom#nextInt()}
 573      * for each lane, from first to last.
 574      *
 575      * @param species species of the desired vector
 576      * @return a vector where each lane elements is set to a randomly
 577      * generated primitive value
 578      */
 579     public static ByteVector random(VectorSpecies<Byte> species) {
 580         ByteSpecies vsp = (ByteSpecies) species;
 581         ThreadLocalRandom r = ThreadLocalRandom.current();
 582         return vsp.vOp(i -> nextRandom(r));
 583     }
 584     private static byte nextRandom(ThreadLocalRandom r) {
 585         return (byte) r.nextInt();
 586     }
 587 
 588     // Unary lanewise support
 589 
 590     /**
 591      * {@inheritDoc} <!--workaround-->
 592      */
 593     public abstract
 594     ByteVector lanewise(VectorOperators.Unary op);
 595 
 596     @ForceInline
 597     final
 598     ByteVector lanewiseTemplate(VectorOperators.Unary op) {
 599         if (opKind(op, VO_SPECIAL)) {
 600             if (op == ZOMO) {
 601                 return blend(broadcast(-1), compare(NE, 0));
 602             }
 603             if (op == NEG) {
 604                 // FIXME: Support this in the JIT.
 605                 return broadcast(0).lanewiseTemplate(SUB, this);
 606             }
 607         }
 608         int opc = opCode(op);
 609         return VectorIntrinsics.unaryOp(
 610             opc, getClass(), byte.class, length(),
 611             this,
 612             UN_IMPL.find(op, opc, (opc_) -> {
 613               switch (opc_) {
 614                 case VECTOR_OP_NEG: return v0 ->
 615                         v0.uOp((i, a) -> (byte) -a);
 616                 case VECTOR_OP_ABS: return v0 ->
 617                         v0.uOp((i, a) -> (byte) Math.abs(a));
 618                 case VECTOR_OP_NOT: return v0 ->
 619                         v0.uOp((i, a) -> (byte) ~a);
 620                 default: return null;
 621               }}));
 622     }
 623     private static final
 624     ImplCache<Unary,UnaryOperator<ByteVector>> UN_IMPL
 625         = new ImplCache<>(Unary.class, ByteVector.class);
 626 
 627     /**
 628      * {@inheritDoc} <!--workaround-->
 629      */
 630     @ForceInline
 631     public final
 632     ByteVector lanewise(VectorOperators.Unary op,
 633                                   VectorMask<Byte> m) {
 634         return blend(lanewise(op), m);
 635     }
 636 
 637     // Binary lanewise support
 638 
 639     /**
 640      * {@inheritDoc} <!--workaround-->
 641      * @see #lanewise(VectorOperators.Binary,byte)
 642      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 643      */
 644     @Override
 645     public abstract
 646     ByteVector lanewise(VectorOperators.Binary op,
 647                                   Vector<Byte> v);
 648     @ForceInline
 649     final
 650     ByteVector lanewiseTemplate(VectorOperators.Binary op,
 651                                           Vector<Byte> v) {
 652         ByteVector that = (ByteVector) v;
 653         that.check(this);
 654         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 655             if (op == FIRST_NONZERO) {
 656                 // FIXME: Support this in the JIT.
 657                 VectorMask<Byte> thisNZ
 658                     = this.viewAsIntegralLanes().compare(NE, (byte) 0);
 659                 that = that.blend((byte) 0, thisNZ.cast(vspecies()));
 660                 op = OR_UNCHECKED;
 661             }
 662             if (opKind(op, VO_SHIFT)) {
 663                 // As per shift specification for Java, mask the shift count.
 664                 // This allows the JIT to ignore some ISA details.
 665                 that = that.lanewise(AND, SHIFT_MASK);
 666             }
 667             if (op == ROR || op == ROL) {  // FIXME: JIT should do this
 668                 ByteVector neg = that.lanewise(NEG);
 669                 ByteVector hi = this.lanewise(LSHL, (op == ROR) ? neg : that);
 670                 ByteVector lo = this.lanewise(LSHR, (op == ROR) ? that : neg);
 671                 return hi.lanewise(OR, lo);
 672             } else if (op == AND_NOT) {
 673                 // FIXME: Support this in the JIT.
 674                 that = that.lanewise(NOT);
 675                 op = AND;
 676             } else if (op == DIV) {
 677                 VectorMask<Byte> eqz = that.eq((byte)0);
 678                 if (eqz.anyTrue()) {
 679                     throw that.divZeroException();
 680                 }
 681             }
 682         }
 683         int opc = opCode(op);
 684         return VectorIntrinsics.binaryOp(
 685             opc, getClass(), byte.class, length(),
 686             this, that,
 687             BIN_IMPL.find(op, opc, (opc_) -> {
 688               switch (opc_) {
 689                 case VECTOR_OP_ADD: return (v0, v1) ->
 690                         v0.bOp(v1, (i, a, b) -> (byte)(a + b));
 691                 case VECTOR_OP_SUB: return (v0, v1) ->
 692                         v0.bOp(v1, (i, a, b) -> (byte)(a - b));
 693                 case VECTOR_OP_MUL: return (v0, v1) ->
 694                         v0.bOp(v1, (i, a, b) -> (byte)(a * b));
 695                 case VECTOR_OP_DIV: return (v0, v1) ->
 696                         v0.bOp(v1, (i, a, b) -> (byte)(a / b));
 697                 case VECTOR_OP_MAX: return (v0, v1) ->
 698                         v0.bOp(v1, (i, a, b) -> (byte)Math.max(a, b));
 699                 case VECTOR_OP_MIN: return (v0, v1) ->
 700                         v0.bOp(v1, (i, a, b) -> (byte)Math.min(a, b));
 701                 case VECTOR_OP_FIRST_NONZERO: return (v0, v1) ->
 702                         v0.bOp(v1, (i, a, b) -> toBits(a) != 0 ? a : b);
 703                 case VECTOR_OP_AND: return (v0, v1) ->
 704                         v0.bOp(v1, (i, a, b) -> (byte)(a & b));
 705                 case VECTOR_OP_OR: return (v0, v1) ->
 706                         v0.bOp(v1, (i, a, b) -> (byte)(a | b));
 707                 case VECTOR_OP_AND_NOT: return (v0, v1) ->
 708                         v0.bOp(v1, (i, a, b) -> (byte)(a & ~b));
 709                 case VECTOR_OP_XOR: return (v0, v1) ->
 710                         v0.bOp(v1, (i, a, b) -> (byte)(a ^ b));
 711                 case VECTOR_OP_LSHIFT: return (v0, v1) ->
 712                         v0.bOp(v1, (i, a, n) -> (byte)(a << n));
 713                 case VECTOR_OP_RSHIFT: return (v0, v1) ->
 714                         v0.bOp(v1, (i, a, n) -> (byte)(a >> n));
 715                 case VECTOR_OP_URSHIFT: return (v0, v1) ->
 716                         v0.bOp(v1, (i, a, n) -> (byte)((a & LSHR_SETUP_MASK) >>> n));
 717                 case VECTOR_OP_LROTATE: return (v0, v1) ->
 718                         v0.bOp(v1, (i, a, n) -> (byte)((a << n)|(a >> -n)));
 719                 case VECTOR_OP_RROTATE: return (v0, v1) ->
 720                         v0.bOp(v1, (i, a, n) -> (byte)((a >> n)|(a << -n)));
 721                 default: return null;
 722                 }}));
 723     }
 724     private static final
 725     ImplCache<Binary,BinaryOperator<ByteVector>> BIN_IMPL
 726         = new ImplCache<>(Binary.class, ByteVector.class);
 727 
 728     /**
 729      * {@inheritDoc} <!--workaround-->
 730      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 731      */
 732     @ForceInline
 733     public final
 734     ByteVector lanewise(VectorOperators.Binary op,
 735                                   Vector<Byte> v,
 736                                   VectorMask<Byte> m) {
 737         ByteVector that = (ByteVector) v;
 738         if (op == DIV) {
 739             // suppress div/0 exceptions in unset lanes
 740             that = that.lanewise(NOT, that.eq((byte)0));
 741             return blend(lanewise(DIV, that), m);
 742         }
 743         return blend(lanewise(op, v), m);
 744     }
 745     // FIXME: Maybe all of the public final methods in this file (the
 746     // simple ones that just call lanewise) should be pushed down to
 747     // the X-VectorBits template.  They can't optimize properly at
 748     // this level, and must rely on inlining.  Does it work?
 749     // (If it works, of course keep the code here.)
 750 
 751     /**
 752      * Combines the lane values of this vector
 753      * with the value of a broadcast scalar.
 754      *
 755      * This is a lane-wise binary operation which applies
 756      * the selected operation to each lane.
 757      * The return value will be equal to this expression:
 758      * {@code this.lanewise(op, this.broadcast(e))}.
 759      *
 760      * @param op the operation used to process lane values
 761      * @param e the input scalar
 762      * @return the result of applying the operation lane-wise
 763      *         to the two input vectors
 764      * @throws UnsupportedOperationException if this vector does
 765      *         not support the requested operation
 766      * @see #lanewise(VectorOperators.Binary,Vector)
 767      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
 768      */
 769     @ForceInline
 770     public final
 771     ByteVector lanewise(VectorOperators.Binary op,
 772                                   byte e) {
 773         int opc = opCode(op);
 774         if (opKind(op, VO_SHIFT) && (byte)(int)e == e) {
 775             return lanewiseShift(op, (int) e);
 776         }
 777         if (op == AND_NOT) {
 778             op = AND; e = (byte) ~e;
 779         }
 780         return lanewise(op, broadcast(e));
 781     }
 782 
 783     /**
 784      * Combines the lane values of this vector
 785      * with the value of a broadcast scalar,
 786      * with selection of lane elements controlled by a mask.
 787      *
 788      * This is a masked lane-wise binary operation which applies
 789      * the selected operation to each lane.
 790      * The return value will be equal to this expression:
 791      * {@code this.lanewise(op, this.broadcast(e), m)}.
 792      *
 793      * @param op the operation used to process lane values
 794      * @param e the input scalar
 795      * @param m the mask controlling lane selection
 796      * @return the result of applying the operation lane-wise
 797      *         to the input vector and the scalar
 798      * @throws UnsupportedOperationException if this vector does
 799      *         not support the requested operation
 800      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 801      * @see #lanewise(VectorOperators.Binary,byte)
 802      */
 803     @ForceInline
 804     public final
 805     ByteVector lanewise(VectorOperators.Binary op,
 806                                   byte e,
 807                                   VectorMask<Byte> m) {
 808         return blend(lanewise(op, e), m);
 809     }
 810 
 811     /**
 812      * {@inheritDoc} <!--workaround-->
 813      * @apiNote
 814      * When working with vector subtypes like {@code ByteVector},
 815      * {@linkplain #lanewise(VectorOperators.Binary,byte)
 816      * the more strongly typed method}
 817      * is typically selected.  It can be explicitly selected
 818      * using a cast: {@code v.lanewise(op,(byte)e)}.
 819      * The two expressions will produce numerically identical results.
 820      */
 821     @ForceInline
 822     public final
 823     ByteVector lanewise(VectorOperators.Binary op,
 824                                   long e) {
 825         byte e1 = (byte) e;
 826         if ((long)e1 != e
 827             // allow shift ops to clip down their int parameters
 828             && !(opKind(op, VO_SHIFT) && (int)e1 == e)
 829             ) {
 830             vspecies().checkValue(e);  // for exception
 831         }
 832         return lanewise(op, e1);
 833     }
 834 
 835     /**
 836      * {@inheritDoc} <!--workaround-->
 837      * @apiNote
 838      * When working with vector subtypes like {@code ByteVector},
 839      * {@linkplain #lanewise(VectorOperators.Binary,byte,VectorMask)
 840      * the more strongly typed method}
 841      * is typically selected.  It can be explicitly selected
 842      * using a cast: {@code v.lanewise(op,(byte)e,m)}.
 843      * The two expressions will produce numerically identical results.
 844      */
 845     @ForceInline
 846     public final
 847     ByteVector lanewise(VectorOperators.Binary op,
 848                                   long e, VectorMask<Byte> m) {
 849         return blend(lanewise(op, e), m);
 850     }
 851 
 852     /*package-private*/
 853     abstract ByteVector
 854     lanewiseShift(VectorOperators.Binary op, int e);
 855 
 856     /*package-private*/
 857     @ForceInline
 858     final ByteVector
 859     lanewiseShiftTemplate(VectorOperators.Binary op, int e) {
 860         // Special handling for these.  FIXME: Refactor?
 861         int opc = opCode(op);
 862         assert(opKind(op, VO_SHIFT));
 863         // As per shift specification for Java, mask the shift count.
 864         e &= SHIFT_MASK;
 865         if (op == ROR || op == ROL) {  // FIXME: JIT should do this
 866             ByteVector hi = this.lanewise(LSHL, (op == ROR) ? -e : e);
 867             ByteVector lo = this.lanewise(LSHR, (op == ROR) ? e : -e);
 868             return hi.lanewise(OR, lo);
 869         }
 870         return VectorIntrinsics.broadcastInt(
 871             opc, getClass(), byte.class, length(),
 872             this, e,
 873             BIN_INT_IMPL.find(op, opc, (opc_) -> {
 874               switch (opc_) {
 875                 case VECTOR_OP_LSHIFT: return (v, n) ->
 876                         v.uOp((i, a) -> (byte)(a << n));
 877                 case VECTOR_OP_RSHIFT: return (v, n) ->
 878                         v.uOp((i, a) -> (byte)(a >> n));
 879                 case VECTOR_OP_URSHIFT: return (v, n) ->
 880                         v.uOp((i, a) -> (byte)((a & LSHR_SETUP_MASK) >>> n));
 881                 case VECTOR_OP_LROTATE: return (v, n) ->
 882                         v.uOp((i, a) -> (byte)((a << n)|(a >> -n)));
 883                 case VECTOR_OP_RROTATE: return (v, n) ->
 884                         v.uOp((i, a) -> (byte)((a >> n)|(a << -n)));
 885                 default: return null;
 886                 }}));
 887     }
 888     private static final
 889     ImplCache<Binary,VectorBroadcastIntOp<ByteVector>> BIN_INT_IMPL
 890         = new ImplCache<>(Binary.class, ByteVector.class);
 891 
 892     // As per shift specification for Java, mask the shift count.
 893     // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte).
 894     // The latter two maskings go beyond the JLS, but seem reasonable
 895     // since our lane types are first-class types, not just dressed
 896     // up ints.
 897     private static final int SHIFT_MASK = (Byte.SIZE - 1);
 898     // Also simulate >>> on sub-word variables with a mask.
 899     private static final int LSHR_SETUP_MASK = ((1 << Byte.SIZE) - 1);
 900 
 901     // Ternary lanewise support
 902 
 903     // Ternary operators come in eight variations:
 904     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
 905     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
 906 
 907     // It is annoying to support all of these variations of masking
 908     // and broadcast, but it would be more surprising not to continue
 909     // the obvious pattern started by unary and binary.
 910 
 911    /**
 912      * {@inheritDoc} <!--workaround-->
 913      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
 914      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
 915      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
 916      * @see #lanewise(VectorOperators.Ternary,byte,byte)
 917      * @see #lanewise(VectorOperators.Ternary,Vector,byte)
 918      * @see #lanewise(VectorOperators.Ternary,byte,Vector)
 919      */
 920     @Override
 921     public abstract
 922     ByteVector lanewise(VectorOperators.Ternary op,
 923                                                   Vector<Byte> v1,
 924                                                   Vector<Byte> v2);
 925     @ForceInline
 926     final
 927     ByteVector lanewiseTemplate(VectorOperators.Ternary op,
 928                                           Vector<Byte> v1,
 929                                           Vector<Byte> v2) {
 930         ByteVector that = (ByteVector) v1;
 931         ByteVector tother = (ByteVector) v2;
 932         // It's a word: https://www.dictionary.com/browse/tother
 933         // See also Chapter 11 of Dickens, Our Mutual Friend:
 934         // "Totherest Governor," replied Mr Riderhood...
 935         that.check(this);
 936         tother.check(this);
 937         if (op == BITWISE_BLEND) {
 938             // FIXME: Support this in the JIT.
 939             that = this.lanewise(XOR, that).lanewise(AND, tother);
 940             return this.lanewise(XOR, that);
 941         }
 942         int opc = opCode(op);
 943         return VectorIntrinsics.ternaryOp(
 944             opc, getClass(), byte.class, length(),
 945             this, that, tother,
 946             TERN_IMPL.find(op, opc, (opc_) -> {
 947               switch (opc_) {
 948                 case VECTOR_OP_BITWISE_BLEND: return (v0, v1_, v2_) ->
 949                         v0.tOp(v1_, v2_, (i, a, b, c) -> (byte)(a^((a^b)&c)));
 950                 default: return null;
 951                 }}));
 952     }
 953     private static final
 954     ImplCache<Ternary,TernaryOperation<ByteVector>> TERN_IMPL
 955         = new ImplCache<>(Ternary.class, ByteVector.class);
 956 
 957     /**
 958      * {@inheritDoc} <!--workaround-->
 959      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
 960      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
 961      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
 962      */
 963     @ForceInline
 964     public final
 965     ByteVector lanewise(VectorOperators.Ternary op,
 966                                   Vector<Byte> v1,
 967                                   Vector<Byte> v2,
 968                                   VectorMask<Byte> m) {
 969         return blend(lanewise(op, v1, v2), m);
 970     }
 971 
 972     /**
 973      * Combines the lane values of this vector
 974      * with the values of two broadcast scalars.
 975      *
 976      * This is a lane-wise ternary operation which applies
 977      * the selected operation to each lane.
 978      * The return value will be equal to this expression:
 979      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
 980      *
 981      * @param op the operation used to combine lane values
 982      * @param e1 the first input scalar
 983      * @param e2 the second input scalar
 984      * @return the result of applying the operation lane-wise
 985      *         to the input vector and the scalars
 986      * @throws UnsupportedOperationException if this vector does
 987      *         not support the requested operation
 988      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
 989      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
 990      */
 991     @ForceInline
 992     public final
 993     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,e2)
 994                                   byte e1,
 995                                   byte e2) {
 996         return lanewise(op, broadcast(e1), broadcast(e2));
 997     }
 998 
 999     /**
1000      * Combines the lane values of this vector
1001      * with the values of two broadcast scalars,
1002      * with selection of lane elements controlled by a mask.
1003      *
1004      * This is a masked lane-wise ternary operation which applies
1005      * the selected operation to each lane.
1006      * The return value will be equal to this expression:
1007      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
1008      *
1009      * @param op the operation used to combine lane values
1010      * @param e1 the first input scalar
1011      * @param e2 the second input scalar
1012      * @param m the mask controlling lane selection
1013      * @return the result of applying the operation lane-wise
1014      *         to the input vector and the scalars
1015      * @throws UnsupportedOperationException if this vector does
1016      *         not support the requested operation
1017      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1018      * @see #lanewise(VectorOperators.Ternary,byte,byte)
1019      */
1020     @ForceInline
1021     public final
1022     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
1023                                   byte e1,
1024                                   byte e2,
1025                                   VectorMask<Byte> m) {
1026         return blend(lanewise(op, e1, e2), m);
1027     }
1028 
1029     /**
1030      * Combines the lane values of this vector
1031      * with the values of another vector and a broadcast scalar.
1032      *
1033      * This is a lane-wise ternary operation which applies
1034      * the selected operation to each lane.
1035      * The return value will be equal to this expression:
1036      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
1037      *
1038      * @param op the operation used to combine lane values
1039      * @param v1 the other input vector
1040      * @param e2 the input scalar
1041      * @return the result of applying the operation lane-wise
1042      *         to the input vectors and the scalar
1043      * @throws UnsupportedOperationException if this vector does
1044      *         not support the requested operation
1045      * @see #lanewise(VectorOperators.Ternary,byte,byte)
1046      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
1047      */
1048     @ForceInline
1049     public final
1050     ByteVector lanewise(VectorOperators.Ternary op, //(op,v1,e2)
1051                                   Vector<Byte> v1,
1052                                   byte e2) {
1053         return lanewise(op, v1, broadcast(e2));
1054     }
1055 
1056     /**
1057      * Combines the lane values of this vector
1058      * with the values of another vector and a broadcast scalar,
1059      * with selection of lane elements controlled by a mask.
1060      *
1061      * This is a masked lane-wise ternary operation which applies
1062      * the selected operation to each lane.
1063      * The return value will be equal to this expression:
1064      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
1065      *
1066      * @param op the operation used to combine lane values
1067      * @param v1 the other input vector
1068      * @param e2 the input scalar
1069      * @param m the mask controlling lane selection
1070      * @return the result of applying the operation lane-wise
1071      *         to the input vectors and the scalar
1072      * @throws UnsupportedOperationException if this vector does
1073      *         not support the requested operation
1074      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1075      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
1076      * @see #lanewise(VectorOperators.Ternary,Vector,byte)
1077      */
1078     @ForceInline
1079     public final
1080     ByteVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
1081                                   Vector<Byte> v1,
1082                                   byte e2,
1083                                   VectorMask<Byte> m) {
1084         return blend(lanewise(op, v1, e2), m);
1085     }
1086 
1087     /**
1088      * Combines the lane values of this vector
1089      * with the values of another vector and a broadcast scalar.
1090      *
1091      * This is a lane-wise ternary operation which applies
1092      * the selected operation to each lane.
1093      * The return value will be equal to this expression:
1094      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
1095      *
1096      * @param op the operation used to combine lane values
1097      * @param e1 the input scalar
1098      * @param v2 the other input vector
1099      * @return the result of applying the operation lane-wise
1100      *         to the input vectors and the scalar
1101      * @throws UnsupportedOperationException if this vector does
1102      *         not support the requested operation
1103      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1104      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
1105      */
1106     @ForceInline
1107     public final
1108     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,v2)
1109                                   byte e1,
1110                                   Vector<Byte> v2) {
1111         return lanewise(op, broadcast(e1), v2);
1112     }
1113 
1114     /**
1115      * Combines the lane values of this vector
1116      * with the values of another vector and a broadcast scalar,
1117      * with selection of lane elements controlled by a mask.
1118      *
1119      * This is a masked lane-wise ternary operation which applies
1120      * the selected operation to each lane.
1121      * The return value will be equal to this expression:
1122      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
1123      *
1124      * @param op the operation used to combine lane values
1125      * @param e1 the input scalar
1126      * @param v2 the other input vector
1127      * @param m the mask controlling lane selection
1128      * @return the result of applying the operation lane-wise
1129      *         to the input vectors and the scalar
1130      * @throws UnsupportedOperationException if this vector does
1131      *         not support the requested operation
1132      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1133      * @see #lanewise(VectorOperators.Ternary,byte,Vector)
1134      */
1135     @ForceInline
1136     public final
1137     ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1138                                   byte e1,
1139                                   Vector<Byte> v2,
1140                                   VectorMask<Byte> m) {
1141         return blend(lanewise(op, e1, v2), m);
1142     }
1143 
1144     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1145     // https://en.wikipedia.org/wiki/Ogdoad
1146 
1147     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1148     //
1149     // These include masked and non-masked versions.
1150     // This subclass adds broadcast (masked or not).
1151 
1152     /**
1153      * {@inheritDoc} <!--workaround-->
1154      * @see #add(byte)
1155      */
1156     @Override
1157     @ForceInline
1158     public final ByteVector add(Vector<Byte> v) {
1159         return lanewise(ADD, v);
1160     }
1161 
1162     /**
1163      * Adds this vector to the broadcast of an input scalar.
1164      *
1165      * This is a lane-wise binary operation which applies
1166      * the primitive addition operation ({@code +}) to each lane.
1167      *
1168      * This method is also equivalent to the expression
1169      * {@link #lanewise(VectorOperators.Binary,byte)
1170      *    lanewise}{@code (}{@link VectorOperators#ADD
1171      *    ADD}{@code , e)}.
1172      *
1173      * @param e the input scalar
1174      * @return the result of adding each lane of this vector to the scalar
1175      * @see #add(Vector)
1176      * @see #broadcast(byte)
1177      * @see #add(byte,VectorMask)
1178      * @see VectorOperators#ADD
1179      * @see #lanewise(VectorOperators.Binary,Vector)
1180      * @see #lanewise(VectorOperators.Binary,byte)
1181      */
1182     @ForceInline
1183     public final
1184     ByteVector add(byte e) {
1185         return lanewise(ADD, e);
1186     }
1187 
1188     /**
1189      * {@inheritDoc} <!--workaround-->
1190      * @see #add(byte,VectorMask)
1191      */
1192     @Override
1193     @ForceInline
1194     public final ByteVector add(Vector<Byte> v,
1195                                           VectorMask<Byte> m) {
1196         return lanewise(ADD, v, m);
1197     }
1198 
1199     /**
1200      * Adds this vector to the broadcast of an input scalar,
1201      * selecting lane elements controlled by a mask.
1202      *
1203      * This is a masked lane-wise binary operation which applies
1204      * the primitive addition operation ({@code +}) to each lane.
1205      *
1206      * This method is also equivalent to the expression
1207      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1208      *    lanewise}{@code (}{@link VectorOperators#ADD
1209      *    ADD}{@code , s, m)}.
1210      *
1211      * @param e the input scalar
1212      * @param m the mask controlling lane selection
1213      * @return the result of adding each lane of this vector to the scalar
1214      * @see #add(Vector,VectorMask)
1215      * @see #broadcast(byte)
1216      * @see #add(byte)
1217      * @see VectorOperators#ADD
1218      * @see #lanewise(VectorOperators.Binary,Vector)
1219      * @see #lanewise(VectorOperators.Binary,byte)
1220      */
1221     @ForceInline
1222     public final ByteVector add(byte e,
1223                                           VectorMask<Byte> m) {
1224         return lanewise(ADD, e, m);
1225     }
1226 
1227     /**
1228      * {@inheritDoc} <!--workaround-->
1229      * @see #sub(byte)
1230      */
1231     @Override
1232     @ForceInline
1233     public final ByteVector sub(Vector<Byte> v) {
1234         return lanewise(SUB, v);
1235     }
1236 
1237     /**
1238      * Subtracts an input scalar from this vector.
1239      *
1240      * This is a masked lane-wise binary operation which applies
1241      * the primitive subtraction operation ({@code -}) to each lane.
1242      *
1243      * This method is also equivalent to the expression
1244      * {@link #lanewise(VectorOperators.Binary,byte)
1245      *    lanewise}{@code (}{@link VectorOperators#SUB
1246      *    SUB}{@code , e)}.
1247      *
1248      * @param e the input scalar
1249      * @return the result of subtracting the scalar from each lane of this vector
1250      * @see #sub(Vector)
1251      * @see #broadcast(byte)
1252      * @see #sub(byte,VectorMask)
1253      * @see VectorOperators#SUB
1254      * @see #lanewise(VectorOperators.Binary,Vector)
1255      * @see #lanewise(VectorOperators.Binary,byte)
1256      */
1257     @ForceInline
1258     public final ByteVector sub(byte e) {
1259         return lanewise(SUB, e);
1260     }
1261 
1262     /**
1263      * {@inheritDoc} <!--workaround-->
1264      * @see #sub(byte,VectorMask)
1265      */
1266     @Override
1267     @ForceInline
1268     public final ByteVector sub(Vector<Byte> v,
1269                                           VectorMask<Byte> m) {
1270         return lanewise(SUB, v, m);
1271     }
1272 
1273     /**
1274      * Subtracts an input scalar from this vector
1275      * under the control of a mask.
1276      *
1277      * This is a masked lane-wise binary operation which applies
1278      * the primitive subtraction operation ({@code -}) to each lane.
1279      *
1280      * This method is also equivalent to the expression
1281      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1282      *    lanewise}{@code (}{@link VectorOperators#SUB
1283      *    SUB}{@code , s, m)}.
1284      *
1285      * @param e the input scalar
1286      * @param m the mask controlling lane selection
1287      * @return the result of subtracting the scalar from each lane of this vector
1288      * @see #sub(Vector,VectorMask)
1289      * @see #broadcast(byte)
1290      * @see #sub(byte)
1291      * @see VectorOperators#SUB
1292      * @see #lanewise(VectorOperators.Binary,Vector)
1293      * @see #lanewise(VectorOperators.Binary,byte)
1294      */
1295     @ForceInline
1296     public final ByteVector sub(byte e,
1297                                           VectorMask<Byte> m) {
1298         return lanewise(SUB, e, m);
1299     }
1300 
1301     /**
1302      * {@inheritDoc} <!--workaround-->
1303      * @see #mul(byte)
1304      */
1305     @Override
1306     @ForceInline
1307     public final ByteVector mul(Vector<Byte> v) {
1308         return lanewise(MUL, v);
1309     }
1310 
1311     /**
1312      * Multiplies this vector by the broadcast of an input scalar.
1313      *
1314      * This is a lane-wise binary operation which applies
1315      * the primitive multiplication operation ({@code *}) to each lane.
1316      *
1317      * This method is also equivalent to the expression
1318      * {@link #lanewise(VectorOperators.Binary,byte)
1319      *    lanewise}{@code (}{@link VectorOperators#MUL
1320      *    MUL}{@code , e)}.
1321      *
1322      * @param e the input scalar
1323      * @return the result of multiplying this vector by the given scalar
1324      * @see #mul(Vector)
1325      * @see #broadcast(byte)
1326      * @see #mul(byte,VectorMask)
1327      * @see VectorOperators#MUL
1328      * @see #lanewise(VectorOperators.Binary,Vector)
1329      * @see #lanewise(VectorOperators.Binary,byte)
1330      */
1331     @ForceInline
1332     public final ByteVector mul(byte e) {
1333         return lanewise(MUL, e);
1334     }
1335 
1336     /**
1337      * {@inheritDoc} <!--workaround-->
1338      * @see #mul(byte,VectorMask)
1339      */
1340     @Override
1341     @ForceInline
1342     public final ByteVector mul(Vector<Byte> v,
1343                                           VectorMask<Byte> m) {
1344         return lanewise(MUL, v, m);
1345     }
1346 
1347     /**
1348      * Multiplies this vector by the broadcast of an input scalar,
1349      * selecting lane elements controlled by a mask.
1350      *
1351      * This is a masked lane-wise binary operation which applies
1352      * the primitive multiplication operation ({@code *}) to each lane.
1353      *
1354      * This method is also equivalent to the expression
1355      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1356      *    lanewise}{@code (}{@link VectorOperators#MUL
1357      *    MUL}{@code , s, m)}.
1358      *
1359      * @param e the input scalar
1360      * @param m the mask controlling lane selection
1361      * @return the result of muling each lane of this vector to the scalar
1362      * @see #mul(Vector,VectorMask)
1363      * @see #broadcast(byte)
1364      * @see #mul(byte)
1365      * @see VectorOperators#MUL
1366      * @see #lanewise(VectorOperators.Binary,Vector)
1367      * @see #lanewise(VectorOperators.Binary,byte)
1368      */
1369     @ForceInline
1370     public final ByteVector mul(byte e,
1371                                           VectorMask<Byte> m) {
1372         return lanewise(MUL, e, m);
1373     }
1374 
1375     /**
1376      * {@inheritDoc} <!--workaround-->
1377      * @apiNote If there is a zero divisor, {@code
1378      * ArithmeticException} will be thrown.
1379      * @see #div(byte)
1380      */
1381     @Override
1382     @ForceInline
1383     public final ByteVector div(Vector<Byte> v) {
1384         return lanewise(DIV, v);
1385     }
1386 
1387     /**
1388      * Divides this vector by the broadcast of an input scalar.
1389      *
1390      * This is a lane-wise binary operation which applies
1391      * the primitive division operation ({@code /}) to each lane.
1392      *
1393      * This method is also equivalent to the expression
1394      * {@link #lanewise(VectorOperators.Binary,byte)
1395      *    lanewise}{@code (}{@link VectorOperators#DIV
1396      *    DIV}{@code , e)}.
1397      *
1398      * @apiNote If there is a zero divisor, {@code
1399      * ArithmeticException} will be thrown.
1400      * @see #div(byte)
1401 
1402      *
1403      * @param e the input scalar
1404      * @return the result of dividing each lane of this vector by the scalar
1405      * @see #div(Vector)
1406      * @see #broadcast(byte)
1407      * @see #div(byte,VectorMask)
1408      * @see VectorOperators#DIV
1409      * @see #lanewise(VectorOperators.Binary,Vector)
1410      * @see #lanewise(VectorOperators.Binary,byte)
1411      */
1412     @ForceInline
1413     public final ByteVector div(byte e) {
1414         return lanewise(DIV, e);
1415     }
1416 
1417     /**
1418      * {@inheritDoc} <!--workaround-->
1419      * @see #div(byte,VectorMask)
1420      * @apiNote If there is a zero divisor, {@code
1421      * ArithmeticException} will be thrown.
1422      */
1423     @Override
1424     @ForceInline
1425     public final ByteVector div(Vector<Byte> v,
1426                                           VectorMask<Byte> m) {
1427         return lanewise(DIV, v, m);
1428     }
1429 
1430     /**
1431      * Divides this vector by the broadcast of an input scalar,
1432      * selecting lane elements controlled by a mask.
1433      *
1434      * This is a masked lane-wise binary operation which applies
1435      * the primitive division operation ({@code /}) to each lane.
1436      *
1437      * This method is also equivalent to the expression
1438      * {@link #lanewise(VectorOperators.Binary,byte,VectorMask)
1439      *    lanewise}{@code (}{@link VectorOperators#DIV
1440      *    DIV}{@code , s, m)}.
1441      *
1442      * @apiNote If there is a zero divisor, {@code
1443      * ArithmeticException} will be thrown.
1444      *
1445      * @param e the input scalar
1446      * @param m the mask controlling lane selection
1447      * @return the result of dividing each lane of this vector by the scalar
1448      * @see #div(Vector,VectorMask)
1449      * @see #broadcast(byte)
1450      * @see #div(byte)
1451      * @see VectorOperators#DIV
1452      * @see #lanewise(VectorOperators.Binary,Vector)
1453      * @see #lanewise(VectorOperators.Binary,byte)
1454      */
1455     @ForceInline
1456     public final ByteVector div(byte e,
1457                                           VectorMask<Byte> m) {
1458         return lanewise(DIV, e, m);
1459     }
1460 
1461     /// END OF FULL-SERVICE BINARY METHODS
1462 
1463     /// SECOND-TIER BINARY METHODS
1464     //
1465     // There are no masked versions.
1466 
1467     /**
1468      * {@inheritDoc} <!--workaround-->
1469      */
1470     @Override
1471     @ForceInline
1472     public final ByteVector min(Vector<Byte> v) {
1473         return lanewise(MIN, v);
1474     }
1475 
1476     // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
1477     /**
1478      * Computes the smaller of this vector and the broadcast of an input scalar.
1479      *
1480      * This is a lane-wise binary operation which applies the
1481      * operation {@code Math.min()} to each pair of
1482      * corresponding lane values.
1483      *
1484      * This method is also equivalent to the expression
1485      * {@link #lanewise(VectorOperators.Binary,byte)
1486      *    lanewise}{@code (}{@link VectorOperators#MIN
1487      *    MIN}{@code , e)}.
1488      *
1489      * @param e the input scalar
1490      * @return the result of multiplying this vector by the given scalar
1491      * @see #min(Vector)
1492      * @see #broadcast(byte)
1493      * @see VectorOperators#MIN
1494      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
1495      */
1496     @ForceInline
1497     public final ByteVector min(byte e) {
1498         return lanewise(MIN, e);
1499     }
1500 
1501     /**
1502      * {@inheritDoc} <!--workaround-->
1503      */
1504     @Override
1505     @ForceInline
1506     public final ByteVector max(Vector<Byte> v) {
1507         return lanewise(MAX, v);
1508     }
1509 
1510     /**
1511      * Computes the larger of this vector and the broadcast of an input scalar.
1512      *
1513      * This is a lane-wise binary operation which applies the
1514      * operation {@code Math.max()} to each pair of
1515      * corresponding lane values.
1516      *
1517      * This method is also equivalent to the expression
1518      * {@link #lanewise(VectorOperators.Binary,byte)
1519      *    lanewise}{@code (}{@link VectorOperators#MAX
1520      *    MAX}{@code , e)}.
1521      *
1522      * @param e the input scalar
1523      * @return the result of multiplying this vector by the given scalar
1524      * @see #max(Vector)
1525      * @see #broadcast(byte)
1526      * @see VectorOperators#MAX
1527      * @see #lanewise(VectorOperators.Binary,byte,VectorMask)
1528      */
1529     @ForceInline
1530     public final ByteVector max(byte e) {
1531         return lanewise(MAX, e);
1532     }
1533 
1534     // common bitwise operators: and, or, not (with scalar versions)
1535     /**
1536      * Computes the bitwise logical conjunction ({@code &})
1537      * of this vector and a second input vector.
1538      *
1539      * This is a lane-wise binary operation which applies the
1540      * the primitive bitwise "and" operation ({@code &})
1541      * to each pair of corresponding lane values.
1542      *
1543      * This method is also equivalent to the expression
1544      * {@link #lanewise(VectorOperators.Binary,Vector)
1545      *    lanewise}{@code (}{@link VectorOperators#AND
1546      *    AND}{@code , v)}.
1547      *
1548      * <p>
1549      * This is not a full-service named operation like
1550      * {@link #add(Vector) add}.  A masked version of
1551      * version of this operation is not directly available
1552      * but may be obtained via the masked version of
1553      * {@code lanewise}.
1554      *
1555      * @param v a second input vector
1556      * @return the bitwise {@code &} of this vector and the second input vector
1557      * @see #and(byte)
1558      * @see #or(Vector)
1559      * @see #not()
1560      * @see VectorOperators#AND
1561      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1562      */
1563     @ForceInline
1564     public final ByteVector and(Vector<Byte> v) {
1565         return lanewise(AND, v);
1566     }
1567 
1568     /**
1569      * Computes the bitwise logical conjunction ({@code &})
1570      * of this vector and a scalar.
1571      *
1572      * This is a lane-wise binary operation which applies the
1573      * the primitive bitwise "and" operation ({@code &})
1574      * to each pair of corresponding lane values.
1575      *
1576      * This method is also equivalent to the expression
1577      * {@link #lanewise(VectorOperators.Binary,Vector)
1578      *    lanewise}{@code (}{@link VectorOperators#AND
1579      *    AND}{@code , e)}.
1580      *
1581      * @param e an input scalar
1582      * @return the bitwise {@code &} of this vector and scalar
1583      * @see #and(Vector)
1584      * @see VectorOperators#AND
1585      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1586      */
1587     @ForceInline
1588     public final ByteVector and(byte e) {
1589         return lanewise(AND, e);
1590     }
1591 
1592     /**
1593      * Computes the bitwise logical disjunction ({@code |})
1594      * of this vector and a second input vector.
1595      *
1596      * This is a lane-wise binary operation which applies the
1597      * the primitive bitwise "or" operation ({@code |})
1598      * to each pair of corresponding lane values.
1599      *
1600      * This method is also equivalent to the expression
1601      * {@link #lanewise(VectorOperators.Binary,Vector)
1602      *    lanewise}{@code (}{@link VectorOperators#OR
1603      *    AND}{@code , v)}.
1604      *
1605      * <p>
1606      * This is not a full-service named operation like
1607      * {@link #add(Vector) add}.  A masked version of
1608      * version of this operation is not directly available
1609      * but may be obtained via the masked version of
1610      * {@code lanewise}.
1611      *
1612      * @param v a second input vector
1613      * @return the bitwise {@code |} of this vector and the second input vector
1614      * @see #or(byte)
1615      * @see #and(Vector)
1616      * @see #not()
1617      * @see VectorOperators#OR
1618      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1619      */
1620     @ForceInline
1621     public final ByteVector or(Vector<Byte> v) {
1622         return lanewise(OR, v);
1623     }
1624 
1625     /**
1626      * Computes the bitwise logical disjunction ({@code |})
1627      * of this vector and a scalar.
1628      *
1629      * This is a lane-wise binary operation which applies the
1630      * the primitive bitwise "or" operation ({@code |})
1631      * to each pair of corresponding lane values.
1632      *
1633      * This method is also equivalent to the expression
1634      * {@link #lanewise(VectorOperators.Binary,Vector)
1635      *    lanewise}{@code (}{@link VectorOperators#OR
1636      *    OR}{@code , e)}.
1637      *
1638      * @param e an input scalar
1639      * @return the bitwise {@code |} of this vector and scalar
1640      * @see #or(Vector)
1641      * @see VectorOperators#OR
1642      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1643      */
1644     @ForceInline
1645     public final ByteVector or(byte e) {
1646         return lanewise(OR, e);
1647     }
1648 
1649 
1650 
1651     /// UNARY METHODS
1652 
1653     /**
1654      * {@inheritDoc} <!--workaround-->
1655      */
1656     @Override
1657     @ForceInline
1658     public final
1659     ByteVector neg() {
1660         return lanewise(NEG);
1661     }
1662 
1663     /**
1664      * {@inheritDoc} <!--workaround-->
1665      */
1666     @Override
1667     @ForceInline
1668     public final
1669     ByteVector abs() {
1670         return lanewise(ABS);
1671     }
1672 
1673     // not (~)
1674     /**
1675      * Computes the bitwise logical complement ({@code ~})
1676      * of this vector.
1677      *
1678      * This is a lane-wise binary operation which applies the
1679      * the primitive bitwise "not" operation ({@code ~})
1680      * to each lane value.
1681      *
1682      * This method is also equivalent to the expression
1683      * {@link #lanewise(VectorOperators.Unary)
1684      *    lanewise}{@code (}{@link VectorOperators#NOT
1685      *    NOT}{@code )}.
1686      *
1687      * <p>
1688      * This is not a full-service named operation like
1689      * {@link #add(Vector) add}.  A masked version of
1690      * version of this operation is not directly available
1691      * but may be obtained via the masked version of
1692      * {@code lanewise}.
1693      *
1694      * @return the bitwise complement {@code ~} of this vector
1695      * @see #and(Vector)
1696      * @see VectorOperators#NOT
1697      * @see #lanewise(VectorOperators.Unary,VectorMask)
1698      */
1699     @ForceInline
1700     public final ByteVector not() {
1701         return lanewise(NOT);
1702     }
1703 
1704 
1705     /// COMPARISONS
1706 
1707     /**
1708      * {@inheritDoc} <!--workaround-->
1709      */
1710     @Override
1711     @ForceInline
1712     public final
1713     VectorMask<Byte> eq(Vector<Byte> v) {
1714         return compare(EQ, v);
1715     }
1716 
1717     /**
1718      * Tests if this vector is equal to an input scalar.
1719      *
1720      * This is a lane-wise binary test operation which applies
1721      * the primitive equals operation ({@code ==}) to each lane.
1722      * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
1723      *
1724      * @param e the input scalar
1725      * @return the result mask of testing if this vector
1726      *         is equal to {@code e}
1727      * @see #compare(VectorOperators.Comparison,byte)
1728      */
1729     @ForceInline
1730     public final
1731     VectorMask<Byte> eq(byte e) {
1732         return compare(EQ, e);
1733     }
1734 
1735     /**
1736      * {@inheritDoc} <!--workaround-->
1737      */
1738     @Override
1739     @ForceInline
1740     public final
1741     VectorMask<Byte> lt(Vector<Byte> v) {
1742         return compare(LT, v);
1743     }
1744 
1745     /**
1746      * Tests if this vector is less than an input scalar.
1747      *
1748      * This is a lane-wise binary test operation which applies
1749      * the primitive less than operation ({@code <}) to each lane.
1750      * The result is the same as {@code compare(VectorOperators.LT, e)}.
1751      *
1752      * @param e the input scalar
1753      * @return the mask result of testing if this vector
1754      *         is less than the input scalar
1755      * @see #compare(VectorOperators.Comparison,byte)
1756      */
1757     @ForceInline
1758     public final
1759     VectorMask<Byte> lt(byte e) {
1760         return compare(LT, e);
1761     }
1762 
1763     /**
1764      * {@inheritDoc} <!--workaround-->
1765      */
1766     @Override
1767     public abstract
1768     VectorMask<Byte> test(VectorOperators.Test op);
1769 
1770     /*package-private*/
1771     @ForceInline
1772     final
1773     <M extends VectorMask<Byte>>
1774     M testTemplate(Class<M> maskType, Test op) {
1775         ByteSpecies vsp = vspecies();
1776         if (opKind(op, VO_SPECIAL)) {
1777             ByteVector bits = this.viewAsIntegralLanes();
1778             VectorMask<Byte> m;
1779             if (op == IS_DEFAULT) {
1780                 m = bits.compare(EQ, (byte) 0);
1781             } else if (op == IS_NEGATIVE) {
1782                 m = bits.compare(LT, (byte) 0);
1783             }
1784             else {
1785                 throw new AssertionError(op);
1786             }
1787             return maskType.cast(m);
1788         }
1789         int opc = opCode(op);
1790         throw new AssertionError(op);
1791     }
1792 
1793     /**
1794      * {@inheritDoc} <!--workaround-->
1795      */
1796     @Override
1797     @ForceInline
1798     public final
1799     VectorMask<Byte> test(VectorOperators.Test op,
1800                                   VectorMask<Byte> m) {
1801         return test(op).and(m);
1802     }
1803 
1804     /**
1805      * {@inheritDoc} <!--workaround-->
1806      */
1807     @Override
1808     public abstract
1809     VectorMask<Byte> compare(VectorOperators.Comparison op, Vector<Byte> v);
1810 
1811     /*package-private*/
1812     @ForceInline
1813     final
1814     <M extends VectorMask<Byte>>
1815     M compareTemplate(Class<M> maskType, Comparison op, Vector<Byte> v) {
1816         Objects.requireNonNull(v);
1817         ByteSpecies vsp = vspecies();
1818         ByteVector that = (ByteVector) v;
1819         that.check(this);
1820         int opc = opCode(op);
1821         return VectorIntrinsics.compare(
1822             opc, getClass(), maskType, byte.class, length(),
1823             this, that,
1824             (cond, v0, v1) -> {
1825                 AbstractMask<Byte> m
1826                     = v0.bTest(cond, v1, (cond_, i, a, b)
1827                                -> compareWithOp(cond, a, b));
1828                 @SuppressWarnings("unchecked")
1829                 M m2 = (M) m;
1830                 return m2;
1831             });
1832     }
1833 
1834     @ForceInline
1835     private static
1836     boolean compareWithOp(int cond, byte a, byte b) {
1837         switch (cond) {
1838         case VectorIntrinsics.BT_eq:  return a == b;
1839         case VectorIntrinsics.BT_ne:  return a != b;
1840         case VectorIntrinsics.BT_lt:  return a <  b;
1841         case VectorIntrinsics.BT_le:  return a <= b;
1842         case VectorIntrinsics.BT_gt:  return a >  b;
1843         case VectorIntrinsics.BT_ge:  return a >= b;
1844         }
1845         throw new AssertionError();
1846     }
1847 
1848     /**
1849      * {@inheritDoc} <!--workaround-->
1850      */
1851     @Override
1852     @ForceInline
1853     public final
1854     VectorMask<Byte> compare(VectorOperators.Comparison op,
1855                                   Vector<Byte> v,
1856                                   VectorMask<Byte> m) {
1857         return compare(op, v).and(m);
1858     }
1859 
1860     /**
1861      * Tests this vector by comparing it with an input scalar,
1862      * according to the given comparison operation.
1863      *
1864      * This is a lane-wise binary test operation which applies
1865      * the comparison operation to each lane.
1866      * <p>
1867      * The result is the same as
1868      * {@code compare(op, broadcast(species(), e))}.
1869      * That is, the scalar may be regarded as broadcast to
1870      * a vector of the same species, and then compared
1871      * against the original vector, using the selected
1872      * comparison operation.
1873      *
1874      * @param op the operation used to compare lane values
1875      * @param e the input scalar
1876      * @return the mask result of testing lane-wise if this vector
1877      *         compares to the input, according to the selected
1878      *         comparison operator
1879      * @see ByteVector#compare(VectorOperators.Comparison,Vector)
1880      * @see #eq(byte)
1881      * @see #lt(byte)
1882      */
1883     public abstract
1884     VectorMask<Byte> compare(Comparison op, byte e);
1885 
1886     /*package-private*/
1887     @ForceInline
1888     final
1889     <M extends VectorMask<Byte>>
1890     M compareTemplate(Class<M> maskType, Comparison op, byte e) {
1891         return compareTemplate(maskType, op, broadcast(e));
1892     }
1893 
1894     /**
1895      * Tests this vector by comparing it with an input scalar,
1896      * according to the given comparison operation,
1897      * in lanes selected by a mask.
1898      *
1899      * This is a masked lane-wise binary test operation which applies
1900      * to each pair of corresponding lane values.
1901      *
1902      * The returned result is equal to the expression
1903      * {@code compare(op,s).and(m)}.
1904      *
1905      * @param op the operation used to compare lane values
1906      * @param e the input scalar
1907      * @param m the mask controlling lane selection
1908      * @return the mask result of testing lane-wise if this vector
1909      *         compares to the input, according to the selected
1910      *         comparison operator,
1911      *         and only in the lanes selected by the mask
1912      * @see ByteVector#compare(VectorOperators.Comparison,Vector,VectorMask)
1913      */
1914     @ForceInline
1915     public final VectorMask<Byte> compare(VectorOperators.Comparison op,
1916                                                byte e,
1917                                                VectorMask<Byte> m) {
1918         return compare(op, e).and(m);
1919     }
1920 
1921     /**
1922      * {@inheritDoc} <!--workaround-->
1923      */
1924     @Override
1925     public abstract
1926     VectorMask<Byte> compare(Comparison op, long e);
1927 
1928     /*package-private*/
1929     @ForceInline
1930     final
1931     <M extends VectorMask<Byte>>
1932     M compareTemplate(Class<M> maskType, Comparison op, long e) {
1933         return compareTemplate(maskType, op, broadcast(e));
1934     }
1935 
1936     /**
1937      * {@inheritDoc} <!--workaround-->
1938      */
1939     @Override
1940     @ForceInline
1941     public final
1942     VectorMask<Byte> compare(Comparison op, long e, VectorMask<Byte> m) {
1943         return compare(op, broadcast(e), m);
1944     }
1945 
1946 
1947 
1948     /**
1949      * {@inheritDoc} <!--workaround-->
1950      */
1951     @Override public abstract
1952     ByteVector blend(Vector<Byte> v, VectorMask<Byte> m);
1953 
1954     /*package-private*/
1955     @ForceInline
1956     final
1957     <M extends VectorMask<Byte>>
1958     ByteVector
1959     blendTemplate(Class<M> maskType, ByteVector v, M m) {
1960         v.check(this);
1961         return VectorIntrinsics.blend(
1962             getClass(), maskType, byte.class, length(),
1963             this, v, m,
1964             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
1965     }
1966 
1967     /**
1968      * {@inheritDoc} <!--workaround-->
1969      */
1970     @Override public abstract ByteVector addIndex(int scale);
1971 
1972     /*package-private*/
1973     @ForceInline
1974     final ByteVector addIndexTemplate(int scale) {
1975         ByteSpecies vsp = vspecies();
1976         // make sure VLENGTH*scale doesn't overflow:
1977         vsp.checkScale(scale);
1978         return VectorIntrinsics.indexVector(
1979             getClass(), byte.class, length(),
1980             this, scale, vsp,
1981             (v, scale_, s)
1982             -> {
1983                 // If the platform doesn't support an INDEX
1984                 // instruction directly, load IOTA from memory
1985                 // and multiply.
1986                 ByteVector iota = s.iota();
1987                 byte sc = (byte) scale_;
1988                 return v.add(sc == 1 ? iota : iota.mul(sc));
1989             });
1990     }
1991 
1992     /**
1993      * Replaces selected lanes of this vector with
1994      * a scalar value
1995      * under the control of a mask.
1996      *
1997      * This is a masked lane-wise binary operation which
1998      * selects each lane value from one or the other input.
1999      *
2000      * The returned result is equal to the expression
2001      * {@code blend(broadcast(e),m)}.
2002      *
2003      * @param e the input scalar, containing the replacement lane value
2004      * @param m the mask controlling lane selection of the scalar
2005      * @return the result of blending the lane elements of this vector with
2006      *         the scalar value
2007      */
2008     @ForceInline
2009     public final ByteVector blend(byte e,
2010                                             VectorMask<Byte> m) {
2011         return blend(broadcast(e), m);
2012     }
2013 
2014     /**
2015      * Replaces selected lanes of this vector with
2016      * a scalar value
2017      * under the control of a mask.
2018      *
2019      * This is a masked lane-wise binary operation which
2020      * selects each lane value from one or the other input.
2021      *
2022      * The returned result is equal to the expression
2023      * {@code blend(broadcast(e),m)}.
2024      *
2025      * @param e the input scalar, containing the replacement lane value
2026      * @param m the mask controlling lane selection of the scalar
2027      * @return the result of blending the lane elements of this vector with
2028      *         the scalar value
2029      */
2030     @ForceInline
2031     public final ByteVector blend(long e,
2032                                             VectorMask<Byte> m) {
2033         return blend(broadcast(e), m);
2034     }
2035 
2036     /**
2037      * {@inheritDoc} <!--workaround-->
2038      */
2039     @Override
2040     public abstract
2041     ByteVector slice(int origin, Vector<Byte> v1);
2042 
2043     /*package-private*/
2044     final
2045     @ForceInline
2046     ByteVector sliceTemplate(int origin, Vector<Byte> v1) {
2047         ByteVector that = (ByteVector) v1;
2048         that.check(this);
2049         byte[] a0 = this.getElements();
2050         byte[] a1 = that.getElements();
2051         byte[] res = new byte[a0.length];
2052         int vlen = res.length;
2053         int firstPart = vlen - origin;
2054         System.arraycopy(a0, origin, res, 0, firstPart);
2055         System.arraycopy(a1, 0, res, firstPart, origin);
2056         return vectorFactory(res);
2057     }
2058 
2059     /**
2060      * {@inheritDoc} <!--workaround-->
2061      */
2062     @Override
2063     @ForceInline
2064     public final
2065     ByteVector slice(int origin,
2066                                Vector<Byte> w,
2067                                VectorMask<Byte> m) {
2068         return broadcast(0).blend(slice(origin, w), m);
2069     }
2070 
2071     /**
2072      * {@inheritDoc} <!--workaround-->
2073      */
2074     @Override
2075     public abstract
2076     ByteVector slice(int origin);
2077 
2078     /**
2079      * {@inheritDoc} <!--workaround-->
2080      */
2081     @Override
2082     public abstract
2083     ByteVector unslice(int origin, Vector<Byte> w, int part);
2084 
2085     /*package-private*/
2086     final
2087     @ForceInline
2088     ByteVector
2089     unsliceTemplate(int origin, Vector<Byte> w, int part) {
2090         ByteVector that = (ByteVector) w;
2091         that.check(this);
2092         byte[] slice = this.getElements();
2093         byte[] res = that.getElements();
2094         int vlen = res.length;
2095         int firstPart = vlen - origin;
2096         switch (part) {
2097         case 0:
2098             System.arraycopy(slice, 0, res, origin, firstPart);
2099             break;
2100         case 1:
2101             System.arraycopy(slice, firstPart, res, 0, origin);
2102             break;
2103         default:
2104             throw wrongPartForSlice(part);
2105         }
2106         return vectorFactory(res);
2107     }
2108 
2109     /*package-private*/
2110     final
2111     @ForceInline
2112     <M extends VectorMask<Byte>>
2113     ByteVector
2114     unsliceTemplate(Class<M> maskType, int origin, Vector<Byte> w, int part, M m) {
2115         ByteVector that = (ByteVector) w;
2116         that.check(this);
2117         ByteVector slice = that.sliceTemplate(origin, that);
2118         slice = slice.blendTemplate(maskType, this, m);
2119         return slice.unsliceTemplate(origin, w, part);
2120     }
2121 
2122     /**
2123      * {@inheritDoc} <!--workaround-->
2124      */
2125     @Override
2126     public abstract
2127     ByteVector unslice(int origin, Vector<Byte> w, int part, VectorMask<Byte> m);
2128 
2129     /**
2130      * {@inheritDoc} <!--workaround-->
2131      */
2132     @Override
2133     public abstract
2134     ByteVector unslice(int origin); 
2135 
2136     private ArrayIndexOutOfBoundsException
2137     wrongPartForSlice(int part) {
2138         String msg = String.format("bad part number %d for slice operation",
2139                                    part);
2140         return new ArrayIndexOutOfBoundsException(msg);
2141     }
2142 
2143     /**
2144      * {@inheritDoc} <!--workaround-->
2145      */
2146     @Override
2147     public abstract
2148     ByteVector rearrange(VectorShuffle<Byte> m);
2149 
2150     /*package-private*/
2151     @ForceInline
2152     final
2153     <S extends VectorShuffle<Byte>>
2154     ByteVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
2155         shuffle.checkIndexes();
2156         return VectorIntrinsics.rearrangeOp(
2157             getClass(), shuffletype, byte.class, length(),
2158             this, shuffle,
2159             (v1, s_) -> v1.uOp((i, a) -> {
2160                 int ei = s_.laneSource(i);
2161                 return v1.lane(ei);
2162             }));
2163     }
2164 
2165     /**
2166      * {@inheritDoc} <!--workaround-->
2167      */
2168     @Override
2169     public abstract
2170     ByteVector rearrange(VectorShuffle<Byte> s,
2171                                    VectorMask<Byte> m);
2172 
2173     /*package-private*/
2174     @ForceInline
2175     final
2176     <S extends VectorShuffle<Byte>>
2177     ByteVector rearrangeTemplate(Class<S> shuffletype,
2178                                            S shuffle,
2179                                            VectorMask<Byte> m) {
2180         ByteVector unmasked =
2181             VectorIntrinsics.rearrangeOp(
2182                 getClass(), shuffletype, byte.class, length(),
2183                 this, shuffle,
2184                 (v1, s_) -> v1.uOp((i, a) -> {
2185                     int ei = s_.laneSource(i);
2186                     return ei < 0 ? 0 : v1.lane(ei);
2187                 }));
2188         VectorMask<Byte> valid = shuffle.laneIsValid();
2189         if (m.andNot(valid).anyTrue()) {
2190             shuffle.checkIndexes();
2191             throw new AssertionError();
2192         }
2193         return broadcast((byte)0).blend(unmasked, valid);
2194     }
2195 
2196     /**
2197      * {@inheritDoc} <!--workaround-->
2198      */
2199     @Override
2200     public abstract
2201     ByteVector rearrange(VectorShuffle<Byte> s,
2202                                    Vector<Byte> v);
2203 
2204     /*package-private*/
2205     @ForceInline
2206     final
2207     <S extends VectorShuffle<Byte>>
2208     ByteVector rearrangeTemplate(Class<S> shuffletype,
2209                                            S shuffle,
2210                                            ByteVector v) {
2211         VectorMask<Byte> valid = shuffle.laneIsValid();
2212         S ws = shuffletype.cast(shuffle.wrapIndexes());
2213         ByteVector r0 =
2214             VectorIntrinsics.rearrangeOp(
2215                 getClass(), shuffletype, byte.class, length(),
2216                 this, ws,
2217                 (v0, s_) -> v0.uOp((i, a) -> {
2218                     int ei = s_.laneSource(i);
2219                     return v0.lane(ei);
2220                 }));
2221         ByteVector r1 =
2222             VectorIntrinsics.rearrangeOp(
2223                 getClass(), shuffletype, byte.class, length(),
2224                 v, ws,
2225                 (v1, s_) -> v1.uOp((i, a) -> {
2226                     int ei = s_.laneSource(i);
2227                     return v1.lane(ei);
2228                 }));
2229         return r1.blend(r0, valid);
2230     }
2231 
2232     /**
2233      * {@inheritDoc} <!--workaround-->
2234      */
2235     @Override
2236     public abstract
2237     ByteVector selectFrom(Vector<Byte> v);
2238 
2239     /*package-private*/
2240     @ForceInline
2241     final ByteVector selectFromTemplate(ByteVector v) {
2242         return v.rearrange(this.toShuffle());
2243     }
2244 
2245     /**
2246      * {@inheritDoc} <!--workaround-->
2247      */
2248     @Override
2249     public abstract
2250     ByteVector selectFrom(Vector<Byte> s, VectorMask<Byte> m);
2251 
2252     /*package-private*/
2253     @ForceInline
2254     final ByteVector selectFromTemplate(ByteVector v,
2255                                                   AbstractMask<Byte> m) {
2256         return v.rearrange(this.toShuffle(), m);
2257     }
2258 
2259     /// Ternary operations
2260 
2261     /**
2262      * Blends together the bits of two vectors under
2263      * the control of a third, which supplies mask bits.
2264      *
2265      *
2266      * This is a lane-wise ternary operation which performs
2267      * a bitwise blending operation {@code (a&~c)|(b&c)}
2268      * to each lane.
2269      *
2270      * This method is also equivalent to the expression
2271      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2272      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2273      *    BITWISE_BLEND}{@code , bits, mask)}.
2274      *
2275      * @param bits input bits to blend into the current vector
2276      * @param mask a bitwise mask to enable blending of the input bits
2277      * @return the bitwise blend of the given bits into the current vector,
2278      *         under control of the bitwise mask
2279      * @see #bitwiseBlend(byte,byte)
2280      * @see #bitwiseBlend(byte,Vector)
2281      * @see #bitwiseBlend(Vector,byte)
2282      * @see VectorOperators#BITWISE_BLEND
2283      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
2284      */
2285     @ForceInline
2286     public final
2287     ByteVector bitwiseBlend(Vector<Byte> bits, Vector<Byte> mask) {
2288         return lanewise(BITWISE_BLEND, bits, mask);
2289     }
2290 
2291     /**
2292      * Blends together the bits of a vector and a scalar under
2293      * the control of another scalar, which supplies mask bits.
2294      *
2295      *
2296      * This is a lane-wise ternary operation which performs
2297      * a bitwise blending operation {@code (a&~c)|(b&c)}
2298      * to each lane.
2299      *
2300      * This method is also equivalent to the expression
2301      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2302      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2303      *    BITWISE_BLEND}{@code , bits, mask)}.
2304      *
2305      * @param bits input bits to blend into the current vector
2306      * @param mask a bitwise mask to enable blending of the input bits
2307      * @return the bitwise blend of the given bits into the current vector,
2308      *         under control of the bitwise mask
2309      * @see #bitwiseBlend(Vector,Vector)
2310      * @see VectorOperators#BITWISE_BLEND
2311      * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask)
2312      */
2313     @ForceInline
2314     public final
2315     ByteVector bitwiseBlend(byte bits, byte mask) {
2316         return lanewise(BITWISE_BLEND, bits, mask);
2317     }
2318 
2319     /**
2320      * Blends together the bits of a vector and a scalar under
2321      * the control of another vector, which supplies mask bits.
2322      *
2323      *
2324      * This is a lane-wise ternary operation which performs
2325      * a bitwise blending operation {@code (a&~c)|(b&c)}
2326      * to each lane.
2327      *
2328      * This method is also equivalent to the expression
2329      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2330      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2331      *    BITWISE_BLEND}{@code , bits, mask)}.
2332      *
2333      * @param bits input bits to blend into the current vector
2334      * @param mask a bitwise mask to enable blending of the input bits
2335      * @return the bitwise blend of the given bits into the current vector,
2336      *         under control of the bitwise mask
2337      * @see #bitwiseBlend(Vector,Vector)
2338      * @see VectorOperators#BITWISE_BLEND
2339      * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask)
2340      */
2341     @ForceInline
2342     public final
2343     ByteVector bitwiseBlend(byte bits, Vector<Byte> mask) {
2344         return lanewise(BITWISE_BLEND, bits, mask);
2345     }
2346 
2347     /**
2348      * Blends together the bits of two vectors under
2349      * the control of a scalar, which supplies mask bits.
2350      *
2351      *
2352      * This is a lane-wise ternary operation which performs
2353      * a bitwise blending operation {@code (a&~c)|(b&c)}
2354      * to each lane.
2355      *
2356      * This method is also equivalent to the expression
2357      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2358      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2359      *    BITWISE_BLEND}{@code , bits, mask)}.
2360      *
2361      * @param bits input bits to blend into the current vector
2362      * @param mask a bitwise mask to enable blending of the input bits
2363      * @return the bitwise blend of the given bits into the current vector,
2364      *         under control of the bitwise mask
2365      * @see #bitwiseBlend(Vector,Vector)
2366      * @see VectorOperators#BITWISE_BLEND
2367      * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask)
2368      */
2369     @ForceInline
2370     public final
2371     ByteVector bitwiseBlend(Vector<Byte> bits, byte mask) {
2372         return lanewise(BITWISE_BLEND, bits, mask);
2373     }
2374 
2375 
2376     // Type specific horizontal reductions
2377 
2378     /**
2379      * Returns a value accumulated from all the lanes of this vector.
2380      *
2381      * This is an associative cross-lane reduction operation which
2382      * applies the specified operation to all the lane elements.
2383      *
2384      * <p>
2385      * A few reduction operations do not support arbitrary reordering
2386      * of their operands, yet are included here because of their
2387      * usefulness.
2388      *
2389      * <ul>
2390      * <li>
2391      * In the case of {@code FIRST_NONZERO}, the reduction returns
2392      * the value from the lowest-numbered non-zero lane.
2393      *
2394      *
2395      * <li>
2396      * In the case of floating point addition and multiplication, the
2397      * precise result will reflect the choice of an arbitrary order
2398      * of operations, which may even vary over time.
2399      *
2400      * <li>
2401      * All other reduction operations are fully commutative and
2402      * associative.  The implementation can choose any order of
2403      * processing, yet it will always produce the same result.
2404      *
2405      * </ul>
2406      *
2407      *
2408      * @param op the operation used to combine lane values
2409      * @return the accumulated result
2410      * @throws UnsupportedOperationException if this vector does
2411      *         not support the requested operation
2412      * @see #reduceLanes(VectorOperators.Associative,VectorMask)
2413      * @see #add(Vector)
2414      * @see #mul(Vector)
2415      * @see #min(Vector)
2416      * @see #max(Vector)
2417      * @see #and(Vector)
2418      * @see #or(Vector)
2419      * @see VectorOperators#XOR
2420      * @see VectorOperators#FIRST_NONZERO
2421      */
2422     public abstract byte reduceLanes(VectorOperators.Associative op);
2423 
2424     /**
2425      * Returns a value accumulated from selected lanes of this vector,
2426      * controlled by a mask.
2427      *
2428      * This is an associative cross-lane reduction operation which
2429      * applies the specified operation to the selected lane elements.
2430      * <p>
2431      * If no elements are selected, an operation-specific identity
2432      * value is returned.
2433      * <ul>
2434      * <li>
2435      * If the operation is
2436      *  {@code ADD}, {@code XOR}, {@code OR},
2437      * or {@code FIRST_NONZERO},
2438      * then the identity value is zero, the default {@code byte} value.
2439      * <li>
2440      * If the operation is {@code MUL},
2441      * then the identity value is one.
2442      * <li>
2443      * If the operation is {@code AND},
2444      * then the identity value is minus one (all bits set).
2445      * <li>
2446      * If the operation is {@code MAX},
2447      * then the identity value is {@code Byte.MIN_VALUE}.
2448      * <li>
2449      * If the operation is {@code MIN},
2450      * then the identity value is {@code Byte.MAX_VALUE}.
2451      * </ul>
2452      *
2453      * @param op the operation used to combine lane values
2454      * @param m the mask controlling lane selection
2455      * @return the reduced result accumulated from the selected lane values
2456      * @throws UnsupportedOperationException if this vector does
2457      *         not support the requested operation
2458      * @see #reduceLanes(VectorOperators.Associative)
2459      */
2460     public abstract byte reduceLanes(VectorOperators.Associative op,
2461                                        VectorMask<Byte> m);
2462 
2463     /*package-private*/
2464     @ForceInline
2465     final
2466     byte reduceLanesTemplate(VectorOperators.Associative op,
2467                                VectorMask<Byte> m) {
2468         ByteVector v = reduceIdentityVector(op).blend(this, m);
2469         return v.reduceLanesTemplate(op);
2470     }
2471 
2472     /*package-private*/
2473     @ForceInline
2474     final
2475     byte reduceLanesTemplate(VectorOperators.Associative op) {
2476         if (op == FIRST_NONZERO) {
2477             // FIXME:  The JIT should handle this, and other scan ops alos.
2478             VectorMask<Byte> thisNZ
2479                 = this.viewAsIntegralLanes().compare(NE, (byte) 0);
2480             return this.lane(thisNZ.firstTrue());
2481         }
2482         int opc = opCode(op);
2483         return fromBits(VectorIntrinsics.reductionCoerced(
2484             opc, getClass(), byte.class, length(),
2485             this,
2486             REDUCE_IMPL.find(op, opc, (opc_) -> {
2487               switch (opc_) {
2488               case VECTOR_OP_ADD: return v ->
2489                       toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a + b)));
2490               case VECTOR_OP_MUL: return v ->
2491                       toBits(v.rOp((byte)1, (i, a, b) -> (byte)(a * b)));
2492               case VECTOR_OP_MIN: return v ->
2493                       toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (byte) Math.min(a, b)));
2494               case VECTOR_OP_MAX: return v ->
2495                       toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (byte) Math.max(a, b)));
2496               case VECTOR_OP_FIRST_NONZERO: return v ->
2497                       toBits(v.rOp((byte)0, (i, a, b) -> toBits(a) != 0 ? a : b));
2498               case VECTOR_OP_AND: return v ->
2499                       toBits(v.rOp((byte)-1, (i, a, b) -> (byte)(a & b)));
2500               case VECTOR_OP_OR: return v ->
2501                       toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a | b)));
2502               case VECTOR_OP_XOR: return v ->
2503                       toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a ^ b)));
2504               default: return null;
2505               }})));
2506     }
2507     private static final
2508     ImplCache<Associative,Function<ByteVector,Long>> REDUCE_IMPL
2509         = new ImplCache<>(Associative.class, ByteVector.class);
2510 
2511     private
2512     @ForceInline
2513     ByteVector reduceIdentityVector(VectorOperators.Associative op) {
2514         int opc = opCode(op);
2515         UnaryOperator<ByteVector> fn
2516             = REDUCE_ID_IMPL.find(op, opc, (opc_) -> {
2517                 switch (opc_) {
2518                 case VECTOR_OP_ADD:
2519                 case VECTOR_OP_OR:
2520                 case VECTOR_OP_XOR:
2521                 case VECTOR_OP_FIRST_NONZERO:
2522                     return v -> v.broadcast(0);
2523                 case VECTOR_OP_MUL:
2524                     return v -> v.broadcast(1);
2525                 case VECTOR_OP_AND:
2526                     return v -> v.broadcast(-1);
2527                 case VECTOR_OP_MIN:
2528                     return v -> v.broadcast(MAX_OR_INF);
2529                 case VECTOR_OP_MAX:
2530                     return v -> v.broadcast(MIN_OR_INF);
2531                 default: return null;
2532                 }
2533             });
2534         return fn.apply(this);
2535     }
2536     private static final
2537     ImplCache<Associative,UnaryOperator<ByteVector>> REDUCE_ID_IMPL
2538         = new ImplCache<>(Associative.class, ByteVector.class);
2539 
2540     private static final byte MIN_OR_INF = Byte.MIN_VALUE;
2541     private static final byte MAX_OR_INF = Byte.MAX_VALUE;
2542 
2543     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
2544     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
2545                                                      VectorMask<Byte> m);
2546 
2547     // Type specific accessors
2548 
2549     /**
2550      * Gets the lane element at lane index {@code i}
2551      *
2552      * @param i the lane index
2553      * @return the lane element at lane index {@code i}
2554      * @throws IllegalArgumentException if the index is is out of range
2555      * ({@code < 0 || >= length()})
2556      */
2557     public abstract byte lane(int i);
2558 
2559     /**
2560      * Replaces the lane element of this vector at lane index {@code i} with
2561      * value {@code e}.
2562      *
2563      * This is a cross-lane operation and behaves as if it returns the result
2564      * of blending this vector with an input vector that is the result of
2565      * broadcasting {@code e} and a mask that has only one lane set at lane
2566      * index {@code i}.
2567      *
2568      * @param i the lane index of the lane element to be replaced
2569      * @param e the value to be placed
2570      * @return the result of replacing the lane element of this vector at lane
2571      * index {@code i} with value {@code e}.
2572      * @throws IllegalArgumentException if the index is is out of range
2573      * ({@code < 0 || >= length()})
2574      */
2575     public abstract ByteVector withLane(int i, byte e);
2576 
2577     // Memory load operations
2578 
2579     /**
2580      * Returns an array of type {@code byte[]}
2581      * containing all the lane values.
2582      * The array length is the same as the vector length.
2583      * The array elements are stored in lane order.
2584      * <p>
2585      * This method behaves as if it stores
2586      * this vector into an allocated array
2587      * (using {@link #intoArray(byte[], int) intoArray})
2588      * and returns the array as follows:
2589      * <pre>{@code
2590      *   byte[] a = new byte[this.length()];
2591      *   this.intoArray(a, 0);
2592      *   return a;
2593      * }</pre>
2594      *
2595      * @return an array containing the lane values of this vector
2596      */
2597     @ForceInline
2598     @Override
2599     public final byte[] toArray() {
2600         byte[] a = new byte[vspecies().laneCount()];
2601         intoArray(a, 0);
2602         return a;
2603     }
2604 
2605     /** {@inheritDoc} <!--workaround-->
2606      * @implNote
2607      * When this method is used on used on vectors
2608      * of type {@code ByteVector},
2609      * there will be no loss of precision or range,
2610      * and so no {@code IllegalArgumentException} will
2611      * be thrown.
2612      */
2613     @ForceInline
2614     @Override
2615     public final int[] toIntArray() {
2616         byte[] a = toArray();
2617         int[] res = new int[a.length];
2618         for (int i = 0; i < a.length; i++) {
2619             byte e = a[i];
2620             res[i] = (int) ByteSpecies.toIntegralChecked(e, true);
2621         }
2622         return res;
2623     }
2624 
2625     /** {@inheritDoc} <!--workaround-->
2626      * @implNote
2627      * When this method is used on used on vectors
2628      * of type {@code ByteVector},
2629      * there will be no loss of precision or range,
2630      * and so no {@code IllegalArgumentException} will
2631      * be thrown.
2632      */
2633     @ForceInline
2634     @Override
2635     public final long[] toLongArray() {
2636         byte[] a = toArray();
2637         long[] res = new long[a.length];
2638         for (int i = 0; i < a.length; i++) {
2639             byte e = a[i];
2640             res[i] = ByteSpecies.toIntegralChecked(e, false);
2641         }
2642         return res;
2643     }
2644 
2645     /** {@inheritDoc} <!--workaround-->
2646      * @implNote
2647      * When this method is used on used on vectors
2648      * of type {@code ByteVector},
2649      * there will be no loss of precision.
2650      */
2651     @ForceInline
2652     @Override
2653     public final double[] toDoubleArray() {
2654         byte[] a = toArray();
2655         double[] res = new double[a.length];
2656         for (int i = 0; i < a.length; i++) {
2657             res[i] = (double) a[i];
2658         }
2659         return res;
2660     }
2661 
2662     /**
2663      * Loads a vector from a byte array starting at an offset.
2664      * Bytes are composed into primitive lane elements according
2665      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
2666      * The vector is arranged into lanes according to
2667      * <a href="Vector.html#lane-order">memory ordering</a>.
2668      * <p>
2669      * This method behaves as if it returns the result of calling
2670      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2671      * fromByteBuffer()} as follows:
2672      * <pre>{@code
2673      * var bb = ByteBuffer.wrap(a);
2674      * var bo = ByteOrder.LITTLE_ENDIAN;
2675      * var m = species.maskAll(true);
2676      * return fromByteBuffer(species, bb, offset, m, bo);
2677      * }</pre>
2678      *
2679      * @param species species of desired vector
2680      * @param a the byte array
2681      * @param offset the offset into the array
2682      * @return a vector loaded from a byte array
2683      * @throws IndexOutOfBoundsException
2684      *         if {@code offset+N*ESIZE < 0}
2685      *         or {@code offset+(N+1)*ESIZE > a.length}
2686      *         for any lane {@code N} in the vector
2687      */
2688     @ForceInline
2689     public static
2690     ByteVector fromByteArray(VectorSpecies<Byte> species,
2691                                        byte[] a, int offset) {
2692         return fromByteArray(species, a, offset, ByteOrder.LITTLE_ENDIAN);
2693     }
2694 
2695     /**
2696      * Loads a vector from a byte array starting at an offset.
2697      * Bytes are composed into primitive lane elements according
2698      * to the specified byte order.
2699      * The vector is arranged into lanes according to
2700      * <a href="Vector.html#lane-order">memory ordering</a>.
2701      * <p>
2702      * This method behaves as if it returns the result of calling
2703      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2704      * fromByteBuffer()} as follows:
2705      * <pre>{@code
2706      * var bb = ByteBuffer.wrap(a);
2707      * var m = species.maskAll(true);
2708      * return fromByteBuffer(species, bb, offset, m, bo);
2709      * }</pre>
2710      *
2711      * @param species species of desired vector
2712      * @param a the byte array
2713      * @param offset the offset into the array
2714      * @param bo the intended byte order
2715      * @return a vector loaded from a byte array
2716      * @throws IndexOutOfBoundsException
2717      *         if {@code offset+N*ESIZE < 0}
2718      *         or {@code offset+(N+1)*ESIZE > a.length}
2719      *         for any lane {@code N} in the vector
2720      */
2721     @ForceInline
2722     public static
2723     ByteVector fromByteArray(VectorSpecies<Byte> species,
2724                                        byte[] a, int offset,
2725                                        ByteOrder bo) {
2726         ByteSpecies vsp = (ByteSpecies) species;
2727         offset = checkFromIndexSize(offset,
2728                                     vsp.vectorBitSize() / Byte.SIZE,
2729                                     a.length);
2730         return vsp.dummyVector()
2731             .fromByteArray0(a, offset).maybeSwap(bo);
2732     }
2733 
2734     /**
2735      * Loads a vector from a byte array starting at an offset
2736      * and using a mask.
2737      * Lanes where the mask is unset are filled with the default
2738      * value of {@code byte} (zero).
2739      * Bytes are composed into primitive lane elements according
2740      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
2741      * The vector is arranged into lanes according to
2742      * <a href="Vector.html#lane-order">memory ordering</a>.
2743      * <p>
2744      * This method behaves as if it returns the result of calling
2745      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2746      * fromByteBuffer()} as follows:
2747      * <pre>{@code
2748      * var bb = ByteBuffer.wrap(a);
2749      * var bo = ByteOrder.LITTLE_ENDIAN;
2750      * return fromByteBuffer(species, bb, offset, bo, m);
2751      * }</pre>
2752      *
2753      * @param species species of desired vector
2754      * @param a the byte array
2755      * @param offset the offset into the array
2756      * @param m the mask controlling lane selection
2757      * @return a vector loaded from a byte array
2758      * @throws IndexOutOfBoundsException
2759      *         if {@code offset+N*ESIZE < 0}
2760      *         or {@code offset+(N+1)*ESIZE > a.length}
2761      *         for any lane {@code N} in the vector where
2762      *         the mask is set
2763      */
2764     @ForceInline
2765     public static
2766     ByteVector fromByteArray(VectorSpecies<Byte> species,
2767                                        byte[] a, int offset,
2768                                        VectorMask<Byte> m) {
2769         return fromByteArray(species, a, offset, ByteOrder.LITTLE_ENDIAN, m);
2770     }
2771 
2772     /**
2773      * Loads a vector from a byte array starting at an offset
2774      * and using a mask.
2775      * Lanes where the mask is unset are filled with the default
2776      * value of {@code byte} (zero).
2777      * Bytes are composed into primitive lane elements according
2778      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
2779      * The vector is arranged into lanes according to
2780      * <a href="Vector.html#lane-order">memory ordering</a>.
2781      * <p>
2782      * This method behaves as if it returns the result of calling
2783      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2784      * fromByteBuffer()} as follows:
2785      * <pre>{@code
2786      * var bb = ByteBuffer.wrap(a);
2787      * return fromByteBuffer(species, bb, offset, m, bo);
2788      * }</pre>
2789      *
2790      * @param species species of desired vector
2791      * @param a the byte array
2792      * @param offset the offset into the array
2793      * @param bo the intended byte order
2794      * @param m the mask controlling lane selection
2795      * @return a vector loaded from a byte array
2796      * @throws IndexOutOfBoundsException
2797      *         if {@code offset+N*ESIZE < 0}
2798      *         or {@code offset+(N+1)*ESIZE > a.length}
2799      *         for any lane {@code N} in the vector
2800      *         where the mask is set
2801      */
2802     @ForceInline
2803     public static
2804     ByteVector fromByteArray(VectorSpecies<Byte> species,
2805                                        byte[] a, int offset,
2806                                        ByteOrder bo,
2807                                        VectorMask<Byte> m) {
2808         ByteSpecies vsp = (ByteSpecies) species;
2809         ByteVector zero = vsp.zero();
2810 
2811         if (offset >= 0 && offset <= (a.length - vsp.length() * 1)) {
2812             ByteVector v = zero.fromByteArray0(a, offset);
2813             return zero.blend(v.maybeSwap(bo), m);
2814         }
2815         ByteVector iota = zero.addIndex(1);
2816         ((AbstractMask<Byte>)m)
2817             .checkIndexByLane(offset, a.length, iota, 1);
2818         ByteBuffer tb = wrapper(a, offset, bo);
2819         return vsp.ldOp(tb, 0, (AbstractMask<Byte>)m,
2820                    (tb_, __, i)  -> tb_.get(i));
2821     }
2822 
2823     /**
2824      * Loads a vector from an array of type {@code byte[]}
2825      * starting at an offset.
2826      * For each vector lane, where {@code N} is the vector lane index, the
2827      * array element at index {@code offset + N} is placed into the
2828      * resulting vector at lane index {@code N}.
2829      *
2830      * @param species species of desired vector
2831      * @param a the array
2832      * @param offset the offset into the array
2833      * @return the vector loaded from an array
2834      * @throws IndexOutOfBoundsException
2835      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2836      *         for any lane {@code N} in the vector
2837      */
2838     @ForceInline
2839     public static
2840     ByteVector fromArray(VectorSpecies<Byte> species,
2841                                    byte[] a, int offset) {
2842         ByteSpecies vsp = (ByteSpecies) species;
2843         offset = checkFromIndexSize(offset,
2844                                     vsp.laneCount(),
2845                                     a.length);
2846         return vsp.dummyVector().fromArray0(a, offset);
2847     }
2848 
2849     /**
2850      * Loads a vector from an array of type {@code byte[]}
2851      * starting at an offset and using a mask.
2852      * Lanes where the mask is unset are filled with the default
2853      * value of {@code byte} (zero).
2854      * For each vector lane, where {@code N} is the vector lane index,
2855      * if the mask lane at index {@code N} is set then the array element at
2856      * index {@code offset + N} is placed into the resulting vector at lane index
2857      * {@code N}, otherwise the default element value is placed into the
2858      * resulting vector at lane index {@code N}.
2859      *
2860      * @param species species of desired vector
2861      * @param a the array
2862      * @param offset the offset into the array
2863      * @param m the mask controlling lane selection
2864      * @return the vector loaded from an array
2865      * @throws IndexOutOfBoundsException
2866      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2867      *         for any lane {@code N} in the vector
2868      *         where the mask is set
2869      */
2870     @ForceInline
2871     public static
2872     ByteVector fromArray(VectorSpecies<Byte> species,
2873                                    byte[] a, int offset,
2874                                    VectorMask<Byte> m) {
2875         ByteSpecies vsp = (ByteSpecies) species;
2876         if (offset >= 0 && offset <= (a.length - species.length())) {
2877             ByteVector zero = vsp.zero();
2878             return zero.blend(zero.fromArray0(a, offset), m);
2879         }
2880         ByteVector iota = vsp.iota();
2881         ((AbstractMask<Byte>)m)
2882             .checkIndexByLane(offset, a.length, iota, 1);
2883         return vsp.vOp(m, i -> a[offset + i]);
2884     }
2885 
2886     /**
2887      * Gathers a new vector composed of elements from an array of type
2888      * {@code byte[]},
2889      * using indexes obtained by adding a fixed {@code offset} to a
2890      * series of secondary offsets from an <em>index map</em>.
2891      * The index map is a contiguous sequence of {@code VLENGTH}
2892      * elements in a second array of {@code int}s, starting at a given
2893      * {@code mapOffset}.
2894      * <p>
2895      * For each vector lane, where {@code N} is the vector lane index,
2896      * the lane is loaded from the array
2897      * element {@code a[f(N)]}, where {@code f(N)} is the
2898      * index mapping expression
2899      * {@code offset + indexMap[mapOffset + N]]}.
2900      *
2901      * @param species species of desired vector
2902      * @param a the array
2903      * @param offset the offset into the array, may be negative if relative
2904      * indexes in the index map compensate to produce a value within the
2905      * array bounds
2906      * @param indexMap the index map
2907      * @param mapOffset the offset into the index map
2908      * @return the vector loaded from the indexed elements of the array
2909      * @throws IndexOutOfBoundsException
2910      *         if {@code mapOffset+N < 0}
2911      *         or if {@code mapOffset+N >= indexMap.length},
2912      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2913      *         is an invalid index into {@code a},
2914      *         for any lane {@code N} in the vector
2915      * @see ByteVector#toIntArray()
2916      */
2917     @ForceInline
2918     public static
2919     ByteVector fromArray(VectorSpecies<Byte> species,
2920                                    byte[] a, int offset,
2921                                    int[] indexMap, int mapOffset) {
2922         ByteSpecies vsp = (ByteSpecies) species;
2923         return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]);
2924     }
2925 
2926     /**
2927      * Gathers a new vector composed of elements from an array of type
2928      * {@code byte[]},
2929      * under the control of a mask, and
2930      * using indexes obtained by adding a fixed {@code offset} to a
2931      * series of secondary offsets from an <em>index map</em>.
2932      * The index map is a contiguous sequence of {@code VLENGTH}
2933      * elements in a second array of {@code int}s, starting at a given
2934      * {@code mapOffset}.
2935      * <p>
2936      * For each vector lane, where {@code N} is the vector lane index,
2937      * if the lane is set in the mask,
2938      * the lane is loaded from the array
2939      * element {@code a[f(N)]}, where {@code f(N)} is the
2940      * index mapping expression
2941      * {@code offset + indexMap[mapOffset + N]]}.
2942      * Unset lanes in the resulting vector are set to zero.
2943      *
2944      * @param species species of desired vector
2945      * @param a the array
2946      * @param offset the offset into the array, may be negative if relative
2947      * indexes in the index map compensate to produce a value within the
2948      * array bounds
2949      * @param indexMap the index map
2950      * @param mapOffset the offset into the index map
2951      * @param m the mask controlling lane selection
2952      * @return the vector loaded from the indexed elements of the array
2953      * @throws IndexOutOfBoundsException
2954      *         if {@code mapOffset+N < 0}
2955      *         or if {@code mapOffset+N >= indexMap.length},
2956      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2957      *         is an invalid index into {@code a},
2958      *         for any lane {@code N} in the vector
2959      *         where the mask is set
2960      * @see ByteVector#toIntArray()
2961      */
2962     @ForceInline
2963     public static
2964     ByteVector fromArray(VectorSpecies<Byte> species,
2965                                    byte[] a, int offset,
2966                                    int[] indexMap, int mapOffset,
2967                                    VectorMask<Byte> m) {
2968         ByteSpecies vsp = (ByteSpecies) species;
2969 
2970         // Do it the slow way.
2971         return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]);
2972 
2973     }
2974 
2975     /**
2976      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
2977      * starting at an offset into the byte buffer.
2978      * <p>
2979      * This method behaves as if it returns the result of calling
2980      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2981      * fromByteBuffer()} as follows:
2982      * <pre>{@code
2983      * var bb = ByteBuffer.wrap(a);
2984      * var bo = ByteOrder.LITTLE_ENDIAN;
2985      * var m = species.maskAll(true);
2986      * return fromByteBuffer(species, bb, offset, m, bo);
2987      * }</pre>
2988      *
2989      * @param species species of desired vector
2990      * @param bb the byte buffer
2991      * @param offset the offset into the byte buffer
2992      * @param bo the intended byte order
2993      * @return a vector loaded from a byte buffer
2994      * @throws IndexOutOfBoundsException
2995      *         if {@code offset+N*1 < 0}
2996      *         or {@code offset+N*1 >= bb.limit()}
2997      *         for any lane {@code N} in the vector
2998      */
2999     @ForceInline
3000     public static
3001     ByteVector fromByteBuffer(VectorSpecies<Byte> species,
3002                                         ByteBuffer bb, int offset,
3003                                         ByteOrder bo) {
3004         ByteSpecies vsp = (ByteSpecies) species;
3005         offset = checkFromIndexSize(offset,
3006                                     vsp.laneCount(),
3007                                     bb.limit());
3008         return vsp.dummyVector()
3009             .fromByteBuffer0(bb, offset).maybeSwap(bo);
3010     }
3011 
3012     /**
3013      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
3014      * starting at an offset into the byte buffer
3015      * and using a mask.
3016      * <p>
3017      * This method behaves as if it returns the result of calling
3018      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
3019      * fromByteBuffer()} as follows:
3020      * <pre>{@code
3021      * var bb = ByteBuffer.wrap(a);
3022      * var bo = ByteOrder.LITTLE_ENDIAN;
3023      * var m = species.maskAll(true);
3024      * return fromByteBuffer(species, bb, offset, m, bo);
3025      * }</pre>
3026      *
3027      * @param species species of desired vector
3028      * @param bb the byte buffer
3029      * @param offset the offset into the byte buffer
3030      * @param bo the intended byte order
3031      * @param m the mask controlling lane selection
3032      * @return a vector loaded from a byte buffer
3033      * @throws IndexOutOfBoundsException
3034      *         if {@code offset+N*1 < 0}
3035      *         or {@code offset+N*1 >= bb.limit()}
3036      *         for any lane {@code N} in the vector
3037      *         where the mask is set
3038      */
3039     @ForceInline
3040     public static
3041     ByteVector fromByteBuffer(VectorSpecies<Byte> species,
3042                                         ByteBuffer bb, int offset,
3043                                         ByteOrder bo,
3044                                         VectorMask<Byte> m) {
3045         if (m.allTrue()) {
3046             return fromByteBuffer(species, bb, offset, bo);
3047         }
3048         ByteSpecies vsp = (ByteSpecies) species;
3049         checkMaskFromIndexSize(offset,
3050                                vsp, m, 1,
3051                                bb.limit());
3052         ByteVector zero = zero(vsp);
3053         ByteVector v = zero.fromByteBuffer0(bb, offset);
3054         return zero.blend(v.maybeSwap(bo), m);
3055     }
3056 
3057     // Memory store operations
3058 
3059     /**
3060      * Stores this vector into an array of type {@code byte[]}
3061      * starting at an offset.
3062      * <p>
3063      * For each vector lane, where {@code N} is the vector lane index,
3064      * the lane element at index {@code N} is stored into the array
3065      * element {@code a[offset+N]}.
3066      *
3067      * @param a the array, of type {@code byte[]}
3068      * @param offset the offset into the array
3069      * @throws IndexOutOfBoundsException
3070      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3071      *         for any lane {@code N} in the vector
3072      */
3073     @ForceInline
3074     public final
3075     void intoArray(byte[] a, int offset) {
3076         ByteSpecies vsp = vspecies();
3077         offset = checkFromIndexSize(offset,
3078                                     vsp.laneCount(),
3079                                     a.length);
3080         VectorIntrinsics.store(
3081             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3082             a, arrayAddress(a, offset),
3083             this,
3084             a, offset,
3085             (arr, off, v)
3086             -> v.stOp(arr, off,
3087                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3088     }
3089 
3090     /**
3091      * Stores this vector into an array of {@code byte}
3092      * starting at offset and using a mask.
3093      * <p>
3094      * For each vector lane, where {@code N} is the vector lane index,
3095      * the lane element at index {@code N} is stored into the array
3096      * element {@code a[offset+N]}.
3097      * If the mask lane at {@code N} is unset then the corresponding
3098      * array element {@code a[offset+N]} is left unchanged.
3099      * <p>
3100      * Array range checking is done for lanes where the mask is set.
3101      * Lanes where the mask is unset are not stored and do not need
3102      * to correspond to legitimate elements of {@code a}.
3103      * That is, unset lanes may correspond to array indexes less than
3104      * zero or beyond the end of the array.
3105      *
3106      * @param a the array, of type {@code byte[]}
3107      * @param offset the offset into the array
3108      * @param m the mask controlling lane storage
3109      * @throws IndexOutOfBoundsException
3110      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3111      *         for any lane {@code N} in the vector
3112      *         where the mask is set
3113      */
3114     @ForceInline
3115     public final
3116     void intoArray(byte[] a, int offset,
3117                    VectorMask<Byte> m) {
3118         if (m.allTrue()) {
3119             intoArray(a, offset);
3120         } else {
3121             // FIXME: Cannot vectorize yet, if there's a mask.
3122             stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v);
3123         }
3124     }
3125 
3126     /**
3127      * Scatters this vector into an array of type {@code byte[]}
3128      * using indexes obtained by adding a fixed {@code offset} to a
3129      * series of secondary offsets from an <em>index map</em>.
3130      * The index map is a contiguous sequence of {@code VLENGTH}
3131      * elements in a second array of {@code int}s, starting at a given
3132      * {@code mapOffset}.
3133      * <p>
3134      * For each vector lane, where {@code N} is the vector lane index,
3135      * the lane element at index {@code N} is stored into the array
3136      * element {@code a[f(N)]}, where {@code f(N)} is the
3137      * index mapping expression
3138      * {@code offset + indexMap[mapOffset + N]]}.
3139      *
3140      * @param a the array
3141      * @param offset an offset to combine with the index map offsets
3142      * @param indexMap the index map
3143      * @param mapOffset the offset into the index map
3144      * @returns a vector of the values {@code a[f(N)]}, where
3145      *          {@code f(N) = offset + indexMap[mapOffset + N]]}.
3146      * @throws IndexOutOfBoundsException
3147      *         if {@code mapOffset+N < 0}
3148      *         or if {@code mapOffset+N >= indexMap.length},
3149      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3150      *         is an invalid index into {@code a},
3151      *         for any lane {@code N} in the vector
3152      * @see ByteVector#toIntArray()
3153      */
3154     @ForceInline
3155     public final
3156     void intoArray(byte[] a, int offset,
3157                    int[] indexMap, int mapOffset) {
3158         ByteSpecies vsp = vspecies();
3159         if (length() == 1) {
3160             intoArray(a, offset + indexMap[mapOffset]);
3161             return;
3162         }
3163         IntVector.IntSpecies isp = (IntVector.IntSpecies) vsp.indexSpecies();
3164         if (isp.laneCount() != vsp.laneCount()) {
3165             stOp(a, offset,
3166                  (arr, off, i, e) -> {
3167                      int j = indexMap[mapOffset + i];
3168                      arr[off + j] = e;
3169                  });
3170             return;
3171         }
3172 
3173         // Index vector: vix[0:n] = i -> offset + indexMap[mo + i]
3174         IntVector vix = IntVector
3175             .fromArray(isp, indexMap, mapOffset)
3176             .add(offset);
3177 
3178         vix = VectorIntrinsics.checkIndex(vix, a.length);
3179 
3180         VectorIntrinsics.storeWithMap(
3181             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3182             isp.vectorType(),
3183             a, arrayAddress(a, 0), vix,
3184             this,
3185             a, offset, indexMap, mapOffset,
3186             (arr, off, v, map, mo)
3187             -> v.stOp(arr, off,
3188                       (arr_, off_, i, e) -> {
3189                           int j = map[mo + i];
3190                           arr[off + j] = e;
3191                       }));
3192     }
3193 
3194     /**
3195      * Scatters this vector into an array of type {@code byte[]},
3196      * under the control of a mask, and
3197      * using indexes obtained by adding a fixed {@code offset} to a
3198      * series of secondary offsets from an <em>index map</em>.
3199      * The index map is a contiguous sequence of {@code VLENGTH}
3200      * elements in a second array of {@code int}s, starting at a given
3201      * {@code mapOffset}.
3202      * <p>
3203      * For each vector lane, where {@code N} is the vector lane index,
3204      * if the mask lane at index {@code N} is set then
3205      * the lane element at index {@code N} is stored into the array
3206      * element {@code a[f(N)]}, where {@code f(N)} is the
3207      * index mapping expression
3208      * {@code offset + indexMap[mapOffset + N]]}.
3209      *
3210      * @param a the array
3211      * @param offset an offset to combine with the index map offsets
3212      * @param indexMap the index map
3213      * @param mapOffset the offset into the index map
3214      * @param m the mask
3215      * @returns a vector of the values {@code m ? a[f(N)] : 0},
3216      *          {@code f(N) = offset + indexMap[mapOffset + N]]}.
3217      * @throws IndexOutOfBoundsException
3218      *         if {@code mapOffset+N < 0}
3219      *         or if {@code mapOffset+N >= indexMap.length},
3220      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3221      *         is an invalid index into {@code a},
3222      *         for any lane {@code N} in the vector
3223      *         where the mask is set
3224      * @see ByteVector#toIntArray()
3225      */
3226     @ForceInline
3227     public final
3228     void intoArray(byte[] a, int offset,
3229                    int[] indexMap, int mapOffset,
3230                    VectorMask<Byte> m) {
3231         ByteSpecies vsp = vspecies();
3232         if (m.allTrue()) {
3233             intoArray(a, offset, indexMap, mapOffset);
3234             return;
3235         }
3236         throw new AssertionError("fixme");
3237     }
3238 
3239     /**
3240      * {@inheritDoc} <!--workaround-->
3241      */
3242     @Override
3243     @ForceInline
3244     public final
3245     void intoByteArray(byte[] a, int offset) {
3246         offset = checkFromIndexSize(offset,
3247                                     bitSize() / Byte.SIZE,
3248                                     a.length);
3249         this.maybeSwap(ByteOrder.LITTLE_ENDIAN)
3250             .intoByteArray0(a, offset);
3251     }
3252 
3253     /**
3254      * {@inheritDoc} <!--workaround-->
3255      */
3256     @Override
3257     @ForceInline
3258     public final
3259     void intoByteArray(byte[] a, int offset,
3260                        VectorMask<Byte> m) {
3261         if (m.allTrue()) {
3262             intoByteArray(a, offset);
3263             return;
3264         }
3265         ByteSpecies vsp = vspecies();
3266         if (offset >= 0 && offset <= (a.length - vsp.length() * 1)) {
3267             var oldVal = fromByteArray0(a, offset);
3268             var newVal = oldVal.blend(this, m);
3269             newVal.intoByteArray0(a, offset);
3270         } else {
3271             checkMaskFromIndexSize(offset, vsp, m, 1, a.length);
3272             ByteBuffer tb = wrapper(a, offset, NATIVE_ENDIAN);
3273             this.stOp(tb, 0, m, (tb_, __, i, e) -> tb_.put(i, e));
3274         }
3275     }
3276 
3277     /**
3278      * {@inheritDoc} <!--workaround-->
3279      */
3280     @Override
3281     @ForceInline
3282     public final
3283     void intoByteArray(byte[] a, int offset,
3284                        ByteOrder bo,
3285                        VectorMask<Byte> m) {
3286         maybeSwap(bo).intoByteArray(a, offset, m);
3287     }
3288 
3289     /**
3290      * {@inheritDoc} <!--workaround-->
3291      */
3292     @Override
3293     @ForceInline
3294     public final
3295     void intoByteBuffer(ByteBuffer bb, int offset,
3296                         ByteOrder bo) {
3297         maybeSwap(bo).intoByteBuffer0(bb, offset);
3298     }
3299 
3300     /**
3301      * {@inheritDoc} <!--workaround-->
3302      */
3303     @Override
3304     @ForceInline
3305     public final
3306     void intoByteBuffer(ByteBuffer bb, int offset,
3307                         ByteOrder bo,
3308                         VectorMask<Byte> m) {
3309         if (m.allTrue()) {
3310             intoByteBuffer(bb, offset, bo);
3311             return;
3312         }
3313         ByteSpecies vsp = vspecies();
3314         checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit());
3315         conditionalStoreNYI(offset, vsp, m, 1, bb.limit());
3316         var oldVal = fromByteBuffer0(bb, offset);
3317         var newVal = oldVal.blend(this.maybeSwap(bo), m);
3318         newVal.intoByteBuffer0(bb, offset);
3319     }
3320 
3321     // ================================================
3322 
3323     // Low-level memory operations.
3324     //
3325     // Note that all of these operations *must* inline into a context
3326     // where the exact species of the involved vector is a
3327     // compile-time constant.  Otherwise, the intrinsic generation
3328     // will fail and performance will suffer.
3329     //
3330     // In many cases this is achieved by re-deriving a version of the
3331     // method in each concrete subclass (per species).  The re-derived
3332     // method simply calls one of these generic methods, with exact
3333     // parameters for the controlling metadata, which is either a
3334     // typed vector or constant species instance.
3335 
3336     // Unchecked loading operations in native byte order.
3337     // Caller is reponsible for applying index checks, masking, and
3338     // byte swapping.
3339 
3340     /*package-private*/
3341     abstract
3342     ByteVector fromArray0(byte[] a, int offset);
3343     @ForceInline
3344     final
3345     ByteVector fromArray0Template(byte[] a, int offset) {
3346         ByteSpecies vsp = vspecies();
3347         return VectorIntrinsics.load(
3348             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3349             a, arrayAddress(a, offset),
3350             a, offset, vsp,
3351             (arr, off, s) -> s.ldOp(arr, off,
3352                                     (arr_, off_, i) -> arr_[off_ + i]));
3353     }
3354 
3355     @Override
3356     abstract
3357     ByteVector fromByteArray0(byte[] a, int offset);
3358     @ForceInline
3359     final
3360     ByteVector fromByteArray0Template(byte[] a, int offset) {
3361         ByteSpecies vsp = vspecies();
3362         return VectorIntrinsics.load(
3363             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3364             a, byteArrayAddress(a, offset),
3365             a, offset, vsp,
3366             (arr, off, s) -> {
3367                 ByteBuffer tb = wrapper(arr, off, NATIVE_ENDIAN);
3368                 return s.ldOp(tb, 0, (tb_, __, i) -> tb_.get(i));
3369             });
3370     }
3371 
3372     abstract
3373     ByteVector fromByteBuffer0(ByteBuffer bb, int offset);
3374     @ForceInline
3375     final
3376     ByteVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
3377         ByteSpecies vsp = vspecies();
3378         return VectorIntrinsics.load(
3379             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3380             bufferBase(bb), bufferAddress(bb, offset),
3381             bb, offset, vsp,
3382             (buf, off, s) -> {
3383                 ByteBuffer tb = wrapper(buf, off, NATIVE_ENDIAN);
3384                 return s.ldOp(tb, 0, (tb_, __, i) -> tb_.get(i));
3385            });
3386     }
3387 
3388     // Unchecked storing operations in native byte order.
3389     // Caller is reponsible for applying index checks, masking, and
3390     // byte swapping.
3391 
3392     abstract
3393     void intoArray0(byte[] a, int offset);
3394     @ForceInline
3395     final
3396     void intoArray0Template(byte[] a, int offset) {
3397         ByteSpecies vsp = vspecies();
3398         VectorIntrinsics.store(
3399             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3400             a, arrayAddress(a, offset),
3401             this, a, offset,
3402             (arr, off, v)
3403             -> v.stOp(arr, off,
3404                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3405     }
3406 
3407     abstract
3408     void intoByteArray0(byte[] a, int offset);
3409     @ForceInline
3410     final
3411     void intoByteArray0Template(byte[] a, int offset) {
3412         ByteSpecies vsp = vspecies();
3413         VectorIntrinsics.store(
3414             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3415             a, byteArrayAddress(a, offset),
3416             this, a, offset,
3417             (arr, off, v) -> {
3418                 ByteBuffer tb = wrapper(arr, off, NATIVE_ENDIAN);
3419                 v.stOp(tb, 0, (tb_, __, i, e) -> tb_.put(i, e));
3420             });
3421     }
3422 
3423     @ForceInline
3424     final
3425     void intoByteBuffer0(ByteBuffer bb, int offset) {
3426         ByteSpecies vsp = vspecies();
3427         VectorIntrinsics.store(
3428             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3429             bufferBase(bb), bufferAddress(bb, offset),
3430             this, bb, offset,
3431             (buf, off, v) -> {
3432                 ByteBuffer tb = wrapper(buf, off, NATIVE_ENDIAN);
3433                 v.stOp(tb, 0, (tb_, __, i, e) -> tb_.put(i, e));
3434             });
3435     }
3436 
3437     // End of low-level memory operations.
3438 
3439     private static
3440     void checkMaskFromIndexSize(int offset,
3441                                 ByteSpecies vsp,
3442                                 VectorMask<Byte> m,
3443                                 int scale,
3444                                 int limit) {
3445         ((AbstractMask<Byte>)m)
3446             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3447     }
3448 
3449     @ForceInline
3450     private void conditionalStoreNYI(int offset,
3451                                      ByteSpecies vsp,
3452                                      VectorMask<Byte> m,
3453                                      int scale,
3454                                      int limit) {
3455         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3456             String msg =
3457                 String.format("unimplemented: store @%d in [0..%d), %s in %s",
3458                               offset, limit, m, vsp);
3459             throw new AssertionError(msg);
3460         }
3461     }
3462 
3463     /*package-private*/
3464     @Override
3465     @ForceInline
3466     final
3467     ByteVector maybeSwap(ByteOrder bo) {
3468         return this;
3469     }
3470 
3471     static final int ARRAY_SHIFT =
3472         31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_BYTE_INDEX_SCALE);
3473     static final long ARRAY_BASE =
3474         Unsafe.ARRAY_BYTE_BASE_OFFSET;
3475 
3476     @ForceInline
3477     static long arrayAddress(byte[] a, int index) {
3478         return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
3479     }
3480 
3481     @ForceInline
3482     static long byteArrayAddress(byte[] a, int index) {
3483         return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
3484     }
3485 
3486     // Byte buffer wrappers.
3487     private static ByteBuffer wrapper(ByteBuffer bb, int offset,
3488                                         ByteOrder bo) {
3489         return bb.duplicate().position(offset).slice()
3490             .order(bo);
3491     }
3492     private static ByteBuffer wrapper(byte[] a, int offset,
3493                                         ByteOrder bo) {
3494         return ByteBuffer.wrap(a, offset, a.length - offset)
3495             .order(bo);
3496     }
3497 
3498     // ================================================
3499 
3500     /// Reinterpreting view methods:
3501     //   lanewise reinterpret: viewAsXVector()
3502     //   keep shape, redraw lanes: reinterpretAsEs()
3503 
3504     /**
3505      * {@inheritDoc} <!--workaround-->
3506      */
3507     @ForceInline
3508     @Override
3509     public final ByteVector reinterpretAsBytes() {
3510         return this;
3511     }
3512 
3513     /**
3514      * {@inheritDoc} <!--workaround-->
3515      */
3516     @ForceInline
3517     @Override
3518     public final ByteVector viewAsIntegralLanes() {
3519         return this;
3520     }
3521 
3522     /**
3523      * {@inheritDoc} <!--workaround-->
3524      *
3525      * @implNote This method always throws
3526      * {@code IllegalArgumentException}, because there is no floating
3527      * point type of the same size as {@code byte}.  The return type
3528      * of this method is arbitrarily designated as
3529      * {@code Vector<?>}.  Future versions of this API may change the return
3530      * type if additional floating point types become available.
3531      */
3532     @ForceInline
3533     @Override
3534     public final
3535     Vector<?>
3536     viewAsFloatingLanes() {
3537         LaneType flt = LaneType.BYTE.asFloating();
3538         throw new AssertionError();  // should already throw IAE
3539     }
3540 
3541     // ================================================
3542 
3543     /// Object methods: toString, equals, hashCode
3544     //
3545     // Object methods are defined as if via Arrays.toString, etc.,
3546     // is applied to the array of elements.  Two equal vectors
3547     // are required to have equal species and equal lane values.
3548 
3549     /**
3550      * Returns a string representation of this vector, of the form
3551      * {@code "[0,1,2...]"}, reporting the lane values of this vector,
3552      * in lane order.
3553      *
3554      * The string is produced as if by a call to {@link
3555      * java.util.Arrays#toString(byte[]) Arrays.toString()},
3556      * as appropriate to the {@code byte} array returned by
3557      * {@link #toArray this.toArray()}.
3558      *
3559      * @return a string of the form {@code "[0,1,2...]"}
3560      * reporting the lane values of this vector
3561      */
3562     @Override
3563     @ForceInline
3564     public final
3565     String toString() {
3566         // now that toArray is strongly typed, we can define this
3567         return Arrays.toString(toArray());
3568     }
3569 
3570     /**
3571      * {@inheritDoc} <!--workaround-->
3572      */
3573     @Override
3574     @ForceInline
3575     public final
3576     boolean equals(Object obj) {
3577         if (obj instanceof Vector) {
3578             Vector<?> that = (Vector<?>) obj;
3579             if (this.species().equals(that.species())) {
3580                 return this.eq(that.check(this.species())).allTrue();
3581             }
3582         }
3583         return false;
3584     }
3585 
3586     /**
3587      * {@inheritDoc} <!--workaround-->
3588      */
3589     @Override
3590     @ForceInline
3591     public final
3592     int hashCode() {
3593         // now that toArray is strongly typed, we can define this
3594         return Objects.hash(species(), Arrays.hashCode(toArray()));
3595     }
3596 
3597     // ================================================
3598 
3599     // Species
3600 
3601     /**
3602      * Class representing {@link ByteVector}'s of the same {@link VectorShape VectorShape}.
3603      */
3604     /*package-private*/
3605     static final class ByteSpecies extends AbstractSpecies<Byte> {
3606         private ByteSpecies(VectorShape shape,
3607                 Class<? extends ByteVector> vectorType,
3608                 Class<? extends AbstractMask<Byte>> maskType,
3609                 Function<Object, ByteVector> vectorFactory) {
3610             super(shape, LaneType.of(byte.class),
3611                   vectorType, maskType,
3612                   vectorFactory);
3613             assert(this.elementSize() == Byte.SIZE);
3614         }
3615 
3616         // Specializing overrides:
3617 
3618         @Override
3619         @ForceInline
3620         public final Class<Byte> elementType() {
3621             return byte.class;
3622         }
3623 
3624         @Override
3625         @ForceInline
3626         public final Class<Byte> genericElementType() {
3627             return Byte.class;
3628         }
3629 
3630         @Override
3631         @ForceInline
3632         public final Class<byte[]> arrayType() {
3633             return byte[].class;
3634         }
3635 
3636         @SuppressWarnings("unchecked")
3637         @Override
3638         @ForceInline
3639         public final Class<? extends ByteVector> vectorType() {
3640             return (Class<? extends ByteVector>) vectorType;
3641         }
3642 
3643         @Override
3644         @ForceInline
3645         public final long checkValue(long e) {
3646             longToElementBits(e);  // only for exception
3647             return e;
3648         }
3649 
3650         /*package-private*/
3651         @Override
3652         @ForceInline
3653         final ByteVector broadcastBits(long bits) {
3654             return (ByteVector)
3655                 VectorIntrinsics.broadcastCoerced(
3656                     vectorType, byte.class, laneCount,
3657                     bits, this,
3658                     (bits_, s_) -> s_.rvOp(i -> bits_));
3659         }
3660 
3661         /*package-private*/
3662         @ForceInline
3663         
3664         final ByteVector broadcast(byte e) {
3665             return broadcastBits(toBits(e));
3666         }
3667 
3668         @Override
3669         @ForceInline
3670         public final ByteVector broadcast(long e) {
3671             return broadcastBits(longToElementBits(e));
3672         }
3673 
3674         /*package-private*/
3675         final @Override
3676         @ForceInline
3677         long longToElementBits(long value) {
3678             // Do the conversion, and then test it for failure.
3679             byte e = (byte) value;
3680             if ((long) e != value) {
3681                 throw badElementBits(value, e);
3682             }
3683             return toBits(e);
3684         }
3685 
3686         /*package-private*/
3687         @ForceInline
3688         static long toIntegralChecked(byte e, boolean convertToInt) {
3689             long value = convertToInt ? (int) e : (long) e;
3690             if ((byte) value != e) {
3691                 throw badArrayBits(e, convertToInt, value);
3692             }
3693             return value;
3694         }
3695 
3696         @Override
3697         @ForceInline
3698         public final ByteVector fromValues(long... values) {
3699             VectorIntrinsics.requireLength(values.length, laneCount);
3700             byte[] va = new byte[laneCount()];
3701             for (int i = 0; i < va.length; i++) {
3702                 long lv = values[i];
3703                 byte v = (byte) lv;
3704                 va[i] = v;
3705                 if ((long)v != lv) {
3706                     throw badElementBits(lv, v);
3707                 }
3708             }
3709             return dummyVector().fromArray0(va, 0);
3710         }
3711 
3712         /* this non-public one is for internal conversions */
3713         @Override
3714         @ForceInline
3715         final ByteVector fromIntValues(int[] values) {
3716             VectorIntrinsics.requireLength(values.length, laneCount);
3717             byte[] va = new byte[laneCount()];
3718             for (int i = 0; i < va.length; i++) {
3719                 int lv = values[i];
3720                 byte v = (byte) lv;
3721                 va[i] = v;
3722                 if ((int)v != lv) {
3723                     throw badElementBits(lv, v);
3724                 }
3725             }
3726             return dummyVector().fromArray0(va, 0);
3727         }
3728 
3729         // Virtual constructors
3730 
3731         @ForceInline
3732         @Override final
3733         public ByteVector fromArray(Object a, int offset) {
3734             // User entry point:  Be careful with inputs.
3735             return ByteVector
3736                 .fromArray(this, (byte[]) a, offset);
3737         }
3738 
3739         @Override final
3740         ByteVector dummyVector() {
3741             return (ByteVector) super.dummyVector();
3742         }
3743 
3744         final
3745         ByteVector vectorFactory(byte[] vec) {
3746             // Species delegates all factory requests to its dummy
3747             // vector.  The dummy knows all about it.
3748             return dummyVector().vectorFactory(vec);
3749         }
3750 
3751         /*package-private*/
3752         final @Override
3753         @ForceInline
3754         ByteVector rvOp(RVOp f) {
3755             byte[] res = new byte[laneCount()];
3756             for (int i = 0; i < res.length; i++) {
3757                 byte bits = (byte) f.apply(i);
3758                 res[i] = fromBits(bits);
3759             }
3760             return dummyVector().vectorFactory(res);
3761         }
3762 
3763         ByteVector vOp(FVOp f) {
3764             byte[] res = new byte[laneCount()];
3765             for (int i = 0; i < res.length; i++) {
3766                 res[i] = f.apply(i);
3767             }
3768             return dummyVector().vectorFactory(res);
3769         }
3770 
3771         ByteVector vOp(VectorMask<Byte> m, FVOp f) {
3772             byte[] res = new byte[laneCount()];
3773             boolean[] mbits = ((AbstractMask<Byte>)m).getBits();
3774             for (int i = 0; i < res.length; i++) {
3775                 if (mbits[i]) {
3776                     res[i] = f.apply(i);
3777                 }
3778             }
3779             return dummyVector().vectorFactory(res);
3780         }
3781 
3782         /*package-private*/
3783         @ForceInline
3784         <M> ByteVector ldOp(M memory, int offset,
3785                                       FLdOp<M> f) {
3786             return dummyVector().ldOp(memory, offset, f);
3787         }
3788 
3789         /*package-private*/
3790         @ForceInline
3791         <M> ByteVector ldOp(M memory, int offset,
3792                                       AbstractMask<Byte> m,
3793                                       FLdOp<M> f) {
3794             return dummyVector().ldOp(memory, offset, m, f);
3795         }
3796 
3797         /*package-private*/
3798         @ForceInline
3799         <M> void stOp(M memory, int offset, FStOp<M> f) {
3800             dummyVector().stOp(memory, offset, f);
3801         }
3802 
3803         /*package-private*/
3804         @ForceInline
3805         <M> void stOp(M memory, int offset,
3806                       AbstractMask<Byte> m,
3807                       FStOp<M> f) {
3808             dummyVector().stOp(memory, offset, m, f);
3809         }
3810 
3811         // N.B. Make sure these constant vectors and
3812         // masks load up correctly into registers.
3813         //
3814         // Also, see if we can avoid all that switching.
3815         // Could we cache both vectors and both masks in
3816         // this species object?
3817 
3818         // Zero and iota vector access
3819         @Override
3820         @ForceInline
3821         public final ByteVector zero() {
3822             if ((Class<?>) vectorType() == ByteMaxVector.class)
3823                 return ByteMaxVector.ZERO;
3824             switch (vectorBitSize()) {
3825                 case 64: return Byte64Vector.ZERO;
3826                 case 128: return Byte128Vector.ZERO;
3827                 case 256: return Byte256Vector.ZERO;
3828                 case 512: return Byte512Vector.ZERO;
3829             }
3830             throw new AssertionError();
3831         }        
3832 
3833         @Override
3834         @ForceInline
3835         public final ByteVector iota() {
3836             if ((Class<?>) vectorType() == ByteMaxVector.class)
3837                 return ByteMaxVector.IOTA;
3838             switch (vectorBitSize()) {
3839                 case 64: return Byte64Vector.IOTA;
3840                 case 128: return Byte128Vector.IOTA;
3841                 case 256: return Byte256Vector.IOTA;
3842                 case 512: return Byte512Vector.IOTA;
3843             }
3844             throw new AssertionError();
3845         }
3846 
3847         // Mask access
3848         @Override
3849         @ForceInline
3850         public final VectorMask<Byte> maskAll(boolean bit) {
3851             if ((Class<?>) vectorType() == ByteMaxVector.class)
3852                 return ByteMaxVector.ByteMaxMask.maskAll(bit);
3853             switch (vectorBitSize()) {
3854                 case 64: return Byte64Vector.Byte64Mask.maskAll(bit);
3855                 case 128: return Byte128Vector.Byte128Mask.maskAll(bit);
3856                 case 256: return Byte256Vector.Byte256Mask.maskAll(bit);
3857                 case 512: return Byte512Vector.Byte512Mask.maskAll(bit);
3858             }
3859             throw new AssertionError();
3860         }
3861     }
3862 
3863     /**
3864      * Finds a species for an element type of {@code byte} and shape.
3865      *
3866      * @param s the shape
3867      * @return a species for an element type of {@code byte} and shape
3868      * @throws IllegalArgumentException if no such species exists for the shape
3869      */
3870     static ByteSpecies species(VectorShape s) {
3871         Objects.requireNonNull(s);
3872         switch (s) {
3873             case S_64_BIT: return (ByteSpecies) SPECIES_64;
3874             case S_128_BIT: return (ByteSpecies) SPECIES_128;
3875             case S_256_BIT: return (ByteSpecies) SPECIES_256;
3876             case S_512_BIT: return (ByteSpecies) SPECIES_512;
3877             case S_Max_BIT: return (ByteSpecies) SPECIES_MAX;
3878             default: throw new IllegalArgumentException("Bad shape: " + s);
3879         }
3880     }
3881 
3882     /** Species representing {@link ByteVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
3883     public static final VectorSpecies<Byte> SPECIES_64
3884         = new ByteSpecies(VectorShape.S_64_BIT,
3885                             Byte64Vector.class,
3886                             Byte64Vector.Byte64Mask.class,
3887                             Byte64Vector::new);
3888 
3889     /** Species representing {@link ByteVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
3890     public static final VectorSpecies<Byte> SPECIES_128
3891         = new ByteSpecies(VectorShape.S_128_BIT,
3892                             Byte128Vector.class,
3893                             Byte128Vector.Byte128Mask.class,
3894                             Byte128Vector::new);
3895 
3896     /** Species representing {@link ByteVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
3897     public static final VectorSpecies<Byte> SPECIES_256
3898         = new ByteSpecies(VectorShape.S_256_BIT,
3899                             Byte256Vector.class,
3900                             Byte256Vector.Byte256Mask.class,
3901                             Byte256Vector::new);
3902 
3903     /** Species representing {@link ByteVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
3904     public static final VectorSpecies<Byte> SPECIES_512
3905         = new ByteSpecies(VectorShape.S_512_BIT,
3906                             Byte512Vector.class,
3907                             Byte512Vector.Byte512Mask.class,
3908                             Byte512Vector::new);
3909 
3910     /** Species representing {@link ByteVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
3911     public static final VectorSpecies<Byte> SPECIES_MAX
3912         = new ByteSpecies(VectorShape.S_Max_BIT,
3913                             ByteMaxVector.class,
3914                             ByteMaxVector.ByteMaxMask.class,
3915                             ByteMaxVector::new);
3916 
3917     /**
3918      * Preferred species for {@link ByteVector}s.
3919      * A preferred species is a species of maximal bit-size for the platform.
3920      */
3921     public static final VectorSpecies<Byte> SPECIES_PREFERRED
3922         = (ByteSpecies) VectorSpecies.ofPreferred(byte.class);
3923 }