1 /*
   2  * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 import java.nio.ShortBuffer;
  29 import java.nio.ByteOrder;
  30 import java.util.Arrays;
  31 import java.util.Objects;
  32 import java.util.function.BinaryOperator;
  33 import java.util.function.IntUnaryOperator;
  34 import java.util.function.Function;
  35 import java.util.function.UnaryOperator;
  36 import java.util.concurrent.ThreadLocalRandom;
  37 
  38 import jdk.internal.misc.Unsafe;
  39 import jdk.internal.vm.annotation.ForceInline;
  40 
  41 import static jdk.incubator.vector.VectorIntrinsics.*;
  42 import static jdk.incubator.vector.VectorOperators.*;
  43 
  44 // -- This file was mechanically generated: Do not edit! -- //
  45 
  46 /**
  47  * A specialized {@link Vector} representing an ordered immutable sequence of
  48  * {@code short} values.
  49  */
  50 @SuppressWarnings("cast")  // warning: redundant cast
  51 public abstract class ShortVector extends AbstractVector<Short> {
  52 
  53     ShortVector() {}
  54 
  55     static final int FORBID_OPCODE_KIND = VO_ONLYFP;
  56 
  57     @ForceInline
  58     static int opCode(Operator op) {
  59         return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
  60     }
  61     @ForceInline
  62     static int opCode(Operator op, int requireKind) {
  63         requireKind |= VO_OPCODE_VALID;
  64         return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
  65     }
  66     @ForceInline
  67     static boolean opKind(Operator op, int bit) {
  68         return VectorOperators.opKind(op, bit);
  69     }
  70 
  71     // Virtualized factories and operators,
  72     // coded with portable definitions.
  73     // These are all @ForceInline in case
  74     // they need to be used performantly.
  75     // The various shape-specific subclasses
  76     // also specialize them by wrapping
  77     // them in a call like this:
  78     //    return (Byte128Vector)
  79     //       super.bOp((Byte128Vector) o);
  80     // The purpose of that is to forcibly inline
  81     // the generic definition from this file
  82     // into a sharply type- and size-specific
  83     // wrapper in the subclass file, so that
  84     // the JIT can specialize the code.
  85     // The code is only inlined and expanded
  86     // if it gets hot.  Think of it as a cheap
  87     // and lazy version of C++ templates.
  88 
  89     // Virtualized getter
  90 
  91     /*package-private*/
  92     abstract short[] getElements();
  93 
  94     // Virtualized constructors
  95 
  96     /**
  97      * Build a vector directly using my own constructor.
  98      * It is an error if the array is aliased elsewhere.
  99      */
 100     /*package-private*/
 101     abstract ShortVector vectorFactory(short[] vec);
 102 
 103     /**
 104      * Build a mask directly using my species.
 105      * It is an error if the array is aliased elsewhere.
 106      */
 107     /*package-private*/
 108     @ForceInline
 109     final
 110     AbstractMask<Short> maskFactory(boolean[] bits) {
 111         return vspecies().maskFactory(bits);
 112     }
 113 
 114     // Constant loader (takes dummy as vector arg)
 115     interface FVOp {
 116         short apply(int i);
 117     }
 118 
 119     /*package-private*/
 120     @ForceInline
 121     final
 122     ShortVector vOp(FVOp f) {
 123         short[] res = new short[length()];
 124         for (int i = 0; i < res.length; i++) {
 125             res[i] = f.apply(i);
 126         }
 127         return vectorFactory(res);
 128     }
 129 
 130     @ForceInline
 131     final
 132     ShortVector vOp(VectorMask<Short> m, FVOp f) {
 133         short[] res = new short[length()];
 134         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 135         for (int i = 0; i < res.length; i++) {
 136             if (mbits[i]) {
 137                 res[i] = f.apply(i);
 138             }
 139         }
 140         return vectorFactory(res);
 141     }
 142 
 143     // Unary operator
 144 
 145     /*package-private*/
 146     interface FUnOp {
 147         short apply(int i, short a);
 148     }
 149 
 150     /*package-private*/
 151     abstract
 152     ShortVector uOp(FUnOp f);
 153     @ForceInline
 154     final
 155     ShortVector uOpTemplate(FUnOp f) {
 156         short[] vec = getElements();
 157         short[] res = new short[length()];
 158         for (int i = 0; i < res.length; i++) {
 159             res[i] = f.apply(i, vec[i]);
 160         }
 161         return vectorFactory(res);
 162     }
 163 
 164     /*package-private*/
 165     abstract
 166     ShortVector uOp(VectorMask<Short> m,
 167                              FUnOp f);
 168     @ForceInline
 169     final
 170     ShortVector uOpTemplate(VectorMask<Short> m,
 171                                      FUnOp f) {
 172         short[] vec = getElements();
 173         short[] res = new short[length()];
 174         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 175         for (int i = 0; i < res.length; i++) {
 176             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 177         }
 178         return vectorFactory(res);
 179     }
 180 
 181     // Binary operator
 182 
 183     /*package-private*/
 184     interface FBinOp {
 185         short apply(int i, short a, short b);
 186     }
 187 
 188     /*package-private*/
 189     abstract
 190     ShortVector bOp(Vector<Short> o,
 191                              FBinOp f);
 192     @ForceInline
 193     final
 194     ShortVector bOpTemplate(Vector<Short> o,
 195                                      FBinOp f) {
 196         short[] res = new short[length()];
 197         short[] vec1 = this.getElements();
 198         short[] vec2 = ((ShortVector)o).getElements();
 199         for (int i = 0; i < res.length; i++) {
 200             res[i] = f.apply(i, vec1[i], vec2[i]);
 201         }
 202         return vectorFactory(res);
 203     }
 204 
 205     /*package-private*/
 206     abstract
 207     ShortVector bOp(Vector<Short> o,
 208                              VectorMask<Short> m,
 209                              FBinOp f);
 210     @ForceInline
 211     final
 212     ShortVector bOpTemplate(Vector<Short> o,
 213                                      VectorMask<Short> m,
 214                                      FBinOp f) {
 215         short[] res = new short[length()];
 216         short[] vec1 = this.getElements();
 217         short[] vec2 = ((ShortVector)o).getElements();
 218         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 219         for (int i = 0; i < res.length; i++) {
 220             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 221         }
 222         return vectorFactory(res);
 223     }
 224 
 225     // Ternary operator
 226 
 227     /*package-private*/
 228     interface FTriOp {
 229         short apply(int i, short a, short b, short c);
 230     }
 231 
 232     /*package-private*/
 233     abstract
 234     ShortVector tOp(Vector<Short> o1,
 235                              Vector<Short> o2,
 236                              FTriOp f);
 237     @ForceInline
 238     final
 239     ShortVector tOpTemplate(Vector<Short> o1,
 240                                      Vector<Short> o2,
 241                                      FTriOp f) {
 242         short[] res = new short[length()];
 243         short[] vec1 = this.getElements();
 244         short[] vec2 = ((ShortVector)o1).getElements();
 245         short[] vec3 = ((ShortVector)o2).getElements();
 246         for (int i = 0; i < res.length; i++) {
 247             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 248         }
 249         return vectorFactory(res);
 250     }
 251 
 252     /*package-private*/
 253     abstract
 254     ShortVector tOp(Vector<Short> o1,
 255                              Vector<Short> o2,
 256                              VectorMask<Short> m,
 257                              FTriOp f);
 258     @ForceInline
 259     final
 260     ShortVector tOpTemplate(Vector<Short> o1,
 261                                      Vector<Short> o2,
 262                                      VectorMask<Short> m,
 263                                      FTriOp f) {
 264         short[] res = new short[length()];
 265         short[] vec1 = this.getElements();
 266         short[] vec2 = ((ShortVector)o1).getElements();
 267         short[] vec3 = ((ShortVector)o2).getElements();
 268         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 269         for (int i = 0; i < res.length; i++) {
 270             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 271         }
 272         return vectorFactory(res);
 273     }
 274 
 275     // Reduction operator
 276 
 277     /*package-private*/
 278     abstract
 279     short rOp(short v, FBinOp f);
 280     @ForceInline
 281     final
 282     short rOpTemplate(short v, FBinOp f) {
 283         short[] vec = getElements();
 284         for (int i = 0; i < vec.length; i++) {
 285             v = f.apply(i, v, vec[i]);
 286         }
 287         return v;
 288     }
 289 
 290     // Memory reference
 291 
 292     /*package-private*/
 293     interface FLdOp<M> {
 294         short apply(M memory, int offset, int i);
 295     }
 296 
 297     /*package-private*/
 298     @ForceInline
 299     final
 300     <M> ShortVector ldOp(M memory, int offset,
 301                                   FLdOp<M> f) {
 302         //dummy; no vec = getElements();
 303         short[] res = new short[length()];
 304         for (int i = 0; i < res.length; i++) {
 305             res[i] = f.apply(memory, offset, i);
 306         }
 307         return vectorFactory(res);
 308     }
 309 
 310     /*package-private*/
 311     @ForceInline
 312     final
 313     <M> ShortVector ldOp(M memory, int offset,
 314                                   VectorMask<Short> m,
 315                                   FLdOp<M> f) {
 316         //short[] vec = getElements();
 317         short[] res = new short[length()];
 318         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 319         for (int i = 0; i < res.length; i++) {
 320             if (mbits[i]) {
 321                 res[i] = f.apply(memory, offset, i);
 322             }
 323         }
 324         return vectorFactory(res);
 325     }
 326 
 327     interface FStOp<M> {
 328         void apply(M memory, int offset, int i, short a);
 329     }
 330 
 331     /*package-private*/
 332     @ForceInline
 333     final
 334     <M> void stOp(M memory, int offset,
 335                   FStOp<M> f) {
 336         short[] vec = getElements();
 337         for (int i = 0; i < vec.length; i++) {
 338             f.apply(memory, offset, i, vec[i]);
 339         }
 340     }
 341 
 342     /*package-private*/
 343     @ForceInline
 344     final
 345     <M> void stOp(M memory, int offset,
 346                   VectorMask<Short> m,
 347                   FStOp<M> f) {
 348         short[] vec = getElements();
 349         boolean[] mbits = ((AbstractMask<Short>)m).getBits();
 350         for (int i = 0; i < vec.length; i++) {
 351             if (mbits[i]) {
 352                 f.apply(memory, offset, i, vec[i]);
 353             }
 354         }
 355     }
 356 
 357     // Binary test
 358 
 359     /*package-private*/
 360     interface FBinTest {
 361         boolean apply(int cond, int i, short a, short b);
 362     }
 363 
 364     /*package-private*/
 365     @ForceInline
 366     final
 367     AbstractMask<Short> bTest(int cond,
 368                                   Vector<Short> o,
 369                                   FBinTest f) {
 370         short[] vec1 = getElements();
 371         short[] vec2 = ((ShortVector)o).getElements();
 372         boolean[] bits = new boolean[length()];
 373         for (int i = 0; i < length(); i++){
 374             bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
 375         }
 376         return maskFactory(bits);
 377     }
 378 
 379     /*package-private*/
 380     @ForceInline
 381     static boolean doBinTest(int cond, short a, short b) {
 382         switch (cond) {
 383         case BT_eq:  return a == b;
 384         case BT_ne:  return a != b;
 385         case BT_lt:  return a < b;
 386         case BT_le:  return a <= b;
 387         case BT_gt:  return a > b;
 388         case BT_ge:  return a >= b;
 389         }
 390         throw new AssertionError(Integer.toHexString(cond));
 391     }
 392 
 393     /*package-private*/
 394     @Override
 395     abstract ShortSpecies vspecies();
 396 
 397     /*package-private*/
 398     @ForceInline
 399     static long toBits(short e) {
 400         return  e;
 401     }
 402 
 403     /*package-private*/
 404     @ForceInline
 405     static short fromBits(long bits) {
 406         return ((short)bits);
 407     }
 408 
 409     // Static factories (other than memory operations)
 410 
 411     // Note: A surprising behavior in javadoc
 412     // sometimes makes a lone /** {@inheritDoc} */
 413     // comment drop the method altogether,
 414     // apparently if the method mentions an
 415     // parameter or return type of Vector<Short>
 416     // instead of Vector<E> as originally specified.
 417     // Adding an empty HTML fragment appears to
 418     // nudge javadoc into providing the desired
 419     // inherited documentation.  We use the HTML
 420     // comment <!--workaround--> for this.
 421 
 422     /**
 423      * {@inheritDoc} <!--workaround-->
 424      */
 425     @ForceInline
 426     public static ShortVector zero(VectorSpecies<Short> species) {
 427         ShortSpecies vsp = (ShortSpecies) species;
 428         return VectorIntrinsics.broadcastCoerced(vsp.vectorType(), short.class, species.length(),
 429                                 0, vsp,
 430                                 ((bits_, s_) -> s_.rvOp(i -> bits_)));
 431     }
 432 
 433     /**
 434      * Returns a vector of the same species as this one
 435      * where all lane elements are set to
 436      * the primitive value {@code e}.
 437      *
 438      * The contents of the current vector are discarded;
 439      * only the species is relevant to this operation.
 440      *
 441      * <p> This method returns the value of this expression:
 442      * {@code ShortVector.broadcast(this.species(), e)}.
 443      *
 444      * @apiNote
 445      * Unlike the similar method named {@code broadcast()}
 446      * in the supertype {@code Vector}, this method does not
 447      * need to validate its argument, and cannot throw
 448      * {@code IllegalArgumentException}.  This method is
 449      * therefore preferable to the supertype method.
 450      *
 451      * @param e the value to broadcast
 452      * @return a vector where all lane elements are set to
 453      *         the primitive value {@code e}
 454      * @see #broadcast(VectorSpecies,long)
 455      * @see Vector#broadcast(long)
 456      * @see VectorSpecies#broadcast(long)
 457      */
 458     public abstract ShortVector broadcast(short e);
 459 
 460     /**
 461      * Returns a vector of the given species
 462      * where all lane elements are set to
 463      * the primitive value {@code e}.
 464      *
 465      * @param species species of the desired vector
 466      * @param e the value to broadcast
 467      * @return a vector where all lane elements are set to
 468      *         the primitive value {@code e}
 469      * @see #broadcast(long)
 470      * @see Vector#broadcast(long)
 471      * @see VectorSpecies#broadcast(long)
 472      */
 473     public static ShortVector broadcast(VectorSpecies<Short> species, short e) {
 474         ShortSpecies vsp = (ShortSpecies) species;
 475         return vsp.broadcast(e);
 476     }
 477 
 478     /*package-private*/
 479     @ForceInline
 480     final ShortVector broadcastTemplate(short e) {
 481         ShortSpecies vsp = vspecies();
 482         return vsp.broadcast(e);
 483     }
 484 
 485     /**
 486      * {@inheritDoc} <!--workaround-->
 487      * @apiNote
 488      * When working with vector subtypes like {@code ShortVector},
 489      * {@linkplain #broadcast(short) the more strongly typed method}
 490      * is typically selected.  It can be explicitly selected
 491      * using a cast: {@code v.broadcast((short)e)}.
 492      * The two expressions will produce numerically identical results.
 493      */
 494     @Override
 495     public abstract ShortVector broadcast(long e);
 496 
 497     /**
 498      * Returns a vector of the given species
 499      * where all lane elements are set to
 500      * the primitive value {@code e}.
 501      *
 502      * The {@code long} value must be accurately representable
 503      * by the {@code ETYPE} of the vector species, so that
 504      * {@code e==(long)(ETYPE)e}.
 505      *
 506      * @param species species of the desired vector
 507      * @param e the value to broadcast
 508      * @return a vector where all lane elements are set to
 509      *         the primitive value {@code e}
 510      * @throws IllegalArgumentException
 511      *         if the given {@code long} value cannot
 512      *         be represented by the vector's {@code ETYPE}
 513      * @see #broadcast(VectorSpecies,short)
 514      * @see VectorSpecies#checkValue(long)
 515      */
 516     public static ShortVector broadcast(VectorSpecies<Short> species, long e) {
 517         ShortSpecies vsp = (ShortSpecies) species;
 518         return vsp.broadcast(e);
 519     }
 520 
 521     /*package-private*/
 522     @ForceInline
 523     final ShortVector broadcastTemplate(long e) {
 524         return vspecies().broadcast(e);
 525     }
 526 
 527     /**
 528      * Returns a vector where each lane element is set to given
 529      * primitive values.
 530      * <p>
 531      * For each vector lane, where {@code N} is the vector lane index, the
 532      * the primitive value at index {@code N} is placed into the resulting
 533      * vector at lane index {@code N}.
 534      *
 535      * @param species species of the desired vector
 536      * @param es the given primitive values
 537      * @return a vector where each lane element is set to given primitive
 538      * values
 539      * @throws IllegalArgumentException
 540      *         if {@code es.length != species.length()}
 541      */
 542     @ForceInline
 543     @SuppressWarnings("unchecked")
 544     public static ShortVector fromValues(VectorSpecies<Short> species, short... es) {
 545         ShortSpecies vsp = (ShortSpecies) species;
 546         int vlength = vsp.laneCount();
 547         VectorIntrinsics.requireLength(es.length, vlength);
 548         // Get an unaliased copy and use it directly:
 549         return vsp.vectorFactory(Arrays.copyOf(es, vlength));
 550     }
 551 
 552     /**
 553      * Returns a vector where the first lane element is set to the primtive
 554      * value {@code e}, all other lane elements are set to the default
 555      * value(zero).
 556      *
 557      * @param species species of the desired vector
 558      * @param e the value
 559      * @return a vector where the first lane element is set to the primitive
 560      * value {@code e}
 561      */
 562     // FIXME: Does this carry its weight?
 563     @ForceInline
 564     public static ShortVector single(VectorSpecies<Short> species, short e) {
 565         return zero(species).withLane(0, e);
 566     }
 567 
 568     /**
 569      * Returns a vector where each lane element is set to a randomly
 570      * generated primitive value.
 571      *
 572      * The semantics are equivalent to calling
 573      * {@code (short)}{@link ThreadLocalRandom#nextInt()}
 574      * for each lane, from first to last.
 575      *
 576      * @param species species of the desired vector
 577      * @return a vector where each lane elements is set to a randomly
 578      * generated primitive value
 579      */
 580     public static ShortVector random(VectorSpecies<Short> species) {
 581         ShortSpecies vsp = (ShortSpecies) species;
 582         ThreadLocalRandom r = ThreadLocalRandom.current();
 583         return vsp.vOp(i -> nextRandom(r));
 584     }
 585     private static short nextRandom(ThreadLocalRandom r) {
 586         return (short) r.nextInt();
 587     }
 588 
 589     // Unary lanewise support
 590 
 591     /**
 592      * {@inheritDoc} <!--workaround-->
 593      */
 594     public abstract
 595     ShortVector lanewise(VectorOperators.Unary op);
 596 
 597     @ForceInline
 598     final
 599     ShortVector lanewiseTemplate(VectorOperators.Unary op) {
 600         if (opKind(op, VO_SPECIAL)) {
 601             if (op == ZOMO) {
 602                 return blend(broadcast(-1), compare(NE, 0));
 603             }
 604             if (op == NEG) {
 605                 // FIXME: Support this in the JIT.
 606                 return broadcast(0).lanewiseTemplate(SUB, this);
 607             }
 608         }
 609         int opc = opCode(op);
 610         return VectorIntrinsics.unaryOp(
 611             opc, getClass(), short.class, length(),
 612             this,
 613             UN_IMPL.find(op, opc, (opc_) -> {
 614               switch (opc_) {
 615                 case VECTOR_OP_NEG: return v0 ->
 616                         v0.uOp((i, a) -> (short) -a);
 617                 case VECTOR_OP_ABS: return v0 ->
 618                         v0.uOp((i, a) -> (short) Math.abs(a));
 619                 case VECTOR_OP_NOT: return v0 ->
 620                         v0.uOp((i, a) -> (short) ~a);
 621                 default: return null;
 622               }}));
 623     }
 624     private static final
 625     ImplCache<Unary,UnaryOperator<ShortVector>> UN_IMPL
 626         = new ImplCache<>(Unary.class, ShortVector.class);
 627 
 628     /**
 629      * {@inheritDoc} <!--workaround-->
 630      */
 631     @ForceInline
 632     public final
 633     ShortVector lanewise(VectorOperators.Unary op,
 634                                   VectorMask<Short> m) {
 635         return blend(lanewise(op), m);
 636     }
 637 
 638     // Binary lanewise support
 639 
 640     /**
 641      * {@inheritDoc} <!--workaround-->
 642      * @see #lanewise(VectorOperators.Binary,short)
 643      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
 644      */
 645     @Override
 646     public abstract
 647     ShortVector lanewise(VectorOperators.Binary op,
 648                                   Vector<Short> v);
 649     @ForceInline
 650     final
 651     ShortVector lanewiseTemplate(VectorOperators.Binary op,
 652                                           Vector<Short> v) {
 653         ShortVector that = (ShortVector) v;
 654         that.check(this);
 655         if (opKind(op, VO_SPECIAL  | VO_SHIFT)) {
 656             if (op == FIRST_NONZERO) {
 657                 // FIXME: Support this in the JIT.
 658                 VectorMask<Short> thisNZ
 659                     = this.viewAsIntegralLanes().compare(NE, (short) 0);
 660                 that = that.blend((short) 0, thisNZ.cast(vspecies()));
 661                 op = OR_UNCHECKED;
 662             }
 663             if (opKind(op, VO_SHIFT)) {
 664                 // As per shift specification for Java, mask the shift count.
 665                 // This allows the JIT to ignore some ISA details.
 666                 that = that.lanewise(AND, SHIFT_MASK);
 667             }
 668             if (op == ROR || op == ROL) {  // FIXME: JIT should do this
 669                 ShortVector neg = that.lanewise(NEG);
 670                 ShortVector hi = this.lanewise(LSHL, (op == ROR) ? neg : that);
 671                 ShortVector lo = this.lanewise(LSHR, (op == ROR) ? that : neg);
 672                 return hi.lanewise(OR, lo);
 673             } else if (op == AND_NOT) {
 674                 // FIXME: Support this in the JIT.
 675                 that = that.lanewise(NOT);
 676                 op = AND;
 677             } else if (op == DIV) {
 678                 VectorMask<Short> eqz = that.eq((short)0);
 679                 if (eqz.anyTrue()) {
 680                     throw that.divZeroException();
 681                 }
 682             }
 683         }
 684         int opc = opCode(op);
 685         return VectorIntrinsics.binaryOp(
 686             opc, getClass(), short.class, length(),
 687             this, that,
 688             BIN_IMPL.find(op, opc, (opc_) -> {
 689               switch (opc_) {
 690                 case VECTOR_OP_ADD: return (v0, v1) ->
 691                         v0.bOp(v1, (i, a, b) -> (short)(a + b));
 692                 case VECTOR_OP_SUB: return (v0, v1) ->
 693                         v0.bOp(v1, (i, a, b) -> (short)(a - b));
 694                 case VECTOR_OP_MUL: return (v0, v1) ->
 695                         v0.bOp(v1, (i, a, b) -> (short)(a * b));
 696                 case VECTOR_OP_DIV: return (v0, v1) ->
 697                         v0.bOp(v1, (i, a, b) -> (short)(a / b));
 698                 case VECTOR_OP_MAX: return (v0, v1) ->
 699                         v0.bOp(v1, (i, a, b) -> (short)Math.max(a, b));
 700                 case VECTOR_OP_MIN: return (v0, v1) ->
 701                         v0.bOp(v1, (i, a, b) -> (short)Math.min(a, b));
 702                 case VECTOR_OP_FIRST_NONZERO: return (v0, v1) ->
 703                         v0.bOp(v1, (i, a, b) -> toBits(a) != 0 ? a : b);
 704                 case VECTOR_OP_AND: return (v0, v1) ->
 705                         v0.bOp(v1, (i, a, b) -> (short)(a & b));
 706                 case VECTOR_OP_OR: return (v0, v1) ->
 707                         v0.bOp(v1, (i, a, b) -> (short)(a | b));
 708                 case VECTOR_OP_AND_NOT: return (v0, v1) ->
 709                         v0.bOp(v1, (i, a, b) -> (short)(a & ~b));
 710                 case VECTOR_OP_XOR: return (v0, v1) ->
 711                         v0.bOp(v1, (i, a, b) -> (short)(a ^ b));
 712                 case VECTOR_OP_LSHIFT: return (v0, v1) ->
 713                         v0.bOp(v1, (i, a, n) -> (short)(a << n));
 714                 case VECTOR_OP_RSHIFT: return (v0, v1) ->
 715                         v0.bOp(v1, (i, a, n) -> (short)(a >> n));
 716                 case VECTOR_OP_URSHIFT: return (v0, v1) ->
 717                         v0.bOp(v1, (i, a, n) -> (short)((a & LSHR_SETUP_MASK) >>> n));
 718                 case VECTOR_OP_LROTATE: return (v0, v1) ->
 719                         v0.bOp(v1, (i, a, n) -> (short)((a << n)|(a >> -n)));
 720                 case VECTOR_OP_RROTATE: return (v0, v1) ->
 721                         v0.bOp(v1, (i, a, n) -> (short)((a >> n)|(a << -n)));
 722                 default: return null;
 723                 }}));
 724     }
 725     private static final
 726     ImplCache<Binary,BinaryOperator<ShortVector>> BIN_IMPL
 727         = new ImplCache<>(Binary.class, ShortVector.class);
 728 
 729     /**
 730      * {@inheritDoc} <!--workaround-->
 731      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
 732      */
 733     @ForceInline
 734     public final
 735     ShortVector lanewise(VectorOperators.Binary op,
 736                                   Vector<Short> v,
 737                                   VectorMask<Short> m) {
 738         ShortVector that = (ShortVector) v;
 739         if (op == DIV) {
 740             // suppress div/0 exceptions in unset lanes
 741             that = that.lanewise(NOT, that.eq((short)0));
 742             return blend(lanewise(DIV, that), m);
 743         }
 744         return blend(lanewise(op, v), m);
 745     }
 746     // FIXME: Maybe all of the public final methods in this file (the
 747     // simple ones that just call lanewise) should be pushed down to
 748     // the X-VectorBits template.  They can't optimize properly at
 749     // this level, and must rely on inlining.  Does it work?
 750     // (If it works, of course keep the code here.)
 751 
 752     /**
 753      * Combines the lane values of this vector
 754      * with the value of a broadcast scalar.
 755      *
 756      * This is a lane-wise binary operation which applies
 757      * the selected operation to each lane.
 758      * The return value will be equal to this expression:
 759      * {@code this.lanewise(op, this.broadcast(e))}.
 760      *
 761      * @param op the operation used to process lane values
 762      * @param e the input scalar
 763      * @return the result of applying the operation lane-wise
 764      *         to the two input vectors
 765      * @throws UnsupportedOperationException if this vector does
 766      *         not support the requested operation
 767      * @see #lanewise(VectorOperators.Binary,Vector)
 768      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
 769      */
 770     @ForceInline
 771     public final
 772     ShortVector lanewise(VectorOperators.Binary op,
 773                                   short e) {
 774         int opc = opCode(op);
 775         if (opKind(op, VO_SHIFT) && (short)(int)e == e) {
 776             return lanewiseShift(op, (int) e);
 777         }
 778         if (op == AND_NOT) {
 779             op = AND; e = (short) ~e;
 780         }
 781         return lanewise(op, broadcast(e));
 782     }
 783 
 784     /**
 785      * Combines the lane values of this vector
 786      * with the value of a broadcast scalar,
 787      * with selection of lane elements controlled by a mask.
 788      *
 789      * This is a masked lane-wise binary operation which applies
 790      * the selected operation to each lane.
 791      * The return value will be equal to this expression:
 792      * {@code this.lanewise(op, this.broadcast(e), m)}.
 793      *
 794      * @param op the operation used to process lane values
 795      * @param e the input scalar
 796      * @param m the mask controlling lane selection
 797      * @return the result of applying the operation lane-wise
 798      *         to the input vector and the scalar
 799      * @throws UnsupportedOperationException if this vector does
 800      *         not support the requested operation
 801      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 802      * @see #lanewise(VectorOperators.Binary,short)
 803      */
 804     @ForceInline
 805     public final
 806     ShortVector lanewise(VectorOperators.Binary op,
 807                                   short e,
 808                                   VectorMask<Short> m) {
 809         return blend(lanewise(op, e), m);
 810     }
 811 
 812     /**
 813      * {@inheritDoc} <!--workaround-->
 814      * @apiNote
 815      * When working with vector subtypes like {@code ShortVector},
 816      * {@linkplain #lanewise(VectorOperators.Binary,short)
 817      * the more strongly typed method}
 818      * is typically selected.  It can be explicitly selected
 819      * using a cast: {@code v.lanewise(op,(short)e)}.
 820      * The two expressions will produce numerically identical results.
 821      */
 822     @ForceInline
 823     public final
 824     ShortVector lanewise(VectorOperators.Binary op,
 825                                   long e) {
 826         short e1 = (short) e;
 827         if ((long)e1 != e
 828             // allow shift ops to clip down their int parameters
 829             && !(opKind(op, VO_SHIFT) && (int)e1 == e)
 830             ) {
 831             vspecies().checkValue(e);  // for exception
 832         }
 833         return lanewise(op, e1);
 834     }
 835 
 836     /**
 837      * {@inheritDoc} <!--workaround-->
 838      * @apiNote
 839      * When working with vector subtypes like {@code ShortVector},
 840      * {@linkplain #lanewise(VectorOperators.Binary,short,VectorMask)
 841      * the more strongly typed method}
 842      * is typically selected.  It can be explicitly selected
 843      * using a cast: {@code v.lanewise(op,(short)e,m)}.
 844      * The two expressions will produce numerically identical results.
 845      */
 846     @ForceInline
 847     public final
 848     ShortVector lanewise(VectorOperators.Binary op,
 849                                   long e, VectorMask<Short> m) {
 850         return blend(lanewise(op, e), m);
 851     }
 852 
 853     /*package-private*/
 854     abstract ShortVector
 855     lanewiseShift(VectorOperators.Binary op, int e);
 856 
 857     /*package-private*/
 858     @ForceInline
 859     final ShortVector
 860     lanewiseShiftTemplate(VectorOperators.Binary op, int e) {
 861         // Special handling for these.  FIXME: Refactor?
 862         int opc = opCode(op);
 863         assert(opKind(op, VO_SHIFT));
 864         // As per shift specification for Java, mask the shift count.
 865         e &= SHIFT_MASK;
 866         if (op == ROR || op == ROL) {  // FIXME: JIT should do this
 867             ShortVector hi = this.lanewise(LSHL, (op == ROR) ? -e : e);
 868             ShortVector lo = this.lanewise(LSHR, (op == ROR) ? e : -e);
 869             return hi.lanewise(OR, lo);
 870         }
 871         return VectorIntrinsics.broadcastInt(
 872             opc, getClass(), short.class, length(),
 873             this, e,
 874             BIN_INT_IMPL.find(op, opc, (opc_) -> {
 875               switch (opc_) {
 876                 case VECTOR_OP_LSHIFT: return (v, n) ->
 877                         v.uOp((i, a) -> (short)(a << n));
 878                 case VECTOR_OP_RSHIFT: return (v, n) ->
 879                         v.uOp((i, a) -> (short)(a >> n));
 880                 case VECTOR_OP_URSHIFT: return (v, n) ->
 881                         v.uOp((i, a) -> (short)((a & LSHR_SETUP_MASK) >>> n));
 882                 case VECTOR_OP_LROTATE: return (v, n) ->
 883                         v.uOp((i, a) -> (short)((a << n)|(a >> -n)));
 884                 case VECTOR_OP_RROTATE: return (v, n) ->
 885                         v.uOp((i, a) -> (short)((a >> n)|(a << -n)));
 886                 default: return null;
 887                 }}));
 888     }
 889     private static final
 890     ImplCache<Binary,VectorBroadcastIntOp<ShortVector>> BIN_INT_IMPL
 891         = new ImplCache<>(Binary.class, ShortVector.class);
 892 
 893     // As per shift specification for Java, mask the shift count.
 894     // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte).
 895     // The latter two maskings go beyond the JLS, but seem reasonable
 896     // since our lane types are first-class types, not just dressed
 897     // up ints.
 898     private static final int SHIFT_MASK = (Short.SIZE - 1);
 899     // Also simulate >>> on sub-word variables with a mask.
 900     private static final int LSHR_SETUP_MASK = ((1 << Short.SIZE) - 1);
 901 
 902     // Ternary lanewise support
 903 
 904     // Ternary operators come in eight variations:
 905     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
 906     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
 907 
 908     // It is annoying to support all of these variations of masking
 909     // and broadcast, but it would be more surprising not to continue
 910     // the obvious pattern started by unary and binary.
 911 
 912    /**
 913      * {@inheritDoc} <!--workaround-->
 914      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
 915      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
 916      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
 917      * @see #lanewise(VectorOperators.Ternary,short,short)
 918      * @see #lanewise(VectorOperators.Ternary,Vector,short)
 919      * @see #lanewise(VectorOperators.Ternary,short,Vector)
 920      */
 921     @Override
 922     public abstract
 923     ShortVector lanewise(VectorOperators.Ternary op,
 924                                                   Vector<Short> v1,
 925                                                   Vector<Short> v2);
 926     @ForceInline
 927     final
 928     ShortVector lanewiseTemplate(VectorOperators.Ternary op,
 929                                           Vector<Short> v1,
 930                                           Vector<Short> v2) {
 931         ShortVector that = (ShortVector) v1;
 932         ShortVector tother = (ShortVector) v2;
 933         // It's a word: https://www.dictionary.com/browse/tother
 934         // See also Chapter 11 of Dickens, Our Mutual Friend:
 935         // "Totherest Governor," replied Mr Riderhood...
 936         that.check(this);
 937         tother.check(this);
 938         if (op == BITWISE_BLEND) {
 939             // FIXME: Support this in the JIT.
 940             that = this.lanewise(XOR, that).lanewise(AND, tother);
 941             return this.lanewise(XOR, that);
 942         }
 943         int opc = opCode(op);
 944         return VectorIntrinsics.ternaryOp(
 945             opc, getClass(), short.class, length(),
 946             this, that, tother,
 947             TERN_IMPL.find(op, opc, (opc_) -> {
 948               switch (opc_) {
 949                 case VECTOR_OP_BITWISE_BLEND: return (v0, v1_, v2_) ->
 950                         v0.tOp(v1_, v2_, (i, a, b, c) -> (short)(a^((a^b)&c)));
 951                 default: return null;
 952                 }}));
 953     }
 954     private static final
 955     ImplCache<Ternary,TernaryOperation<ShortVector>> TERN_IMPL
 956         = new ImplCache<>(Ternary.class, ShortVector.class);
 957 
 958     /**
 959      * {@inheritDoc} <!--workaround-->
 960      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
 961      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
 962      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
 963      */
 964     @ForceInline
 965     public final
 966     ShortVector lanewise(VectorOperators.Ternary op,
 967                                   Vector<Short> v1,
 968                                   Vector<Short> v2,
 969                                   VectorMask<Short> m) {
 970         return blend(lanewise(op, v1, v2), m);
 971     }
 972 
 973     /**
 974      * Combines the lane values of this vector
 975      * with the values of two broadcast scalars.
 976      *
 977      * This is a lane-wise ternary operation which applies
 978      * the selected operation to each lane.
 979      * The return value will be equal to this expression:
 980      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
 981      *
 982      * @param op the operation used to combine lane values
 983      * @param e1 the first input scalar
 984      * @param e2 the second input scalar
 985      * @return the result of applying the operation lane-wise
 986      *         to the input vector and the scalars
 987      * @throws UnsupportedOperationException if this vector does
 988      *         not support the requested operation
 989      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
 990      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
 991      */
 992     @ForceInline
 993     public final
 994     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,e2)
 995                                   short e1,
 996                                   short e2) {
 997         return lanewise(op, broadcast(e1), broadcast(e2));
 998     }
 999 
1000     /**
1001      * Combines the lane values of this vector
1002      * with the values of two broadcast scalars,
1003      * with selection of lane elements controlled by a mask.
1004      *
1005      * This is a masked lane-wise ternary operation which applies
1006      * the selected operation to each lane.
1007      * The return value will be equal to this expression:
1008      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
1009      *
1010      * @param op the operation used to combine lane values
1011      * @param e1 the first input scalar
1012      * @param e2 the second input scalar
1013      * @param m the mask controlling lane selection
1014      * @return the result of applying the operation lane-wise
1015      *         to the input vector and the scalars
1016      * @throws UnsupportedOperationException if this vector does
1017      *         not support the requested operation
1018      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1019      * @see #lanewise(VectorOperators.Ternary,short,short)
1020      */
1021     @ForceInline
1022     public final
1023     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
1024                                   short e1,
1025                                   short e2,
1026                                   VectorMask<Short> m) {
1027         return blend(lanewise(op, e1, e2), m);
1028     }
1029 
1030     /**
1031      * Combines the lane values of this vector
1032      * with the values of another vector and a broadcast scalar.
1033      *
1034      * This is a lane-wise ternary operation which applies
1035      * the selected operation to each lane.
1036      * The return value will be equal to this expression:
1037      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
1038      *
1039      * @param op the operation used to combine lane values
1040      * @param v1 the other input vector
1041      * @param e2 the input scalar
1042      * @return the result of applying the operation lane-wise
1043      *         to the input vectors and the scalar
1044      * @throws UnsupportedOperationException if this vector does
1045      *         not support the requested operation
1046      * @see #lanewise(VectorOperators.Ternary,short,short)
1047      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
1048      */
1049     @ForceInline
1050     public final
1051     ShortVector lanewise(VectorOperators.Ternary op, //(op,v1,e2)
1052                                   Vector<Short> v1,
1053                                   short e2) {
1054         return lanewise(op, v1, broadcast(e2));
1055     }
1056 
1057     /**
1058      * Combines the lane values of this vector
1059      * with the values of another vector and a broadcast scalar,
1060      * with selection of lane elements controlled by a mask.
1061      *
1062      * This is a masked lane-wise ternary operation which applies
1063      * the selected operation to each lane.
1064      * The return value will be equal to this expression:
1065      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
1066      *
1067      * @param op the operation used to combine lane values
1068      * @param v1 the other input vector
1069      * @param e2 the input scalar
1070      * @param m the mask controlling lane selection
1071      * @return the result of applying the operation lane-wise
1072      *         to the input vectors and the scalar
1073      * @throws UnsupportedOperationException if this vector does
1074      *         not support the requested operation
1075      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1076      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
1077      * @see #lanewise(VectorOperators.Ternary,Vector,short)
1078      */
1079     @ForceInline
1080     public final
1081     ShortVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
1082                                   Vector<Short> v1,
1083                                   short e2,
1084                                   VectorMask<Short> m) {
1085         return blend(lanewise(op, v1, e2), m);
1086     }
1087 
1088     /**
1089      * Combines the lane values of this vector
1090      * with the values of another vector and a broadcast scalar.
1091      *
1092      * This is a lane-wise ternary operation which applies
1093      * the selected operation to each lane.
1094      * The return value will be equal to this expression:
1095      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
1096      *
1097      * @param op the operation used to combine lane values
1098      * @param e1 the input scalar
1099      * @param v2 the other input vector
1100      * @return the result of applying the operation lane-wise
1101      *         to the input vectors and the scalar
1102      * @throws UnsupportedOperationException if this vector does
1103      *         not support the requested operation
1104      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1105      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
1106      */
1107     @ForceInline
1108     public final
1109     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,v2)
1110                                   short e1,
1111                                   Vector<Short> v2) {
1112         return lanewise(op, broadcast(e1), v2);
1113     }
1114 
1115     /**
1116      * Combines the lane values of this vector
1117      * with the values of another vector and a broadcast scalar,
1118      * with selection of lane elements controlled by a mask.
1119      *
1120      * This is a masked lane-wise ternary operation which applies
1121      * the selected operation to each lane.
1122      * The return value will be equal to this expression:
1123      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
1124      *
1125      * @param op the operation used to combine lane values
1126      * @param e1 the input scalar
1127      * @param v2 the other input vector
1128      * @param m the mask controlling lane selection
1129      * @return the result of applying the operation lane-wise
1130      *         to the input vectors and the scalar
1131      * @throws UnsupportedOperationException if this vector does
1132      *         not support the requested operation
1133      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1134      * @see #lanewise(VectorOperators.Ternary,short,Vector)
1135      */
1136     @ForceInline
1137     public final
1138     ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1139                                   short e1,
1140                                   Vector<Short> v2,
1141                                   VectorMask<Short> m) {
1142         return blend(lanewise(op, e1, v2), m);
1143     }
1144 
1145     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1146     // https://en.wikipedia.org/wiki/Ogdoad
1147 
1148     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1149     //
1150     // These include masked and non-masked versions.
1151     // This subclass adds broadcast (masked or not).
1152 
1153     /**
1154      * {@inheritDoc} <!--workaround-->
1155      * @see #add(short)
1156      */
1157     @Override
1158     @ForceInline
1159     public final ShortVector add(Vector<Short> v) {
1160         return lanewise(ADD, v);
1161     }
1162 
1163     /**
1164      * Adds this vector to the broadcast of an input scalar.
1165      *
1166      * This is a lane-wise binary operation which applies
1167      * the primitive addition operation ({@code +}) to each lane.
1168      *
1169      * This method is also equivalent to the expression
1170      * {@link #lanewise(VectorOperators.Binary,short)
1171      *    lanewise}{@code (}{@link VectorOperators#ADD
1172      *    ADD}{@code , e)}.
1173      *
1174      * @param e the input scalar
1175      * @return the result of adding each lane of this vector to the scalar
1176      * @see #add(Vector)
1177      * @see #broadcast(short)
1178      * @see #add(short,VectorMask)
1179      * @see VectorOperators#ADD
1180      * @see #lanewise(VectorOperators.Binary,Vector)
1181      * @see #lanewise(VectorOperators.Binary,short)
1182      */
1183     @ForceInline
1184     public final
1185     ShortVector add(short e) {
1186         return lanewise(ADD, e);
1187     }
1188 
1189     /**
1190      * {@inheritDoc} <!--workaround-->
1191      * @see #add(short,VectorMask)
1192      */
1193     @Override
1194     @ForceInline
1195     public final ShortVector add(Vector<Short> v,
1196                                           VectorMask<Short> m) {
1197         return lanewise(ADD, v, m);
1198     }
1199 
1200     /**
1201      * Adds this vector to the broadcast of an input scalar,
1202      * selecting lane elements controlled by a mask.
1203      *
1204      * This is a masked lane-wise binary operation which applies
1205      * the primitive addition operation ({@code +}) to each lane.
1206      *
1207      * This method is also equivalent to the expression
1208      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1209      *    lanewise}{@code (}{@link VectorOperators#ADD
1210      *    ADD}{@code , s, m)}.
1211      *
1212      * @param e the input scalar
1213      * @param m the mask controlling lane selection
1214      * @return the result of adding each lane of this vector to the scalar
1215      * @see #add(Vector,VectorMask)
1216      * @see #broadcast(short)
1217      * @see #add(short)
1218      * @see VectorOperators#ADD
1219      * @see #lanewise(VectorOperators.Binary,Vector)
1220      * @see #lanewise(VectorOperators.Binary,short)
1221      */
1222     @ForceInline
1223     public final ShortVector add(short e,
1224                                           VectorMask<Short> m) {
1225         return lanewise(ADD, e, m);
1226     }
1227 
1228     /**
1229      * {@inheritDoc} <!--workaround-->
1230      * @see #sub(short)
1231      */
1232     @Override
1233     @ForceInline
1234     public final ShortVector sub(Vector<Short> v) {
1235         return lanewise(SUB, v);
1236     }
1237 
1238     /**
1239      * Subtracts an input scalar from this vector.
1240      *
1241      * This is a masked lane-wise binary operation which applies
1242      * the primitive subtraction operation ({@code -}) to each lane.
1243      *
1244      * This method is also equivalent to the expression
1245      * {@link #lanewise(VectorOperators.Binary,short)
1246      *    lanewise}{@code (}{@link VectorOperators#SUB
1247      *    SUB}{@code , e)}.
1248      *
1249      * @param e the input scalar
1250      * @return the result of subtracting the scalar from each lane of this vector
1251      * @see #sub(Vector)
1252      * @see #broadcast(short)
1253      * @see #sub(short,VectorMask)
1254      * @see VectorOperators#SUB
1255      * @see #lanewise(VectorOperators.Binary,Vector)
1256      * @see #lanewise(VectorOperators.Binary,short)
1257      */
1258     @ForceInline
1259     public final ShortVector sub(short e) {
1260         return lanewise(SUB, e);
1261     }
1262 
1263     /**
1264      * {@inheritDoc} <!--workaround-->
1265      * @see #sub(short,VectorMask)
1266      */
1267     @Override
1268     @ForceInline
1269     public final ShortVector sub(Vector<Short> v,
1270                                           VectorMask<Short> m) {
1271         return lanewise(SUB, v, m);
1272     }
1273 
1274     /**
1275      * Subtracts an input scalar from this vector
1276      * under the control of a mask.
1277      *
1278      * This is a masked lane-wise binary operation which applies
1279      * the primitive subtraction operation ({@code -}) to each lane.
1280      *
1281      * This method is also equivalent to the expression
1282      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1283      *    lanewise}{@code (}{@link VectorOperators#SUB
1284      *    SUB}{@code , s, m)}.
1285      *
1286      * @param e the input scalar
1287      * @param m the mask controlling lane selection
1288      * @return the result of subtracting the scalar from each lane of this vector
1289      * @see #sub(Vector,VectorMask)
1290      * @see #broadcast(short)
1291      * @see #sub(short)
1292      * @see VectorOperators#SUB
1293      * @see #lanewise(VectorOperators.Binary,Vector)
1294      * @see #lanewise(VectorOperators.Binary,short)
1295      */
1296     @ForceInline
1297     public final ShortVector sub(short e,
1298                                           VectorMask<Short> m) {
1299         return lanewise(SUB, e, m);
1300     }
1301 
1302     /**
1303      * {@inheritDoc} <!--workaround-->
1304      * @see #mul(short)
1305      */
1306     @Override
1307     @ForceInline
1308     public final ShortVector mul(Vector<Short> v) {
1309         return lanewise(MUL, v);
1310     }
1311 
1312     /**
1313      * Multiplies this vector by the broadcast of an input scalar.
1314      *
1315      * This is a lane-wise binary operation which applies
1316      * the primitive multiplication operation ({@code *}) to each lane.
1317      *
1318      * This method is also equivalent to the expression
1319      * {@link #lanewise(VectorOperators.Binary,short)
1320      *    lanewise}{@code (}{@link VectorOperators#MUL
1321      *    MUL}{@code , e)}.
1322      *
1323      * @param e the input scalar
1324      * @return the result of multiplying this vector by the given scalar
1325      * @see #mul(Vector)
1326      * @see #broadcast(short)
1327      * @see #mul(short,VectorMask)
1328      * @see VectorOperators#MUL
1329      * @see #lanewise(VectorOperators.Binary,Vector)
1330      * @see #lanewise(VectorOperators.Binary,short)
1331      */
1332     @ForceInline
1333     public final ShortVector mul(short e) {
1334         return lanewise(MUL, e);
1335     }
1336 
1337     /**
1338      * {@inheritDoc} <!--workaround-->
1339      * @see #mul(short,VectorMask)
1340      */
1341     @Override
1342     @ForceInline
1343     public final ShortVector mul(Vector<Short> v,
1344                                           VectorMask<Short> m) {
1345         return lanewise(MUL, v, m);
1346     }
1347 
1348     /**
1349      * Multiplies this vector by the broadcast of an input scalar,
1350      * selecting lane elements controlled by a mask.
1351      *
1352      * This is a masked lane-wise binary operation which applies
1353      * the primitive multiplication operation ({@code *}) to each lane.
1354      *
1355      * This method is also equivalent to the expression
1356      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1357      *    lanewise}{@code (}{@link VectorOperators#MUL
1358      *    MUL}{@code , s, m)}.
1359      *
1360      * @param e the input scalar
1361      * @param m the mask controlling lane selection
1362      * @return the result of muling each lane of this vector to the scalar
1363      * @see #mul(Vector,VectorMask)
1364      * @see #broadcast(short)
1365      * @see #mul(short)
1366      * @see VectorOperators#MUL
1367      * @see #lanewise(VectorOperators.Binary,Vector)
1368      * @see #lanewise(VectorOperators.Binary,short)
1369      */
1370     @ForceInline
1371     public final ShortVector mul(short e,
1372                                           VectorMask<Short> m) {
1373         return lanewise(MUL, e, m);
1374     }
1375 
1376     /**
1377      * {@inheritDoc} <!--workaround-->
1378      * @apiNote If there is a zero divisor, {@code
1379      * ArithmeticException} will be thrown.
1380      * @see #div(short)
1381      */
1382     @Override
1383     @ForceInline
1384     public final ShortVector div(Vector<Short> v) {
1385         return lanewise(DIV, v);
1386     }
1387 
1388     /**
1389      * Divides this vector by the broadcast of an input scalar.
1390      *
1391      * This is a lane-wise binary operation which applies
1392      * the primitive division operation ({@code /}) to each lane.
1393      *
1394      * This method is also equivalent to the expression
1395      * {@link #lanewise(VectorOperators.Binary,short)
1396      *    lanewise}{@code (}{@link VectorOperators#DIV
1397      *    DIV}{@code , e)}.
1398      *
1399      * @apiNote If there is a zero divisor, {@code
1400      * ArithmeticException} will be thrown.
1401      * @see #div(short)
1402 
1403      *
1404      * @param e the input scalar
1405      * @return the result of dividing each lane of this vector by the scalar
1406      * @see #div(Vector)
1407      * @see #broadcast(short)
1408      * @see #div(short,VectorMask)
1409      * @see VectorOperators#DIV
1410      * @see #lanewise(VectorOperators.Binary,Vector)
1411      * @see #lanewise(VectorOperators.Binary,short)
1412      */
1413     @ForceInline
1414     public final ShortVector div(short e) {
1415         return lanewise(DIV, e);
1416     }
1417 
1418     /**
1419      * {@inheritDoc} <!--workaround-->
1420      * @see #div(short,VectorMask)
1421      * @apiNote If there is a zero divisor, {@code
1422      * ArithmeticException} will be thrown.
1423      */
1424     @Override
1425     @ForceInline
1426     public final ShortVector div(Vector<Short> v,
1427                                           VectorMask<Short> m) {
1428         return lanewise(DIV, v, m);
1429     }
1430 
1431     /**
1432      * Divides this vector by the broadcast of an input scalar,
1433      * selecting lane elements controlled by a mask.
1434      *
1435      * This is a masked lane-wise binary operation which applies
1436      * the primitive division operation ({@code /}) to each lane.
1437      *
1438      * This method is also equivalent to the expression
1439      * {@link #lanewise(VectorOperators.Binary,short,VectorMask)
1440      *    lanewise}{@code (}{@link VectorOperators#DIV
1441      *    DIV}{@code , s, m)}.
1442      *
1443      * @apiNote If there is a zero divisor, {@code
1444      * ArithmeticException} will be thrown.
1445      *
1446      * @param e the input scalar
1447      * @param m the mask controlling lane selection
1448      * @return the result of dividing each lane of this vector by the scalar
1449      * @see #div(Vector,VectorMask)
1450      * @see #broadcast(short)
1451      * @see #div(short)
1452      * @see VectorOperators#DIV
1453      * @see #lanewise(VectorOperators.Binary,Vector)
1454      * @see #lanewise(VectorOperators.Binary,short)
1455      */
1456     @ForceInline
1457     public final ShortVector div(short e,
1458                                           VectorMask<Short> m) {
1459         return lanewise(DIV, e, m);
1460     }
1461 
1462     /// END OF FULL-SERVICE BINARY METHODS
1463 
1464     /// SECOND-TIER BINARY METHODS
1465     //
1466     // There are no masked versions.
1467 
1468     /**
1469      * {@inheritDoc} <!--workaround-->
1470      */
1471     @Override
1472     @ForceInline
1473     public final ShortVector min(Vector<Short> v) {
1474         return lanewise(MIN, v);
1475     }
1476 
1477     // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
1478     /**
1479      * Computes the smaller of this vector and the broadcast of an input scalar.
1480      *
1481      * This is a lane-wise binary operation which applies the
1482      * operation {@code Math.min()} to each pair of
1483      * corresponding lane values.
1484      *
1485      * This method is also equivalent to the expression
1486      * {@link #lanewise(VectorOperators.Binary,short)
1487      *    lanewise}{@code (}{@link VectorOperators#MIN
1488      *    MIN}{@code , e)}.
1489      *
1490      * @param e the input scalar
1491      * @return the result of multiplying this vector by the given scalar
1492      * @see #min(Vector)
1493      * @see #broadcast(short)
1494      * @see VectorOperators#MIN
1495      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
1496      */
1497     @ForceInline
1498     public final ShortVector min(short e) {
1499         return lanewise(MIN, e);
1500     }
1501 
1502     /**
1503      * {@inheritDoc} <!--workaround-->
1504      */
1505     @Override
1506     @ForceInline
1507     public final ShortVector max(Vector<Short> v) {
1508         return lanewise(MAX, v);
1509     }
1510 
1511     /**
1512      * Computes the larger of this vector and the broadcast of an input scalar.
1513      *
1514      * This is a lane-wise binary operation which applies the
1515      * operation {@code Math.max()} to each pair of
1516      * corresponding lane values.
1517      *
1518      * This method is also equivalent to the expression
1519      * {@link #lanewise(VectorOperators.Binary,short)
1520      *    lanewise}{@code (}{@link VectorOperators#MAX
1521      *    MAX}{@code , e)}.
1522      *
1523      * @param e the input scalar
1524      * @return the result of multiplying this vector by the given scalar
1525      * @see #max(Vector)
1526      * @see #broadcast(short)
1527      * @see VectorOperators#MAX
1528      * @see #lanewise(VectorOperators.Binary,short,VectorMask)
1529      */
1530     @ForceInline
1531     public final ShortVector max(short e) {
1532         return lanewise(MAX, e);
1533     }
1534 
1535     // common bitwise operators: and, or, not (with scalar versions)
1536     /**
1537      * Computes the bitwise logical conjunction ({@code &})
1538      * of this vector and a second input vector.
1539      *
1540      * This is a lane-wise binary operation which applies the
1541      * the primitive bitwise "and" operation ({@code &})
1542      * to each pair of corresponding lane values.
1543      *
1544      * This method is also equivalent to the expression
1545      * {@link #lanewise(VectorOperators.Binary,Vector)
1546      *    lanewise}{@code (}{@link VectorOperators#AND
1547      *    AND}{@code , v)}.
1548      *
1549      * <p>
1550      * This is not a full-service named operation like
1551      * {@link #add(Vector) add}.  A masked version of
1552      * version of this operation is not directly available
1553      * but may be obtained via the masked version of
1554      * {@code lanewise}.
1555      *
1556      * @param v a second input vector
1557      * @return the bitwise {@code &} of this vector and the second input vector
1558      * @see #and(short)
1559      * @see #or(Vector)
1560      * @see #not()
1561      * @see VectorOperators#AND
1562      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1563      */
1564     @ForceInline
1565     public final ShortVector and(Vector<Short> v) {
1566         return lanewise(AND, v);
1567     }
1568 
1569     /**
1570      * Computes the bitwise logical conjunction ({@code &})
1571      * of this vector and a scalar.
1572      *
1573      * This is a lane-wise binary operation which applies the
1574      * the primitive bitwise "and" operation ({@code &})
1575      * to each pair of corresponding lane values.
1576      *
1577      * This method is also equivalent to the expression
1578      * {@link #lanewise(VectorOperators.Binary,Vector)
1579      *    lanewise}{@code (}{@link VectorOperators#AND
1580      *    AND}{@code , e)}.
1581      *
1582      * @param e an input scalar
1583      * @return the bitwise {@code &} of this vector and scalar
1584      * @see #and(Vector)
1585      * @see VectorOperators#AND
1586      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1587      */
1588     @ForceInline
1589     public final ShortVector and(short e) {
1590         return lanewise(AND, e);
1591     }
1592 
1593     /**
1594      * Computes the bitwise logical disjunction ({@code |})
1595      * of this vector and a second input vector.
1596      *
1597      * This is a lane-wise binary operation which applies the
1598      * the primitive bitwise "or" operation ({@code |})
1599      * to each pair of corresponding lane values.
1600      *
1601      * This method is also equivalent to the expression
1602      * {@link #lanewise(VectorOperators.Binary,Vector)
1603      *    lanewise}{@code (}{@link VectorOperators#OR
1604      *    AND}{@code , v)}.
1605      *
1606      * <p>
1607      * This is not a full-service named operation like
1608      * {@link #add(Vector) add}.  A masked version of
1609      * version of this operation is not directly available
1610      * but may be obtained via the masked version of
1611      * {@code lanewise}.
1612      *
1613      * @param v a second input vector
1614      * @return the bitwise {@code |} of this vector and the second input vector
1615      * @see #or(short)
1616      * @see #and(Vector)
1617      * @see #not()
1618      * @see VectorOperators#OR
1619      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1620      */
1621     @ForceInline
1622     public final ShortVector or(Vector<Short> v) {
1623         return lanewise(OR, v);
1624     }
1625 
1626     /**
1627      * Computes the bitwise logical disjunction ({@code |})
1628      * of this vector and a scalar.
1629      *
1630      * This is a lane-wise binary operation which applies the
1631      * the primitive bitwise "or" operation ({@code |})
1632      * to each pair of corresponding lane values.
1633      *
1634      * This method is also equivalent to the expression
1635      * {@link #lanewise(VectorOperators.Binary,Vector)
1636      *    lanewise}{@code (}{@link VectorOperators#OR
1637      *    OR}{@code , e)}.
1638      *
1639      * @param e an input scalar
1640      * @return the bitwise {@code |} of this vector and scalar
1641      * @see #or(Vector)
1642      * @see VectorOperators#OR
1643      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1644      */
1645     @ForceInline
1646     public final ShortVector or(short e) {
1647         return lanewise(OR, e);
1648     }
1649 
1650 
1651 
1652     /// UNARY METHODS
1653 
1654     /**
1655      * {@inheritDoc} <!--workaround-->
1656      */
1657     @Override
1658     @ForceInline
1659     public final
1660     ShortVector neg() {
1661         return lanewise(NEG);
1662     }
1663 
1664     /**
1665      * {@inheritDoc} <!--workaround-->
1666      */
1667     @Override
1668     @ForceInline
1669     public final
1670     ShortVector abs() {
1671         return lanewise(ABS);
1672     }
1673 
1674     // not (~)
1675     /**
1676      * Computes the bitwise logical complement ({@code ~})
1677      * of this vector.
1678      *
1679      * This is a lane-wise binary operation which applies the
1680      * the primitive bitwise "not" operation ({@code ~})
1681      * to each lane value.
1682      *
1683      * This method is also equivalent to the expression
1684      * {@link #lanewise(VectorOperators.Unary)
1685      *    lanewise}{@code (}{@link VectorOperators#NOT
1686      *    NOT}{@code )}.
1687      *
1688      * <p>
1689      * This is not a full-service named operation like
1690      * {@link #add(Vector) add}.  A masked version of
1691      * version of this operation is not directly available
1692      * but may be obtained via the masked version of
1693      * {@code lanewise}.
1694      *
1695      * @return the bitwise complement {@code ~} of this vector
1696      * @see #and(Vector)
1697      * @see VectorOperators#NOT
1698      * @see #lanewise(VectorOperators.Unary,VectorMask)
1699      */
1700     @ForceInline
1701     public final ShortVector not() {
1702         return lanewise(NOT);
1703     }
1704 
1705 
1706     /// COMPARISONS
1707 
1708     /**
1709      * {@inheritDoc} <!--workaround-->
1710      */
1711     @Override
1712     @ForceInline
1713     public final
1714     VectorMask<Short> eq(Vector<Short> v) {
1715         return compare(EQ, v);
1716     }
1717 
1718     /**
1719      * Tests if this vector is equal to an input scalar.
1720      *
1721      * This is a lane-wise binary test operation which applies
1722      * the primitive equals operation ({@code ==}) to each lane.
1723      * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
1724      *
1725      * @param e the input scalar
1726      * @return the result mask of testing if this vector
1727      *         is equal to {@code e}
1728      * @see #compare(VectorOperators.Comparison,short)
1729      */
1730     @ForceInline
1731     public final
1732     VectorMask<Short> eq(short e) {
1733         return compare(EQ, e);
1734     }
1735 
1736     /**
1737      * {@inheritDoc} <!--workaround-->
1738      */
1739     @Override
1740     @ForceInline
1741     public final
1742     VectorMask<Short> lt(Vector<Short> v) {
1743         return compare(LT, v);
1744     }
1745 
1746     /**
1747      * Tests if this vector is less than an input scalar.
1748      *
1749      * This is a lane-wise binary test operation which applies
1750      * the primitive less than operation ({@code <}) to each lane.
1751      * The result is the same as {@code compare(VectorOperators.LT, e)}.
1752      *
1753      * @param e the input scalar
1754      * @return the mask result of testing if this vector
1755      *         is less than the input scalar
1756      * @see #compare(VectorOperators.Comparison,short)
1757      */
1758     @ForceInline
1759     public final
1760     VectorMask<Short> lt(short e) {
1761         return compare(LT, e);
1762     }
1763 
1764     /**
1765      * {@inheritDoc} <!--workaround-->
1766      */
1767     @Override
1768     public abstract
1769     VectorMask<Short> test(VectorOperators.Test op);
1770 
1771     /*package-private*/
1772     @ForceInline
1773     final
1774     <M extends VectorMask<Short>>
1775     M testTemplate(Class<M> maskType, Test op) {
1776         ShortSpecies vsp = vspecies();
1777         if (opKind(op, VO_SPECIAL)) {
1778             ShortVector bits = this.viewAsIntegralLanes();
1779             VectorMask<Short> m;
1780             if (op == IS_DEFAULT) {
1781                 m = bits.compare(EQ, (short) 0);
1782             } else if (op == IS_NEGATIVE) {
1783                 m = bits.compare(LT, (short) 0);
1784             }
1785             else {
1786                 throw new AssertionError(op);
1787             }
1788             return maskType.cast(m);
1789         }
1790         int opc = opCode(op);
1791         throw new AssertionError(op);
1792     }
1793 
1794     /**
1795      * {@inheritDoc} <!--workaround-->
1796      */
1797     @Override
1798     @ForceInline
1799     public final
1800     VectorMask<Short> test(VectorOperators.Test op,
1801                                   VectorMask<Short> m) {
1802         return test(op).and(m);
1803     }
1804 
1805     /**
1806      * {@inheritDoc} <!--workaround-->
1807      */
1808     @Override
1809     public abstract
1810     VectorMask<Short> compare(VectorOperators.Comparison op, Vector<Short> v);
1811 
1812     /*package-private*/
1813     @ForceInline
1814     final
1815     <M extends VectorMask<Short>>
1816     M compareTemplate(Class<M> maskType, Comparison op, Vector<Short> v) {
1817         Objects.requireNonNull(v);
1818         ShortSpecies vsp = vspecies();
1819         ShortVector that = (ShortVector) v;
1820         that.check(this);
1821         int opc = opCode(op);
1822         return VectorIntrinsics.compare(
1823             opc, getClass(), maskType, short.class, length(),
1824             this, that,
1825             (cond, v0, v1) -> {
1826                 AbstractMask<Short> m
1827                     = v0.bTest(cond, v1, (cond_, i, a, b)
1828                                -> compareWithOp(cond, a, b));
1829                 @SuppressWarnings("unchecked")
1830                 M m2 = (M) m;
1831                 return m2;
1832             });
1833     }
1834 
1835     @ForceInline
1836     private static
1837     boolean compareWithOp(int cond, short a, short b) {
1838         switch (cond) {
1839         case VectorIntrinsics.BT_eq:  return a == b;
1840         case VectorIntrinsics.BT_ne:  return a != b;
1841         case VectorIntrinsics.BT_lt:  return a <  b;
1842         case VectorIntrinsics.BT_le:  return a <= b;
1843         case VectorIntrinsics.BT_gt:  return a >  b;
1844         case VectorIntrinsics.BT_ge:  return a >= b;
1845         }
1846         throw new AssertionError();
1847     }
1848 
1849     /**
1850      * {@inheritDoc} <!--workaround-->
1851      */
1852     @Override
1853     @ForceInline
1854     public final
1855     VectorMask<Short> compare(VectorOperators.Comparison op,
1856                                   Vector<Short> v,
1857                                   VectorMask<Short> m) {
1858         return compare(op, v).and(m);
1859     }
1860 
1861     /**
1862      * Tests this vector by comparing it with an input scalar,
1863      * according to the given comparison operation.
1864      *
1865      * This is a lane-wise binary test operation which applies
1866      * the comparison operation to each lane.
1867      * <p>
1868      * The result is the same as
1869      * {@code compare(op, broadcast(species(), e))}.
1870      * That is, the scalar may be regarded as broadcast to
1871      * a vector of the same species, and then compared
1872      * against the original vector, using the selected
1873      * comparison operation.
1874      *
1875      * @param op the operation used to compare lane values
1876      * @param e the input scalar
1877      * @return the mask result of testing lane-wise if this vector
1878      *         compares to the input, according to the selected
1879      *         comparison operator
1880      * @see ShortVector#compare(VectorOperators.Comparison,Vector)
1881      * @see #eq(short)
1882      * @see #lt(short)
1883      */
1884     public abstract
1885     VectorMask<Short> compare(Comparison op, short e);
1886 
1887     /*package-private*/
1888     @ForceInline
1889     final
1890     <M extends VectorMask<Short>>
1891     M compareTemplate(Class<M> maskType, Comparison op, short e) {
1892         return compareTemplate(maskType, op, broadcast(e));
1893     }
1894 
1895     /**
1896      * Tests this vector by comparing it with an input scalar,
1897      * according to the given comparison operation,
1898      * in lanes selected by a mask.
1899      *
1900      * This is a masked lane-wise binary test operation which applies
1901      * to each pair of corresponding lane values.
1902      *
1903      * The returned result is equal to the expression
1904      * {@code compare(op,s).and(m)}.
1905      *
1906      * @param op the operation used to compare lane values
1907      * @param e the input scalar
1908      * @param m the mask controlling lane selection
1909      * @return the mask result of testing lane-wise if this vector
1910      *         compares to the input, according to the selected
1911      *         comparison operator,
1912      *         and only in the lanes selected by the mask
1913      * @see ShortVector#compare(VectorOperators.Comparison,Vector,VectorMask)
1914      */
1915     @ForceInline
1916     public final VectorMask<Short> compare(VectorOperators.Comparison op,
1917                                                short e,
1918                                                VectorMask<Short> m) {
1919         return compare(op, e).and(m);
1920     }
1921 
1922     /**
1923      * {@inheritDoc} <!--workaround-->
1924      */
1925     @Override
1926     public abstract
1927     VectorMask<Short> compare(Comparison op, long e);
1928 
1929     /*package-private*/
1930     @ForceInline
1931     final
1932     <M extends VectorMask<Short>>
1933     M compareTemplate(Class<M> maskType, Comparison op, long e) {
1934         return compareTemplate(maskType, op, broadcast(e));
1935     }
1936 
1937     /**
1938      * {@inheritDoc} <!--workaround-->
1939      */
1940     @Override
1941     @ForceInline
1942     public final
1943     VectorMask<Short> compare(Comparison op, long e, VectorMask<Short> m) {
1944         return compare(op, broadcast(e), m);
1945     }
1946 
1947 
1948 
1949     /**
1950      * {@inheritDoc} <!--workaround-->
1951      */
1952     @Override public abstract
1953     ShortVector blend(Vector<Short> v, VectorMask<Short> m);
1954 
1955     /*package-private*/
1956     @ForceInline
1957     final
1958     <M extends VectorMask<Short>>
1959     ShortVector
1960     blendTemplate(Class<M> maskType, ShortVector v, M m) {
1961         v.check(this);
1962         return VectorIntrinsics.blend(
1963             getClass(), maskType, short.class, length(),
1964             this, v, m,
1965             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
1966     }
1967 
1968     /**
1969      * {@inheritDoc} <!--workaround-->
1970      */
1971     @Override public abstract ShortVector addIndex(int scale);
1972 
1973     /*package-private*/
1974     @ForceInline
1975     final ShortVector addIndexTemplate(int scale) {
1976         ShortSpecies vsp = vspecies();
1977         // make sure VLENGTH*scale doesn't overflow:
1978         vsp.checkScale(scale);
1979         return VectorIntrinsics.indexVector(
1980             getClass(), short.class, length(),
1981             this, scale, vsp,
1982             (v, scale_, s)
1983             -> {
1984                 // If the platform doesn't support an INDEX
1985                 // instruction directly, load IOTA from memory
1986                 // and multiply.
1987                 ShortVector iota = s.iota();
1988                 short sc = (short) scale_;
1989                 return v.add(sc == 1 ? iota : iota.mul(sc));
1990             });
1991     }
1992 
1993     /**
1994      * Replaces selected lanes of this vector with
1995      * a scalar value
1996      * under the control of a mask.
1997      *
1998      * This is a masked lane-wise binary operation which
1999      * selects each lane value from one or the other input.
2000      *
2001      * The returned result is equal to the expression
2002      * {@code blend(broadcast(e),m)}.
2003      *
2004      * @param e the input scalar, containing the replacement lane value
2005      * @param m the mask controlling lane selection of the scalar
2006      * @return the result of blending the lane elements of this vector with
2007      *         the scalar value
2008      */
2009     @ForceInline
2010     public final ShortVector blend(short e,
2011                                             VectorMask<Short> m) {
2012         return blend(broadcast(e), m);
2013     }
2014 
2015     /**
2016      * Replaces selected lanes of this vector with
2017      * a scalar value
2018      * under the control of a mask.
2019      *
2020      * This is a masked lane-wise binary operation which
2021      * selects each lane value from one or the other input.
2022      *
2023      * The returned result is equal to the expression
2024      * {@code blend(broadcast(e),m)}.
2025      *
2026      * @param e the input scalar, containing the replacement lane value
2027      * @param m the mask controlling lane selection of the scalar
2028      * @return the result of blending the lane elements of this vector with
2029      *         the scalar value
2030      */
2031     @ForceInline
2032     public final ShortVector blend(long e,
2033                                             VectorMask<Short> m) {
2034         return blend(broadcast(e), m);
2035     }
2036 
2037     /**
2038      * {@inheritDoc} <!--workaround-->
2039      */
2040     @Override
2041     public abstract
2042     ShortVector slice(int origin, Vector<Short> v1);
2043 
2044     /*package-private*/
2045     final
2046     @ForceInline
2047     ShortVector sliceTemplate(int origin, Vector<Short> v1) {
2048         ShortVector that = (ShortVector) v1;
2049         that.check(this);
2050         short[] a0 = this.getElements();
2051         short[] a1 = that.getElements();
2052         short[] res = new short[a0.length];
2053         int vlen = res.length;
2054         int firstPart = vlen - origin;
2055         System.arraycopy(a0, origin, res, 0, firstPart);
2056         System.arraycopy(a1, 0, res, firstPart, origin);
2057         return vectorFactory(res);
2058     }
2059 
2060     /**
2061      * {@inheritDoc} <!--workaround-->
2062      */
2063     @Override
2064     @ForceInline
2065     public final
2066     ShortVector slice(int origin,
2067                                Vector<Short> w,
2068                                VectorMask<Short> m) {
2069         return broadcast(0).blend(slice(origin, w), m);
2070     }
2071 
2072     /**
2073      * {@inheritDoc} <!--workaround-->
2074      */
2075     @Override
2076     public abstract
2077     ShortVector slice(int origin);
2078 
2079     /**
2080      * {@inheritDoc} <!--workaround-->
2081      */
2082     @Override
2083     public abstract
2084     ShortVector unslice(int origin, Vector<Short> w, int part);
2085 
2086     /*package-private*/
2087     final
2088     @ForceInline
2089     ShortVector
2090     unsliceTemplate(int origin, Vector<Short> w, int part) {
2091         ShortVector that = (ShortVector) w;
2092         that.check(this);
2093         short[] slice = this.getElements();
2094         short[] res = that.getElements();
2095         int vlen = res.length;
2096         int firstPart = vlen - origin;
2097         switch (part) {
2098         case 0:
2099             System.arraycopy(slice, 0, res, origin, firstPart);
2100             break;
2101         case 1:
2102             System.arraycopy(slice, firstPart, res, 0, origin);
2103             break;
2104         default:
2105             throw wrongPartForSlice(part);
2106         }
2107         return vectorFactory(res);
2108     }
2109 
2110     /*package-private*/
2111     final
2112     @ForceInline
2113     <M extends VectorMask<Short>>
2114     ShortVector
2115     unsliceTemplate(Class<M> maskType, int origin, Vector<Short> w, int part, M m) {
2116         ShortVector that = (ShortVector) w;
2117         that.check(this);
2118         ShortVector slice = that.sliceTemplate(origin, that);
2119         slice = slice.blendTemplate(maskType, this, m);
2120         return slice.unsliceTemplate(origin, w, part);
2121     }
2122 
2123     /**
2124      * {@inheritDoc} <!--workaround-->
2125      */
2126     @Override
2127     public abstract
2128     ShortVector unslice(int origin, Vector<Short> w, int part, VectorMask<Short> m);
2129 
2130     /**
2131      * {@inheritDoc} <!--workaround-->
2132      */
2133     @Override
2134     public abstract
2135     ShortVector unslice(int origin); 
2136 
2137     private ArrayIndexOutOfBoundsException
2138     wrongPartForSlice(int part) {
2139         String msg = String.format("bad part number %d for slice operation",
2140                                    part);
2141         return new ArrayIndexOutOfBoundsException(msg);
2142     }
2143 
2144     /**
2145      * {@inheritDoc} <!--workaround-->
2146      */
2147     @Override
2148     public abstract
2149     ShortVector rearrange(VectorShuffle<Short> m);
2150 
2151     /*package-private*/
2152     @ForceInline
2153     final
2154     <S extends VectorShuffle<Short>>
2155     ShortVector rearrangeTemplate(Class<S> shuffletype, S shuffle) {
2156         shuffle.checkIndexes();
2157         return VectorIntrinsics.rearrangeOp(
2158             getClass(), shuffletype, short.class, length(),
2159             this, shuffle,
2160             (v1, s_) -> v1.uOp((i, a) -> {
2161                 int ei = s_.laneSource(i);
2162                 return v1.lane(ei);
2163             }));
2164     }
2165 
2166     /**
2167      * {@inheritDoc} <!--workaround-->
2168      */
2169     @Override
2170     public abstract
2171     ShortVector rearrange(VectorShuffle<Short> s,
2172                                    VectorMask<Short> m);
2173 
2174     /*package-private*/
2175     @ForceInline
2176     final
2177     <S extends VectorShuffle<Short>>
2178     ShortVector rearrangeTemplate(Class<S> shuffletype,
2179                                            S shuffle,
2180                                            VectorMask<Short> m) {
2181         ShortVector unmasked =
2182             VectorIntrinsics.rearrangeOp(
2183                 getClass(), shuffletype, short.class, length(),
2184                 this, shuffle,
2185                 (v1, s_) -> v1.uOp((i, a) -> {
2186                     int ei = s_.laneSource(i);
2187                     return ei < 0 ? 0 : v1.lane(ei);
2188                 }));
2189         VectorMask<Short> valid = shuffle.laneIsValid();
2190         if (m.andNot(valid).anyTrue()) {
2191             shuffle.checkIndexes();
2192             throw new AssertionError();
2193         }
2194         return broadcast((short)0).blend(unmasked, valid);
2195     }
2196 
2197     /**
2198      * {@inheritDoc} <!--workaround-->
2199      */
2200     @Override
2201     public abstract
2202     ShortVector rearrange(VectorShuffle<Short> s,
2203                                    Vector<Short> v);
2204 
2205     /*package-private*/
2206     @ForceInline
2207     final
2208     <S extends VectorShuffle<Short>>
2209     ShortVector rearrangeTemplate(Class<S> shuffletype,
2210                                            S shuffle,
2211                                            ShortVector v) {
2212         VectorMask<Short> valid = shuffle.laneIsValid();
2213         S ws = shuffletype.cast(shuffle.wrapIndexes());
2214         ShortVector r0 =
2215             VectorIntrinsics.rearrangeOp(
2216                 getClass(), shuffletype, short.class, length(),
2217                 this, ws,
2218                 (v0, s_) -> v0.uOp((i, a) -> {
2219                     int ei = s_.laneSource(i);
2220                     return v0.lane(ei);
2221                 }));
2222         ShortVector r1 =
2223             VectorIntrinsics.rearrangeOp(
2224                 getClass(), shuffletype, short.class, length(),
2225                 v, ws,
2226                 (v1, s_) -> v1.uOp((i, a) -> {
2227                     int ei = s_.laneSource(i);
2228                     return v1.lane(ei);
2229                 }));
2230         return r1.blend(r0, valid);
2231     }
2232 
2233     /**
2234      * {@inheritDoc} <!--workaround-->
2235      */
2236     @Override
2237     public abstract
2238     ShortVector selectFrom(Vector<Short> v);
2239 
2240     /*package-private*/
2241     @ForceInline
2242     final ShortVector selectFromTemplate(ShortVector v) {
2243         return v.rearrange(this.toShuffle());
2244     }
2245 
2246     /**
2247      * {@inheritDoc} <!--workaround-->
2248      */
2249     @Override
2250     public abstract
2251     ShortVector selectFrom(Vector<Short> s, VectorMask<Short> m);
2252 
2253     /*package-private*/
2254     @ForceInline
2255     final ShortVector selectFromTemplate(ShortVector v,
2256                                                   AbstractMask<Short> m) {
2257         return v.rearrange(this.toShuffle(), m);
2258     }
2259 
2260     /// Ternary operations
2261 
2262     /**
2263      * Blends together the bits of two vectors under
2264      * the control of a third, which supplies mask bits.
2265      *
2266      *
2267      * This is a lane-wise ternary operation which performs
2268      * a bitwise blending operation {@code (a&~c)|(b&c)}
2269      * to each lane.
2270      *
2271      * This method is also equivalent to the expression
2272      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2273      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2274      *    BITWISE_BLEND}{@code , bits, mask)}.
2275      *
2276      * @param bits input bits to blend into the current vector
2277      * @param mask a bitwise mask to enable blending of the input bits
2278      * @return the bitwise blend of the given bits into the current vector,
2279      *         under control of the bitwise mask
2280      * @see #bitwiseBlend(short,short)
2281      * @see #bitwiseBlend(short,Vector)
2282      * @see #bitwiseBlend(Vector,short)
2283      * @see VectorOperators#BITWISE_BLEND
2284      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
2285      */
2286     @ForceInline
2287     public final
2288     ShortVector bitwiseBlend(Vector<Short> bits, Vector<Short> mask) {
2289         return lanewise(BITWISE_BLEND, bits, mask);
2290     }
2291 
2292     /**
2293      * Blends together the bits of a vector and a scalar under
2294      * the control of another scalar, which supplies mask bits.
2295      *
2296      *
2297      * This is a lane-wise ternary operation which performs
2298      * a bitwise blending operation {@code (a&~c)|(b&c)}
2299      * to each lane.
2300      *
2301      * This method is also equivalent to the expression
2302      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2303      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2304      *    BITWISE_BLEND}{@code , bits, mask)}.
2305      *
2306      * @param bits input bits to blend into the current vector
2307      * @param mask a bitwise mask to enable blending of the input bits
2308      * @return the bitwise blend of the given bits into the current vector,
2309      *         under control of the bitwise mask
2310      * @see #bitwiseBlend(Vector,Vector)
2311      * @see VectorOperators#BITWISE_BLEND
2312      * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask)
2313      */
2314     @ForceInline
2315     public final
2316     ShortVector bitwiseBlend(short bits, short mask) {
2317         return lanewise(BITWISE_BLEND, bits, mask);
2318     }
2319 
2320     /**
2321      * Blends together the bits of a vector and a scalar under
2322      * the control of another vector, which supplies mask bits.
2323      *
2324      *
2325      * This is a lane-wise ternary operation which performs
2326      * a bitwise blending operation {@code (a&~c)|(b&c)}
2327      * to each lane.
2328      *
2329      * This method is also equivalent to the expression
2330      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2331      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2332      *    BITWISE_BLEND}{@code , bits, mask)}.
2333      *
2334      * @param bits input bits to blend into the current vector
2335      * @param mask a bitwise mask to enable blending of the input bits
2336      * @return the bitwise blend of the given bits into the current vector,
2337      *         under control of the bitwise mask
2338      * @see #bitwiseBlend(Vector,Vector)
2339      * @see VectorOperators#BITWISE_BLEND
2340      * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask)
2341      */
2342     @ForceInline
2343     public final
2344     ShortVector bitwiseBlend(short bits, Vector<Short> mask) {
2345         return lanewise(BITWISE_BLEND, bits, mask);
2346     }
2347 
2348     /**
2349      * Blends together the bits of two vectors under
2350      * the control of a scalar, which supplies mask bits.
2351      *
2352      *
2353      * This is a lane-wise ternary operation which performs
2354      * a bitwise blending operation {@code (a&~c)|(b&c)}
2355      * to each lane.
2356      *
2357      * This method is also equivalent to the expression
2358      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2359      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2360      *    BITWISE_BLEND}{@code , bits, mask)}.
2361      *
2362      * @param bits input bits to blend into the current vector
2363      * @param mask a bitwise mask to enable blending of the input bits
2364      * @return the bitwise blend of the given bits into the current vector,
2365      *         under control of the bitwise mask
2366      * @see #bitwiseBlend(Vector,Vector)
2367      * @see VectorOperators#BITWISE_BLEND
2368      * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask)
2369      */
2370     @ForceInline
2371     public final
2372     ShortVector bitwiseBlend(Vector<Short> bits, short mask) {
2373         return lanewise(BITWISE_BLEND, bits, mask);
2374     }
2375 
2376 
2377     // Type specific horizontal reductions
2378 
2379     /**
2380      * Returns a value accumulated from all the lanes of this vector.
2381      *
2382      * This is an associative cross-lane reduction operation which
2383      * applies the specified operation to all the lane elements.
2384      *
2385      * <p>
2386      * A few reduction operations do not support arbitrary reordering
2387      * of their operands, yet are included here because of their
2388      * usefulness.
2389      *
2390      * <ul>
2391      * <li>
2392      * In the case of {@code FIRST_NONZERO}, the reduction returns
2393      * the value from the lowest-numbered non-zero lane.
2394      *
2395      *
2396      * <li>
2397      * In the case of floating point addition and multiplication, the
2398      * precise result will reflect the choice of an arbitrary order
2399      * of operations, which may even vary over time.
2400      *
2401      * <li>
2402      * All other reduction operations are fully commutative and
2403      * associative.  The implementation can choose any order of
2404      * processing, yet it will always produce the same result.
2405      *
2406      * </ul>
2407      *
2408      *
2409      * @param op the operation used to combine lane values
2410      * @return the accumulated result
2411      * @throws UnsupportedOperationException if this vector does
2412      *         not support the requested operation
2413      * @see #reduceLanes(VectorOperators.Associative,VectorMask)
2414      * @see #add(Vector)
2415      * @see #mul(Vector)
2416      * @see #min(Vector)
2417      * @see #max(Vector)
2418      * @see #and(Vector)
2419      * @see #or(Vector)
2420      * @see VectorOperators#XOR
2421      * @see VectorOperators#FIRST_NONZERO
2422      */
2423     public abstract short reduceLanes(VectorOperators.Associative op);
2424 
2425     /**
2426      * Returns a value accumulated from selected lanes of this vector,
2427      * controlled by a mask.
2428      *
2429      * This is an associative cross-lane reduction operation which
2430      * applies the specified operation to the selected lane elements.
2431      * <p>
2432      * If no elements are selected, an operation-specific identity
2433      * value is returned.
2434      * <ul>
2435      * <li>
2436      * If the operation is
2437      *  {@code ADD}, {@code XOR}, {@code OR},
2438      * or {@code FIRST_NONZERO},
2439      * then the identity value is zero, the default {@code short} value.
2440      * <li>
2441      * If the operation is {@code MUL},
2442      * then the identity value is one.
2443      * <li>
2444      * If the operation is {@code AND},
2445      * then the identity value is minus one (all bits set).
2446      * <li>
2447      * If the operation is {@code MAX},
2448      * then the identity value is {@code Short.MIN_VALUE}.
2449      * <li>
2450      * If the operation is {@code MIN},
2451      * then the identity value is {@code Short.MAX_VALUE}.
2452      * </ul>
2453      *
2454      * @param op the operation used to combine lane values
2455      * @param m the mask controlling lane selection
2456      * @return the reduced result accumulated from the selected lane values
2457      * @throws UnsupportedOperationException if this vector does
2458      *         not support the requested operation
2459      * @see #reduceLanes(VectorOperators.Associative)
2460      */
2461     public abstract short reduceLanes(VectorOperators.Associative op,
2462                                        VectorMask<Short> m);
2463 
2464     /*package-private*/
2465     @ForceInline
2466     final
2467     short reduceLanesTemplate(VectorOperators.Associative op,
2468                                VectorMask<Short> m) {
2469         ShortVector v = reduceIdentityVector(op).blend(this, m);
2470         return v.reduceLanesTemplate(op);
2471     }
2472 
2473     /*package-private*/
2474     @ForceInline
2475     final
2476     short reduceLanesTemplate(VectorOperators.Associative op) {
2477         if (op == FIRST_NONZERO) {
2478             // FIXME:  The JIT should handle this, and other scan ops alos.
2479             VectorMask<Short> thisNZ
2480                 = this.viewAsIntegralLanes().compare(NE, (short) 0);
2481             return this.lane(thisNZ.firstTrue());
2482         }
2483         int opc = opCode(op);
2484         return fromBits(VectorIntrinsics.reductionCoerced(
2485             opc, getClass(), short.class, length(),
2486             this,
2487             REDUCE_IMPL.find(op, opc, (opc_) -> {
2488               switch (opc_) {
2489               case VECTOR_OP_ADD: return v ->
2490                       toBits(v.rOp((short)0, (i, a, b) -> (short)(a + b)));
2491               case VECTOR_OP_MUL: return v ->
2492                       toBits(v.rOp((short)1, (i, a, b) -> (short)(a * b)));
2493               case VECTOR_OP_MIN: return v ->
2494                       toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (short) Math.min(a, b)));
2495               case VECTOR_OP_MAX: return v ->
2496                       toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (short) Math.max(a, b)));
2497               case VECTOR_OP_FIRST_NONZERO: return v ->
2498                       toBits(v.rOp((short)0, (i, a, b) -> toBits(a) != 0 ? a : b));
2499               case VECTOR_OP_AND: return v ->
2500                       toBits(v.rOp((short)-1, (i, a, b) -> (short)(a & b)));
2501               case VECTOR_OP_OR: return v ->
2502                       toBits(v.rOp((short)0, (i, a, b) -> (short)(a | b)));
2503               case VECTOR_OP_XOR: return v ->
2504                       toBits(v.rOp((short)0, (i, a, b) -> (short)(a ^ b)));
2505               default: return null;
2506               }})));
2507     }
2508     private static final
2509     ImplCache<Associative,Function<ShortVector,Long>> REDUCE_IMPL
2510         = new ImplCache<>(Associative.class, ShortVector.class);
2511 
2512     private
2513     @ForceInline
2514     ShortVector reduceIdentityVector(VectorOperators.Associative op) {
2515         int opc = opCode(op);
2516         UnaryOperator<ShortVector> fn
2517             = REDUCE_ID_IMPL.find(op, opc, (opc_) -> {
2518                 switch (opc_) {
2519                 case VECTOR_OP_ADD:
2520                 case VECTOR_OP_OR:
2521                 case VECTOR_OP_XOR:
2522                 case VECTOR_OP_FIRST_NONZERO:
2523                     return v -> v.broadcast(0);
2524                 case VECTOR_OP_MUL:
2525                     return v -> v.broadcast(1);
2526                 case VECTOR_OP_AND:
2527                     return v -> v.broadcast(-1);
2528                 case VECTOR_OP_MIN:
2529                     return v -> v.broadcast(MAX_OR_INF);
2530                 case VECTOR_OP_MAX:
2531                     return v -> v.broadcast(MIN_OR_INF);
2532                 default: return null;
2533                 }
2534             });
2535         return fn.apply(this);
2536     }
2537     private static final
2538     ImplCache<Associative,UnaryOperator<ShortVector>> REDUCE_ID_IMPL
2539         = new ImplCache<>(Associative.class, ShortVector.class);
2540 
2541     private static final short MIN_OR_INF = Short.MIN_VALUE;
2542     private static final short MAX_OR_INF = Short.MAX_VALUE;
2543 
2544     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
2545     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
2546                                                      VectorMask<Short> m);
2547 
2548     // Type specific accessors
2549 
2550     /**
2551      * Gets the lane element at lane index {@code i}
2552      *
2553      * @param i the lane index
2554      * @return the lane element at lane index {@code i}
2555      * @throws IllegalArgumentException if the index is is out of range
2556      * ({@code < 0 || >= length()})
2557      */
2558     public abstract short lane(int i);
2559 
2560     /**
2561      * Replaces the lane element of this vector at lane index {@code i} with
2562      * value {@code e}.
2563      *
2564      * This is a cross-lane operation and behaves as if it returns the result
2565      * of blending this vector with an input vector that is the result of
2566      * broadcasting {@code e} and a mask that has only one lane set at lane
2567      * index {@code i}.
2568      *
2569      * @param i the lane index of the lane element to be replaced
2570      * @param e the value to be placed
2571      * @return the result of replacing the lane element of this vector at lane
2572      * index {@code i} with value {@code e}.
2573      * @throws IllegalArgumentException if the index is is out of range
2574      * ({@code < 0 || >= length()})
2575      */
2576     public abstract ShortVector withLane(int i, short e);
2577 
2578     // Memory load operations
2579 
2580     /**
2581      * Returns an array of type {@code short[]}
2582      * containing all the lane values.
2583      * The array length is the same as the vector length.
2584      * The array elements are stored in lane order.
2585      * <p>
2586      * This method behaves as if it stores
2587      * this vector into an allocated array
2588      * (using {@link #intoArray(short[], int) intoArray})
2589      * and returns the array as follows:
2590      * <pre>{@code
2591      *   short[] a = new short[this.length()];
2592      *   this.intoArray(a, 0);
2593      *   return a;
2594      * }</pre>
2595      *
2596      * @return an array containing the lane values of this vector
2597      */
2598     @ForceInline
2599     @Override
2600     public final short[] toArray() {
2601         short[] a = new short[vspecies().laneCount()];
2602         intoArray(a, 0);
2603         return a;
2604     }
2605 
2606     /** {@inheritDoc} <!--workaround-->
2607      * @implNote
2608      * When this method is used on used on vectors
2609      * of type {@code ShortVector},
2610      * there will be no loss of precision or range,
2611      * and so no {@code IllegalArgumentException} will
2612      * be thrown.
2613      */
2614     @ForceInline
2615     @Override
2616     public final int[] toIntArray() {
2617         short[] a = toArray();
2618         int[] res = new int[a.length];
2619         for (int i = 0; i < a.length; i++) {
2620             short e = a[i];
2621             res[i] = (int) ShortSpecies.toIntegralChecked(e, true);
2622         }
2623         return res;
2624     }
2625 
2626     /** {@inheritDoc} <!--workaround-->
2627      * @implNote
2628      * When this method is used on used on vectors
2629      * of type {@code ShortVector},
2630      * there will be no loss of precision or range,
2631      * and so no {@code IllegalArgumentException} will
2632      * be thrown.
2633      */
2634     @ForceInline
2635     @Override
2636     public final long[] toLongArray() {
2637         short[] a = toArray();
2638         long[] res = new long[a.length];
2639         for (int i = 0; i < a.length; i++) {
2640             short e = a[i];
2641             res[i] = ShortSpecies.toIntegralChecked(e, false);
2642         }
2643         return res;
2644     }
2645 
2646     /** {@inheritDoc} <!--workaround-->
2647      * @implNote
2648      * When this method is used on used on vectors
2649      * of type {@code ShortVector},
2650      * there will be no loss of precision.
2651      */
2652     @ForceInline
2653     @Override
2654     public final double[] toDoubleArray() {
2655         short[] a = toArray();
2656         double[] res = new double[a.length];
2657         for (int i = 0; i < a.length; i++) {
2658             res[i] = (double) a[i];
2659         }
2660         return res;
2661     }
2662 
2663     /**
2664      * Loads a vector from a byte array starting at an offset.
2665      * Bytes are composed into primitive lane elements according
2666      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
2667      * The vector is arranged into lanes according to
2668      * <a href="Vector.html#lane-order">memory ordering</a>.
2669      * <p>
2670      * This method behaves as if it returns the result of calling
2671      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2672      * fromByteBuffer()} as follows:
2673      * <pre>{@code
2674      * var bb = ByteBuffer.wrap(a);
2675      * var bo = ByteOrder.LITTLE_ENDIAN;
2676      * var m = species.maskAll(true);
2677      * return fromByteBuffer(species, bb, offset, m, bo);
2678      * }</pre>
2679      *
2680      * @param species species of desired vector
2681      * @param a the byte array
2682      * @param offset the offset into the array
2683      * @return a vector loaded from a byte array
2684      * @throws IndexOutOfBoundsException
2685      *         if {@code offset+N*ESIZE < 0}
2686      *         or {@code offset+(N+1)*ESIZE > a.length}
2687      *         for any lane {@code N} in the vector
2688      */
2689     @ForceInline
2690     public static
2691     ShortVector fromByteArray(VectorSpecies<Short> species,
2692                                        byte[] a, int offset) {
2693         return fromByteArray(species, a, offset, ByteOrder.LITTLE_ENDIAN);
2694     }
2695 
2696     /**
2697      * Loads a vector from a byte array starting at an offset.
2698      * Bytes are composed into primitive lane elements according
2699      * to the specified byte order.
2700      * The vector is arranged into lanes according to
2701      * <a href="Vector.html#lane-order">memory ordering</a>.
2702      * <p>
2703      * This method behaves as if it returns the result of calling
2704      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2705      * fromByteBuffer()} as follows:
2706      * <pre>{@code
2707      * var bb = ByteBuffer.wrap(a);
2708      * var m = species.maskAll(true);
2709      * return fromByteBuffer(species, bb, offset, m, bo);
2710      * }</pre>
2711      *
2712      * @param species species of desired vector
2713      * @param a the byte array
2714      * @param offset the offset into the array
2715      * @param bo the intended byte order
2716      * @return a vector loaded from a byte array
2717      * @throws IndexOutOfBoundsException
2718      *         if {@code offset+N*ESIZE < 0}
2719      *         or {@code offset+(N+1)*ESIZE > a.length}
2720      *         for any lane {@code N} in the vector
2721      */
2722     @ForceInline
2723     public static
2724     ShortVector fromByteArray(VectorSpecies<Short> species,
2725                                        byte[] a, int offset,
2726                                        ByteOrder bo) {
2727         ShortSpecies vsp = (ShortSpecies) species;
2728         offset = checkFromIndexSize(offset,
2729                                     vsp.vectorBitSize() / Byte.SIZE,
2730                                     a.length);
2731         return vsp.dummyVector()
2732             .fromByteArray0(a, offset).maybeSwap(bo);
2733     }
2734 
2735     /**
2736      * Loads a vector from a byte array starting at an offset
2737      * and using a mask.
2738      * Lanes where the mask is unset are filled with the default
2739      * value of {@code short} (zero).
2740      * Bytes are composed into primitive lane elements according
2741      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
2742      * The vector is arranged into lanes according to
2743      * <a href="Vector.html#lane-order">memory ordering</a>.
2744      * <p>
2745      * This method behaves as if it returns the result of calling
2746      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2747      * fromByteBuffer()} as follows:
2748      * <pre>{@code
2749      * var bb = ByteBuffer.wrap(a);
2750      * var bo = ByteOrder.LITTLE_ENDIAN;
2751      * return fromByteBuffer(species, bb, offset, bo, m);
2752      * }</pre>
2753      *
2754      * @param species species of desired vector
2755      * @param a the byte array
2756      * @param offset the offset into the array
2757      * @param m the mask controlling lane selection
2758      * @return a vector loaded from a byte array
2759      * @throws IndexOutOfBoundsException
2760      *         if {@code offset+N*ESIZE < 0}
2761      *         or {@code offset+(N+1)*ESIZE > a.length}
2762      *         for any lane {@code N} in the vector where
2763      *         the mask is set
2764      */
2765     @ForceInline
2766     public static
2767     ShortVector fromByteArray(VectorSpecies<Short> species,
2768                                        byte[] a, int offset,
2769                                        VectorMask<Short> m) {
2770         return fromByteArray(species, a, offset, ByteOrder.LITTLE_ENDIAN, m);
2771     }
2772 
2773     /**
2774      * Loads a vector from a byte array starting at an offset
2775      * and using a mask.
2776      * Lanes where the mask is unset are filled with the default
2777      * value of {@code short} (zero).
2778      * Bytes are composed into primitive lane elements according
2779      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
2780      * The vector is arranged into lanes according to
2781      * <a href="Vector.html#lane-order">memory ordering</a>.
2782      * <p>
2783      * This method behaves as if it returns the result of calling
2784      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2785      * fromByteBuffer()} as follows:
2786      * <pre>{@code
2787      * var bb = ByteBuffer.wrap(a);
2788      * return fromByteBuffer(species, bb, offset, m, bo);
2789      * }</pre>
2790      *
2791      * @param species species of desired vector
2792      * @param a the byte array
2793      * @param offset the offset into the array
2794      * @param bo the intended byte order
2795      * @param m the mask controlling lane selection
2796      * @return a vector loaded from a byte array
2797      * @throws IndexOutOfBoundsException
2798      *         if {@code offset+N*ESIZE < 0}
2799      *         or {@code offset+(N+1)*ESIZE > a.length}
2800      *         for any lane {@code N} in the vector
2801      *         where the mask is set
2802      */
2803     @ForceInline
2804     public static
2805     ShortVector fromByteArray(VectorSpecies<Short> species,
2806                                        byte[] a, int offset,
2807                                        ByteOrder bo,
2808                                        VectorMask<Short> m) {
2809         ShortSpecies vsp = (ShortSpecies) species;
2810         ShortVector zero = vsp.zero();
2811 
2812         if (offset >= 0 && offset <= (a.length - vsp.length() * 2)) {
2813             ShortVector v = zero.fromByteArray0(a, offset);
2814             return zero.blend(v.maybeSwap(bo), m);
2815         }
2816         ShortVector iota = zero.addIndex(1);
2817         ((AbstractMask<Short>)m)
2818             .checkIndexByLane(offset, a.length, iota, 2);
2819         ShortBuffer tb = wrapper(a, offset, bo);
2820         return vsp.ldOp(tb, 0, (AbstractMask<Short>)m,
2821                    (tb_, __, i)  -> tb_.get(i));
2822     }
2823 
2824     /**
2825      * Loads a vector from an array of type {@code short[]}
2826      * starting at an offset.
2827      * For each vector lane, where {@code N} is the vector lane index, the
2828      * array element at index {@code offset + N} is placed into the
2829      * resulting vector at lane index {@code N}.
2830      *
2831      * @param species species of desired vector
2832      * @param a the array
2833      * @param offset the offset into the array
2834      * @return the vector loaded from an array
2835      * @throws IndexOutOfBoundsException
2836      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2837      *         for any lane {@code N} in the vector
2838      */
2839     @ForceInline
2840     public static
2841     ShortVector fromArray(VectorSpecies<Short> species,
2842                                    short[] a, int offset) {
2843         ShortSpecies vsp = (ShortSpecies) species;
2844         offset = checkFromIndexSize(offset,
2845                                     vsp.laneCount(),
2846                                     a.length);
2847         return vsp.dummyVector().fromArray0(a, offset);
2848     }
2849 
2850     /**
2851      * Loads a vector from an array of type {@code short[]}
2852      * starting at an offset and using a mask.
2853      * Lanes where the mask is unset are filled with the default
2854      * value of {@code short} (zero).
2855      * For each vector lane, where {@code N} is the vector lane index,
2856      * if the mask lane at index {@code N} is set then the array element at
2857      * index {@code offset + N} is placed into the resulting vector at lane index
2858      * {@code N}, otherwise the default element value is placed into the
2859      * resulting vector at lane index {@code N}.
2860      *
2861      * @param species species of desired vector
2862      * @param a the array
2863      * @param offset the offset into the array
2864      * @param m the mask controlling lane selection
2865      * @return the vector loaded from an array
2866      * @throws IndexOutOfBoundsException
2867      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
2868      *         for any lane {@code N} in the vector
2869      *         where the mask is set
2870      */
2871     @ForceInline
2872     public static
2873     ShortVector fromArray(VectorSpecies<Short> species,
2874                                    short[] a, int offset,
2875                                    VectorMask<Short> m) {
2876         ShortSpecies vsp = (ShortSpecies) species;
2877         if (offset >= 0 && offset <= (a.length - species.length())) {
2878             ShortVector zero = vsp.zero();
2879             return zero.blend(zero.fromArray0(a, offset), m);
2880         }
2881         ShortVector iota = vsp.iota();
2882         ((AbstractMask<Short>)m)
2883             .checkIndexByLane(offset, a.length, iota, 1);
2884         return vsp.vOp(m, i -> a[offset + i]);
2885     }
2886 
2887     /**
2888      * Gathers a new vector composed of elements from an array of type
2889      * {@code short[]},
2890      * using indexes obtained by adding a fixed {@code offset} to a
2891      * series of secondary offsets from an <em>index map</em>.
2892      * The index map is a contiguous sequence of {@code VLENGTH}
2893      * elements in a second array of {@code int}s, starting at a given
2894      * {@code mapOffset}.
2895      * <p>
2896      * For each vector lane, where {@code N} is the vector lane index,
2897      * the lane is loaded from the array
2898      * element {@code a[f(N)]}, where {@code f(N)} is the
2899      * index mapping expression
2900      * {@code offset + indexMap[mapOffset + N]]}.
2901      *
2902      * @param species species of desired vector
2903      * @param a the array
2904      * @param offset the offset into the array, may be negative if relative
2905      * indexes in the index map compensate to produce a value within the
2906      * array bounds
2907      * @param indexMap the index map
2908      * @param mapOffset the offset into the index map
2909      * @return the vector loaded from the indexed elements of the array
2910      * @throws IndexOutOfBoundsException
2911      *         if {@code mapOffset+N < 0}
2912      *         or if {@code mapOffset+N >= indexMap.length},
2913      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2914      *         is an invalid index into {@code a},
2915      *         for any lane {@code N} in the vector
2916      * @see ShortVector#toIntArray()
2917      */
2918     @ForceInline
2919     public static
2920     ShortVector fromArray(VectorSpecies<Short> species,
2921                                    short[] a, int offset,
2922                                    int[] indexMap, int mapOffset) {
2923         ShortSpecies vsp = (ShortSpecies) species;
2924         return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]);
2925     }
2926 
2927     /**
2928      * Gathers a new vector composed of elements from an array of type
2929      * {@code short[]},
2930      * under the control of a mask, and
2931      * using indexes obtained by adding a fixed {@code offset} to a
2932      * series of secondary offsets from an <em>index map</em>.
2933      * The index map is a contiguous sequence of {@code VLENGTH}
2934      * elements in a second array of {@code int}s, starting at a given
2935      * {@code mapOffset}.
2936      * <p>
2937      * For each vector lane, where {@code N} is the vector lane index,
2938      * if the lane is set in the mask,
2939      * the lane is loaded from the array
2940      * element {@code a[f(N)]}, where {@code f(N)} is the
2941      * index mapping expression
2942      * {@code offset + indexMap[mapOffset + N]]}.
2943      * Unset lanes in the resulting vector are set to zero.
2944      *
2945      * @param species species of desired vector
2946      * @param a the array
2947      * @param offset the offset into the array, may be negative if relative
2948      * indexes in the index map compensate to produce a value within the
2949      * array bounds
2950      * @param indexMap the index map
2951      * @param mapOffset the offset into the index map
2952      * @param m the mask controlling lane selection
2953      * @return the vector loaded from the indexed elements of the array
2954      * @throws IndexOutOfBoundsException
2955      *         if {@code mapOffset+N < 0}
2956      *         or if {@code mapOffset+N >= indexMap.length},
2957      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
2958      *         is an invalid index into {@code a},
2959      *         for any lane {@code N} in the vector
2960      *         where the mask is set
2961      * @see ShortVector#toIntArray()
2962      */
2963     @ForceInline
2964     public static
2965     ShortVector fromArray(VectorSpecies<Short> species,
2966                                    short[] a, int offset,
2967                                    int[] indexMap, int mapOffset,
2968                                    VectorMask<Short> m) {
2969         ShortSpecies vsp = (ShortSpecies) species;
2970 
2971         // Do it the slow way.
2972         return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]);
2973 
2974     }
2975 
2976     /**
2977      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
2978      * starting at an offset into the byte buffer.
2979      * <p>
2980      * Bytes are composed into primitive lane elements according to
2981      * {@link ByteOrder#LITTLE_ENDIAN little endian} byte order.
2982      * To avoid errors, the
2983      * {@linkplain ByteBuffer#order() intrinsic byte order}
2984      * of the buffer must be little-endian.
2985      * <p>
2986      * This method behaves as if it returns the result of calling
2987      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
2988      * fromByteBuffer()} as follows:
2989      * <pre>{@code
2990      * var bb = ByteBuffer.wrap(a);
2991      * var bo = ByteOrder.LITTLE_ENDIAN;
2992      * var m = species.maskAll(true);
2993      * return fromByteBuffer(species, bb, offset, m, bo);
2994      * }</pre>
2995      *
2996      * @param species species of desired vector
2997      * @param bb the byte buffer
2998      * @param offset the offset into the byte buffer
2999      * @param bo the intended byte order
3000      * @return a vector loaded from a byte buffer
3001      * @throws IllegalArgumentException if byte order of bb
3002      *         is not {@link ByteOrder#LITTLE_ENDIAN}
3003      * @throws IndexOutOfBoundsException
3004      *         if {@code offset+N*2 < 0}
3005      *         or {@code offset+N*2 >= bb.limit()}
3006      *         for any lane {@code N} in the vector
3007      */
3008     @ForceInline
3009     public static
3010     ShortVector fromByteBuffer(VectorSpecies<Short> species,
3011                                         ByteBuffer bb, int offset,
3012                                         ByteOrder bo) {
3013         ShortSpecies vsp = (ShortSpecies) species;
3014         offset = checkFromIndexSize(offset,
3015                                     vsp.laneCount(),
3016                                     bb.limit());
3017         return vsp.dummyVector()
3018             .fromByteBuffer0(bb, offset).maybeSwap(bo);
3019     }
3020 
3021     /**
3022      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
3023      * starting at an offset into the byte buffer
3024      * and using a mask.
3025      * <p>
3026      * Bytes are composed into primitive lane elements according to
3027      * {@link ByteOrder#LITTLE_ENDIAN little endian} byte order.
3028      * To avoid errors, the
3029      * {@linkplain ByteBuffer#order() intrinsic byte order}
3030      * of the buffer must be little-endian.
3031      * <p>
3032      * This method behaves as if it returns the result of calling
3033      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
3034      * fromByteBuffer()} as follows:
3035      * <pre>{@code
3036      * var bb = ByteBuffer.wrap(a);
3037      * var bo = ByteOrder.LITTLE_ENDIAN;
3038      * var m = species.maskAll(true);
3039      * return fromByteBuffer(species, bb, offset, m, bo);
3040      * }</pre>
3041      *
3042      * @param species species of desired vector
3043      * @param bb the byte buffer
3044      * @param offset the offset into the byte buffer
3045      * @param bo the intended byte order
3046      * @param m the mask controlling lane selection
3047      * @return a vector loaded from a byte buffer
3048      * @throws IllegalArgumentException if byte order of bb
3049      *         is not {@link ByteOrder#LITTLE_ENDIAN}
3050      * @throws IndexOutOfBoundsException
3051      *         if {@code offset+N*2 < 0}
3052      *         or {@code offset+N*2 >= bb.limit()}
3053      *         for any lane {@code N} in the vector
3054      *         where the mask is set
3055      */
3056     @ForceInline
3057     public static
3058     ShortVector fromByteBuffer(VectorSpecies<Short> species,
3059                                         ByteBuffer bb, int offset,
3060                                         ByteOrder bo,
3061                                         VectorMask<Short> m) {
3062         if (m.allTrue()) {
3063             return fromByteBuffer(species, bb, offset, bo);
3064         }
3065         ShortSpecies vsp = (ShortSpecies) species;
3066         checkMaskFromIndexSize(offset,
3067                                vsp, m, 1,
3068                                bb.limit());
3069         ShortVector zero = zero(vsp);
3070         ShortVector v = zero.fromByteBuffer0(bb, offset);
3071         return zero.blend(v.maybeSwap(bo), m);
3072     }
3073 
3074     // Memory store operations
3075 
3076     /**
3077      * Stores this vector into an array of type {@code short[]}
3078      * starting at an offset.
3079      * <p>
3080      * For each vector lane, where {@code N} is the vector lane index,
3081      * the lane element at index {@code N} is stored into the array
3082      * element {@code a[offset+N]}.
3083      *
3084      * @param a the array, of type {@code short[]}
3085      * @param offset the offset into the array
3086      * @throws IndexOutOfBoundsException
3087      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3088      *         for any lane {@code N} in the vector
3089      */
3090     @ForceInline
3091     public final
3092     void intoArray(short[] a, int offset) {
3093         ShortSpecies vsp = vspecies();
3094         offset = checkFromIndexSize(offset,
3095                                     vsp.laneCount(),
3096                                     a.length);
3097         VectorIntrinsics.store(
3098             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3099             a, arrayAddress(a, offset),
3100             this,
3101             a, offset,
3102             (arr, off, v)
3103             -> v.stOp(arr, off,
3104                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3105     }
3106 
3107     /**
3108      * Stores this vector into an array of {@code short}
3109      * starting at offset and using a mask.
3110      * <p>
3111      * For each vector lane, where {@code N} is the vector lane index,
3112      * the lane element at index {@code N} is stored into the array
3113      * element {@code a[offset+N]}.
3114      * If the mask lane at {@code N} is unset then the corresponding
3115      * array element {@code a[offset+N]} is left unchanged.
3116      * <p>
3117      * Array range checking is done for lanes where the mask is set.
3118      * Lanes where the mask is unset are not stored and do not need
3119      * to correspond to legitimate elements of {@code a}.
3120      * That is, unset lanes may correspond to array indexes less than
3121      * zero or beyond the end of the array.
3122      *
3123      * @param a the array, of type {@code short[]}
3124      * @param offset the offset into the array
3125      * @param m the mask controlling lane storage
3126      * @throws IndexOutOfBoundsException
3127      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3128      *         for any lane {@code N} in the vector
3129      *         where the mask is set
3130      */
3131     @ForceInline
3132     public final
3133     void intoArray(short[] a, int offset,
3134                    VectorMask<Short> m) {
3135         if (m.allTrue()) {
3136             intoArray(a, offset);
3137         } else {
3138             // FIXME: Cannot vectorize yet, if there's a mask.
3139             stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v);
3140         }
3141     }
3142 
3143     /**
3144      * Scatters this vector into an array of type {@code short[]}
3145      * using indexes obtained by adding a fixed {@code offset} to a
3146      * series of secondary offsets from an <em>index map</em>.
3147      * The index map is a contiguous sequence of {@code VLENGTH}
3148      * elements in a second array of {@code int}s, starting at a given
3149      * {@code mapOffset}.
3150      * <p>
3151      * For each vector lane, where {@code N} is the vector lane index,
3152      * the lane element at index {@code N} is stored into the array
3153      * element {@code a[f(N)]}, where {@code f(N)} is the
3154      * index mapping expression
3155      * {@code offset + indexMap[mapOffset + N]]}.
3156      *
3157      * @param a the array
3158      * @param offset an offset to combine with the index map offsets
3159      * @param indexMap the index map
3160      * @param mapOffset the offset into the index map
3161      * @returns a vector of the values {@code a[f(N)]}, where
3162      *          {@code f(N) = offset + indexMap[mapOffset + N]]}.
3163      * @throws IndexOutOfBoundsException
3164      *         if {@code mapOffset+N < 0}
3165      *         or if {@code mapOffset+N >= indexMap.length},
3166      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3167      *         is an invalid index into {@code a},
3168      *         for any lane {@code N} in the vector
3169      * @see ShortVector#toIntArray()
3170      */
3171     @ForceInline
3172     public final
3173     void intoArray(short[] a, int offset,
3174                    int[] indexMap, int mapOffset) {
3175         ShortSpecies vsp = vspecies();
3176         if (length() == 1) {
3177             intoArray(a, offset + indexMap[mapOffset]);
3178             return;
3179         }
3180         IntVector.IntSpecies isp = (IntVector.IntSpecies) vsp.indexSpecies();
3181         if (isp.laneCount() != vsp.laneCount()) {
3182             stOp(a, offset,
3183                  (arr, off, i, e) -> {
3184                      int j = indexMap[mapOffset + i];
3185                      arr[off + j] = e;
3186                  });
3187             return;
3188         }
3189 
3190         // Index vector: vix[0:n] = i -> offset + indexMap[mo + i]
3191         IntVector vix = IntVector
3192             .fromArray(isp, indexMap, mapOffset)
3193             .add(offset);
3194 
3195         vix = VectorIntrinsics.checkIndex(vix, a.length);
3196 
3197         VectorIntrinsics.storeWithMap(
3198             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3199             isp.vectorType(),
3200             a, arrayAddress(a, 0), vix,
3201             this,
3202             a, offset, indexMap, mapOffset,
3203             (arr, off, v, map, mo)
3204             -> v.stOp(arr, off,
3205                       (arr_, off_, i, e) -> {
3206                           int j = map[mo + i];
3207                           arr[off + j] = e;
3208                       }));
3209     }
3210 
3211     /**
3212      * Scatters this vector into an array of type {@code short[]},
3213      * under the control of a mask, and
3214      * using indexes obtained by adding a fixed {@code offset} to a
3215      * series of secondary offsets from an <em>index map</em>.
3216      * The index map is a contiguous sequence of {@code VLENGTH}
3217      * elements in a second array of {@code int}s, starting at a given
3218      * {@code mapOffset}.
3219      * <p>
3220      * For each vector lane, where {@code N} is the vector lane index,
3221      * if the mask lane at index {@code N} is set then
3222      * the lane element at index {@code N} is stored into the array
3223      * element {@code a[f(N)]}, where {@code f(N)} is the
3224      * index mapping expression
3225      * {@code offset + indexMap[mapOffset + N]]}.
3226      *
3227      * @param a the array
3228      * @param offset an offset to combine with the index map offsets
3229      * @param indexMap the index map
3230      * @param mapOffset the offset into the index map
3231      * @param m the mask
3232      * @returns a vector of the values {@code m ? a[f(N)] : 0},
3233      *          {@code f(N) = offset + indexMap[mapOffset + N]]}.
3234      * @throws IndexOutOfBoundsException
3235      *         if {@code mapOffset+N < 0}
3236      *         or if {@code mapOffset+N >= indexMap.length},
3237      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3238      *         is an invalid index into {@code a},
3239      *         for any lane {@code N} in the vector
3240      *         where the mask is set
3241      * @see ShortVector#toIntArray()
3242      */
3243     @ForceInline
3244     public final
3245     void intoArray(short[] a, int offset,
3246                    int[] indexMap, int mapOffset,
3247                    VectorMask<Short> m) {
3248         ShortSpecies vsp = vspecies();
3249         if (m.allTrue()) {
3250             intoArray(a, offset, indexMap, mapOffset);
3251             return;
3252         }
3253         throw new AssertionError("fixme");
3254     }
3255 
3256     /**
3257      * {@inheritDoc} <!--workaround-->
3258      */
3259     @Override
3260     @ForceInline
3261     public final
3262     void intoByteArray(byte[] a, int offset) {
3263         offset = checkFromIndexSize(offset,
3264                                     bitSize() / Byte.SIZE,
3265                                     a.length);
3266         this.maybeSwap(ByteOrder.LITTLE_ENDIAN)
3267             .intoByteArray0(a, offset);
3268     }
3269 
3270     /**
3271      * {@inheritDoc} <!--workaround-->
3272      */
3273     @Override
3274     @ForceInline
3275     public final
3276     void intoByteArray(byte[] a, int offset,
3277                        VectorMask<Short> m) {
3278         if (m.allTrue()) {
3279             intoByteArray(a, offset);
3280             return;
3281         }
3282         ShortSpecies vsp = vspecies();
3283         if (offset >= 0 && offset <= (a.length - vsp.length() * 2)) {
3284             var oldVal = fromByteArray0(a, offset);
3285             var newVal = oldVal.blend(this, m);
3286             newVal.intoByteArray0(a, offset);
3287         } else {
3288             checkMaskFromIndexSize(offset, vsp, m, 2, a.length);
3289             ShortBuffer tb = wrapper(a, offset, NATIVE_ENDIAN);
3290             this.stOp(tb, 0, m, (tb_, __, i, e) -> tb_.put(i, e));
3291         }
3292     }
3293 
3294     /**
3295      * {@inheritDoc} <!--workaround-->
3296      */
3297     @Override
3298     @ForceInline
3299     public final
3300     void intoByteArray(byte[] a, int offset,
3301                        ByteOrder bo,
3302                        VectorMask<Short> m) {
3303         maybeSwap(bo).intoByteArray(a, offset, m);
3304     }
3305 
3306     /**
3307      * {@inheritDoc} <!--workaround-->
3308      */
3309     @Override
3310     @ForceInline
3311     public final
3312     void intoByteBuffer(ByteBuffer bb, int offset,
3313                         ByteOrder bo) {
3314         maybeSwap(bo).intoByteBuffer0(bb, offset);
3315     }
3316 
3317     /**
3318      * {@inheritDoc} <!--workaround-->
3319      */
3320     @Override
3321     @ForceInline
3322     public final
3323     void intoByteBuffer(ByteBuffer bb, int offset,
3324                         ByteOrder bo,
3325                         VectorMask<Short> m) {
3326         if (m.allTrue()) {
3327             intoByteBuffer(bb, offset, bo);
3328             return;
3329         }
3330         ShortSpecies vsp = vspecies();
3331         checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit());
3332         conditionalStoreNYI(offset, vsp, m, 2, bb.limit());
3333         var oldVal = fromByteBuffer0(bb, offset);
3334         var newVal = oldVal.blend(this.maybeSwap(bo), m);
3335         newVal.intoByteBuffer0(bb, offset);
3336     }
3337 
3338     // ================================================
3339 
3340     // Low-level memory operations.
3341     //
3342     // Note that all of these operations *must* inline into a context
3343     // where the exact species of the involved vector is a
3344     // compile-time constant.  Otherwise, the intrinsic generation
3345     // will fail and performance will suffer.
3346     //
3347     // In many cases this is achieved by re-deriving a version of the
3348     // method in each concrete subclass (per species).  The re-derived
3349     // method simply calls one of these generic methods, with exact
3350     // parameters for the controlling metadata, which is either a
3351     // typed vector or constant species instance.
3352 
3353     // Unchecked loading operations in native byte order.
3354     // Caller is reponsible for applying index checks, masking, and
3355     // byte swapping.
3356 
3357     /*package-private*/
3358     abstract
3359     ShortVector fromArray0(short[] a, int offset);
3360     @ForceInline
3361     final
3362     ShortVector fromArray0Template(short[] a, int offset) {
3363         ShortSpecies vsp = vspecies();
3364         return VectorIntrinsics.load(
3365             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3366             a, arrayAddress(a, offset),
3367             a, offset, vsp,
3368             (arr, off, s) -> s.ldOp(arr, off,
3369                                     (arr_, off_, i) -> arr_[off_ + i]));
3370     }
3371 
3372     @Override
3373     abstract
3374     ShortVector fromByteArray0(byte[] a, int offset);
3375     @ForceInline
3376     final
3377     ShortVector fromByteArray0Template(byte[] a, int offset) {
3378         ShortSpecies vsp = vspecies();
3379         return VectorIntrinsics.load(
3380             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3381             a, byteArrayAddress(a, offset),
3382             a, offset, vsp,
3383             (arr, off, s) -> {
3384                 ShortBuffer tb = wrapper(arr, off, NATIVE_ENDIAN);
3385                 return s.ldOp(tb, 0, (tb_, __, i) -> tb_.get(i));
3386             });
3387     }
3388 
3389     abstract
3390     ShortVector fromByteBuffer0(ByteBuffer bb, int offset);
3391     @ForceInline
3392     final
3393     ShortVector fromByteBuffer0Template(ByteBuffer bb, int offset) {
3394         ShortSpecies vsp = vspecies();
3395         return VectorIntrinsics.load(
3396             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3397             bufferBase(bb), bufferAddress(bb, offset),
3398             bb, offset, vsp,
3399             (buf, off, s) -> {
3400                 ShortBuffer tb = wrapper(buf, off, NATIVE_ENDIAN);
3401                 return s.ldOp(tb, 0, (tb_, __, i) -> tb_.get(i));
3402            });
3403     }
3404 
3405     // Unchecked storing operations in native byte order.
3406     // Caller is reponsible for applying index checks, masking, and
3407     // byte swapping.
3408 
3409     abstract
3410     void intoArray0(short[] a, int offset);
3411     @ForceInline
3412     final
3413     void intoArray0Template(short[] a, int offset) {
3414         ShortSpecies vsp = vspecies();
3415         VectorIntrinsics.store(
3416             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3417             a, arrayAddress(a, offset),
3418             this, a, offset,
3419             (arr, off, v)
3420             -> v.stOp(arr, off,
3421                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3422     }
3423 
3424     abstract
3425     void intoByteArray0(byte[] a, int offset);
3426     @ForceInline
3427     final
3428     void intoByteArray0Template(byte[] a, int offset) {
3429         ShortSpecies vsp = vspecies();
3430         VectorIntrinsics.store(
3431             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3432             a, byteArrayAddress(a, offset),
3433             this, a, offset,
3434             (arr, off, v) -> {
3435                 ShortBuffer tb = wrapper(arr, off, NATIVE_ENDIAN);
3436                 v.stOp(tb, 0, (tb_, __, i, e) -> tb_.put(i, e));
3437             });
3438     }
3439 
3440     @ForceInline
3441     final
3442     void intoByteBuffer0(ByteBuffer bb, int offset) {
3443         ShortSpecies vsp = vspecies();
3444         VectorIntrinsics.store(
3445             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3446             bufferBase(bb), bufferAddress(bb, offset),
3447             this, bb, offset,
3448             (buf, off, v) -> {
3449                 ShortBuffer tb = wrapper(buf, off, NATIVE_ENDIAN);
3450                 v.stOp(tb, 0, (tb_, __, i, e) -> tb_.put(i, e));
3451             });
3452     }
3453 
3454     // End of low-level memory operations.
3455 
3456     private static
3457     void checkMaskFromIndexSize(int offset,
3458                                 ShortSpecies vsp,
3459                                 VectorMask<Short> m,
3460                                 int scale,
3461                                 int limit) {
3462         ((AbstractMask<Short>)m)
3463             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3464     }
3465 
3466     @ForceInline
3467     private void conditionalStoreNYI(int offset,
3468                                      ShortSpecies vsp,
3469                                      VectorMask<Short> m,
3470                                      int scale,
3471                                      int limit) {
3472         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3473             String msg =
3474                 String.format("unimplemented: store @%d in [0..%d), %s in %s",
3475                               offset, limit, m, vsp);
3476             throw new AssertionError(msg);
3477         }
3478     }
3479 
3480     /*package-private*/
3481     @Override
3482     @ForceInline
3483     final
3484     ShortVector maybeSwap(ByteOrder bo) {
3485         if (bo != NATIVE_ENDIAN) {
3486             return this.reinterpretAsBytes()
3487                 .rearrange(swapBytesShuffle())
3488                 .reinterpretAsShorts();
3489         }
3490         return this;
3491     }
3492 
3493     static final int ARRAY_SHIFT =
3494         31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_SHORT_INDEX_SCALE);
3495     static final long ARRAY_BASE =
3496         Unsafe.ARRAY_SHORT_BASE_OFFSET;
3497 
3498     @ForceInline
3499     static long arrayAddress(short[] a, int index) {
3500         return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
3501     }
3502 
3503     @ForceInline
3504     static long byteArrayAddress(byte[] a, int index) {
3505         return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
3506     }
3507 
3508     // Byte buffer wrappers.
3509     private static ShortBuffer wrapper(ByteBuffer bb, int offset,
3510                                         ByteOrder bo) {
3511         return bb.duplicate().position(offset).slice()
3512             .order(bo).asShortBuffer();
3513     }
3514     private static ShortBuffer wrapper(byte[] a, int offset,
3515                                         ByteOrder bo) {
3516         return ByteBuffer.wrap(a, offset, a.length - offset)
3517             .order(bo).asShortBuffer();
3518     }
3519 
3520     // ================================================
3521 
3522     /// Reinterpreting view methods:
3523     //   lanewise reinterpret: viewAsXVector()
3524     //   keep shape, redraw lanes: reinterpretAsEs()
3525 
3526     /**
3527      * {@inheritDoc} <!--workaround-->
3528      */
3529     @ForceInline
3530     @Override
3531     public final ByteVector reinterpretAsBytes() {
3532          // Going to ByteVector, pay close attention to byte order.
3533          assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);
3534          return asByteVectorRaw();
3535          //return asByteVectorRaw().rearrange(swapBytesShuffle());
3536     }
3537 
3538     /**
3539      * {@inheritDoc} <!--workaround-->
3540      */
3541     @ForceInline
3542     @Override
3543     public final ShortVector viewAsIntegralLanes() {
3544         return this;
3545     }
3546 
3547     /**
3548      * {@inheritDoc} <!--workaround-->
3549      *
3550      * @implNote This method always throws
3551      * {@code IllegalArgumentException}, because there is no floating
3552      * point type of the same size as {@code short}.  The return type
3553      * of this method is arbitrarily designated as
3554      * {@code Vector<?>}.  Future versions of this API may change the return
3555      * type if additional floating point types become available.
3556      */
3557     @ForceInline
3558     @Override
3559     public final
3560     Vector<?>
3561     viewAsFloatingLanes() {
3562         LaneType flt = LaneType.SHORT.asFloating();
3563         throw new AssertionError();  // should already throw IAE
3564     }
3565 
3566     // ================================================
3567 
3568     /// Object methods: toString, equals, hashCode
3569     //
3570     // Object methods are defined as if via Arrays.toString, etc.,
3571     // is applied to the array of elements.  Two equal vectors
3572     // are required to have equal species and equal lane values.
3573 
3574     /**
3575      * Returns a string representation of this vector, of the form
3576      * {@code "[0,1,2...]"}, reporting the lane values of this vector,
3577      * in lane order.
3578      *
3579      * The string is produced as if by a call to {@link
3580      * java.util.Arrays#toString(short[]) Arrays.toString()},
3581      * as appropriate to the {@code short} array returned by
3582      * {@link #toArray this.toArray()}.
3583      *
3584      * @return a string of the form {@code "[0,1,2...]"}
3585      * reporting the lane values of this vector
3586      */
3587     @Override
3588     @ForceInline
3589     public final
3590     String toString() {
3591         // now that toArray is strongly typed, we can define this
3592         return Arrays.toString(toArray());
3593     }
3594 
3595     /**
3596      * {@inheritDoc} <!--workaround-->
3597      */
3598     @Override
3599     @ForceInline
3600     public final
3601     boolean equals(Object obj) {
3602         if (obj instanceof Vector) {
3603             Vector<?> that = (Vector<?>) obj;
3604             if (this.species().equals(that.species())) {
3605                 return this.eq(that.check(this.species())).allTrue();
3606             }
3607         }
3608         return false;
3609     }
3610 
3611     /**
3612      * {@inheritDoc} <!--workaround-->
3613      */
3614     @Override
3615     @ForceInline
3616     public final
3617     int hashCode() {
3618         // now that toArray is strongly typed, we can define this
3619         return Objects.hash(species(), Arrays.hashCode(toArray()));
3620     }
3621 
3622     // ================================================
3623 
3624     // Species
3625 
3626     /**
3627      * Class representing {@link ShortVector}'s of the same {@link VectorShape VectorShape}.
3628      */
3629     /*package-private*/
3630     static final class ShortSpecies extends AbstractSpecies<Short> {
3631         private ShortSpecies(VectorShape shape,
3632                 Class<? extends ShortVector> vectorType,
3633                 Class<? extends AbstractMask<Short>> maskType,
3634                 Function<Object, ShortVector> vectorFactory) {
3635             super(shape, LaneType.of(short.class),
3636                   vectorType, maskType,
3637                   vectorFactory);
3638             assert(this.elementSize() == Short.SIZE);
3639         }
3640 
3641         // Specializing overrides:
3642 
3643         @Override
3644         @ForceInline
3645         public final Class<Short> elementType() {
3646             return short.class;
3647         }
3648 
3649         @Override
3650         @ForceInline
3651         public final Class<Short> genericElementType() {
3652             return Short.class;
3653         }
3654 
3655         @Override
3656         @ForceInline
3657         public final Class<short[]> arrayType() {
3658             return short[].class;
3659         }
3660 
3661         @SuppressWarnings("unchecked")
3662         @Override
3663         @ForceInline
3664         public final Class<? extends ShortVector> vectorType() {
3665             return (Class<? extends ShortVector>) vectorType;
3666         }
3667 
3668         @Override
3669         @ForceInline
3670         public final long checkValue(long e) {
3671             longToElementBits(e);  // only for exception
3672             return e;
3673         }
3674 
3675         /*package-private*/
3676         @Override
3677         @ForceInline
3678         final ShortVector broadcastBits(long bits) {
3679             return (ShortVector)
3680                 VectorIntrinsics.broadcastCoerced(
3681                     vectorType, short.class, laneCount,
3682                     bits, this,
3683                     (bits_, s_) -> s_.rvOp(i -> bits_));
3684         }
3685 
3686         /*package-private*/
3687         @ForceInline
3688         
3689         final ShortVector broadcast(short e) {
3690             return broadcastBits(toBits(e));
3691         }
3692 
3693         @Override
3694         @ForceInline
3695         public final ShortVector broadcast(long e) {
3696             return broadcastBits(longToElementBits(e));
3697         }
3698 
3699         /*package-private*/
3700         final @Override
3701         @ForceInline
3702         long longToElementBits(long value) {
3703             // Do the conversion, and then test it for failure.
3704             short e = (short) value;
3705             if ((long) e != value) {
3706                 throw badElementBits(value, e);
3707             }
3708             return toBits(e);
3709         }
3710 
3711         /*package-private*/
3712         @ForceInline
3713         static long toIntegralChecked(short e, boolean convertToInt) {
3714             long value = convertToInt ? (int) e : (long) e;
3715             if ((short) value != e) {
3716                 throw badArrayBits(e, convertToInt, value);
3717             }
3718             return value;
3719         }
3720 
3721         @Override
3722         @ForceInline
3723         public final ShortVector fromValues(long... values) {
3724             VectorIntrinsics.requireLength(values.length, laneCount);
3725             short[] va = new short[laneCount()];
3726             for (int i = 0; i < va.length; i++) {
3727                 long lv = values[i];
3728                 short v = (short) lv;
3729                 va[i] = v;
3730                 if ((long)v != lv) {
3731                     throw badElementBits(lv, v);
3732                 }
3733             }
3734             return dummyVector().fromArray0(va, 0);
3735         }
3736 
3737         /* this non-public one is for internal conversions */
3738         @Override
3739         @ForceInline
3740         final ShortVector fromIntValues(int[] values) {
3741             VectorIntrinsics.requireLength(values.length, laneCount);
3742             short[] va = new short[laneCount()];
3743             for (int i = 0; i < va.length; i++) {
3744                 int lv = values[i];
3745                 short v = (short) lv;
3746                 va[i] = v;
3747                 if ((int)v != lv) {
3748                     throw badElementBits(lv, v);
3749                 }
3750             }
3751             return dummyVector().fromArray0(va, 0);
3752         }
3753 
3754         // Virtual constructors
3755 
3756         @ForceInline
3757         @Override final
3758         public ShortVector fromArray(Object a, int offset) {
3759             // User entry point:  Be careful with inputs.
3760             return ShortVector
3761                 .fromArray(this, (short[]) a, offset);
3762         }
3763 
3764         @Override final
3765         ShortVector dummyVector() {
3766             return (ShortVector) super.dummyVector();
3767         }
3768 
3769         final
3770         ShortVector vectorFactory(short[] vec) {
3771             // Species delegates all factory requests to its dummy
3772             // vector.  The dummy knows all about it.
3773             return dummyVector().vectorFactory(vec);
3774         }
3775 
3776         /*package-private*/
3777         final @Override
3778         @ForceInline
3779         ShortVector rvOp(RVOp f) {
3780             short[] res = new short[laneCount()];
3781             for (int i = 0; i < res.length; i++) {
3782                 short bits = (short) f.apply(i);
3783                 res[i] = fromBits(bits);
3784             }
3785             return dummyVector().vectorFactory(res);
3786         }
3787 
3788         ShortVector vOp(FVOp f) {
3789             short[] res = new short[laneCount()];
3790             for (int i = 0; i < res.length; i++) {
3791                 res[i] = f.apply(i);
3792             }
3793             return dummyVector().vectorFactory(res);
3794         }
3795 
3796         ShortVector vOp(VectorMask<Short> m, FVOp f) {
3797             short[] res = new short[laneCount()];
3798             boolean[] mbits = ((AbstractMask<Short>)m).getBits();
3799             for (int i = 0; i < res.length; i++) {
3800                 if (mbits[i]) {
3801                     res[i] = f.apply(i);
3802                 }
3803             }
3804             return dummyVector().vectorFactory(res);
3805         }
3806 
3807         /*package-private*/
3808         @ForceInline
3809         <M> ShortVector ldOp(M memory, int offset,
3810                                       FLdOp<M> f) {
3811             return dummyVector().ldOp(memory, offset, f);
3812         }
3813 
3814         /*package-private*/
3815         @ForceInline
3816         <M> ShortVector ldOp(M memory, int offset,
3817                                       AbstractMask<Short> m,
3818                                       FLdOp<M> f) {
3819             return dummyVector().ldOp(memory, offset, m, f);
3820         }
3821 
3822         /*package-private*/
3823         @ForceInline
3824         <M> void stOp(M memory, int offset, FStOp<M> f) {
3825             dummyVector().stOp(memory, offset, f);
3826         }
3827 
3828         /*package-private*/
3829         @ForceInline
3830         <M> void stOp(M memory, int offset,
3831                       AbstractMask<Short> m,
3832                       FStOp<M> f) {
3833             dummyVector().stOp(memory, offset, m, f);
3834         }
3835 
3836         // N.B. Make sure these constant vectors and
3837         // masks load up correctly into registers.
3838         //
3839         // Also, see if we can avoid all that switching.
3840         // Could we cache both vectors and both masks in
3841         // this species object?
3842 
3843         // Zero and iota vector access
3844         @Override
3845         @ForceInline
3846         public final ShortVector zero() {
3847             if ((Class<?>) vectorType() == ShortMaxVector.class)
3848                 return ShortMaxVector.ZERO;
3849             switch (vectorBitSize()) {
3850                 case 64: return Short64Vector.ZERO;
3851                 case 128: return Short128Vector.ZERO;
3852                 case 256: return Short256Vector.ZERO;
3853                 case 512: return Short512Vector.ZERO;
3854             }
3855             throw new AssertionError();
3856         }        
3857 
3858         @Override
3859         @ForceInline
3860         public final ShortVector iota() {
3861             if ((Class<?>) vectorType() == ShortMaxVector.class)
3862                 return ShortMaxVector.IOTA;
3863             switch (vectorBitSize()) {
3864                 case 64: return Short64Vector.IOTA;
3865                 case 128: return Short128Vector.IOTA;
3866                 case 256: return Short256Vector.IOTA;
3867                 case 512: return Short512Vector.IOTA;
3868             }
3869             throw new AssertionError();
3870         }
3871 
3872         // Mask access
3873         @Override
3874         @ForceInline
3875         public final VectorMask<Short> maskAll(boolean bit) {
3876             if ((Class<?>) vectorType() == ShortMaxVector.class)
3877                 return ShortMaxVector.ShortMaxMask.maskAll(bit);
3878             switch (vectorBitSize()) {
3879                 case 64: return Short64Vector.Short64Mask.maskAll(bit);
3880                 case 128: return Short128Vector.Short128Mask.maskAll(bit);
3881                 case 256: return Short256Vector.Short256Mask.maskAll(bit);
3882                 case 512: return Short512Vector.Short512Mask.maskAll(bit);
3883             }
3884             throw new AssertionError();
3885         }
3886     }
3887 
3888     /**
3889      * Finds a species for an element type of {@code short} and shape.
3890      *
3891      * @param s the shape
3892      * @return a species for an element type of {@code short} and shape
3893      * @throws IllegalArgumentException if no such species exists for the shape
3894      */
3895     static ShortSpecies species(VectorShape s) {
3896         Objects.requireNonNull(s);
3897         switch (s) {
3898             case S_64_BIT: return (ShortSpecies) SPECIES_64;
3899             case S_128_BIT: return (ShortSpecies) SPECIES_128;
3900             case S_256_BIT: return (ShortSpecies) SPECIES_256;
3901             case S_512_BIT: return (ShortSpecies) SPECIES_512;
3902             case S_Max_BIT: return (ShortSpecies) SPECIES_MAX;
3903             default: throw new IllegalArgumentException("Bad shape: " + s);
3904         }
3905     }
3906 
3907     /** Species representing {@link ShortVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
3908     public static final VectorSpecies<Short> SPECIES_64
3909         = new ShortSpecies(VectorShape.S_64_BIT,
3910                             Short64Vector.class,
3911                             Short64Vector.Short64Mask.class,
3912                             Short64Vector::new);
3913 
3914     /** Species representing {@link ShortVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
3915     public static final VectorSpecies<Short> SPECIES_128
3916         = new ShortSpecies(VectorShape.S_128_BIT,
3917                             Short128Vector.class,
3918                             Short128Vector.Short128Mask.class,
3919                             Short128Vector::new);
3920 
3921     /** Species representing {@link ShortVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
3922     public static final VectorSpecies<Short> SPECIES_256
3923         = new ShortSpecies(VectorShape.S_256_BIT,
3924                             Short256Vector.class,
3925                             Short256Vector.Short256Mask.class,
3926                             Short256Vector::new);
3927 
3928     /** Species representing {@link ShortVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
3929     public static final VectorSpecies<Short> SPECIES_512
3930         = new ShortSpecies(VectorShape.S_512_BIT,
3931                             Short512Vector.class,
3932                             Short512Vector.Short512Mask.class,
3933                             Short512Vector::new);
3934 
3935     /** Species representing {@link ShortVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
3936     public static final VectorSpecies<Short> SPECIES_MAX
3937         = new ShortSpecies(VectorShape.S_Max_BIT,
3938                             ShortMaxVector.class,
3939                             ShortMaxVector.ShortMaxMask.class,
3940                             ShortMaxVector::new);
3941 
3942     /**
3943      * Preferred species for {@link ShortVector}s.
3944      * A preferred species is a species of maximal bit-size for the platform.
3945      */
3946     public static final VectorSpecies<Short> SPECIES_PREFERRED
3947         = (ShortSpecies) VectorSpecies.ofPreferred(short.class);
3948 }