1 /*
   2  * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have
  23  * questions.
  24  */
  25 package jdk.incubator.vector;
  26 
  27 import java.nio.ByteBuffer;
  28 #if[!byte]
  29 import java.nio.$Type$Buffer;
  30 #end[!byte]
  31 import java.nio.ByteOrder;
  32 import java.util.Arrays;
  33 import java.util.Objects;
  34 import java.util.function.BinaryOperator;
  35 import java.util.function.IntUnaryOperator;
  36 import java.util.function.Function;
  37 import java.util.function.UnaryOperator;
  38 import java.util.concurrent.ThreadLocalRandom;
  39 
  40 import jdk.internal.misc.Unsafe;
  41 import jdk.internal.vm.annotation.ForceInline;
  42 
  43 import static jdk.incubator.vector.VectorIntrinsics.*;
  44 import static jdk.incubator.vector.VectorOperators.*;
  45 
  46 #warn This file is preprocessed before being compiled
  47 
  48 /**
  49  * A specialized {@link Vector} representing an ordered immutable sequence of
  50  * {@code $type$} values.
  51  */
  52 @SuppressWarnings("cast")  // warning: redundant cast
  53 public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> {
  54 
  55     $abstractvectortype$() {}
  56 
  57 #if[FP]
  58     static final int FORBID_OPCODE_KIND = VO_NOFP;
  59 #else[FP]
  60     static final int FORBID_OPCODE_KIND = VO_ONLYFP;
  61 #end[FP]
  62 
  63     @ForceInline
  64     static int opCode(Operator op) {
  65         return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND);
  66     }
  67     @ForceInline
  68     static int opCode(Operator op, int requireKind) {
  69         requireKind |= VO_OPCODE_VALID;
  70         return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND);
  71     }
  72     @ForceInline
  73     static boolean opKind(Operator op, int bit) {
  74         return VectorOperators.opKind(op, bit);
  75     }
  76 
  77     // Virtualized factories and operators,
  78     // coded with portable definitions.
  79     // These are all @ForceInline in case
  80     // they need to be used performantly.
  81     // The various shape-specific subclasses
  82     // also specialize them by wrapping
  83     // them in a call like this:
  84     //    return (Byte128Vector)
  85     //       super.bOp((Byte128Vector) o);
  86     // The purpose of that is to forcibly inline
  87     // the generic definition from this file
  88     // into a sharply type- and size-specific
  89     // wrapper in the subclass file, so that
  90     // the JIT can specialize the code.
  91     // The code is only inlined and expanded
  92     // if it gets hot.  Think of it as a cheap
  93     // and lazy version of C++ templates.
  94 
  95     // Virtualized getter
  96 
  97     /*package-private*/
  98     abstract $type$[] vec();
  99 
 100     // Virtualized constructors
 101 
 102     /**
 103      * Build a vector directly using my own constructor.
 104      * It is an error if the array is aliased elsewhere.
 105      */
 106     /*package-private*/
 107     abstract $abstractvectortype$ vectorFactory($type$[] vec);
 108 
 109     /**
 110      * Build a mask directly using my species.
 111      * It is an error if the array is aliased elsewhere.
 112      */
 113     /*package-private*/
 114     @ForceInline
 115     final
 116     AbstractMask<$Boxtype$> maskFactory(boolean[] bits) {
 117         return vspecies().maskFactory(bits);
 118     }
 119 
 120     // Constant loader (takes dummy as vector arg)
 121     interface FVOp {
 122         $type$ apply(int i);
 123     }
 124 
 125     /*package-private*/
 126     @ForceInline
 127     final
 128     $abstractvectortype$ vOp(FVOp f) {
 129         $type$[] res = new $type$[length()];
 130         for (int i = 0; i < res.length; i++) {
 131             res[i] = f.apply(i);
 132         }
 133         return vectorFactory(res);
 134     }
 135 
 136     @ForceInline
 137     final
 138     $abstractvectortype$ vOp(VectorMask<$Boxtype$> m, FVOp f) {
 139         $type$[] res = new $type$[length()];
 140         boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
 141         for (int i = 0; i < res.length; i++) {
 142             if (mbits[i]) {
 143                 res[i] = f.apply(i);
 144             }
 145         }
 146         return vectorFactory(res);
 147     }
 148 
 149     // Unary operator
 150 
 151     /*package-private*/
 152     interface FUnOp {
 153         $type$ apply(int i, $type$ a);
 154     }
 155 
 156     /*package-private*/
 157     abstract
 158     $abstractvectortype$ uOp(FUnOp f);
 159     @ForceInline
 160     final
 161     $abstractvectortype$ uOpTemplate(FUnOp f) {
 162         $type$[] vec = vec();
 163         $type$[] res = new $type$[length()];
 164         for (int i = 0; i < res.length; i++) {
 165             res[i] = f.apply(i, vec[i]);
 166         }
 167         return vectorFactory(res);
 168     }
 169 
 170     /*package-private*/
 171     abstract
 172     $abstractvectortype$ uOp(VectorMask<$Boxtype$> m,
 173                              FUnOp f);
 174     @ForceInline
 175     final
 176     $abstractvectortype$ uOpTemplate(VectorMask<$Boxtype$> m,
 177                                      FUnOp f) {
 178         $type$[] vec = vec();
 179         $type$[] res = new $type$[length()];
 180         boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
 181         for (int i = 0; i < res.length; i++) {
 182             res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
 183         }
 184         return vectorFactory(res);
 185     }
 186 
 187     // Binary operator
 188 
 189     /*package-private*/
 190     interface FBinOp {
 191         $type$ apply(int i, $type$ a, $type$ b);
 192     }
 193 
 194     /*package-private*/
 195     abstract
 196     $abstractvectortype$ bOp(Vector<$Boxtype$> o,
 197                              FBinOp f);
 198     @ForceInline
 199     final
 200     $abstractvectortype$ bOpTemplate(Vector<$Boxtype$> o,
 201                                      FBinOp f) {
 202         $type$[] res = new $type$[length()];
 203         $type$[] vec1 = this.vec();
 204         $type$[] vec2 = (($abstractvectortype$)o).vec();
 205         for (int i = 0; i < res.length; i++) {
 206             res[i] = f.apply(i, vec1[i], vec2[i]);
 207         }
 208         return vectorFactory(res);
 209     }
 210 
 211     /*package-private*/
 212     abstract
 213     $abstractvectortype$ bOp(Vector<$Boxtype$> o,
 214                              VectorMask<$Boxtype$> m,
 215                              FBinOp f);
 216     @ForceInline
 217     final
 218     $abstractvectortype$ bOpTemplate(Vector<$Boxtype$> o,
 219                                      VectorMask<$Boxtype$> m,
 220                                      FBinOp f) {
 221         $type$[] res = new $type$[length()];
 222         $type$[] vec1 = this.vec();
 223         $type$[] vec2 = (($abstractvectortype$)o).vec();
 224         boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
 225         for (int i = 0; i < res.length; i++) {
 226             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
 227         }
 228         return vectorFactory(res);
 229     }
 230 
 231     // Ternary operator
 232 
 233     /*package-private*/
 234     interface FTriOp {
 235         $type$ apply(int i, $type$ a, $type$ b, $type$ c);
 236     }
 237 
 238     /*package-private*/
 239     abstract
 240     $abstractvectortype$ tOp(Vector<$Boxtype$> o1,
 241                              Vector<$Boxtype$> o2,
 242                              FTriOp f);
 243     @ForceInline
 244     final
 245     $abstractvectortype$ tOpTemplate(Vector<$Boxtype$> o1,
 246                                      Vector<$Boxtype$> o2,
 247                                      FTriOp f) {
 248         $type$[] res = new $type$[length()];
 249         $type$[] vec1 = this.vec();
 250         $type$[] vec2 = (($abstractvectortype$)o1).vec();
 251         $type$[] vec3 = (($abstractvectortype$)o2).vec();
 252         for (int i = 0; i < res.length; i++) {
 253             res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]);
 254         }
 255         return vectorFactory(res);
 256     }
 257 
 258     /*package-private*/
 259     abstract
 260     $abstractvectortype$ tOp(Vector<$Boxtype$> o1,
 261                              Vector<$Boxtype$> o2,
 262                              VectorMask<$Boxtype$> m,
 263                              FTriOp f);
 264     @ForceInline
 265     final
 266     $abstractvectortype$ tOpTemplate(Vector<$Boxtype$> o1,
 267                                      Vector<$Boxtype$> o2,
 268                                      VectorMask<$Boxtype$> m,
 269                                      FTriOp f) {
 270         $type$[] res = new $type$[length()];
 271         $type$[] vec1 = this.vec();
 272         $type$[] vec2 = (($abstractvectortype$)o1).vec();
 273         $type$[] vec3 = (($abstractvectortype$)o2).vec();
 274         boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
 275         for (int i = 0; i < res.length; i++) {
 276             res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i];
 277         }
 278         return vectorFactory(res);
 279     }
 280 
 281     // Reduction operator
 282 
 283     /*package-private*/
 284     abstract
 285     $type$ rOp($type$ v, FBinOp f);
 286     @ForceInline
 287     final
 288     $type$ rOpTemplate($type$ v, FBinOp f) {
 289         $type$[] vec = vec();
 290         for (int i = 0; i < vec.length; i++) {
 291             v = f.apply(i, v, vec[i]);
 292         }
 293         return v;
 294     }
 295 
 296     // Memory reference
 297 
 298     /*package-private*/
 299     interface FLdOp<M> {
 300         $type$ apply(M memory, int offset, int i);
 301     }
 302 
 303     /*package-private*/
 304     @ForceInline
 305     final
 306     <M> $abstractvectortype$ ldOp(M memory, int offset,
 307                                   FLdOp<M> f) {
 308         //dummy; no vec = vec();
 309         $type$[] res = new $type$[length()];
 310         for (int i = 0; i < res.length; i++) {
 311             res[i] = f.apply(memory, offset, i);
 312         }
 313         return vectorFactory(res);
 314     }
 315 
 316     /*package-private*/
 317     @ForceInline
 318     final
 319     <M> $abstractvectortype$ ldOp(M memory, int offset,
 320                                   VectorMask<$Boxtype$> m,
 321                                   FLdOp<M> f) {
 322         //$type$[] vec = vec();
 323         $type$[] res = new $type$[length()];
 324         boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
 325         for (int i = 0; i < res.length; i++) {
 326             if (mbits[i]) {
 327                 res[i] = f.apply(memory, offset, i);
 328             }
 329         }
 330         return vectorFactory(res);
 331     }
 332 
 333     interface FStOp<M> {
 334         void apply(M memory, int offset, int i, $type$ a);
 335     }
 336 
 337     /*package-private*/
 338     @ForceInline
 339     final
 340     <M> void stOp(M memory, int offset,
 341                   FStOp<M> f) {
 342         $type$[] vec = vec();
 343         for (int i = 0; i < vec.length; i++) {
 344             f.apply(memory, offset, i, vec[i]);
 345         }
 346     }
 347 
 348     /*package-private*/
 349     @ForceInline
 350     final
 351     <M> void stOp(M memory, int offset,
 352                   VectorMask<$Boxtype$> m,
 353                   FStOp<M> f) {
 354         $type$[] vec = vec();
 355         boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
 356         for (int i = 0; i < vec.length; i++) {
 357             if (mbits[i]) {
 358                 f.apply(memory, offset, i, vec[i]);
 359             }
 360         }
 361     }
 362 
 363     // Binary test
 364 
 365     /*package-private*/
 366     interface FBinTest {
 367         boolean apply(int cond, int i, $type$ a, $type$ b);
 368     }
 369 
 370     /*package-private*/
 371     @ForceInline
 372     final
 373     AbstractMask<$Boxtype$> bTest(int cond,
 374                                   Vector<$Boxtype$> o,
 375                                   FBinTest f) {
 376         $type$[] vec1 = vec();
 377         $type$[] vec2 = (($abstractvectortype$)o).vec();
 378         boolean[] bits = new boolean[length()];
 379         for (int i = 0; i < length(); i++){
 380             bits[i] = f.apply(cond, i, vec1[i], vec2[i]);
 381         }
 382         return maskFactory(bits);
 383     }
 384 
 385     /*package-private*/
 386     @ForceInline
 387     static boolean doBinTest(int cond, $type$ a, $type$ b) {
 388         switch (cond) {
 389         case BT_eq:  return a == b;
 390         case BT_ne:  return a != b;
 391         case BT_lt:  return a < b;
 392         case BT_le:  return a <= b;
 393         case BT_gt:  return a > b;
 394         case BT_ge:  return a >= b;
 395         }
 396         throw new AssertionError(Integer.toHexString(cond));
 397     }
 398 
 399     /*package-private*/
 400     @Override
 401     abstract $Type$Species vspecies();
 402 
 403     /*package-private*/
 404     @ForceInline
 405     static long toBits($type$ e) {
 406         return {#if[FP]? $Type$.$type$To$Bitstype$Bits(e): e};
 407     }
 408 
 409     /*package-private*/
 410     @ForceInline
 411     static $type$ fromBits(long bits) {
 412         return {#if[FP]?$Type$.$bitstype$BitsTo$Type$}(($bitstype$)bits);
 413     }
 414 
 415     // Static factories (other than memory operations)
 416 
 417     // Note: A surprising behavior in javadoc
 418     // sometimes makes a lone /** {@inheritDoc} */
 419     // comment drop the method altogether,
 420     // apparently if the method mentions an
 421     // parameter or return type of Vector<$Boxtype$>
 422     // instead of Vector<E> as originally specified.
 423     // Adding an empty HTML fragment appears to
 424     // nudge javadoc into providing the desired
 425     // inherited documentation.  We use the HTML
 426     // comment <!--workaround--> for this.
 427 
 428     /**
 429      * {@inheritDoc} <!--workaround-->
 430      */
 431     @ForceInline
 432     public static $abstractvectortype$ zero(VectorSpecies<$Boxtype$> species) {
 433         $Type$Species vsp = ($Type$Species) species;
 434 #if[FP]
 435         return VectorIntrinsics.broadcastCoerced(vsp.vectorType(), $type$.class, species.length(),
 436                         toBits(0.0f), vsp,
 437                         ((bits_, s_) -> s_.rvOp(i -> bits_)));
 438 #else[FP]
 439         return VectorIntrinsics.broadcastCoerced(vsp.vectorType(), $type$.class, species.length(),
 440                                 0, vsp,
 441                                 ((bits_, s_) -> s_.rvOp(i -> bits_)));
 442 #end[FP]
 443     }
 444 
 445     /**
 446      * Returns a vector of the same species as this one
 447      * where all lane elements are set to
 448      * the primitive value {@code e}.
 449      *
 450      * The contents of the current vector are discarded;
 451      * only the species is relevant to this operation.
 452      *
 453      * <p> This method returns the value of this expression:
 454      * {@code $abstractvectortype$.broadcast(this.species(), e)}.
 455      *
 456      * @apiNote
 457      * Unlike the similar method named {@code broadcast()}
 458      * in the supertype {@code Vector}, this method does not
 459      * need to validate its argument, and cannot throw
 460      * {@code IllegalArgumentException}.  This method is
 461      * therefore preferable to the supertype method.
 462      *
 463      * @param e the value to broadcast
 464      * @return a vector where all lane elements are set to
 465      *         the primitive value {@code e}
 466      * @see #broadcast(VectorSpecies,long)
 467      * @see Vector#broadcast(long)
 468      * @see VectorSpecies#broadcast(long)
 469      */
 470     public abstract $abstractvectortype$ broadcast($type$ e);
 471 
 472     /**
 473      * Returns a vector of the given species
 474      * where all lane elements are set to
 475      * the primitive value {@code e}.
 476      *
 477      * @param species species of the desired vector
 478      * @param e the value to broadcast
 479      * @return a vector where all lane elements are set to
 480      *         the primitive value {@code e}
 481      * @see #broadcast(long)
 482      * @see Vector#broadcast(long)
 483      * @see VectorSpecies#broadcast(long)
 484      */
 485     public static $abstractvectortype$ broadcast(VectorSpecies<$Boxtype$> species, $type$ e) {
 486         $Type$Species vsp = ($Type$Species) species;
 487         return vsp.broadcast(e);
 488     }
 489 
 490     /*package-private*/
 491     @ForceInline
 492     final $abstractvectortype$ broadcastTemplate($type$ e) {
 493         $Type$Species vsp = vspecies();
 494         return vsp.broadcast(e);
 495     }
 496 
 497 #if[!long]
 498     /**
 499      * {@inheritDoc} <!--workaround-->
 500      * @apiNote
 501      * When working with vector subtypes like {@code $abstractvectortype$},
 502      * {@linkplain #broadcast($type$) the more strongly typed method}
 503      * is typically selected.  It can be explicitly selected
 504      * using a cast: {@code v.broadcast(($type$)e)}.
 505      * The two expressions will produce numerically identical results.
 506      */
 507     @Override
 508     public abstract $abstractvectortype$ broadcast(long e);
 509 
 510     /**
 511      * Returns a vector of the given species
 512      * where all lane elements are set to
 513      * the primitive value {@code e}.
 514      *
 515      * The {@code long} value must be accurately representable
 516      * by the {@code ETYPE} of the vector species, so that
 517      * {@code e==(long)(ETYPE)e}.
 518      *
 519      * @param species species of the desired vector
 520      * @param e the value to broadcast
 521      * @return a vector where all lane elements are set to
 522      *         the primitive value {@code e}
 523      * @throws IllegalArgumentException
 524      *         if the given {@code long} value cannot
 525      *         be represented by the vector's {@code ETYPE}
 526      * @see #broadcast(VectorSpecies,$type$)
 527      * @see VectorSpecies#checkValue(long)
 528      */
 529     public static $abstractvectortype$ broadcast(VectorSpecies<$Boxtype$> species, long e) {
 530         $Type$Species vsp = ($Type$Species) species;
 531         return vsp.broadcast(e);
 532     }
 533 
 534     /*package-private*/
 535     @ForceInline
 536     final $abstractvectortype$ broadcastTemplate(long e) {
 537         return vspecies().broadcast(e);
 538     }
 539 #end[!long]
 540 
 541     /**
 542      * Returns a vector where each lane element is set to given
 543      * primitive values.
 544      * <p>
 545      * For each vector lane, where {@code N} is the vector lane index, the
 546      * the primitive value at index {@code N} is placed into the resulting
 547      * vector at lane index {@code N}.
 548      *
 549      * @param species species of the desired vector
 550      * @param es the given primitive values
 551      * @return a vector where each lane element is set to given primitive
 552      * values
 553      * @throws IllegalArgumentException
 554      *         if {@code es.length != species.length()}
 555      */
 556     @ForceInline
 557     @SuppressWarnings("unchecked")
 558     public static $abstractvectortype$ fromValues(VectorSpecies<$Boxtype$> species, $type$... es) {
 559         $Type$Species vsp = ($Type$Species) species;
 560         int vlength = vsp.laneCount();
 561         VectorIntrinsics.requireLength(es.length, vlength);
 562         // Get an unaliased copy and use it directly:
 563         return vsp.vectorFactory(Arrays.copyOf(es, vlength));
 564     }
 565 
 566     /**
 567      * Returns a vector where the first lane element is set to the primtive
 568      * value {@code e}, all other lane elements are set to the default
 569      * value({#if[FP]?positive }zero).
 570      *
 571      * @param species species of the desired vector
 572      * @param e the value
 573      * @return a vector where the first lane element is set to the primitive
 574      * value {@code e}
 575      */
 576     // FIXME: Does this carry its weight?
 577     @ForceInline
 578     public static $abstractvectortype$ single(VectorSpecies<$Boxtype$> species, $type$ e) {
 579         return zero(species).withLane(0, e);
 580     }
 581 
 582     /**
 583      * Returns a vector where each lane element is set to a randomly
 584      * generated primitive value.
 585      *
 586      * The semantics are equivalent to calling
 587 #if[byteOrShort]
 588      * {@code ($type$)}{@link ThreadLocalRandom#nextInt()}
 589 #else[byteOrShort]
 590      * {@link ThreadLocalRandom#next$Type$()}
 591 #end[byteOrShort]
 592      * for each lane, from first to last.
 593      *
 594      * @param species species of the desired vector
 595      * @return a vector where each lane elements is set to a randomly
 596      * generated primitive value
 597      */
 598     public static $abstractvectortype$ random(VectorSpecies<$Boxtype$> species) {
 599         $Type$Species vsp = ($Type$Species) species;
 600         ThreadLocalRandom r = ThreadLocalRandom.current();
 601         return vsp.vOp(i -> nextRandom(r));
 602     }
 603     private static $type$ nextRandom(ThreadLocalRandom r) {
 604 #if[byteOrShort]
 605         return ($type$) r.nextInt();
 606 #else[byteOrShort]
 607         return r.next$Type$();
 608 #end[byteOrShort]
 609     }
 610 
 611     // Unary lanewise support
 612 
 613     /**
 614      * {@inheritDoc} <!--workaround-->
 615      */
 616     public abstract
 617     $abstractvectortype$ lanewise(VectorOperators.Unary op);
 618 
 619     @ForceInline
 620     final
 621     $abstractvectortype$ lanewiseTemplate(VectorOperators.Unary op) {
 622         if (opKind(op, VO_SPECIAL)) {
 623             if (op == ZOMO) {
 624                 return blend(broadcast(-1), compare(NE, 0));
 625             }
 626 #if[BITWISE]
 627             if (op == NEG) {
 628                 // FIXME: Support this in the JIT.
 629                 return broadcast(0).lanewiseTemplate(SUB, this);
 630             }
 631 #end[BITWISE]
 632         }
 633         int opc = opCode(op);
 634         return VectorIntrinsics.unaryOp(
 635             opc, getClass(), $type$.class, length(),
 636             this,
 637             UN_IMPL.find(op, opc, (opc_) -> {
 638               switch (opc_) {
 639                 case VECTOR_OP_NEG: return v0 ->
 640                         v0.uOp((i, a) -> ($type$) -a);
 641                 case VECTOR_OP_ABS: return v0 ->
 642                         v0.uOp((i, a) -> ($type$) Math.abs(a));
 643 #if[BITWISE]
 644                 case VECTOR_OP_NOT: return v0 ->
 645                         v0.uOp((i, a) -> ($type$) ~a);
 646 #end[BITWISE]
 647 #if[FP]
 648                 case VECTOR_OP_SIN: return v0 ->
 649                         v0.uOp((i, a) -> ($type$) Math.sin(a));
 650                 case VECTOR_OP_COS: return v0 ->
 651                         v0.uOp((i, a) -> ($type$) Math.cos(a));
 652                 case VECTOR_OP_TAN: return v0 ->
 653                         v0.uOp((i, a) -> ($type$) Math.tan(a));
 654                 case VECTOR_OP_ASIN: return v0 ->
 655                         v0.uOp((i, a) -> ($type$) Math.asin(a));
 656                 case VECTOR_OP_ACOS: return v0 ->
 657                         v0.uOp((i, a) -> ($type$) Math.acos(a));
 658                 case VECTOR_OP_ATAN: return v0 ->
 659                         v0.uOp((i, a) -> ($type$) Math.atan(a));
 660                 case VECTOR_OP_EXP: return v0 ->
 661                         v0.uOp((i, a) -> ($type$) Math.exp(a));
 662                 case VECTOR_OP_LOG: return v0 ->
 663                         v0.uOp((i, a) -> ($type$) Math.log(a));
 664                 case VECTOR_OP_LOG10: return v0 ->
 665                         v0.uOp((i, a) -> ($type$) Math.log10(a));
 666                 case VECTOR_OP_SQRT: return v0 ->
 667                         v0.uOp((i, a) -> ($type$) Math.sqrt(a));
 668                 case VECTOR_OP_CBRT: return v0 ->
 669                         v0.uOp((i, a) -> ($type$) Math.cbrt(a));
 670                 case VECTOR_OP_SINH: return v0 ->
 671                         v0.uOp((i, a) -> ($type$) Math.sinh(a));
 672                 case VECTOR_OP_COSH: return v0 ->
 673                         v0.uOp((i, a) -> ($type$) Math.cosh(a));
 674                 case VECTOR_OP_TANH: return v0 ->
 675                         v0.uOp((i, a) -> ($type$) Math.tanh(a));
 676                 case VECTOR_OP_EXPM1: return v0 ->
 677                         v0.uOp((i, a) -> ($type$) Math.expm1(a));
 678                 case VECTOR_OP_LOG1P: return v0 ->
 679                         v0.uOp((i, a) -> ($type$) Math.log1p(a));
 680 #end[FP]
 681                 default: return null;
 682               }}));
 683     }
 684     private static final
 685     ImplCache<Unary,UnaryOperator<$abstractvectortype$>> UN_IMPL
 686         = new ImplCache<>(Unary.class, $Type$Vector.class);
 687 
 688     /**
 689      * {@inheritDoc} <!--workaround-->
 690      */
 691     @ForceInline
 692     public final
 693     $abstractvectortype$ lanewise(VectorOperators.Unary op,
 694                                   VectorMask<$Boxtype$> m) {
 695         return blend(lanewise(op), m);
 696     }
 697 
 698     // Binary lanewise support
 699 
 700     /**
 701      * {@inheritDoc} <!--workaround-->
 702      * @see #lanewise(VectorOperators.Binary,$type$)
 703      * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
 704      */
 705     @Override
 706     public abstract
 707     $abstractvectortype$ lanewise(VectorOperators.Binary op,
 708                                   Vector<$Boxtype$> v);
 709     @ForceInline
 710     final
 711     $abstractvectortype$ lanewiseTemplate(VectorOperators.Binary op,
 712                                           Vector<$Boxtype$> v) {
 713         $abstractvectortype$ that = ($abstractvectortype$) v;
 714         that.check(this);
 715         if (opKind(op, VO_SPECIAL {#if[!FP]? | VO_SHIFT})) {
 716             if (op == FIRST_NONZERO) {
 717                 // FIXME: Support this in the JIT.
 718                 VectorMask<$Boxbitstype$> thisNZ
 719                     = this.viewAsIntegralLanes().compare(NE, ($bitstype$) 0);
 720                 that = that.blend(($type$) 0, thisNZ.cast(vspecies()));
 721                 op = OR_UNCHECKED;
 722 #if[FP]
 723                 // FIXME: Support OR_UNCHECKED on float/double also!
 724                 return this.viewAsIntegralLanes()
 725                     .lanewise(op, that.viewAsIntegralLanes())
 726                     .viewAsFloatingLanes();
 727 #end[FP]
 728             }
 729 #if[BITWISE]
 730 #if[!FP]
 731             if (opKind(op, VO_SHIFT)) {
 732                 // As per shift specification for Java, mask the shift count.
 733                 // This allows the JIT to ignore some ISA details.
 734                 that = that.lanewise(AND, SHIFT_MASK);
 735             }
 736 #end[!FP]
 737             if (op == ROR || op == ROL) {  // FIXME: JIT should do this
 738                 $abstractvectortype$ neg = that.lanewise(NEG);
 739                 $abstractvectortype$ hi = this.lanewise(LSHL, (op == ROR) ? neg : that);
 740                 $abstractvectortype$ lo = this.lanewise(LSHR, (op == ROR) ? that : neg);
 741                 return hi.lanewise(OR, lo);
 742             } else if (op == AND_NOT) {
 743                 // FIXME: Support this in the JIT.
 744                 that = that.lanewise(NOT);
 745                 op = AND;
 746             } else if (op == DIV) {
 747                 VectorMask<$Boxtype$> eqz = that.eq(($type$)0);
 748                 if (eqz.anyTrue()) {
 749                     throw that.divZeroException();
 750                 }
 751             }
 752 #end[BITWISE]
 753         }
 754         int opc = opCode(op);
 755         return VectorIntrinsics.binaryOp(
 756             opc, getClass(), $type$.class, length(),
 757             this, that,
 758             BIN_IMPL.find(op, opc, (opc_) -> {
 759               switch (opc_) {
 760                 case VECTOR_OP_ADD: return (v0, v1) ->
 761                         v0.bOp(v1, (i, a, b) -> ($type$)(a + b));
 762                 case VECTOR_OP_SUB: return (v0, v1) ->
 763                         v0.bOp(v1, (i, a, b) -> ($type$)(a - b));
 764                 case VECTOR_OP_MUL: return (v0, v1) ->
 765                         v0.bOp(v1, (i, a, b) -> ($type$)(a * b));
 766                 case VECTOR_OP_DIV: return (v0, v1) ->
 767                         v0.bOp(v1, (i, a, b) -> ($type$)(a / b));
 768                 case VECTOR_OP_MAX: return (v0, v1) ->
 769                         v0.bOp(v1, (i, a, b) -> ($type$)Math.max(a, b));
 770                 case VECTOR_OP_MIN: return (v0, v1) ->
 771                         v0.bOp(v1, (i, a, b) -> ($type$)Math.min(a, b));
 772                 case VECTOR_OP_FIRST_NONZERO: return (v0, v1) ->
 773                         v0.bOp(v1, (i, a, b) -> toBits(a) != 0 ? a : b);
 774 #if[BITWISE]
 775                 case VECTOR_OP_AND: return (v0, v1) ->
 776                         v0.bOp(v1, (i, a, b) -> ($type$)(a & b));
 777                 case VECTOR_OP_OR: return (v0, v1) ->
 778                         v0.bOp(v1, (i, a, b) -> ($type$)(a | b));
 779                 case VECTOR_OP_AND_NOT: return (v0, v1) ->
 780                         v0.bOp(v1, (i, a, b) -> ($type$)(a & ~b));
 781                 case VECTOR_OP_XOR: return (v0, v1) ->
 782                         v0.bOp(v1, (i, a, b) -> ($type$)(a ^ b));
 783                 case VECTOR_OP_LSHIFT: return (v0, v1) ->
 784                         v0.bOp(v1, (i, a, n) -> ($type$)(a << n));
 785                 case VECTOR_OP_RSHIFT: return (v0, v1) ->
 786                         v0.bOp(v1, (i, a, n) -> ($type$)(a >> n));
 787                 case VECTOR_OP_URSHIFT: return (v0, v1) ->
 788                         v0.bOp(v1, (i, a, n) -> ($type$)((a & LSHR_SETUP_MASK) >>> n));
 789                 case VECTOR_OP_LROTATE: return (v0, v1) ->
 790                         v0.bOp(v1, (i, a, n) -> ($type$)((a << n)|(a >> -n)));
 791                 case VECTOR_OP_RROTATE: return (v0, v1) ->
 792                         v0.bOp(v1, (i, a, n) -> ($type$)((a >> n)|(a << -n)));
 793 #end[BITWISE]
 794 #if[FP]
 795                 case VECTOR_OP_OR: return (v0, v1) ->
 796                         v0.bOp(v1, (i, a, b) -> fromBits(toBits(a) | toBits(b)));
 797                 case VECTOR_OP_ATAN2: return (v0, v1) ->
 798                         v0.bOp(v1, (i, a, b) -> ($type$) Math.atan2(a, b));
 799                 case VECTOR_OP_POW: return (v0, v1) ->
 800                         v0.bOp(v1, (i, a, b) -> ($type$) Math.pow(a, b));
 801                 case VECTOR_OP_HYPOT: return (v0, v1) ->
 802                         v0.bOp(v1, (i, a, b) -> ($type$) Math.hypot(a, b));
 803 #end[FP]
 804                 default: return null;
 805                 }}));
 806     }
 807     private static final
 808     ImplCache<Binary,BinaryOperator<$abstractvectortype$>> BIN_IMPL
 809         = new ImplCache<>(Binary.class, $Type$Vector.class);
 810 
 811     /**
 812      * {@inheritDoc} <!--workaround-->
 813      * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
 814      */
 815     @ForceInline
 816     public final
 817     $abstractvectortype$ lanewise(VectorOperators.Binary op,
 818                                   Vector<$Boxtype$> v,
 819                                   VectorMask<$Boxtype$> m) {
 820 #if[BITWISE]
 821         $abstractvectortype$ that = ($abstractvectortype$) v;
 822         if (op == DIV) {
 823             // suppress div/0 exceptions in unset lanes
 824             that = that.lanewise(NOT, that.eq(($type$)0));
 825             return blend(lanewise(DIV, that), m);
 826         }
 827 #end[BITWISE]
 828         return blend(lanewise(op, v), m);
 829     }
 830     // FIXME: Maybe all of the public final methods in this file (the
 831     // simple ones that just call lanewise) should be pushed down to
 832     // the X-VectorBits template.  They can't optimize properly at
 833     // this level, and must rely on inlining.  Does it work?
 834     // (If it works, of course keep the code here.)
 835 
 836     /**
 837      * Combines the lane values of this vector
 838      * with the value of a broadcast scalar.
 839      *
 840      * This is a lane-wise binary operation which applies
 841      * the selected operation to each lane.
 842      * The return value will be equal to this expression:
 843      * {@code this.lanewise(op, this.broadcast(e))}.
 844      *
 845      * @param op the operation used to process lane values
 846      * @param e the input scalar
 847      * @return the result of applying the operation lane-wise
 848      *         to the two input vectors
 849      * @throws UnsupportedOperationException if this vector does
 850      *         not support the requested operation
 851      * @see #lanewise(VectorOperators.Binary,Vector)
 852      * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
 853      */
 854     @ForceInline
 855     public final
 856     $abstractvectortype$ lanewise(VectorOperators.Binary op,
 857                                   $type$ e) {
 858         int opc = opCode(op);
 859 #if[BITWISE]
 860         if (opKind(op, VO_SHIFT) && ($type$)(int)e == e) {
 861             return lanewiseShift(op, (int) e);
 862         }
 863         if (op == AND_NOT) {
 864             op = AND; e = ($type$) ~e;
 865         }
 866 #end[BITWISE]
 867         return lanewise(op, broadcast(e));
 868     }
 869 
 870     /**
 871      * Combines the lane values of this vector
 872      * with the value of a broadcast scalar,
 873      * with selection of lane elements controlled by a mask.
 874      *
 875      * This is a masked lane-wise binary operation which applies
 876      * the selected operation to each lane.
 877      * The return value will be equal to this expression:
 878      * {@code this.lanewise(op, this.broadcast(e), m)}.
 879      *
 880      * @param op the operation used to process lane values
 881      * @param e the input scalar
 882      * @param m the mask controlling lane selection
 883      * @return the result of applying the operation lane-wise
 884      *         to the input vector and the scalar
 885      * @throws UnsupportedOperationException if this vector does
 886      *         not support the requested operation
 887      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
 888      * @see #lanewise(VectorOperators.Binary,$type$)
 889      */
 890     @ForceInline
 891     public final
 892     $abstractvectortype$ lanewise(VectorOperators.Binary op,
 893                                   $type$ e,
 894                                   VectorMask<$Boxtype$> m) {
 895         return blend(lanewise(op, e), m);
 896     }
 897 
 898 #if[!long]
 899     /**
 900      * {@inheritDoc} <!--workaround-->
 901      * @apiNote
 902      * When working with vector subtypes like {@code $abstractvectortype$},
 903      * {@linkplain #lanewise(VectorOperators.Binary,$type$)
 904      * the more strongly typed method}
 905      * is typically selected.  It can be explicitly selected
 906      * using a cast: {@code v.lanewise(op,($type$)e)}.
 907      * The two expressions will produce numerically identical results.
 908      */
 909     @ForceInline
 910     public final
 911     $abstractvectortype$ lanewise(VectorOperators.Binary op,
 912                                   long e) {
 913         $type$ e1 = ($type$) e;
 914         if ((long)e1 != e
 915 #if[BITWISE]
 916             // allow shift ops to clip down their int parameters
 917             && !(opKind(op, VO_SHIFT) && (int)e1 == e)
 918 #end[BITWISE]
 919             ) {
 920             vspecies().checkValue(e);  // for exception
 921         }
 922         return lanewise(op, e1);
 923     }
 924 
 925     /**
 926      * {@inheritDoc} <!--workaround-->
 927      * @apiNote
 928      * When working with vector subtypes like {@code $abstractvectortype$},
 929      * {@linkplain #lanewise(VectorOperators.Binary,$type$,VectorMask)
 930      * the more strongly typed method}
 931      * is typically selected.  It can be explicitly selected
 932      * using a cast: {@code v.lanewise(op,($type$)e,m)}.
 933      * The two expressions will produce numerically identical results.
 934      */
 935     @ForceInline
 936     public final
 937     $abstractvectortype$ lanewise(VectorOperators.Binary op,
 938                                   long e, VectorMask<$Boxtype$> m) {
 939         return blend(lanewise(op, e), m);
 940     }
 941 #end[!long]
 942 
 943 #if[BITWISE]
 944     /*package-private*/
 945     abstract $abstractvectortype$
 946     lanewiseShift(VectorOperators.Binary op, int e);
 947 
 948     /*package-private*/
 949     @ForceInline
 950     final $abstractvectortype$
 951     lanewiseShiftTemplate(VectorOperators.Binary op, int e) {
 952         // Special handling for these.  FIXME: Refactor?
 953         int opc = opCode(op);
 954         assert(opKind(op, VO_SHIFT));
 955         // As per shift specification for Java, mask the shift count.
 956         e &= SHIFT_MASK;
 957         if (op == ROR || op == ROL) {  // FIXME: JIT should do this
 958             $abstractvectortype$ hi = this.lanewise(LSHL, (op == ROR) ? -e : e);
 959             $abstractvectortype$ lo = this.lanewise(LSHR, (op == ROR) ? e : -e);
 960             return hi.lanewise(OR, lo);
 961         }
 962         return VectorIntrinsics.broadcastInt(
 963             opc, getClass(), $type$.class, length(),
 964             this, e,
 965             BIN_INT_IMPL.find(op, opc, (opc_) -> {
 966               switch (opc_) {
 967                 case VECTOR_OP_LSHIFT: return (v, n) ->
 968                         v.uOp((i, a) -> ($type$)(a << n));
 969                 case VECTOR_OP_RSHIFT: return (v, n) ->
 970                         v.uOp((i, a) -> ($type$)(a >> n));
 971                 case VECTOR_OP_URSHIFT: return (v, n) ->
 972                         v.uOp((i, a) -> ($type$)((a & LSHR_SETUP_MASK) >>> n));
 973                 case VECTOR_OP_LROTATE: return (v, n) ->
 974                         v.uOp((i, a) -> ($type$)((a << n)|(a >> -n)));
 975                 case VECTOR_OP_RROTATE: return (v, n) ->
 976                         v.uOp((i, a) -> ($type$)((a >> n)|(a << -n)));
 977                 default: return null;
 978                 }}));
 979     }
 980     private static final
 981     ImplCache<Binary,VectorBroadcastIntOp<$abstractvectortype$>> BIN_INT_IMPL
 982         = new ImplCache<>(Binary.class, $Type$Vector.class);
 983 
 984     // As per shift specification for Java, mask the shift count.
 985     // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte).
 986     // The latter two maskings go beyond the JLS, but seem reasonable
 987     // since our lane types are first-class types, not just dressed
 988     // up ints.
 989     private static final int SHIFT_MASK = ($Boxtype$.SIZE - 1);
 990 #if[byteOrShort]
 991     // Also simulate >>> on sub-word variables with a mask.
 992     private static final int LSHR_SETUP_MASK = ((1 << $Boxtype$.SIZE) - 1);
 993 #else[byteOrShort]
 994     private static final $type$ LSHR_SETUP_MASK = -1;
 995 #end[byteOrShort]
 996 #end[BITWISE]
 997 
 998     // Ternary lanewise support
 999 
1000     // Ternary operators come in eight variations:
1001     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2])
1002     //   lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask)
1003 
1004     // It is annoying to support all of these variations of masking
1005     // and broadcast, but it would be more surprising not to continue
1006     // the obvious pattern started by unary and binary.
1007 
1008    /**
1009      * {@inheritDoc} <!--workaround-->
1010      * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
1011      * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask)
1012      * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask)
1013      * @see #lanewise(VectorOperators.Ternary,$type$,$type$)
1014      * @see #lanewise(VectorOperators.Ternary,Vector,$type$)
1015      * @see #lanewise(VectorOperators.Ternary,$type$,Vector)
1016      */
1017     @Override
1018     public abstract
1019     $abstractvectortype$ lanewise(VectorOperators.Ternary op,
1020                                                   Vector<$Boxtype$> v1,
1021                                                   Vector<$Boxtype$> v2);
1022     @ForceInline
1023     final
1024     $abstractvectortype$ lanewiseTemplate(VectorOperators.Ternary op,
1025                                           Vector<$Boxtype$> v1,
1026                                           Vector<$Boxtype$> v2) {
1027         $abstractvectortype$ that = ($abstractvectortype$) v1;
1028         $abstractvectortype$ tother = ($abstractvectortype$) v2;
1029         // It's a word: https://www.dictionary.com/browse/tother
1030         // See also Chapter 11 of Dickens, Our Mutual Friend:
1031         // "Totherest Governor," replied Mr Riderhood...
1032         that.check(this);
1033         tother.check(this);
1034 #if[BITWISE]
1035         if (op == BITWISE_BLEND) {
1036             // FIXME: Support this in the JIT.
1037             that = this.lanewise(XOR, that).lanewise(AND, tother);
1038             return this.lanewise(XOR, that);
1039         }
1040 #end[BITWISE]
1041         int opc = opCode(op);
1042         return VectorIntrinsics.ternaryOp(
1043             opc, getClass(), $type$.class, length(),
1044             this, that, tother,
1045             TERN_IMPL.find(op, opc, (opc_) -> {
1046               switch (opc_) {
1047 #if[BITWISE]
1048                 case VECTOR_OP_BITWISE_BLEND: return (v0, v1_, v2_) ->
1049                         v0.tOp(v1_, v2_, (i, a, b, c) -> ($type$)(a^((a^b)&c)));
1050 #end[BITWISE]
1051 #if[FP]
1052                 case VECTOR_OP_FMA: return (v0, v1_, v2_) ->
1053                         v0.tOp(v1_, v2_, (i, a, b, c) -> Math.fma(a, b, c));
1054 #end[FP]
1055                 default: return null;
1056                 }}));
1057     }
1058     private static final
1059     ImplCache<Ternary,TernaryOperation<$abstractvectortype$>> TERN_IMPL
1060         = new ImplCache<>(Ternary.class, $Type$Vector.class);
1061 
1062     /**
1063      * {@inheritDoc} <!--workaround-->
1064      * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
1065      * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask)
1066      * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask)
1067      */
1068     @ForceInline
1069     public final
1070     $abstractvectortype$ lanewise(VectorOperators.Ternary op,
1071                                   Vector<$Boxtype$> v1,
1072                                   Vector<$Boxtype$> v2,
1073                                   VectorMask<$Boxtype$> m) {
1074         return blend(lanewise(op, v1, v2), m);
1075     }
1076 
1077     /**
1078      * Combines the lane values of this vector
1079      * with the values of two broadcast scalars.
1080      *
1081      * This is a lane-wise ternary operation which applies
1082      * the selected operation to each lane.
1083      * The return value will be equal to this expression:
1084      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}.
1085      *
1086      * @param op the operation used to combine lane values
1087      * @param e1 the first input scalar
1088      * @param e2 the second input scalar
1089      * @return the result of applying the operation lane-wise
1090      *         to the input vector and the scalars
1091      * @throws UnsupportedOperationException if this vector does
1092      *         not support the requested operation
1093      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1094      * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
1095      */
1096     @ForceInline
1097     public final
1098     $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,e1,e2)
1099                                   $type$ e1,
1100                                   $type$ e2) {
1101         return lanewise(op, broadcast(e1), broadcast(e2));
1102     }
1103 
1104     /**
1105      * Combines the lane values of this vector
1106      * with the values of two broadcast scalars,
1107      * with selection of lane elements controlled by a mask.
1108      *
1109      * This is a masked lane-wise ternary operation which applies
1110      * the selected operation to each lane.
1111      * The return value will be equal to this expression:
1112      * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}.
1113      *
1114      * @param op the operation used to combine lane values
1115      * @param e1 the first input scalar
1116      * @param e2 the second input scalar
1117      * @param m the mask controlling lane selection
1118      * @return the result of applying the operation lane-wise
1119      *         to the input vector and the scalars
1120      * @throws UnsupportedOperationException if this vector does
1121      *         not support the requested operation
1122      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1123      * @see #lanewise(VectorOperators.Ternary,$type$,$type$)
1124      */
1125     @ForceInline
1126     public final
1127     $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,e1,e2,m)
1128                                   $type$ e1,
1129                                   $type$ e2,
1130                                   VectorMask<$Boxtype$> m) {
1131         return blend(lanewise(op, e1, e2), m);
1132     }
1133 
1134     /**
1135      * Combines the lane values of this vector
1136      * with the values of another vector and a broadcast scalar.
1137      *
1138      * This is a lane-wise ternary operation which applies
1139      * the selected operation to each lane.
1140      * The return value will be equal to this expression:
1141      * {@code this.lanewise(op, v1, this.broadcast(e2))}.
1142      *
1143      * @param op the operation used to combine lane values
1144      * @param v1 the other input vector
1145      * @param e2 the input scalar
1146      * @return the result of applying the operation lane-wise
1147      *         to the input vectors and the scalar
1148      * @throws UnsupportedOperationException if this vector does
1149      *         not support the requested operation
1150      * @see #lanewise(VectorOperators.Ternary,$type$,$type$)
1151      * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask)
1152      */
1153     @ForceInline
1154     public final
1155     $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,v1,e2)
1156                                   Vector<$Boxtype$> v1,
1157                                   $type$ e2) {
1158         return lanewise(op, v1, broadcast(e2));
1159     }
1160 
1161     /**
1162      * Combines the lane values of this vector
1163      * with the values of another vector and a broadcast scalar,
1164      * with selection of lane elements controlled by a mask.
1165      *
1166      * This is a masked lane-wise ternary operation which applies
1167      * the selected operation to each lane.
1168      * The return value will be equal to this expression:
1169      * {@code this.lanewise(op, v1, this.broadcast(e2), m)}.
1170      *
1171      * @param op the operation used to combine lane values
1172      * @param v1 the other input vector
1173      * @param e2 the input scalar
1174      * @param m the mask controlling lane selection
1175      * @return the result of applying the operation lane-wise
1176      *         to the input vectors and the scalar
1177      * @throws UnsupportedOperationException if this vector does
1178      *         not support the requested operation
1179      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1180      * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
1181      * @see #lanewise(VectorOperators.Ternary,Vector,$type$)
1182      */
1183     @ForceInline
1184     public final
1185     $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,v1,e2,m)
1186                                   Vector<$Boxtype$> v1,
1187                                   $type$ e2,
1188                                   VectorMask<$Boxtype$> m) {
1189         return blend(lanewise(op, v1, e2), m);
1190     }
1191 
1192     /**
1193      * Combines the lane values of this vector
1194      * with the values of another vector and a broadcast scalar.
1195      *
1196      * This is a lane-wise ternary operation which applies
1197      * the selected operation to each lane.
1198      * The return value will be equal to this expression:
1199      * {@code this.lanewise(op, this.broadcast(e1), v2)}.
1200      *
1201      * @param op the operation used to combine lane values
1202      * @param e1 the input scalar
1203      * @param v2 the other input vector
1204      * @return the result of applying the operation lane-wise
1205      *         to the input vectors and the scalar
1206      * @throws UnsupportedOperationException if this vector does
1207      *         not support the requested operation
1208      * @see #lanewise(VectorOperators.Ternary,Vector,Vector)
1209      * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask)
1210      */
1211     @ForceInline
1212     public final
1213     $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,e1,v2)
1214                                   $type$ e1,
1215                                   Vector<$Boxtype$> v2) {
1216         return lanewise(op, broadcast(e1), v2);
1217     }
1218 
1219     /**
1220      * Combines the lane values of this vector
1221      * with the values of another vector and a broadcast scalar,
1222      * with selection of lane elements controlled by a mask.
1223      *
1224      * This is a masked lane-wise ternary operation which applies
1225      * the selected operation to each lane.
1226      * The return value will be equal to this expression:
1227      * {@code this.lanewise(op, this.broadcast(e1), v2, m)}.
1228      *
1229      * @param op the operation used to combine lane values
1230      * @param e1 the input scalar
1231      * @param v2 the other input vector
1232      * @param m the mask controlling lane selection
1233      * @return the result of applying the operation lane-wise
1234      *         to the input vectors and the scalar
1235      * @throws UnsupportedOperationException if this vector does
1236      *         not support the requested operation
1237      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
1238      * @see #lanewise(VectorOperators.Ternary,$type$,Vector)
1239      */
1240     @ForceInline
1241     public final
1242     $abstractvectortype$ lanewise(VectorOperators.Ternary op, //(op,e1,v2,m)
1243                                   $type$ e1,
1244                                   Vector<$Boxtype$> v2,
1245                                   VectorMask<$Boxtype$> m) {
1246         return blend(lanewise(op, e1, v2), m);
1247     }
1248 
1249     // (Thus endeth the Great and Mighty Ternary Ogdoad.)
1250     // https://en.wikipedia.org/wiki/Ogdoad
1251 
1252     /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV
1253     //
1254     // These include masked and non-masked versions.
1255     // This subclass adds broadcast (masked or not).
1256 
1257     /**
1258      * {@inheritDoc} <!--workaround-->
1259      * @see #add($type$)
1260      */
1261     @Override
1262     @ForceInline
1263     public final $abstractvectortype$ add(Vector<$Boxtype$> v) {
1264         return lanewise(ADD, v);
1265     }
1266 
1267     /**
1268      * Adds this vector to the broadcast of an input scalar.
1269      *
1270      * This is a lane-wise binary operation which applies
1271      * the primitive addition operation ({@code +}) to each lane.
1272      *
1273      * This method is also equivalent to the expression
1274      * {@link #lanewise(VectorOperators.Binary,$type$)
1275      *    lanewise}{@code (}{@link VectorOperators#ADD
1276      *    ADD}{@code , e)}.
1277      *
1278      * @param e the input scalar
1279      * @return the result of adding each lane of this vector to the scalar
1280      * @see #add(Vector)
1281      * @see #broadcast($type$)
1282      * @see #add($type$,VectorMask)
1283      * @see VectorOperators#ADD
1284      * @see #lanewise(VectorOperators.Binary,Vector)
1285      * @see #lanewise(VectorOperators.Binary,$type$)
1286      */
1287     @ForceInline
1288     public final
1289     $abstractvectortype$ add($type$ e) {
1290         return lanewise(ADD, e);
1291     }
1292 
1293     /**
1294      * {@inheritDoc} <!--workaround-->
1295      * @see #add($type$,VectorMask)
1296      */
1297     @Override
1298     @ForceInline
1299     public final $abstractvectortype$ add(Vector<$Boxtype$> v,
1300                                           VectorMask<$Boxtype$> m) {
1301         return lanewise(ADD, v, m);
1302     }
1303 
1304     /**
1305      * Adds this vector to the broadcast of an input scalar,
1306      * selecting lane elements controlled by a mask.
1307      *
1308      * This is a masked lane-wise binary operation which applies
1309      * the primitive addition operation ({@code +}) to each lane.
1310      *
1311      * This method is also equivalent to the expression
1312      * {@link #lanewise(VectorOperators.Binary,$type$,VectorMask)
1313      *    lanewise}{@code (}{@link VectorOperators#ADD
1314      *    ADD}{@code , s, m)}.
1315      *
1316      * @param e the input scalar
1317      * @param m the mask controlling lane selection
1318      * @return the result of adding each lane of this vector to the scalar
1319      * @see #add(Vector,VectorMask)
1320      * @see #broadcast($type$)
1321      * @see #add($type$)
1322      * @see VectorOperators#ADD
1323      * @see #lanewise(VectorOperators.Binary,Vector)
1324      * @see #lanewise(VectorOperators.Binary,$type$)
1325      */
1326     @ForceInline
1327     public final $abstractvectortype$ add($type$ e,
1328                                           VectorMask<$Boxtype$> m) {
1329         return lanewise(ADD, e, m);
1330     }
1331 
1332     /**
1333      * {@inheritDoc} <!--workaround-->
1334      * @see #sub($type$)
1335      */
1336     @Override
1337     @ForceInline
1338     public final $abstractvectortype$ sub(Vector<$Boxtype$> v) {
1339         return lanewise(SUB, v);
1340     }
1341 
1342     /**
1343      * Subtracts an input scalar from this vector.
1344      *
1345      * This is a masked lane-wise binary operation which applies
1346      * the primitive subtraction operation ({@code -}) to each lane.
1347      *
1348      * This method is also equivalent to the expression
1349      * {@link #lanewise(VectorOperators.Binary,$type$)
1350      *    lanewise}{@code (}{@link VectorOperators#SUB
1351      *    SUB}{@code , e)}.
1352      *
1353      * @param e the input scalar
1354      * @return the result of subtracting the scalar from each lane of this vector
1355      * @see #sub(Vector)
1356      * @see #broadcast($type$)
1357      * @see #sub($type$,VectorMask)
1358      * @see VectorOperators#SUB
1359      * @see #lanewise(VectorOperators.Binary,Vector)
1360      * @see #lanewise(VectorOperators.Binary,$type$)
1361      */
1362     @ForceInline
1363     public final $abstractvectortype$ sub($type$ e) {
1364         return lanewise(SUB, e);
1365     }
1366 
1367     /**
1368      * {@inheritDoc} <!--workaround-->
1369      * @see #sub($type$,VectorMask)
1370      */
1371     @Override
1372     @ForceInline
1373     public final $abstractvectortype$ sub(Vector<$Boxtype$> v,
1374                                           VectorMask<$Boxtype$> m) {
1375         return lanewise(SUB, v, m);
1376     }
1377 
1378     /**
1379      * Subtracts an input scalar from this vector
1380      * under the control of a mask.
1381      *
1382      * This is a masked lane-wise binary operation which applies
1383      * the primitive subtraction operation ({@code -}) to each lane.
1384      *
1385      * This method is also equivalent to the expression
1386      * {@link #lanewise(VectorOperators.Binary,$type$,VectorMask)
1387      *    lanewise}{@code (}{@link VectorOperators#SUB
1388      *    SUB}{@code , s, m)}.
1389      *
1390      * @param e the input scalar
1391      * @param m the mask controlling lane selection
1392      * @return the result of subtracting the scalar from each lane of this vector
1393      * @see #sub(Vector,VectorMask)
1394      * @see #broadcast($type$)
1395      * @see #sub($type$)
1396      * @see VectorOperators#SUB
1397      * @see #lanewise(VectorOperators.Binary,Vector)
1398      * @see #lanewise(VectorOperators.Binary,$type$)
1399      */
1400     @ForceInline
1401     public final $abstractvectortype$ sub($type$ e,
1402                                           VectorMask<$Boxtype$> m) {
1403         return lanewise(SUB, e, m);
1404     }
1405 
1406     /**
1407      * {@inheritDoc} <!--workaround-->
1408      * @see #mul($type$)
1409      */
1410     @Override
1411     @ForceInline
1412     public final $abstractvectortype$ mul(Vector<$Boxtype$> v) {
1413         return lanewise(MUL, v);
1414     }
1415 
1416     /**
1417      * Multiplies this vector by the broadcast of an input scalar.
1418      *
1419      * This is a lane-wise binary operation which applies
1420      * the primitive multiplication operation ({@code *}) to each lane.
1421      *
1422      * This method is also equivalent to the expression
1423      * {@link #lanewise(VectorOperators.Binary,$type$)
1424      *    lanewise}{@code (}{@link VectorOperators#MUL
1425      *    MUL}{@code , e)}.
1426      *
1427      * @param e the input scalar
1428      * @return the result of multiplying this vector by the given scalar
1429      * @see #mul(Vector)
1430      * @see #broadcast($type$)
1431      * @see #mul($type$,VectorMask)
1432      * @see VectorOperators#MUL
1433      * @see #lanewise(VectorOperators.Binary,Vector)
1434      * @see #lanewise(VectorOperators.Binary,$type$)
1435      */
1436     @ForceInline
1437     public final $abstractvectortype$ mul($type$ e) {
1438         return lanewise(MUL, e);
1439     }
1440 
1441     /**
1442      * {@inheritDoc} <!--workaround-->
1443      * @see #mul($type$,VectorMask)
1444      */
1445     @Override
1446     @ForceInline
1447     public final $abstractvectortype$ mul(Vector<$Boxtype$> v,
1448                                           VectorMask<$Boxtype$> m) {
1449         return lanewise(MUL, v, m);
1450     }
1451 
1452     /**
1453      * Multiplies this vector by the broadcast of an input scalar,
1454      * selecting lane elements controlled by a mask.
1455      *
1456      * This is a masked lane-wise binary operation which applies
1457      * the primitive multiplication operation ({@code *}) to each lane.
1458      *
1459      * This method is also equivalent to the expression
1460      * {@link #lanewise(VectorOperators.Binary,$type$,VectorMask)
1461      *    lanewise}{@code (}{@link VectorOperators#MUL
1462      *    MUL}{@code , s, m)}.
1463      *
1464      * @param e the input scalar
1465      * @param m the mask controlling lane selection
1466      * @return the result of muling each lane of this vector to the scalar
1467      * @see #mul(Vector,VectorMask)
1468      * @see #broadcast($type$)
1469      * @see #mul($type$)
1470      * @see VectorOperators#MUL
1471      * @see #lanewise(VectorOperators.Binary,Vector)
1472      * @see #lanewise(VectorOperators.Binary,$type$)
1473      */
1474     @ForceInline
1475     public final $abstractvectortype$ mul($type$ e,
1476                                           VectorMask<$Boxtype$> m) {
1477         return lanewise(MUL, e, m);
1478     }
1479 
1480     /**
1481      * {@inheritDoc} <!--workaround-->
1482 #if[FP]
1483      * @apiNote Because the underlying scalar operator is an IEEE
1484      * floating point number, division by zero in fact will
1485      * not throw an exception, but will yield a signed
1486      * infinity or NaN.
1487 #else[FP]
1488      * @apiNote If there is a zero divisor, {@code
1489      * ArithmeticException} will be thrown.
1490      * @see #div($type$)
1491 #end[FP]
1492      */
1493     @Override
1494     @ForceInline
1495     public final $abstractvectortype$ div(Vector<$Boxtype$> v) {
1496         return lanewise(DIV, v);
1497     }
1498 
1499     /**
1500      * Divides this vector by the broadcast of an input scalar.
1501      *
1502      * This is a lane-wise binary operation which applies
1503      * the primitive division operation ({@code /}) to each lane.
1504      *
1505      * This method is also equivalent to the expression
1506      * {@link #lanewise(VectorOperators.Binary,$type$)
1507      *    lanewise}{@code (}{@link VectorOperators#DIV
1508      *    DIV}{@code , e)}.
1509      *
1510 #if[FP]
1511      * @apiNote Because the underlying scalar operator is an IEEE
1512      * floating point number, division by zero in fact will
1513      * not throw an exception, but will yield a signed
1514      * infinity or NaN.
1515 #else[FP]
1516      * @apiNote If there is a zero divisor, {@code
1517      * ArithmeticException} will be thrown.
1518 #end[FP]
1519      * @see #div($type$)
1520 
1521      *
1522      * @param e the input scalar
1523      * @return the result of dividing each lane of this vector by the scalar
1524      * @see #div(Vector)
1525      * @see #broadcast($type$)
1526      * @see #div($type$,VectorMask)
1527      * @see VectorOperators#DIV
1528      * @see #lanewise(VectorOperators.Binary,Vector)
1529      * @see #lanewise(VectorOperators.Binary,$type$)
1530      */
1531     @ForceInline
1532     public final $abstractvectortype$ div($type$ e) {
1533         return lanewise(DIV, e);
1534     }
1535 
1536     /**
1537      * {@inheritDoc} <!--workaround-->
1538      * @see #div($type$,VectorMask)
1539 #if[FP]
1540      * @apiNote Because the underlying scalar operator is an IEEE
1541      * floating point number, division by zero in fact will
1542      * not throw an exception, but will yield a signed
1543      * infinity or NaN.
1544 #else[FP]
1545      * @apiNote If there is a zero divisor, {@code
1546      * ArithmeticException} will be thrown.
1547 #end[FP]
1548      */
1549     @Override
1550     @ForceInline
1551     public final $abstractvectortype$ div(Vector<$Boxtype$> v,
1552                                           VectorMask<$Boxtype$> m) {
1553         return lanewise(DIV, v, m);
1554     }
1555 
1556     /**
1557      * Divides this vector by the broadcast of an input scalar,
1558      * selecting lane elements controlled by a mask.
1559      *
1560      * This is a masked lane-wise binary operation which applies
1561      * the primitive division operation ({@code /}) to each lane.
1562      *
1563      * This method is also equivalent to the expression
1564      * {@link #lanewise(VectorOperators.Binary,$type$,VectorMask)
1565      *    lanewise}{@code (}{@link VectorOperators#DIV
1566      *    DIV}{@code , s, m)}.
1567      *
1568 #if[FP]
1569      * @apiNote Because the underlying scalar operator is an IEEE
1570      * floating point number, division by zero in fact will
1571      * not throw an exception, but will yield a signed
1572      * infinity or NaN.
1573 #else[FP]
1574      * @apiNote If there is a zero divisor, {@code
1575      * ArithmeticException} will be thrown.
1576 #end[FP]
1577      *
1578      * @param e the input scalar
1579      * @param m the mask controlling lane selection
1580      * @return the result of dividing each lane of this vector by the scalar
1581      * @see #div(Vector,VectorMask)
1582      * @see #broadcast($type$)
1583      * @see #div($type$)
1584      * @see VectorOperators#DIV
1585      * @see #lanewise(VectorOperators.Binary,Vector)
1586      * @see #lanewise(VectorOperators.Binary,$type$)
1587      */
1588     @ForceInline
1589     public final $abstractvectortype$ div($type$ e,
1590                                           VectorMask<$Boxtype$> m) {
1591         return lanewise(DIV, e, m);
1592     }
1593 
1594     /// END OF FULL-SERVICE BINARY METHODS
1595 
1596     /// SECOND-TIER BINARY METHODS
1597     //
1598     // There are no masked versions.
1599 
1600     /**
1601      * {@inheritDoc} <!--workaround-->
1602 #if[FP]
1603      * @apiNote
1604      * For this method, floating point negative
1605      * zero {@code -0.0} is treated as a value distinct from, and less
1606      * than the default value(positive zero).
1607 #end[FP]
1608      */
1609     @Override
1610     @ForceInline
1611     public final $abstractvectortype$ min(Vector<$Boxtype$> v) {
1612         return lanewise(MIN, v);
1613     }
1614 
1615     // FIXME:  "broadcast of an input scalar" is really wordy.  Reduce?
1616     /**
1617      * Computes the smaller of this vector and the broadcast of an input scalar.
1618      *
1619      * This is a lane-wise binary operation which applies the
1620      * operation {@code Math.min()} to each pair of
1621      * corresponding lane values.
1622      *
1623      * This method is also equivalent to the expression
1624      * {@link #lanewise(VectorOperators.Binary,$type$)
1625      *    lanewise}{@code (}{@link VectorOperators#MIN
1626      *    MIN}{@code , e)}.
1627      *
1628      * @param e the input scalar
1629      * @return the result of multiplying this vector by the given scalar
1630      * @see #min(Vector)
1631      * @see #broadcast($type$)
1632      * @see VectorOperators#MIN
1633      * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
1634 #if[FP]
1635      * @apiNote
1636      * For this method, floating point negative
1637      * zero {@code -0.0} is treated as a value distinct from, and less
1638      * than the default value(positive zero).
1639 #end[FP]
1640      */
1641     @ForceInline
1642     public final $abstractvectortype$ min($type$ e) {
1643         return lanewise(MIN, e);
1644     }
1645 
1646     /**
1647      * {@inheritDoc} <!--workaround-->
1648 #if[FP]
1649      * @apiNote
1650      * For this method, negative floating-point zero compares
1651      * less than the default value, positive zero.
1652 #end[FP]
1653      */
1654     @Override
1655     @ForceInline
1656     public final $abstractvectortype$ max(Vector<$Boxtype$> v) {
1657         return lanewise(MAX, v);
1658     }
1659 
1660     /**
1661      * Computes the larger of this vector and the broadcast of an input scalar.
1662      *
1663      * This is a lane-wise binary operation which applies the
1664      * operation {@code Math.max()} to each pair of
1665      * corresponding lane values.
1666      *
1667      * This method is also equivalent to the expression
1668      * {@link #lanewise(VectorOperators.Binary,$type$)
1669      *    lanewise}{@code (}{@link VectorOperators#MAX
1670      *    MAX}{@code , e)}.
1671      *
1672      * @param e the input scalar
1673      * @return the result of multiplying this vector by the given scalar
1674      * @see #max(Vector)
1675      * @see #broadcast($type$)
1676      * @see VectorOperators#MAX
1677      * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
1678 #if[FP]
1679      * @apiNote
1680      * For this method, negative floating-point zero compares
1681      * less than the default value, positive zero.
1682 #end[FP]
1683      */
1684     @ForceInline
1685     public final $abstractvectortype$ max($type$ e) {
1686         return lanewise(MAX, e);
1687     }
1688 
1689 #if[BITWISE]
1690     // common bitwise operators: and, or, not (with scalar versions)
1691     /**
1692      * Computes the bitwise logical conjunction ({@code &})
1693      * of this vector and a second input vector.
1694      *
1695      * This is a lane-wise binary operation which applies the
1696      * the primitive bitwise "and" operation ({@code &})
1697      * to each pair of corresponding lane values.
1698      *
1699      * This method is also equivalent to the expression
1700      * {@link #lanewise(VectorOperators.Binary,Vector)
1701      *    lanewise}{@code (}{@link VectorOperators#AND
1702      *    AND}{@code , v)}.
1703      *
1704      * <p>
1705      * This is not a full-service named operation like
1706      * {@link #add(Vector) add}.  A masked version of
1707      * version of this operation is not directly available
1708      * but may be obtained via the masked version of
1709      * {@code lanewise}.
1710      *
1711      * @param v a second input vector
1712      * @return the bitwise {@code &} of this vector and the second input vector
1713      * @see #and($type$)
1714      * @see #or(Vector)
1715      * @see #not()
1716      * @see VectorOperators#AND
1717      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1718      */
1719     @ForceInline
1720     public final $abstractvectortype$ and(Vector<$Boxtype$> v) {
1721         return lanewise(AND, v);
1722     }
1723 
1724     /**
1725      * Computes the bitwise logical conjunction ({@code &})
1726      * of this vector and a scalar.
1727      *
1728      * This is a lane-wise binary operation which applies the
1729      * the primitive bitwise "and" operation ({@code &})
1730      * to each pair of corresponding lane values.
1731      *
1732      * This method is also equivalent to the expression
1733      * {@link #lanewise(VectorOperators.Binary,Vector)
1734      *    lanewise}{@code (}{@link VectorOperators#AND
1735      *    AND}{@code , e)}.
1736      *
1737      * @param e an input scalar
1738      * @return the bitwise {@code &} of this vector and scalar
1739      * @see #and(Vector)
1740      * @see VectorOperators#AND
1741      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1742      */
1743     @ForceInline
1744     public final $abstractvectortype$ and($type$ e) {
1745         return lanewise(AND, e);
1746     }
1747 
1748     /**
1749      * Computes the bitwise logical disjunction ({@code |})
1750      * of this vector and a second input vector.
1751      *
1752      * This is a lane-wise binary operation which applies the
1753      * the primitive bitwise "or" operation ({@code |})
1754      * to each pair of corresponding lane values.
1755      *
1756      * This method is also equivalent to the expression
1757      * {@link #lanewise(VectorOperators.Binary,Vector)
1758      *    lanewise}{@code (}{@link VectorOperators#OR
1759      *    AND}{@code , v)}.
1760      *
1761      * <p>
1762      * This is not a full-service named operation like
1763      * {@link #add(Vector) add}.  A masked version of
1764      * version of this operation is not directly available
1765      * but may be obtained via the masked version of
1766      * {@code lanewise}.
1767      *
1768      * @param v a second input vector
1769      * @return the bitwise {@code |} of this vector and the second input vector
1770      * @see #or($type$)
1771      * @see #and(Vector)
1772      * @see #not()
1773      * @see VectorOperators#OR
1774      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1775      */
1776     @ForceInline
1777     public final $abstractvectortype$ or(Vector<$Boxtype$> v) {
1778         return lanewise(OR, v);
1779     }
1780 
1781     /**
1782      * Computes the bitwise logical disjunction ({@code |})
1783      * of this vector and a scalar.
1784      *
1785      * This is a lane-wise binary operation which applies the
1786      * the primitive bitwise "or" operation ({@code |})
1787      * to each pair of corresponding lane values.
1788      *
1789      * This method is also equivalent to the expression
1790      * {@link #lanewise(VectorOperators.Binary,Vector)
1791      *    lanewise}{@code (}{@link VectorOperators#OR
1792      *    OR}{@code , e)}.
1793      *
1794      * @param e an input scalar
1795      * @return the bitwise {@code |} of this vector and scalar
1796      * @see #or(Vector)
1797      * @see VectorOperators#OR
1798      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1799      */
1800     @ForceInline
1801     public final $abstractvectortype$ or($type$ e) {
1802         return lanewise(OR, e);
1803     }
1804 
1805 #end[BITWISE]
1806 
1807 #if[FP]
1808     // common FP operator: pow
1809     /**
1810      * Raises this vector to the power of a second input vector.
1811      *
1812      * This is a lane-wise binary operation which applies the
1813      * method {@code Math.pow()}
1814      * to each pair of corresponding lane values.
1815      *
1816      * This method is also equivalent to the expression
1817      * {@link #lanewise(VectorOperators.Binary,Vector)
1818      *    lanewise}{@code (}{@link VectorOperators#POW
1819      *    POW}{@code , n)}.
1820      *
1821      * <p>
1822      * This is not a full-service named operation like
1823      * {@link #add(Vector) add}.  A masked version of
1824      * version of this operation is not directly available
1825      * but may be obtained via the masked version of
1826      * {@code lanewise}.
1827      *
1828      * @param n a vector exponent by which to raise this vector
1829      * @return the {@code n}-th power of this vector
1830      * @see #pow($type$)
1831      * @see VectorOperators#POW
1832      * @see #lanewise(VectorOperators.Binary,Vector,VectorMask)
1833      */
1834     @ForceInline
1835     public final $abstractvectortype$ pow(Vector<$Boxtype$> n) {
1836         return lanewise(POW, n);
1837     }
1838 
1839     /**
1840      * Raises this vector to a scalar power.
1841      *
1842      * This is a lane-wise binary operation which applies the
1843      * method {@code Math.pow()}
1844      * to each pair of corresponding lane values.
1845      *
1846      * This method is also equivalent to the expression
1847      * {@link #lanewise(VectorOperators.Binary,Vector)
1848      *    lanewise}{@code (}{@link VectorOperators#POW
1849      *    POW}{@code , n)}.
1850      *
1851      * @param n a scalar exponent by which to raise this vector
1852      * @return the {@code n}-th power of this vector
1853      * @see #pow(Vector)
1854      * @see VectorOperators#POW
1855      * @see #lanewise(VectorOperators.Binary,$type$,VectorMask)
1856      */
1857     @ForceInline
1858     public final $abstractvectortype$ pow($type$ n) {
1859         return lanewise(POW, n);
1860     }
1861 #end[FP]
1862 
1863     /// UNARY METHODS
1864 
1865     /**
1866      * {@inheritDoc} <!--workaround-->
1867      */
1868     @Override
1869     @ForceInline
1870     public final
1871     $abstractvectortype$ neg() {
1872         return lanewise(NEG);
1873     }
1874 
1875     /**
1876      * {@inheritDoc} <!--workaround-->
1877      */
1878     @Override
1879     @ForceInline
1880     public final
1881     $abstractvectortype$ abs() {
1882         return lanewise(ABS);
1883     }
1884 
1885 #if[BITWISE]
1886     // not (~)
1887     /**
1888      * Computes the bitwise logical complement ({@code ~})
1889      * of this vector.
1890      *
1891      * This is a lane-wise binary operation which applies the
1892      * the primitive bitwise "not" operation ({@code ~})
1893      * to each lane value.
1894      *
1895      * This method is also equivalent to the expression
1896      * {@link #lanewise(VectorOperators.Unary)
1897      *    lanewise}{@code (}{@link VectorOperators#NOT
1898      *    NOT}{@code )}.
1899      *
1900      * <p>
1901      * This is not a full-service named operation like
1902      * {@link #add(Vector) add}.  A masked version of
1903      * version of this operation is not directly available
1904      * but may be obtained via the masked version of
1905      * {@code lanewise}.
1906      *
1907      * @return the bitwise complement {@code ~} of this vector
1908      * @see #and(Vector)
1909      * @see VectorOperators#NOT
1910      * @see #lanewise(VectorOperators.Unary,VectorMask)
1911      */
1912     @ForceInline
1913     public final $abstractvectortype$ not() {
1914         return lanewise(NOT);
1915     }
1916 #end[BITWISE]
1917 
1918 #if[FP]
1919     // sqrt
1920     /**
1921      * Computes the square root of this vector.
1922      *
1923      * This is a lane-wise unary operation which applies the
1924      * the method {@code Math.sqrt()}
1925      * to each lane value.
1926      *
1927      * This method is also equivalent to the expression
1928      * {@link #lanewise(VectorOperators.Unary)
1929      *    lanewise}{@code (}{@link VectorOperators#SQRT
1930      *    SQRT}{@code )}.
1931      *
1932      * @return the square root of this vector
1933      * @see VectorOperators#SQRT
1934      * @see #lanewise(VectorOperators.Unary,VectorMask)
1935      */
1936     @ForceInline
1937     public final $abstractvectortype$ sqrt() {
1938         return lanewise(SQRT);
1939     }
1940 #end[FP]
1941 
1942     /// COMPARISONS
1943 
1944     /**
1945      * {@inheritDoc} <!--workaround-->
1946      */
1947     @Override
1948     @ForceInline
1949     public final
1950     VectorMask<$Boxtype$> eq(Vector<$Boxtype$> v) {
1951         return compare(EQ, v);
1952     }
1953 
1954     /**
1955      * Tests if this vector is equal to an input scalar.
1956      *
1957      * This is a lane-wise binary test operation which applies
1958      * the primitive equals operation ({@code ==}) to each lane.
1959      * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}.
1960      *
1961      * @param e the input scalar
1962      * @return the result mask of testing if this vector
1963      *         is equal to {@code e}
1964      * @see #compare(VectorOperators.Comparison,$type$)
1965      */
1966     @ForceInline
1967     public final
1968     VectorMask<$Boxtype$> eq($type$ e) {
1969         return compare(EQ, e);
1970     }
1971 
1972     /**
1973      * {@inheritDoc} <!--workaround-->
1974      */
1975     @Override
1976     @ForceInline
1977     public final
1978     VectorMask<$Boxtype$> lt(Vector<$Boxtype$> v) {
1979         return compare(LT, v);
1980     }
1981 
1982     /**
1983      * Tests if this vector is less than an input scalar.
1984      *
1985      * This is a lane-wise binary test operation which applies
1986      * the primitive less than operation ({@code <}) to each lane.
1987      * The result is the same as {@code compare(VectorOperators.LT, e)}.
1988      *
1989      * @param e the input scalar
1990      * @return the mask result of testing if this vector
1991      *         is less than the input scalar
1992      * @see #compare(VectorOperators.Comparison,$type$)
1993      */
1994     @ForceInline
1995     public final
1996     VectorMask<$Boxtype$> lt($type$ e) {
1997         return compare(LT, e);
1998     }
1999 
2000     /**
2001      * {@inheritDoc} <!--workaround-->
2002      */
2003     @Override
2004     public abstract
2005     VectorMask<$Boxtype$> test(VectorOperators.Test op);
2006 
2007     /*package-private*/
2008     @ForceInline
2009     final
2010     <M extends VectorMask<$Boxtype$>>
2011     M testTemplate(Class<M> maskType, Test op) {
2012         $Type$Species vsp = vspecies();
2013         if (opKind(op, VO_SPECIAL)) {
2014             $Bitstype$Vector bits = this.viewAsIntegralLanes();
2015             VectorMask<$Boxbitstype$> m;
2016             if (op == IS_DEFAULT) {
2017                 m = bits.compare(EQ, ($bitstype$) 0);
2018             } else if (op == IS_NEGATIVE) {
2019                 m = bits.compare(LT, ($bitstype$) 0);
2020             }
2021 #if[FP]
2022             else if (op == IS_FINITE ||
2023                      op == IS_NAN ||
2024                      op == IS_INFINITE) {
2025                 // first kill the sign:
2026                 bits = bits.and($Boxbitstype$.MAX_VALUE);
2027                 // next find the bit pattern for infinity:
2028                 $bitstype$ infbits = ($bitstype$) toBits($Boxtype$.POSITIVE_INFINITY);
2029                 // now compare:
2030                 if (op == IS_FINITE) {
2031                     m = bits.compare(LT, infbits);
2032                 } else if (op == IS_NAN) {
2033                     m = bits.compare(GT, infbits);
2034                 } else {
2035                     m = bits.compare(EQ, infbits);
2036                 }
2037             }
2038 #end[FP]
2039             else {
2040                 throw new AssertionError(op);
2041             }
2042             return maskType.cast(m{#if[FP]?.cast(this.vspecies())});
2043         }
2044         int opc = opCode(op);
2045         throw new AssertionError(op);
2046     }
2047 
2048     /**
2049      * {@inheritDoc} <!--workaround-->
2050      */
2051     @Override
2052     @ForceInline
2053     public final
2054     VectorMask<$Boxtype$> test(VectorOperators.Test op,
2055                                   VectorMask<$Boxtype$> m) {
2056         return test(op).and(m);
2057     }
2058 
2059     /**
2060      * {@inheritDoc} <!--workaround-->
2061      */
2062     @Override
2063     public abstract
2064     VectorMask<$Boxtype$> compare(VectorOperators.Comparison op, Vector<$Boxtype$> v);
2065 
2066     /*package-private*/
2067     @ForceInline
2068     final
2069     <M extends VectorMask<$Boxtype$>>
2070     M compareTemplate(Class<M> maskType, Comparison op, Vector<$Boxtype$> v) {
2071         Objects.requireNonNull(v);
2072         $Type$Species vsp = vspecies();
2073         $abstractvectortype$ that = ($abstractvectortype$) v;
2074         that.check(this);
2075         int opc = opCode(op);
2076         return VectorIntrinsics.compare(
2077             opc, getClass(), maskType, $type$.class, length(),
2078             this, that,
2079             (cond, v0, v1) -> {
2080                 AbstractMask<$Boxtype$> m
2081                     = v0.bTest(cond, v1, (cond_, i, a, b)
2082                                -> compareWithOp(cond, a, b));
2083                 @SuppressWarnings("unchecked")
2084                 M m2 = (M) m;
2085                 return m2;
2086             });
2087     }
2088 
2089     @ForceInline
2090     private static
2091     boolean compareWithOp(int cond, $type$ a, $type$ b) {
2092         switch (cond) {
2093         case VectorIntrinsics.BT_eq:  return a == b;
2094         case VectorIntrinsics.BT_ne:  return a != b;
2095         case VectorIntrinsics.BT_lt:  return a <  b;
2096         case VectorIntrinsics.BT_le:  return a <= b;
2097         case VectorIntrinsics.BT_gt:  return a >  b;
2098         case VectorIntrinsics.BT_ge:  return a >= b;
2099         }
2100         throw new AssertionError();
2101     }
2102 
2103     /**
2104      * {@inheritDoc} <!--workaround-->
2105      */
2106     @Override
2107     @ForceInline
2108     public final
2109     VectorMask<$Boxtype$> compare(VectorOperators.Comparison op,
2110                                   Vector<$Boxtype$> v,
2111                                   VectorMask<$Boxtype$> m) {
2112         return compare(op, v).and(m);
2113     }
2114 
2115     /**
2116      * Tests this vector by comparing it with an input scalar,
2117      * according to the given comparison operation.
2118      *
2119      * This is a lane-wise binary test operation which applies
2120      * the comparison operation to each lane.
2121      * <p>
2122      * The result is the same as
2123      * {@code compare(op, broadcast(species(), e))}.
2124      * That is, the scalar may be regarded as broadcast to
2125      * a vector of the same species, and then compared
2126      * against the original vector, using the selected
2127      * comparison operation.
2128      *
2129      * @param op the operation used to compare lane values
2130      * @param e the input scalar
2131      * @return the mask result of testing lane-wise if this vector
2132      *         compares to the input, according to the selected
2133      *         comparison operator
2134      * @see $abstractvectortype$#compare(VectorOperators.Comparison,Vector)
2135      * @see #eq($type$)
2136      * @see #lt($type$)
2137      */
2138     public abstract
2139     VectorMask<$Boxtype$> compare(Comparison op, $type$ e);
2140 
2141     /*package-private*/
2142     @ForceInline
2143     final
2144     <M extends VectorMask<$Boxtype$>>
2145     M compareTemplate(Class<M> maskType, Comparison op, $type$ e) {
2146         return compareTemplate(maskType, op, broadcast(e));
2147     }
2148 
2149     /**
2150      * Tests this vector by comparing it with an input scalar,
2151      * according to the given comparison operation,
2152      * in lanes selected by a mask.
2153      *
2154      * This is a masked lane-wise binary test operation which applies
2155      * to each pair of corresponding lane values.
2156      *
2157      * The returned result is equal to the expression
2158      * {@code compare(op,s).and(m)}.
2159      *
2160      * @param op the operation used to compare lane values
2161      * @param e the input scalar
2162      * @param m the mask controlling lane selection
2163      * @return the mask result of testing lane-wise if this vector
2164      *         compares to the input, according to the selected
2165      *         comparison operator,
2166      *         and only in the lanes selected by the mask
2167      * @see $abstractvectortype$#compare(VectorOperators.Comparison,Vector,VectorMask)
2168      */
2169     @ForceInline
2170     public final VectorMask<$Boxtype$> compare(VectorOperators.Comparison op,
2171                                                $type$ e,
2172                                                VectorMask<$Boxtype$> m) {
2173         return compare(op, e).and(m);
2174     }
2175 
2176 #if[!long]
2177     /**
2178      * {@inheritDoc} <!--workaround-->
2179      */
2180     @Override
2181     public abstract
2182     VectorMask<$Boxtype$> compare(Comparison op, long e);
2183 
2184     /*package-private*/
2185     @ForceInline
2186     final
2187     <M extends VectorMask<$Boxtype$>>
2188     M compareTemplate(Class<M> maskType, Comparison op, long e) {
2189         return compareTemplate(maskType, op, broadcast(e));
2190     }
2191 
2192     /**
2193      * {@inheritDoc} <!--workaround-->
2194      */
2195     @Override
2196     @ForceInline
2197     public final
2198     VectorMask<$Boxtype$> compare(Comparison op, long e, VectorMask<$Boxtype$> m) {
2199         return compare(op, broadcast(e), m);
2200     }
2201 
2202 
2203 #end[!long]
2204 
2205     /**
2206      * {@inheritDoc} <!--workaround-->
2207      */
2208     @Override public abstract
2209     $abstractvectortype$ blend(Vector<$Boxtype$> v, VectorMask<$Boxtype$> m);
2210 
2211     /*package-private*/
2212     @ForceInline
2213     final
2214     <M extends VectorMask<$Boxtype$>>
2215     $abstractvectortype$
2216     blendTemplate(Class<M> maskType, $abstractvectortype$ v, M m) {
2217         v.check(this);
2218         return VectorIntrinsics.blend(
2219             getClass(), maskType, $type$.class, length(),
2220             this, v, m,
2221             (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b));
2222     }
2223 
2224     /**
2225      * {@inheritDoc} <!--workaround-->
2226      */
2227     @Override public abstract $abstractvectortype$ addIndex(int scale);
2228 
2229     /*package-private*/
2230     @ForceInline
2231     final $abstractvectortype$ addIndexTemplate(int scale) {
2232         $Type$Species vsp = vspecies();
2233         // make sure VLENGTH*scale doesn't overflow:
2234         vsp.checkScale(scale);
2235         return VectorIntrinsics.indexVector(
2236             getClass(), $type$.class, length(),
2237             this, scale, vsp,
2238             (v, scale_, s)
2239             -> {
2240                 // If the platform doesn't support an INDEX
2241                 // instruction directly, load IOTA from memory
2242                 // and multiply.
2243                 $abstractvectortype$ iota = s.iota();
2244                 $type$ sc = ($type$) scale_;
2245                 return v.add(sc == 1 ? iota : iota.mul(sc));
2246             });
2247     }
2248 
2249     /**
2250      * Replaces selected lanes of this vector with
2251      * a scalar value
2252      * under the control of a mask.
2253      *
2254      * This is a masked lane-wise binary operation which
2255      * selects each lane value from one or the other input.
2256      *
2257      * The returned result is equal to the expression
2258      * {@code blend(broadcast(e),m)}.
2259      *
2260      * @param e the input scalar, containing the replacement lane value
2261      * @param m the mask controlling lane selection of the scalar
2262      * @return the result of blending the lane elements of this vector with
2263      *         the scalar value
2264      */
2265     @ForceInline
2266     public final $abstractvectortype$ blend($type$ e,
2267                                             VectorMask<$Boxtype$> m) {
2268         return blend(broadcast(e), m);
2269     }
2270 
2271 #if[!long]
2272     /**
2273      * Replaces selected lanes of this vector with
2274      * a scalar value
2275      * under the control of a mask.
2276      *
2277      * This is a masked lane-wise binary operation which
2278      * selects each lane value from one or the other input.
2279      *
2280      * The returned result is equal to the expression
2281      * {@code blend(broadcast(e),m)}.
2282      *
2283      * @param e the input scalar, containing the replacement lane value
2284      * @param m the mask controlling lane selection of the scalar
2285      * @return the result of blending the lane elements of this vector with
2286      *         the scalar value
2287      */
2288     @ForceInline
2289     public final $abstractvectortype$ blend(long e,
2290                                             VectorMask<$Boxtype$> m) {
2291         return blend(broadcast(e), m);
2292     }
2293 #end[!long]
2294 
2295     /**
2296      * {@inheritDoc} <!--workaround-->
2297      */
2298     @Override
2299     public abstract
2300     $abstractvectortype$ slice(int origin, Vector<$Boxtype$> v1);
2301 
2302     /*package-private*/
2303     final
2304     @ForceInline
2305     $abstractvectortype$ sliceTemplate(int origin, Vector<$Boxtype$> v1) {
2306         $abstractvectortype$ that = ($abstractvectortype$) v1;
2307         that.check(this);
2308         $type$[] a0 = this.vec();
2309         $type$[] a1 = that.vec();
2310         $type$[] res = new $type$[a0.length];
2311         int vlen = res.length;
2312         int firstPart = vlen - origin;
2313         System.arraycopy(a0, origin, res, 0, firstPart);
2314         System.arraycopy(a1, 0, res, firstPart, origin);
2315         return vectorFactory(res);
2316     }
2317 
2318     /**
2319      * {@inheritDoc} <!--workaround-->
2320      */
2321     @Override
2322     @ForceInline
2323     public final
2324     $abstractvectortype$ slice(int origin,
2325                                Vector<$Boxtype$> w,
2326                                VectorMask<$Boxtype$> m) {
2327         return broadcast(0).blend(slice(origin, w), m);
2328     }
2329 
2330     /**
2331      * {@inheritDoc} <!--workaround-->
2332      */
2333     @Override
2334     public abstract
2335     $abstractvectortype$ slice(int origin);
2336 
2337     /**
2338      * {@inheritDoc} <!--workaround-->
2339      */
2340     @Override
2341     public abstract
2342     $abstractvectortype$ unslice(int origin, Vector<$Boxtype$> w, int part);
2343 
2344     /*package-private*/
2345     final
2346     @ForceInline
2347     $abstractvectortype$
2348     unsliceTemplate(int origin, Vector<$Boxtype$> w, int part) {
2349         $abstractvectortype$ that = ($abstractvectortype$) w;
2350         that.check(this);
2351         $type$[] slice = this.vec();
2352         $type$[] res = that.vec().clone();
2353         int vlen = res.length;
2354         int firstPart = vlen - origin;
2355         switch (part) {
2356         case 0:
2357             System.arraycopy(slice, 0, res, origin, firstPart);
2358             break;
2359         case 1:
2360             System.arraycopy(slice, firstPart, res, 0, origin);
2361             break;
2362         default:
2363             throw wrongPartForSlice(part);
2364         }
2365         return vectorFactory(res);
2366     }
2367 
2368     /*package-private*/
2369     final
2370     @ForceInline
2371     <M extends VectorMask<$Boxtype$>>
2372     $abstractvectortype$
2373     unsliceTemplate(Class<M> maskType, int origin, Vector<$Boxtype$> w, int part, M m) {
2374         $abstractvectortype$ that = ($abstractvectortype$) w;
2375         that.check(this);
2376         $abstractvectortype$ slice = that.sliceTemplate(origin, that);
2377         slice = slice.blendTemplate(maskType, this, m);
2378         return slice.unsliceTemplate(origin, w, part);
2379     }
2380 
2381     /**
2382      * {@inheritDoc} <!--workaround-->
2383      */
2384     @Override
2385     public abstract
2386     $abstractvectortype$ unslice(int origin, Vector<$Boxtype$> w, int part, VectorMask<$Boxtype$> m);
2387 
2388     /**
2389      * {@inheritDoc} <!--workaround-->
2390      */
2391     @Override
2392     public abstract
2393     $abstractvectortype$ unslice(int origin); 
2394 
2395     private ArrayIndexOutOfBoundsException
2396     wrongPartForSlice(int part) {
2397         String msg = String.format("bad part number %d for slice operation",
2398                                    part);
2399         return new ArrayIndexOutOfBoundsException(msg);
2400     }
2401 
2402     /**
2403      * {@inheritDoc} <!--workaround-->
2404      */
2405     @Override
2406     public abstract
2407     $abstractvectortype$ rearrange(VectorShuffle<$Boxtype$> m);
2408 
2409     /*package-private*/
2410     @ForceInline
2411     final
2412     <S extends VectorShuffle<$Boxtype$>>
2413     $abstractvectortype$ rearrangeTemplate(Class<S> shuffletype, S shuffle) {
2414         shuffle.checkIndexes();
2415         return VectorIntrinsics.rearrangeOp(
2416             getClass(), shuffletype, $type$.class, length(),
2417             this, shuffle,
2418             (v1, s_) -> v1.uOp((i, a) -> {
2419                 int ei = s_.laneSource(i);
2420                 return v1.lane(ei);
2421             }));
2422     }
2423 
2424     /**
2425      * {@inheritDoc} <!--workaround-->
2426      */
2427     @Override
2428     public abstract
2429     $abstractvectortype$ rearrange(VectorShuffle<$Boxtype$> s,
2430                                    VectorMask<$Boxtype$> m);
2431 
2432     /*package-private*/
2433     @ForceInline
2434     final
2435     <S extends VectorShuffle<$Boxtype$>>
2436     $abstractvectortype$ rearrangeTemplate(Class<S> shuffletype,
2437                                            S shuffle,
2438                                            VectorMask<$Boxtype$> m) {
2439         $abstractvectortype$ unmasked =
2440             VectorIntrinsics.rearrangeOp(
2441                 getClass(), shuffletype, $type$.class, length(),
2442                 this, shuffle,
2443                 (v1, s_) -> v1.uOp((i, a) -> {
2444                     int ei = s_.laneSource(i);
2445                     return ei < 0 ? 0 : v1.lane(ei);
2446                 }));
2447         VectorMask<$Boxtype$> valid = shuffle.laneIsValid();
2448         if (m.andNot(valid).anyTrue()) {
2449             shuffle.checkIndexes();
2450             throw new AssertionError();
2451         }
2452         return broadcast(($type$)0).blend(unmasked, valid);
2453     }
2454 
2455     /**
2456      * {@inheritDoc} <!--workaround-->
2457      */
2458     @Override
2459     public abstract
2460     $abstractvectortype$ rearrange(VectorShuffle<$Boxtype$> s,
2461                                    Vector<$Boxtype$> v);
2462 
2463     /*package-private*/
2464     @ForceInline
2465     final
2466     <S extends VectorShuffle<$Boxtype$>>
2467     $abstractvectortype$ rearrangeTemplate(Class<S> shuffletype,
2468                                            S shuffle,
2469                                            $abstractvectortype$ v) {
2470         VectorMask<$Boxtype$> valid = shuffle.laneIsValid();
2471         S ws = shuffletype.cast(shuffle.wrapIndexes());
2472         $abstractvectortype$ r0 =
2473             VectorIntrinsics.rearrangeOp(
2474                 getClass(), shuffletype, $type$.class, length(),
2475                 this, ws,
2476                 (v0, s_) -> v0.uOp((i, a) -> {
2477                     int ei = s_.laneSource(i);
2478                     return v0.lane(ei);
2479                 }));
2480         $abstractvectortype$ r1 =
2481             VectorIntrinsics.rearrangeOp(
2482                 getClass(), shuffletype, $type$.class, length(),
2483                 v, ws,
2484                 (v1, s_) -> v1.uOp((i, a) -> {
2485                     int ei = s_.laneSource(i);
2486                     return v1.lane(ei);
2487                 }));
2488         return r1.blend(r0, valid);
2489     }
2490 
2491     /**
2492      * {@inheritDoc} <!--workaround-->
2493      */
2494     @Override
2495     public abstract
2496     $abstractvectortype$ selectFrom(Vector<$Boxtype$> v);
2497 
2498     /*package-private*/
2499     @ForceInline
2500     final $abstractvectortype$ selectFromTemplate($abstractvectortype$ v) {
2501         return v.rearrange(this.toShuffle());
2502     }
2503 
2504     /**
2505      * {@inheritDoc} <!--workaround-->
2506      */
2507     @Override
2508     public abstract
2509     $abstractvectortype$ selectFrom(Vector<$Boxtype$> s, VectorMask<$Boxtype$> m);
2510 
2511     /*package-private*/
2512     @ForceInline
2513     final $abstractvectortype$ selectFromTemplate($abstractvectortype$ v,
2514                                                   AbstractMask<$Boxtype$> m) {
2515         return v.rearrange(this.toShuffle(), m);
2516     }
2517 
2518     /// Ternary operations
2519 
2520 #if[BITWISE]
2521     /**
2522      * Blends together the bits of two vectors under
2523      * the control of a third, which supplies mask bits.
2524      *
2525      *
2526      * This is a lane-wise ternary operation which performs
2527      * a bitwise blending operation {@code (a&~c)|(b&c)}
2528      * to each lane.
2529      *
2530      * This method is also equivalent to the expression
2531      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2532      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2533      *    BITWISE_BLEND}{@code , bits, mask)}.
2534      *
2535      * @param bits input bits to blend into the current vector
2536      * @param mask a bitwise mask to enable blending of the input bits
2537      * @return the bitwise blend of the given bits into the current vector,
2538      *         under control of the bitwise mask
2539      * @see #bitwiseBlend($type$,$type$)
2540      * @see #bitwiseBlend($type$,Vector)
2541      * @see #bitwiseBlend(Vector,$type$)
2542      * @see VectorOperators#BITWISE_BLEND
2543      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
2544      */
2545     @ForceInline
2546     public final
2547     $abstractvectortype$ bitwiseBlend(Vector<$Boxtype$> bits, Vector<$Boxtype$> mask) {
2548         return lanewise(BITWISE_BLEND, bits, mask);
2549     }
2550 
2551     /**
2552      * Blends together the bits of a vector and a scalar under
2553      * the control of another scalar, which supplies mask bits.
2554      *
2555      *
2556      * This is a lane-wise ternary operation which performs
2557      * a bitwise blending operation {@code (a&~c)|(b&c)}
2558      * to each lane.
2559      *
2560      * This method is also equivalent to the expression
2561      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2562      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2563      *    BITWISE_BLEND}{@code , bits, mask)}.
2564      *
2565      * @param bits input bits to blend into the current vector
2566      * @param mask a bitwise mask to enable blending of the input bits
2567      * @return the bitwise blend of the given bits into the current vector,
2568      *         under control of the bitwise mask
2569      * @see #bitwiseBlend(Vector,Vector)
2570      * @see VectorOperators#BITWISE_BLEND
2571      * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
2572      */
2573     @ForceInline
2574     public final
2575     $abstractvectortype$ bitwiseBlend($type$ bits, $type$ mask) {
2576         return lanewise(BITWISE_BLEND, bits, mask);
2577     }
2578 
2579     /**
2580      * Blends together the bits of a vector and a scalar under
2581      * the control of another vector, which supplies mask bits.
2582      *
2583      *
2584      * This is a lane-wise ternary operation which performs
2585      * a bitwise blending operation {@code (a&~c)|(b&c)}
2586      * to each lane.
2587      *
2588      * This method is also equivalent to the expression
2589      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2590      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2591      *    BITWISE_BLEND}{@code , bits, mask)}.
2592      *
2593      * @param bits input bits to blend into the current vector
2594      * @param mask a bitwise mask to enable blending of the input bits
2595      * @return the bitwise blend of the given bits into the current vector,
2596      *         under control of the bitwise mask
2597      * @see #bitwiseBlend(Vector,Vector)
2598      * @see VectorOperators#BITWISE_BLEND
2599      * @see #lanewise(VectorOperators.Ternary,$type$,Vector,VectorMask)
2600      */
2601     @ForceInline
2602     public final
2603     $abstractvectortype$ bitwiseBlend($type$ bits, Vector<$Boxtype$> mask) {
2604         return lanewise(BITWISE_BLEND, bits, mask);
2605     }
2606 
2607     /**
2608      * Blends together the bits of two vectors under
2609      * the control of a scalar, which supplies mask bits.
2610      *
2611      *
2612      * This is a lane-wise ternary operation which performs
2613      * a bitwise blending operation {@code (a&~c)|(b&c)}
2614      * to each lane.
2615      *
2616      * This method is also equivalent to the expression
2617      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2618      *    lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND
2619      *    BITWISE_BLEND}{@code , bits, mask)}.
2620      *
2621      * @param bits input bits to blend into the current vector
2622      * @param mask a bitwise mask to enable blending of the input bits
2623      * @return the bitwise blend of the given bits into the current vector,
2624      *         under control of the bitwise mask
2625      * @see #bitwiseBlend(Vector,Vector)
2626      * @see VectorOperators#BITWISE_BLEND
2627      * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask)
2628      */
2629     @ForceInline
2630     public final
2631     $abstractvectortype$ bitwiseBlend(Vector<$Boxtype$> bits, $type$ mask) {
2632         return lanewise(BITWISE_BLEND, bits, mask);
2633     }
2634 #end[BITWISE]
2635 
2636 #if[FP]
2637     /**
2638      * Multiplies this vector by a second input vector, and sums
2639      * the result with a third.
2640      *
2641      * Extended precision is used for the intermediate result,
2642      * avoiding possible loss of precision from rounding once
2643      * for each of the two operations.
2644      * The result is numerically close to {@code this.mul(b).add(c)},
2645      * and is typically closer to the true mathematical result.
2646      *
2647      * This is a lane-wise ternary operation which applies the
2648      * {@link Math#fma($type$,$type$,$type$) Math#fma(a,b,c)}
2649      * operation to each lane.
2650      *
2651      * This method is also equivalent to the expression
2652      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2653      *    lanewise}{@code (}{@link VectorOperators#FMA
2654      *    FMA}{@code , b, c)}.
2655      *
2656      * @param b the second input vector, supplying multiplier values
2657      * @param c the third input vector, supplying addend values
2658      * @return the product of this vector and the second input vector
2659      *         summed with the third input vector, using extended precision
2660      *         for the intermediate result
2661      * @see #fma($type$,$type$)
2662      * @see VectorOperators#FMA
2663      * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask)
2664      */
2665     @ForceInline
2666     public final
2667     $abstractvectortype$ fma(Vector<$Boxtype$> b, Vector<$Boxtype$> c) {
2668         return lanewise(FMA, b, c);
2669     }
2670 
2671     /**
2672      * Multiplies this vector by a scalar multiplier, and sums
2673      * the result with a scalar addend.
2674      *
2675      * Extended precision is used for the intermediate result,
2676      * avoiding possible loss of precision from rounding once
2677      * for each of the two operations.
2678      * The result is numerically close to {@code this.mul(b).add(c)},
2679      * and is typically closer to the true mathematical result.
2680      *
2681      * This is a lane-wise ternary operation which applies the
2682      * {@link Math#fma($type$,$type$,$type$) Math#fma(a,b,c)}
2683      * operation to each lane.
2684      *
2685      * This method is also equivalent to the expression
2686      * {@link #lanewise(VectorOperators.Ternary,Vector,Vector)
2687      *    lanewise}{@code (}{@link VectorOperators#FMA
2688      *    FMA}{@code , b, c)}.
2689      *
2690      * @param b the scalar multiplier
2691      * @param c the scalar addend
2692      * @return the product of this vector and the scalar multiplier
2693      *         summed with scalar addend, using extended precision
2694      *         for the intermediate result
2695      * @see #fma(Vector,Vector)
2696      * @see VectorOperators#FMA
2697      * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask)
2698      */
2699     @ForceInline
2700     public final
2701     $abstractvectortype$ fma($type$ b, $type$ c) {
2702         return lanewise(FMA, b, c);
2703     }
2704 
2705     // Don't bother with (Vector,$type$) and ($type$,Vector) overloadings.
2706 #end[FP]
2707 
2708     // Type specific horizontal reductions
2709 
2710     /**
2711      * Returns a value accumulated from all the lanes of this vector.
2712      *
2713      * This is an associative cross-lane reduction operation which
2714      * applies the specified operation to all the lane elements.
2715      *
2716      * <p>
2717      * A few reduction operations do not support arbitrary reordering
2718      * of their operands, yet are included here because of their
2719      * usefulness.
2720      *
2721      * <ul>
2722      * <li>
2723      * In the case of {@code FIRST_NONZERO}, the reduction returns
2724      * the value from the lowest-numbered non-zero lane.
2725      *
2726 #if[FP]
2727      * (As with {@code MAX} and {@code MIN}, floating point negative
2728      * zero {@code -0.0} is treated as a value distinct from
2729      * the default value, positive zero. So a first-nonzero lane reduction
2730      * might return {@code -0.0} even in the presence of non-zero
2731      * lane values.)
2732 #end[FP]
2733      *
2734      * <li>
2735      * In the case of floating point addition and multiplication, the
2736      * precise result will reflect the choice of an arbitrary order
2737      * of operations, which may even vary over time.
2738      *
2739      * <li>
2740      * All other reduction operations are fully commutative and
2741      * associative.  The implementation can choose any order of
2742      * processing, yet it will always produce the same result.
2743      *
2744      * </ul>
2745      *
2746 #if[FP]
2747      * @implNote
2748      * The value of a floating-point reduction may be a function
2749      * both of the input values as well as the order of scalar
2750      * operations which combine those values, specifically in the
2751      * case of {@code ADD} and {@code MUL} operations, where
2752      * details of rounding depend on operand order.
2753      * In those cases, the order of operations of this method is
2754      * intentionally not defined.  This allows the JVM to generate
2755      * optimal machine code for the underlying platform at runtime. If
2756      * the platform supports a vector instruction to add or multiply
2757      * all values in the vector, or if there is some other efficient
2758      * machine code sequence, then the JVM has the option of
2759      * generating this machine code. Otherwise, the default
2760      * implementation is applied, which adds vector elements
2761      * sequentially from beginning to end.  For this reason, the
2762      * output of this method may vary for the same input values,
2763      * if the selected operator is {@code ADD} or {@code MUL}.
2764      *
2765 #end[FP]
2766      *
2767      * @param op the operation used to combine lane values
2768      * @return the accumulated result
2769      * @throws UnsupportedOperationException if this vector does
2770      *         not support the requested operation
2771      * @see #reduceLanes(VectorOperators.Associative,VectorMask)
2772      * @see #add(Vector)
2773      * @see #mul(Vector)
2774      * @see #min(Vector)
2775      * @see #max(Vector)
2776 #if[BITWISE]
2777      * @see #and(Vector)
2778      * @see #or(Vector)
2779      * @see VectorOperators#XOR
2780 #end[BITWISE]
2781      * @see VectorOperators#FIRST_NONZERO
2782      */
2783     public abstract $type$ reduceLanes(VectorOperators.Associative op);
2784 
2785     /**
2786      * Returns a value accumulated from selected lanes of this vector,
2787      * controlled by a mask.
2788      *
2789      * This is an associative cross-lane reduction operation which
2790      * applies the specified operation to the selected lane elements.
2791      * <p>
2792      * If no elements are selected, an operation-specific identity
2793      * value is returned.
2794      * <ul>
2795      * <li>
2796      * If the operation is
2797 #if[BITWISE]
2798      *  {@code ADD}, {@code XOR}, {@code OR},
2799 #else[BITWISE]
2800      *  {@code ADD}
2801 #end[BITWISE]
2802      * or {@code FIRST_NONZERO},
2803      * then the identity value is {#if[FP]?positive }zero, the default {@code $type$} value.
2804      * <li>
2805      * If the operation is {@code MUL},
2806      * then the identity value is one.
2807 #if[BITWISE]
2808      * <li>
2809      * If the operation is {@code AND},
2810      * then the identity value is minus one (all bits set).
2811      * <li>
2812      * If the operation is {@code MAX},
2813      * then the identity value is {@code $Boxtype$.MIN_VALUE}.
2814      * <li>
2815      * If the operation is {@code MIN},
2816      * then the identity value is {@code $Boxtype$.MAX_VALUE}.
2817 #end[BITWISE]
2818 #if[FP]
2819      * <li>
2820      * If the operation is {@code MAX},
2821      * then the identity value is {@code $Boxtype$.NEGATIVE_INFINITY}.
2822      * <li>
2823      * If the operation is {@code MIN},
2824      * then the identity value is {@code $Boxtype$.POSITIVE_INFINITY}.
2825 #end[FP]
2826      * </ul>
2827 #if[FP]
2828      *
2829      * @implNote
2830      * The value of a floating-point reduction may be a function
2831      * both of the input values as well as the order of scalar
2832      * operations which combine those values, specifically in the
2833      * case of {@code ADD} and {@code MUL} operations, where
2834      * details of rounding depend on operand order.
2835      * See {@linkplain #reduceLanes(VectorOperators.Associative)
2836      * the unmasked version of this method}
2837      * for a discussion.
2838      *
2839 #end[FP]
2840      *
2841      * @param op the operation used to combine lane values
2842      * @param m the mask controlling lane selection
2843      * @return the reduced result accumulated from the selected lane values
2844      * @throws UnsupportedOperationException if this vector does
2845      *         not support the requested operation
2846      * @see #reduceLanes(VectorOperators.Associative)
2847      */
2848     public abstract $type$ reduceLanes(VectorOperators.Associative op,
2849                                        VectorMask<$Boxtype$> m);
2850 
2851     /*package-private*/
2852     @ForceInline
2853     final
2854     $type$ reduceLanesTemplate(VectorOperators.Associative op,
2855                                VectorMask<$Boxtype$> m) {
2856         $abstractvectortype$ v = reduceIdentityVector(op).blend(this, m);
2857         return v.reduceLanesTemplate(op);
2858     }
2859 
2860     /*package-private*/
2861     @ForceInline
2862     final
2863     $type$ reduceLanesTemplate(VectorOperators.Associative op) {
2864         if (op == FIRST_NONZERO) {
2865             // FIXME:  The JIT should handle this, and other scan ops alos.
2866             VectorMask<$Boxbitstype$> thisNZ
2867                 = this.viewAsIntegralLanes().compare(NE, ($bitstype$) 0);
2868             return this.lane(thisNZ.firstTrue());
2869         }
2870         int opc = opCode(op);
2871         return fromBits(VectorIntrinsics.reductionCoerced(
2872             opc, getClass(), $type$.class, length(),
2873             this,
2874             REDUCE_IMPL.find(op, opc, (opc_) -> {
2875               switch (opc_) {
2876               case VECTOR_OP_ADD: return v ->
2877                       toBits(v.rOp(($type$)0, (i, a, b) -> ($type$)(a + b)));
2878               case VECTOR_OP_MUL: return v ->
2879                       toBits(v.rOp(($type$)1, (i, a, b) -> ($type$)(a * b)));
2880               case VECTOR_OP_MIN: return v ->
2881                       toBits(v.rOp(MAX_OR_INF, (i, a, b) -> ($type$) Math.min(a, b)));
2882               case VECTOR_OP_MAX: return v ->
2883                       toBits(v.rOp(MIN_OR_INF, (i, a, b) -> ($type$) Math.max(a, b)));
2884               case VECTOR_OP_FIRST_NONZERO: return v ->
2885                       toBits(v.rOp(($type$)0, (i, a, b) -> toBits(a) != 0 ? a : b));
2886 #if[BITWISE]
2887               case VECTOR_OP_AND: return v ->
2888                       toBits(v.rOp(($type$)-1, (i, a, b) -> ($type$)(a & b)));
2889               case VECTOR_OP_OR: return v ->
2890                       toBits(v.rOp(($type$)0, (i, a, b) -> ($type$)(a | b)));
2891               case VECTOR_OP_XOR: return v ->
2892                       toBits(v.rOp(($type$)0, (i, a, b) -> ($type$)(a ^ b)));
2893 #end[BITWISE]
2894 #if[FP]
2895               case VECTOR_OP_OR: return v ->
2896                       toBits(v.rOp(($type$)0, (i, a, b) -> fromBits(toBits(a) | toBits(b))));
2897 #end[FP]
2898               default: return null;
2899               }})));
2900     }
2901     private static final
2902     ImplCache<Associative,Function<$abstractvectortype$,Long>> REDUCE_IMPL
2903         = new ImplCache<>(Associative.class, $Type$Vector.class);
2904 
2905     private
2906     @ForceInline
2907     $abstractvectortype$ reduceIdentityVector(VectorOperators.Associative op) {
2908         int opc = opCode(op);
2909         UnaryOperator<$abstractvectortype$> fn
2910             = REDUCE_ID_IMPL.find(op, opc, (opc_) -> {
2911                 switch (opc_) {
2912                 case VECTOR_OP_ADD:
2913                 case VECTOR_OP_OR:
2914                 case VECTOR_OP_XOR:
2915                 case VECTOR_OP_FIRST_NONZERO:
2916                     return v -> v.broadcast(0);
2917                 case VECTOR_OP_MUL:
2918                     return v -> v.broadcast(1);
2919                 case VECTOR_OP_AND:
2920                     return v -> v.broadcast(-1);
2921                 case VECTOR_OP_MIN:
2922                     return v -> v.broadcast(MAX_OR_INF);
2923                 case VECTOR_OP_MAX:
2924                     return v -> v.broadcast(MIN_OR_INF);
2925                 default: return null;
2926                 }
2927             });
2928         return fn.apply(this);
2929     }
2930     private static final
2931     ImplCache<Associative,UnaryOperator<$abstractvectortype$>> REDUCE_ID_IMPL
2932         = new ImplCache<>(Associative.class, $Type$Vector.class);
2933 
2934 #if[FP]
2935     private static final $type$ MIN_OR_INF = $Boxtype$.NEGATIVE_INFINITY;
2936     private static final $type$ MAX_OR_INF = $Boxtype$.POSITIVE_INFINITY;
2937 #else[FP]
2938     private static final $type$ MIN_OR_INF = $Boxtype$.MIN_VALUE;
2939     private static final $type$ MAX_OR_INF = $Boxtype$.MAX_VALUE;
2940 #end[FP]
2941 
2942     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op);
2943     public @Override abstract long reduceLanesToLong(VectorOperators.Associative op,
2944                                                      VectorMask<$Boxtype$> m);
2945 
2946     // Type specific accessors
2947 
2948     /**
2949      * Gets the lane element at lane index {@code i}
2950      *
2951      * @param i the lane index
2952      * @return the lane element at lane index {@code i}
2953      * @throws IllegalArgumentException if the index is is out of range
2954      * ({@code < 0 || >= length()})
2955      */
2956     public abstract $type$ lane(int i);
2957 
2958     /**
2959      * Replaces the lane element of this vector at lane index {@code i} with
2960      * value {@code e}.
2961      *
2962      * This is a cross-lane operation and behaves as if it returns the result
2963      * of blending this vector with an input vector that is the result of
2964      * broadcasting {@code e} and a mask that has only one lane set at lane
2965      * index {@code i}.
2966      *
2967      * @param i the lane index of the lane element to be replaced
2968      * @param e the value to be placed
2969      * @return the result of replacing the lane element of this vector at lane
2970      * index {@code i} with value {@code e}.
2971      * @throws IllegalArgumentException if the index is is out of range
2972      * ({@code < 0 || >= length()})
2973      */
2974     public abstract $abstractvectortype$ withLane(int i, $type$ e);
2975 
2976     // Memory load operations
2977 
2978     /**
2979      * Returns an array of type {@code $type$[]}
2980      * containing all the lane values.
2981      * The array length is the same as the vector length.
2982      * The array elements are stored in lane order.
2983      * <p>
2984      * This method behaves as if it stores
2985      * this vector into an allocated array
2986      * (using {@link #intoArray($type$[], int) intoArray})
2987      * and returns the array as follows:
2988      * <pre>{@code
2989      *   $type$[] a = new $type$[this.length()];
2990      *   this.intoArray(a, 0);
2991      *   return a;
2992      * }</pre>
2993      *
2994      * @return an array containing the lane values of this vector
2995      */
2996     @ForceInline
2997     @Override
2998     public final $type$[] toArray() {
2999         $type$[] a = new $type$[vspecies().laneCount()];
3000         intoArray(a, 0);
3001         return a;
3002     }
3003 
3004 #if[int]
3005     /**
3006      * {@inheritDoc} <!--workaround-->
3007      * This is an alias for {@link #toArray()}
3008      * When this method is used on used on vectors
3009      * of type {@code $abstractvectortype$},
3010      * there will be no loss of range or precision.
3011      */
3012     @ForceInline
3013     @Override
3014     public final int[] toIntArray() {
3015         return toArray();
3016     }
3017 #else[int]
3018     /** {@inheritDoc} <!--workaround-->
3019 #if[!FP]
3020 #if[!long]
3021      * @implNote
3022      * When this method is used on used on vectors
3023      * of type {@code $abstractvectortype$},
3024      * there will be no loss of precision or range,
3025      * and so no {@code IllegalArgumentException} will
3026      * be thrown.
3027 #end[!long]
3028 #end[!FP]
3029      */
3030     @ForceInline
3031     @Override
3032     public final int[] toIntArray() {
3033         $type$[] a = toArray();
3034         int[] res = new int[a.length];
3035         for (int i = 0; i < a.length; i++) {
3036             $type$ e = a[i];
3037             res[i] = (int) $Type$Species.toIntegralChecked(e, true);
3038         }
3039         return res;
3040     }
3041 #end[int]
3042 
3043 #if[long]
3044     /**
3045      * {@inheritDoc} <!--workaround-->
3046      * This is an alias for {@link #toArray()}
3047      * When this method is used on used on vectors
3048      * of type {@code $abstractvectortype$},
3049      * there will be no loss of range or precision.
3050      */
3051     @ForceInline
3052     @Override
3053     public final long[] toLongArray() {
3054         return toArray();
3055     }
3056 #else[long]
3057     /** {@inheritDoc} <!--workaround-->
3058 #if[!FP]
3059      * @implNote
3060      * When this method is used on used on vectors
3061      * of type {@code $abstractvectortype$},
3062      * there will be no loss of precision or range,
3063      * and so no {@code IllegalArgumentException} will
3064      * be thrown.
3065 #end[!FP]
3066      */
3067     @ForceInline
3068     @Override
3069     public final long[] toLongArray() {
3070         $type$[] a = toArray();
3071         long[] res = new long[a.length];
3072         for (int i = 0; i < a.length; i++) {
3073             $type$ e = a[i];
3074             res[i] = $Type$Species.toIntegralChecked(e, false);
3075         }
3076         return res;
3077     }
3078 #end[long]
3079 
3080 #if[double]
3081     /** {@inheritDoc} <!--workaround-->
3082      * @implNote
3083      * This is an alias for {@link #toArray()}
3084      * When this method is used on used on vectors
3085      * of type {@code $abstractvectortype$},
3086      * there will be no loss of precision.
3087      */
3088     @ForceInline
3089     @Override
3090     public final double[] toDoubleArray() {
3091         return toArray();
3092     }
3093 #else[double]
3094     /** {@inheritDoc} <!--workaround-->
3095 #if[long]
3096      * @implNote
3097      * When this method is used on used on vectors
3098      * of type {@code $abstractvectortype$},
3099      * up to nine bits of precision may be lost
3100      * for lane values of large magnitude.
3101 #else[long]
3102      * @implNote
3103      * When this method is used on used on vectors
3104      * of type {@code $abstractvectortype$},
3105      * there will be no loss of precision.
3106 #end[long]
3107      */
3108     @ForceInline
3109     @Override
3110     public final double[] toDoubleArray() {
3111         $type$[] a = toArray();
3112         double[] res = new double[a.length];
3113         for (int i = 0; i < a.length; i++) {
3114             res[i] = (double) a[i];
3115         }
3116         return res;
3117     }
3118 #end[double]
3119 
3120     /**
3121      * Loads a vector from a byte array starting at an offset.
3122      * Bytes are composed into primitive lane elements according
3123      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
3124      * The vector is arranged into lanes according to
3125      * <a href="Vector.html#lane-order">memory ordering</a>.
3126      * <p>
3127      * This method behaves as if it returns the result of calling
3128      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
3129      * fromByteBuffer()} as follows:
3130      * <pre>{@code
3131      * var bb = ByteBuffer.wrap(a);
3132      * var bo = ByteOrder.LITTLE_ENDIAN;
3133      * var m = species.maskAll(true);
3134      * return fromByteBuffer(species, bb, offset, m, bo);
3135      * }</pre>
3136      *
3137      * @param species species of desired vector
3138      * @param a the byte array
3139      * @param offset the offset into the array
3140      * @return a vector loaded from a byte array
3141      * @throws IndexOutOfBoundsException
3142      *         if {@code offset+N*ESIZE < 0}
3143      *         or {@code offset+(N+1)*ESIZE > a.length}
3144      *         for any lane {@code N} in the vector
3145      */
3146     @ForceInline
3147     public static
3148     $abstractvectortype$ fromByteArray(VectorSpecies<$Boxtype$> species,
3149                                        byte[] a, int offset) {
3150         return fromByteArray(species, a, offset, ByteOrder.LITTLE_ENDIAN);
3151     }
3152 
3153     /**
3154      * Loads a vector from a byte array starting at an offset.
3155      * Bytes are composed into primitive lane elements according
3156      * to the specified byte order.
3157      * The vector is arranged into lanes according to
3158      * <a href="Vector.html#lane-order">memory ordering</a>.
3159      * <p>
3160      * This method behaves as if it returns the result of calling
3161      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
3162      * fromByteBuffer()} as follows:
3163      * <pre>{@code
3164      * var bb = ByteBuffer.wrap(a);
3165      * var m = species.maskAll(true);
3166      * return fromByteBuffer(species, bb, offset, m, bo);
3167      * }</pre>
3168      *
3169      * @param species species of desired vector
3170      * @param a the byte array
3171      * @param offset the offset into the array
3172      * @param bo the intended byte order
3173      * @return a vector loaded from a byte array
3174      * @throws IndexOutOfBoundsException
3175      *         if {@code offset+N*ESIZE < 0}
3176      *         or {@code offset+(N+1)*ESIZE > a.length}
3177      *         for any lane {@code N} in the vector
3178      */
3179     @ForceInline
3180     public static
3181     $abstractvectortype$ fromByteArray(VectorSpecies<$Boxtype$> species,
3182                                        byte[] a, int offset,
3183                                        ByteOrder bo) {
3184         $Type$Species vsp = ($Type$Species) species;
3185         offset = checkFromIndexSize(offset,
3186                                     vsp.vectorBitSize() / Byte.SIZE,
3187                                     a.length);
3188         return vsp.dummyVector()
3189             .fromByteArray0(a, offset).maybeSwap(bo);
3190     }
3191 
3192     /**
3193      * Loads a vector from a byte array starting at an offset
3194      * and using a mask.
3195      * Lanes where the mask is unset are filled with the default
3196      * value of {@code $type$} ({#if[FP]?positive }zero).
3197      * Bytes are composed into primitive lane elements according
3198      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
3199      * The vector is arranged into lanes according to
3200      * <a href="Vector.html#lane-order">memory ordering</a>.
3201      * <p>
3202      * This method behaves as if it returns the result of calling
3203      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
3204      * fromByteBuffer()} as follows:
3205      * <pre>{@code
3206      * var bb = ByteBuffer.wrap(a);
3207      * var bo = ByteOrder.LITTLE_ENDIAN;
3208      * return fromByteBuffer(species, bb, offset, bo, m);
3209      * }</pre>
3210      *
3211      * @param species species of desired vector
3212      * @param a the byte array
3213      * @param offset the offset into the array
3214      * @param m the mask controlling lane selection
3215      * @return a vector loaded from a byte array
3216      * @throws IndexOutOfBoundsException
3217      *         if {@code offset+N*ESIZE < 0}
3218      *         or {@code offset+(N+1)*ESIZE > a.length}
3219      *         for any lane {@code N} in the vector where
3220      *         the mask is set
3221      */
3222     @ForceInline
3223     public static
3224     $abstractvectortype$ fromByteArray(VectorSpecies<$Boxtype$> species,
3225                                        byte[] a, int offset,
3226                                        VectorMask<$Boxtype$> m) {
3227         return fromByteArray(species, a, offset, ByteOrder.LITTLE_ENDIAN, m);
3228     }
3229 
3230     /**
3231      * Loads a vector from a byte array starting at an offset
3232      * and using a mask.
3233      * Lanes where the mask is unset are filled with the default
3234      * value of {@code $type$} ({#if[FP]?positive }zero).
3235      * Bytes are composed into primitive lane elements according
3236      * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering.
3237      * The vector is arranged into lanes according to
3238      * <a href="Vector.html#lane-order">memory ordering</a>.
3239      * <p>
3240      * This method behaves as if it returns the result of calling
3241      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
3242      * fromByteBuffer()} as follows:
3243      * <pre>{@code
3244      * var bb = ByteBuffer.wrap(a);
3245      * return fromByteBuffer(species, bb, offset, m, bo);
3246      * }</pre>
3247      *
3248      * @param species species of desired vector
3249      * @param a the byte array
3250      * @param offset the offset into the array
3251      * @param bo the intended byte order
3252      * @param m the mask controlling lane selection
3253      * @return a vector loaded from a byte array
3254      * @throws IndexOutOfBoundsException
3255      *         if {@code offset+N*ESIZE < 0}
3256      *         or {@code offset+(N+1)*ESIZE > a.length}
3257      *         for any lane {@code N} in the vector
3258      *         where the mask is set
3259      */
3260     @ForceInline
3261     public static
3262     $abstractvectortype$ fromByteArray(VectorSpecies<$Boxtype$> species,
3263                                        byte[] a, int offset,
3264                                        ByteOrder bo,
3265                                        VectorMask<$Boxtype$> m) {
3266         $Type$Species vsp = ($Type$Species) species;
3267         $abstractvectortype$ zero = vsp.zero();
3268 
3269         if (offset >= 0 && offset <= (a.length - vsp.length() * $sizeInBytes$)) {
3270             $abstractvectortype$ v = zero.fromByteArray0(a, offset);
3271             return zero.blend(v.maybeSwap(bo), m);
3272         }
3273         $abstractvectortype$ iota = zero.addIndex(1);
3274         ((AbstractMask<$Boxtype$>)m)
3275             .checkIndexByLane(offset, a.length, iota, $sizeInBytes$);
3276         $Type$Buffer tb = wrapper(a, offset, bo);
3277         return vsp.ldOp(tb, 0, (AbstractMask<$Boxtype$>)m,
3278                    (tb_, __, i)  -> tb_.get(i));
3279     }
3280 
3281     /**
3282      * Loads a vector from an array of type {@code $type$[]}
3283      * starting at an offset.
3284      * For each vector lane, where {@code N} is the vector lane index, the
3285      * array element at index {@code offset + N} is placed into the
3286      * resulting vector at lane index {@code N}.
3287      *
3288      * @param species species of desired vector
3289      * @param a the array
3290      * @param offset the offset into the array
3291      * @return the vector loaded from an array
3292      * @throws IndexOutOfBoundsException
3293      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3294      *         for any lane {@code N} in the vector
3295      */
3296     @ForceInline
3297     public static
3298     $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species,
3299                                    $type$[] a, int offset) {
3300         $Type$Species vsp = ($Type$Species) species;
3301         offset = checkFromIndexSize(offset,
3302                                     vsp.laneCount(),
3303                                     a.length);
3304         return vsp.dummyVector().fromArray0(a, offset);
3305     }
3306 
3307     /**
3308      * Loads a vector from an array of type {@code $type$[]}
3309      * starting at an offset and using a mask.
3310      * Lanes where the mask is unset are filled with the default
3311      * value of {@code $type$} ({#if[FP]?positive }zero).
3312      * For each vector lane, where {@code N} is the vector lane index,
3313      * if the mask lane at index {@code N} is set then the array element at
3314      * index {@code offset + N} is placed into the resulting vector at lane index
3315      * {@code N}, otherwise the default element value is placed into the
3316      * resulting vector at lane index {@code N}.
3317      *
3318      * @param species species of desired vector
3319      * @param a the array
3320      * @param offset the offset into the array
3321      * @param m the mask controlling lane selection
3322      * @return the vector loaded from an array
3323      * @throws IndexOutOfBoundsException
3324      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3325      *         for any lane {@code N} in the vector
3326      *         where the mask is set
3327      */
3328     @ForceInline
3329     public static
3330     $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species,
3331                                    $type$[] a, int offset,
3332                                    VectorMask<$Boxtype$> m) {
3333         $Type$Species vsp = ($Type$Species) species;
3334         if (offset >= 0 && offset <= (a.length - species.length())) {
3335             $abstractvectortype$ zero = vsp.zero();
3336             return zero.blend(zero.fromArray0(a, offset), m);
3337         }
3338         $abstractvectortype$ iota = vsp.iota();
3339         ((AbstractMask<$Boxtype$>)m)
3340             .checkIndexByLane(offset, a.length, iota, 1);
3341         return vsp.vOp(m, i -> a[offset + i]);
3342     }
3343 
3344     /**
3345      * Gathers a new vector composed of elements from an array of type
3346      * {@code $type$[]},
3347      * using indexes obtained by adding a fixed {@code offset} to a
3348      * series of secondary offsets from an <em>index map</em>.
3349      * The index map is a contiguous sequence of {@code VLENGTH}
3350      * elements in a second array of {@code int}s, starting at a given
3351      * {@code mapOffset}.
3352      * <p>
3353      * For each vector lane, where {@code N} is the vector lane index,
3354      * the lane is loaded from the array
3355      * element {@code a[f(N)]}, where {@code f(N)} is the
3356      * index mapping expression
3357      * {@code offset + indexMap[mapOffset + N]]}.
3358      *
3359      * @param species species of desired vector
3360      * @param a the array
3361      * @param offset the offset into the array, may be negative if relative
3362      * indexes in the index map compensate to produce a value within the
3363      * array bounds
3364      * @param indexMap the index map
3365      * @param mapOffset the offset into the index map
3366      * @return the vector loaded from the indexed elements of the array
3367      * @throws IndexOutOfBoundsException
3368      *         if {@code mapOffset+N < 0}
3369      *         or if {@code mapOffset+N >= indexMap.length},
3370      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3371      *         is an invalid index into {@code a},
3372      *         for any lane {@code N} in the vector
3373      * @see $abstractvectortype$#toIntArray()
3374      */
3375 #if[byteOrShort]
3376     @ForceInline
3377     public static
3378     $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species,
3379                                    $type$[] a, int offset,
3380                                    int[] indexMap, int mapOffset) {
3381         $Type$Species vsp = ($Type$Species) species;
3382         return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]);
3383     }
3384 #else[byteOrShort]
3385     @ForceInline
3386     public static
3387     $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species,
3388                                    $type$[] a, int offset,
3389                                    int[] indexMap, int mapOffset) {
3390         $Type$Species vsp = ($Type$Species) species;
3391         Objects.requireNonNull(a);
3392         Objects.requireNonNull(indexMap);
3393         Class<? extends $abstractvectortype$> vectorType = vsp.vectorType();
3394 
3395 #if[longOrDouble]
3396         if (vsp.laneCount() == 1) {
3397           return $abstractvectortype$.fromArray(vsp, a, offset + indexMap[mapOffset]);
3398         }
3399 #end[longOrDouble]
3400 
3401         // Index vector: vix[0:n] = k -> offset + indexMap[mapOffset + k]
3402         IntVector vix = IntVector.fromArray(IntVector.species(vsp.indexShape()), indexMap, mapOffset).add(offset);
3403 
3404         vix = VectorIntrinsics.checkIndex(vix, a.length);
3405 
3406         return VectorIntrinsics.loadWithMap(
3407             vectorType, $type$.class, vsp.laneCount(),
3408             IntVector.species(vsp.indexShape()).vectorType(),
3409             a, ARRAY_BASE, vix,
3410             a, offset, indexMap, mapOffset, vsp,
3411             ($type$[] c, int idx, int[] iMap, int idy, $Type$Species s) ->
3412             s.vOp(n -> c[idx + iMap[idy+n]]));
3413         }
3414 #end[byteOrShort]
3415 
3416     /**
3417      * Gathers a new vector composed of elements from an array of type
3418      * {@code $type$[]},
3419      * under the control of a mask, and
3420      * using indexes obtained by adding a fixed {@code offset} to a
3421      * series of secondary offsets from an <em>index map</em>.
3422      * The index map is a contiguous sequence of {@code VLENGTH}
3423      * elements in a second array of {@code int}s, starting at a given
3424      * {@code mapOffset}.
3425      * <p>
3426      * For each vector lane, where {@code N} is the vector lane index,
3427      * if the lane is set in the mask,
3428      * the lane is loaded from the array
3429      * element {@code a[f(N)]}, where {@code f(N)} is the
3430      * index mapping expression
3431      * {@code offset + indexMap[mapOffset + N]]}.
3432      * Unset lanes in the resulting vector are set to zero.
3433      *
3434      * @param species species of desired vector
3435      * @param a the array
3436      * @param offset the offset into the array, may be negative if relative
3437      * indexes in the index map compensate to produce a value within the
3438      * array bounds
3439      * @param indexMap the index map
3440      * @param mapOffset the offset into the index map
3441      * @param m the mask controlling lane selection
3442      * @return the vector loaded from the indexed elements of the array
3443      * @throws IndexOutOfBoundsException
3444      *         if {@code mapOffset+N < 0}
3445      *         or if {@code mapOffset+N >= indexMap.length},
3446      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3447      *         is an invalid index into {@code a},
3448      *         for any lane {@code N} in the vector
3449      *         where the mask is set
3450      * @see $abstractvectortype$#toIntArray()
3451      */
3452     @ForceInline
3453     public static
3454     $abstractvectortype$ fromArray(VectorSpecies<$Boxtype$> species,
3455                                    $type$[] a, int offset,
3456                                    int[] indexMap, int mapOffset,
3457                                    VectorMask<$Boxtype$> m) {
3458         $Type$Species vsp = ($Type$Species) species;
3459 
3460 #if[byteOrShort]
3461         // Do it the slow way.
3462         return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]);
3463 
3464 #else[byteOrShort]
3465         // FIXME This can result in out of bounds errors for unset mask lanes
3466         // FIX = Use a scatter instruction which routes the unwanted lanes
3467         // into a bit-bucket variable (private to implementation).
3468         // This requires a 2-D scatter in order to set a second base address.
3469         // See notes in https://bugs.openjdk.java.net/browse/JDK-8223367
3470         assert(m.allTrue());
3471         return ($abstractvectortype$)
3472             zero(species).blend(fromArray(species, a, offset, indexMap, mapOffset), m);
3473 
3474 #end[byteOrShort]
3475     }
3476 
3477     /**
3478      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
3479      * starting at an offset into the byte buffer.
3480 #if[!byte]
3481      * <p>
3482      * Bytes are composed into primitive lane elements according to
3483      * {@link ByteOrder#LITTLE_ENDIAN little endian} byte order.
3484      * To avoid errors, the
3485      * {@linkplain ByteBuffer#order() intrinsic byte order}
3486      * of the buffer must be little-endian.
3487 #end[!byte]
3488      * <p>
3489      * This method behaves as if it returns the result of calling
3490      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
3491      * fromByteBuffer()} as follows:
3492      * <pre>{@code
3493      * var bb = ByteBuffer.wrap(a);
3494      * var bo = ByteOrder.LITTLE_ENDIAN;
3495      * var m = species.maskAll(true);
3496      * return fromByteBuffer(species, bb, offset, m, bo);
3497      * }</pre>
3498      *
3499      * @param species species of desired vector
3500      * @param bb the byte buffer
3501      * @param offset the offset into the byte buffer
3502      * @param bo the intended byte order
3503      * @return a vector loaded from a byte buffer
3504 #if[!byte]
3505      * @throws IllegalArgumentException if byte order of bb
3506      *         is not {@link ByteOrder#LITTLE_ENDIAN}
3507 #end[!byte]
3508      * @throws IndexOutOfBoundsException
3509      *         if {@code offset+N*$sizeInBytes$ < 0}
3510      *         or {@code offset+N*$sizeInBytes$ >= bb.limit()}
3511      *         for any lane {@code N} in the vector
3512      */
3513     @ForceInline
3514     public static
3515     $abstractvectortype$ fromByteBuffer(VectorSpecies<$Boxtype$> species,
3516                                         ByteBuffer bb, int offset,
3517                                         ByteOrder bo) {
3518         $Type$Species vsp = ($Type$Species) species;
3519         offset = checkFromIndexSize(offset,
3520                                     vsp.laneCount(),
3521                                     bb.limit());
3522         return vsp.dummyVector()
3523             .fromByteBuffer0(bb, offset).maybeSwap(bo);
3524     }
3525 
3526     /**
3527      * Loads a vector from a {@linkplain ByteBuffer byte buffer}
3528      * starting at an offset into the byte buffer
3529      * and using a mask.
3530 #if[!byte]
3531      * <p>
3532      * Bytes are composed into primitive lane elements according to
3533      * {@link ByteOrder#LITTLE_ENDIAN little endian} byte order.
3534      * To avoid errors, the
3535      * {@linkplain ByteBuffer#order() intrinsic byte order}
3536      * of the buffer must be little-endian.
3537 #end[!byte]
3538      * <p>
3539      * This method behaves as if it returns the result of calling
3540      * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask)
3541      * fromByteBuffer()} as follows:
3542      * <pre>{@code
3543      * var bb = ByteBuffer.wrap(a);
3544      * var bo = ByteOrder.LITTLE_ENDIAN;
3545      * var m = species.maskAll(true);
3546      * return fromByteBuffer(species, bb, offset, m, bo);
3547      * }</pre>
3548      *
3549      * @param species species of desired vector
3550      * @param bb the byte buffer
3551      * @param offset the offset into the byte buffer
3552      * @param bo the intended byte order
3553      * @param m the mask controlling lane selection
3554      * @return a vector loaded from a byte buffer
3555 #if[!byte]
3556      * @throws IllegalArgumentException if byte order of bb
3557      *         is not {@link ByteOrder#LITTLE_ENDIAN}
3558 #end[!byte]
3559      * @throws IndexOutOfBoundsException
3560      *         if {@code offset+N*$sizeInBytes$ < 0}
3561      *         or {@code offset+N*$sizeInBytes$ >= bb.limit()}
3562      *         for any lane {@code N} in the vector
3563      *         where the mask is set
3564      */
3565     @ForceInline
3566     public static
3567     $abstractvectortype$ fromByteBuffer(VectorSpecies<$Boxtype$> species,
3568                                         ByteBuffer bb, int offset,
3569                                         ByteOrder bo,
3570                                         VectorMask<$Boxtype$> m) {
3571         if (m.allTrue()) {
3572             return fromByteBuffer(species, bb, offset, bo);
3573         }
3574         $Type$Species vsp = ($Type$Species) species;
3575         checkMaskFromIndexSize(offset,
3576                                vsp, m, 1,
3577                                bb.limit());
3578         $abstractvectortype$ zero = zero(vsp);
3579         $abstractvectortype$ v = zero.fromByteBuffer0(bb, offset);
3580         return zero.blend(v.maybeSwap(bo), m);
3581     }
3582 
3583     // Memory store operations
3584 
3585     /**
3586      * Stores this vector into an array of type {@code $type$[]}
3587      * starting at an offset.
3588      * <p>
3589      * For each vector lane, where {@code N} is the vector lane index,
3590      * the lane element at index {@code N} is stored into the array
3591      * element {@code a[offset+N]}.
3592      *
3593      * @param a the array, of type {@code $type$[]}
3594      * @param offset the offset into the array
3595      * @throws IndexOutOfBoundsException
3596      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3597      *         for any lane {@code N} in the vector
3598      */
3599     @ForceInline
3600     public final
3601     void intoArray($type$[] a, int offset) {
3602         $Type$Species vsp = vspecies();
3603         offset = checkFromIndexSize(offset,
3604                                     vsp.laneCount(),
3605                                     a.length);
3606         VectorIntrinsics.store(
3607             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3608             a, arrayAddress(a, offset),
3609             this,
3610             a, offset,
3611             (arr, off, v)
3612             -> v.stOp(arr, off,
3613                       (arr_, off_, i, e) -> arr_[off_ + i] = e));
3614     }
3615 
3616     /**
3617      * Stores this vector into an array of {@code $type$}
3618      * starting at offset and using a mask.
3619      * <p>
3620      * For each vector lane, where {@code N} is the vector lane index,
3621      * the lane element at index {@code N} is stored into the array
3622      * element {@code a[offset+N]}.
3623      * If the mask lane at {@code N} is unset then the corresponding
3624      * array element {@code a[offset+N]} is left unchanged.
3625      * <p>
3626      * Array range checking is done for lanes where the mask is set.
3627      * Lanes where the mask is unset are not stored and do not need
3628      * to correspond to legitimate elements of {@code a}.
3629      * That is, unset lanes may correspond to array indexes less than
3630      * zero or beyond the end of the array.
3631      *
3632      * @param a the array, of type {@code $type$[]}
3633      * @param offset the offset into the array
3634      * @param m the mask controlling lane storage
3635      * @throws IndexOutOfBoundsException
3636      *         if {@code offset+N < 0} or {@code offset+N >= a.length}
3637      *         for any lane {@code N} in the vector
3638      *         where the mask is set
3639      */
3640     @ForceInline
3641     public final
3642     void intoArray($type$[] a, int offset,
3643                    VectorMask<$Boxtype$> m) {
3644         if (m.allTrue()) {
3645             intoArray(a, offset);
3646         } else {
3647             // FIXME: Cannot vectorize yet, if there's a mask.
3648             stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v);
3649         }
3650     }
3651 
3652     /**
3653      * Scatters this vector into an array of type {@code $type$[]}
3654      * using indexes obtained by adding a fixed {@code offset} to a
3655      * series of secondary offsets from an <em>index map</em>.
3656      * The index map is a contiguous sequence of {@code VLENGTH}
3657      * elements in a second array of {@code int}s, starting at a given
3658      * {@code mapOffset}.
3659      * <p>
3660      * For each vector lane, where {@code N} is the vector lane index,
3661      * the lane element at index {@code N} is stored into the array
3662      * element {@code a[f(N)]}, where {@code f(N)} is the
3663      * index mapping expression
3664      * {@code offset + indexMap[mapOffset + N]]}.
3665      *
3666      * @param a the array
3667      * @param offset an offset to combine with the index map offsets
3668      * @param indexMap the index map
3669      * @param mapOffset the offset into the index map
3670      * @returns a vector of the values {@code a[f(N)]}, where
3671      *          {@code f(N) = offset + indexMap[mapOffset + N]]}.
3672      * @throws IndexOutOfBoundsException
3673      *         if {@code mapOffset+N < 0}
3674      *         or if {@code mapOffset+N >= indexMap.length},
3675      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3676      *         is an invalid index into {@code a},
3677      *         for any lane {@code N} in the vector
3678      * @see $abstractvectortype$#toIntArray()
3679      */
3680     @ForceInline
3681     public final
3682     void intoArray($type$[] a, int offset,
3683                    int[] indexMap, int mapOffset) {
3684         $Type$Species vsp = vspecies();
3685         if (length() == 1) {
3686             intoArray(a, offset + indexMap[mapOffset]);
3687             return;
3688         }
3689         IntVector.IntSpecies isp = (IntVector.IntSpecies) vsp.indexSpecies();
3690         if (isp.laneCount() != vsp.laneCount()) {
3691             stOp(a, offset,
3692                  (arr, off, i, e) -> {
3693                      int j = indexMap[mapOffset + i];
3694                      arr[off + j] = e;
3695                  });
3696             return;
3697         }
3698 
3699         // Index vector: vix[0:n] = i -> offset + indexMap[mo + i]
3700         IntVector vix = IntVector
3701             .fromArray(isp, indexMap, mapOffset)
3702             .add(offset);
3703 
3704         vix = VectorIntrinsics.checkIndex(vix, a.length);
3705 
3706         VectorIntrinsics.storeWithMap(
3707             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3708             isp.vectorType(),
3709             a, arrayAddress(a, 0), vix,
3710             this,
3711             a, offset, indexMap, mapOffset,
3712             (arr, off, v, map, mo)
3713             -> v.stOp(arr, off,
3714                       (arr_, off_, i, e) -> {
3715                           int j = map[mo + i];
3716                           arr[off + j] = e;
3717                       }));
3718     }
3719 
3720     /**
3721      * Scatters this vector into an array of type {@code $type$[]},
3722      * under the control of a mask, and
3723      * using indexes obtained by adding a fixed {@code offset} to a
3724      * series of secondary offsets from an <em>index map</em>.
3725      * The index map is a contiguous sequence of {@code VLENGTH}
3726      * elements in a second array of {@code int}s, starting at a given
3727      * {@code mapOffset}.
3728      * <p>
3729      * For each vector lane, where {@code N} is the vector lane index,
3730      * if the mask lane at index {@code N} is set then
3731      * the lane element at index {@code N} is stored into the array
3732      * element {@code a[f(N)]}, where {@code f(N)} is the
3733      * index mapping expression
3734      * {@code offset + indexMap[mapOffset + N]]}.
3735      *
3736      * @param a the array
3737      * @param offset an offset to combine with the index map offsets
3738      * @param indexMap the index map
3739      * @param mapOffset the offset into the index map
3740      * @param m the mask
3741      * @returns a vector of the values {@code m ? a[f(N)] : 0},
3742      *          {@code f(N) = offset + indexMap[mapOffset + N]]}.
3743      * @throws IndexOutOfBoundsException
3744      *         if {@code mapOffset+N < 0}
3745      *         or if {@code mapOffset+N >= indexMap.length},
3746      *         or if {@code f(N)=offset+indexMap[mapOffset+N]}
3747      *         is an invalid index into {@code a},
3748      *         for any lane {@code N} in the vector
3749      *         where the mask is set
3750      * @see $abstractvectortype$#toIntArray()
3751      */
3752     @ForceInline
3753     public final
3754     void intoArray($type$[] a, int offset,
3755                    int[] indexMap, int mapOffset,
3756                    VectorMask<$Boxtype$> m) {
3757         $Type$Species vsp = vspecies();
3758         if (m.allTrue()) {
3759             intoArray(a, offset, indexMap, mapOffset);
3760             return;
3761         }
3762         throw new AssertionError("fixme");
3763     }
3764 
3765     /**
3766      * {@inheritDoc} <!--workaround-->
3767      */
3768     @Override
3769     @ForceInline
3770     public final
3771     void intoByteArray(byte[] a, int offset) {
3772         offset = checkFromIndexSize(offset,
3773                                     bitSize() / Byte.SIZE,
3774                                     a.length);
3775         this.maybeSwap(ByteOrder.LITTLE_ENDIAN)
3776             .intoByteArray0(a, offset);
3777     }
3778 
3779     /**
3780      * {@inheritDoc} <!--workaround-->
3781      */
3782     @Override
3783     @ForceInline
3784     public final
3785     void intoByteArray(byte[] a, int offset,
3786                        VectorMask<$Boxtype$> m) {
3787         if (m.allTrue()) {
3788             intoByteArray(a, offset);
3789             return;
3790         }
3791         $Type$Species vsp = vspecies();
3792         if (offset >= 0 && offset <= (a.length - vsp.length() * $sizeInBytes$)) {
3793             var oldVal = fromByteArray0(a, offset);
3794             var newVal = oldVal.blend(this, m);
3795             newVal.intoByteArray0(a, offset);
3796         } else {
3797             checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, a.length);
3798             $Type$Buffer tb = wrapper(a, offset, NATIVE_ENDIAN);
3799             this.stOp(tb, 0, m, (tb_, __, i, e) -> tb_.put(i, e));
3800         }
3801     }
3802 
3803     /**
3804      * {@inheritDoc} <!--workaround-->
3805      */
3806     @Override
3807     @ForceInline
3808     public final
3809     void intoByteArray(byte[] a, int offset,
3810                        ByteOrder bo,
3811                        VectorMask<$Boxtype$> m) {
3812         maybeSwap(bo).intoByteArray(a, offset, m);
3813     }
3814 
3815     /**
3816      * {@inheritDoc} <!--workaround-->
3817      */
3818     @Override
3819     @ForceInline
3820     public final
3821     void intoByteBuffer(ByteBuffer bb, int offset,
3822                         ByteOrder bo) {
3823         maybeSwap(bo).intoByteBuffer0(bb, offset);
3824     }
3825 
3826     /**
3827      * {@inheritDoc} <!--workaround-->
3828      */
3829     @Override
3830     @ForceInline
3831     public final
3832     void intoByteBuffer(ByteBuffer bb, int offset,
3833                         ByteOrder bo,
3834                         VectorMask<$Boxtype$> m) {
3835         if (m.allTrue()) {
3836             intoByteBuffer(bb, offset, bo);
3837             return;
3838         }
3839         $Type$Species vsp = vspecies();
3840         checkMaskFromIndexSize(offset, vsp, m, $sizeInBytes$, bb.limit());
3841         conditionalStoreNYI(offset, vsp, m, $sizeInBytes$, bb.limit());
3842         var oldVal = fromByteBuffer0(bb, offset);
3843         var newVal = oldVal.blend(this.maybeSwap(bo), m);
3844         newVal.intoByteBuffer0(bb, offset);
3845     }
3846 
3847     // ================================================
3848 
3849     // Low-level memory operations.
3850     //
3851     // Note that all of these operations *must* inline into a context
3852     // where the exact species of the involved vector is a
3853     // compile-time constant.  Otherwise, the intrinsic generation
3854     // will fail and performance will suffer.
3855     //
3856     // In many cases this is achieved by re-deriving a version of the
3857     // method in each concrete subclass (per species).  The re-derived
3858     // method simply calls one of these generic methods, with exact
3859     // parameters for the controlling metadata, which is either a
3860     // typed vector or constant species instance.
3861 
3862     // Unchecked loading operations in native byte order.
3863     // Caller is reponsible for applying index checks, masking, and
3864     // byte swapping.
3865 
3866     /*package-private*/
3867     abstract
3868     $abstractvectortype$ fromArray0($type$[] a, int offset);
3869     @ForceInline
3870     final
3871     $abstractvectortype$ fromArray0Template($type$[] a, int offset) {
3872         $Type$Species vsp = vspecies();
3873         return VectorIntrinsics.load(
3874             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3875             a, arrayAddress(a, offset),
3876             a, offset, vsp,
3877             (arr, off, s) -> s.ldOp(arr, off,
3878                                     (arr_, off_, i) -> arr_[off_ + i]));
3879     }
3880 
3881     @Override
3882     abstract
3883     $abstractvectortype$ fromByteArray0(byte[] a, int offset);
3884     @ForceInline
3885     final
3886     $abstractvectortype$ fromByteArray0Template(byte[] a, int offset) {
3887         $Type$Species vsp = vspecies();
3888         return VectorIntrinsics.load(
3889             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3890             a, byteArrayAddress(a, offset),
3891             a, offset, vsp,
3892             (arr, off, s) -> {
3893                 $Type$Buffer tb = wrapper(arr, off, NATIVE_ENDIAN);
3894                 return s.ldOp(tb, 0, (tb_, __, i) -> tb_.get(i));
3895             });
3896     }
3897 
3898     abstract
3899     $abstractvectortype$ fromByteBuffer0(ByteBuffer bb, int offset);
3900     @ForceInline
3901     final
3902     $abstractvectortype$ fromByteBuffer0Template(ByteBuffer bb, int offset) {
3903         $Type$Species vsp = vspecies();
3904         return VectorIntrinsics.load(
3905             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3906             bufferBase(bb), bufferAddress(bb, offset),
3907             bb, offset, vsp,
3908             (buf, off, s) -> {
3909                 $Type$Buffer tb = wrapper(buf, off, NATIVE_ENDIAN);
3910                 return s.ldOp(tb, 0, (tb_, __, i) -> tb_.get(i));
3911            });
3912     }
3913 
3914     // Unchecked storing operations in native byte order.
3915     // Caller is reponsible for applying index checks, masking, and
3916     // byte swapping.
3917 
3918     abstract
3919     void intoArray0($type$[] a, int offset);
3920     @ForceInline
3921     final
3922     void intoArray0Template($type$[] a, int offset) {
3923         $Type$Species vsp = vspecies();
3924         VectorIntrinsics.store(
3925             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3926             a, arrayAddress(a, offset),
3927             this, a, offset,
3928             (arr, off, v)
3929             -> v.stOp(arr, off,
3930                       (arr_, off_, i, e) -> arr_[off_+i] = e));
3931     }
3932 
3933     abstract
3934     void intoByteArray0(byte[] a, int offset);
3935     @ForceInline
3936     final
3937     void intoByteArray0Template(byte[] a, int offset) {
3938         $Type$Species vsp = vspecies();
3939         VectorIntrinsics.store(
3940             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3941             a, byteArrayAddress(a, offset),
3942             this, a, offset,
3943             (arr, off, v) -> {
3944                 $Type$Buffer tb = wrapper(arr, off, NATIVE_ENDIAN);
3945                 v.stOp(tb, 0, (tb_, __, i, e) -> tb_.put(i, e));
3946             });
3947     }
3948 
3949     @ForceInline
3950     final
3951     void intoByteBuffer0(ByteBuffer bb, int offset) {
3952         $Type$Species vsp = vspecies();
3953         VectorIntrinsics.store(
3954             vsp.vectorType(), vsp.elementType(), vsp.laneCount(),
3955             bufferBase(bb), bufferAddress(bb, offset),
3956             this, bb, offset,
3957             (buf, off, v) -> {
3958                 $Type$Buffer tb = wrapper(buf, off, NATIVE_ENDIAN);
3959                 v.stOp(tb, 0, (tb_, __, i, e) -> tb_.put(i, e));
3960             });
3961     }
3962 
3963     // End of low-level memory operations.
3964 
3965     private static
3966     void checkMaskFromIndexSize(int offset,
3967                                 $Type$Species vsp,
3968                                 VectorMask<$Boxtype$> m,
3969                                 int scale,
3970                                 int limit) {
3971         ((AbstractMask<$Boxtype$>)m)
3972             .checkIndexByLane(offset, limit, vsp.iota(), scale);
3973     }
3974 
3975     @ForceInline
3976     private void conditionalStoreNYI(int offset,
3977                                      $Type$Species vsp,
3978                                      VectorMask<$Boxtype$> m,
3979                                      int scale,
3980                                      int limit) {
3981         if (offset < 0 || offset + vsp.laneCount() * scale > limit) {
3982             String msg =
3983                 String.format("unimplemented: store @%d in [0..%d), %s in %s",
3984                               offset, limit, m, vsp);
3985             throw new AssertionError(msg);
3986         }
3987     }
3988 
3989     /*package-private*/
3990     @Override
3991     @ForceInline
3992     final
3993     $abstractvectortype$ maybeSwap(ByteOrder bo) {
3994 #if[!byte]
3995         if (bo != NATIVE_ENDIAN) {
3996             return this.reinterpretAsBytes()
3997                 .rearrange(swapBytesShuffle())
3998                 .reinterpretAs$Type$s();
3999         }
4000 #end[!byte]
4001         return this;
4002     }
4003 
4004     static final int ARRAY_SHIFT =
4005         31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_$TYPE$_INDEX_SCALE);
4006     static final long ARRAY_BASE =
4007         Unsafe.ARRAY_$TYPE$_BASE_OFFSET;
4008 
4009     @ForceInline
4010     static long arrayAddress($type$[] a, int index) {
4011         return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
4012     }
4013 
4014     @ForceInline
4015     static long byteArrayAddress(byte[] a, int index) {
4016         return Unsafe.ARRAY_BYTE_BASE_OFFSET + index;
4017     }
4018 
4019     // Byte buffer wrappers.
4020     private static $Type$Buffer wrapper(ByteBuffer bb, int offset,
4021                                         ByteOrder bo) {
4022         return bb.duplicate().position(offset).slice()
4023             .order(bo){#if[byte]?;:.as$Type$Buffer();}
4024     }
4025     private static $Type$Buffer wrapper(byte[] a, int offset,
4026                                         ByteOrder bo) {
4027         return ByteBuffer.wrap(a, offset, a.length - offset)
4028             .order(bo){#if[byte]?;:.as$Type$Buffer();}
4029     }
4030 
4031     // ================================================
4032 
4033     /// Reinterpreting view methods:
4034     //   lanewise reinterpret: viewAsXVector()
4035     //   keep shape, redraw lanes: reinterpretAsEs()
4036 
4037     /**
4038      * {@inheritDoc} <!--workaround-->
4039      */
4040     @ForceInline
4041     @Override
4042     public final ByteVector reinterpretAsBytes() {
4043 #if[byte]
4044         return this;
4045 #else[byte]
4046          // Going to ByteVector, pay close attention to byte order.
4047          assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN);
4048          return asByteVectorRaw();
4049          //return asByteVectorRaw().rearrange(swapBytesShuffle());
4050 #end[byte]
4051     }
4052 
4053     /**
4054      * {@inheritDoc} <!--workaround-->
4055      */
4056     @ForceInline
4057     @Override
4058     public final $Bitstype$Vector viewAsIntegralLanes() {
4059 #if[BITWISE]
4060         return this;
4061 #else[BITWISE]
4062         LaneType ilt = LaneType.$TYPE$.asIntegral();
4063         return ($Bitstype$Vector) asVectorRaw(ilt);
4064 #end[BITWISE]
4065     }
4066 
4067     /**
4068      * {@inheritDoc} <!--workaround-->
4069 #if[byteOrShort]
4070      *
4071      * @implNote This method always throws
4072      * {@code IllegalArgumentException}, because there is no floating
4073      * point type of the same size as {@code $type$}.  The return type
4074      * of this method is arbitrarily designated as
4075      * {@code Vector<?>}.  Future versions of this API may change the return
4076      * type if additional floating point types become available.
4077 #end[byteOrShort]
4078      */
4079     @ForceInline
4080     @Override
4081     public final
4082     {#if[byteOrShort]?Vector<?>:$Fptype$Vector}
4083     viewAsFloatingLanes() {
4084 #if[FP]
4085         return this;
4086 #else[FP]
4087         LaneType flt = LaneType.$TYPE$.asFloating();
4088 #if[!byteOrShort]
4089         return ($Fptype$Vector) asVectorRaw(flt);
4090 #else[!byteOrShort]
4091         throw new AssertionError();  // should already throw IAE
4092 #end[byteOrShort]
4093 #end[FP]
4094     }
4095 
4096     // ================================================
4097 
4098     /// Object methods: toString, equals, hashCode
4099     //
4100     // Object methods are defined as if via Arrays.toString, etc.,
4101     // is applied to the array of elements.  Two equal vectors
4102     // are required to have equal species and equal lane values.
4103 
4104     /**
4105      * Returns a string representation of this vector, of the form
4106      * {@code "[0,1,2...]"}, reporting the lane values of this vector,
4107      * in lane order.
4108      *
4109      * The string is produced as if by a call to {@link
4110      * java.util.Arrays#toString($type$[]) Arrays.toString()},
4111      * as appropriate to the {@code $type$} array returned by
4112      * {@link #toArray this.toArray()}.
4113      *
4114      * @return a string of the form {@code "[0,1,2...]"}
4115      * reporting the lane values of this vector
4116      */
4117     @Override
4118     @ForceInline
4119     public final
4120     String toString() {
4121         // now that toArray is strongly typed, we can define this
4122         return Arrays.toString(toArray());
4123     }
4124 
4125     /**
4126      * {@inheritDoc} <!--workaround-->
4127      */
4128     @Override
4129     @ForceInline
4130     public final
4131     boolean equals(Object obj) {
4132         if (obj instanceof Vector) {
4133             Vector<?> that = (Vector<?>) obj;
4134             if (this.species().equals(that.species())) {
4135                 return this.eq(that.check(this.species())).allTrue();
4136             }
4137         }
4138         return false;
4139     }
4140 
4141     /**
4142      * {@inheritDoc} <!--workaround-->
4143      */
4144     @Override
4145     @ForceInline
4146     public final
4147     int hashCode() {
4148         // now that toArray is strongly typed, we can define this
4149         return Objects.hash(species(), Arrays.hashCode(toArray()));
4150     }
4151 
4152     // ================================================
4153 
4154     // Species
4155 
4156     /**
4157      * Class representing {@link $abstractvectortype$}'s of the same {@link VectorShape VectorShape}.
4158      */
4159     /*package-private*/
4160     static final class $Type$Species extends AbstractSpecies<$Boxtype$> {
4161         private $Type$Species(VectorShape shape,
4162                 Class<? extends $abstractvectortype$> vectorType,
4163                 Class<? extends AbstractMask<$Boxtype$>> maskType,
4164                 Function<Object, $abstractvectortype$> vectorFactory) {
4165             super(shape, LaneType.of($type$.class),
4166                   vectorType, maskType,
4167                   vectorFactory);
4168             assert(this.elementSize() == $Boxtype$.SIZE);
4169         }
4170 
4171         // Specializing overrides:
4172 
4173         @Override
4174         @ForceInline
4175         public final Class<$Boxtype$> elementType() {
4176             return $type$.class;
4177         }
4178 
4179         @Override
4180         @ForceInline
4181         public final Class<$Boxtype$> genericElementType() {
4182             return $Boxtype$.class;
4183         }
4184 
4185         @Override
4186         @ForceInline
4187         public final Class<$type$[]> arrayType() {
4188             return $type$[].class;
4189         }
4190 
4191         @SuppressWarnings("unchecked")
4192         @Override
4193         @ForceInline
4194         public final Class<? extends $Type$Vector> vectorType() {
4195             return (Class<? extends $Type$Vector>) vectorType;
4196         }
4197 
4198         @Override
4199         @ForceInline
4200         public final long checkValue(long e) {
4201             longToElementBits(e);  // only for exception
4202             return e;
4203         }
4204 
4205         /*package-private*/
4206         @Override
4207         @ForceInline
4208         final $abstractvectortype$ broadcastBits(long bits) {
4209             return ($abstractvectortype$)
4210                 VectorIntrinsics.broadcastCoerced(
4211                     vectorType, $type$.class, laneCount,
4212                     bits, this,
4213                     (bits_, s_) -> s_.rvOp(i -> bits_));
4214         }
4215 
4216         /*package-private*/
4217         @ForceInline
4218         {#if[long]?public}
4219         final $abstractvectortype$ broadcast($type$ e) {
4220             return broadcastBits(toBits(e));
4221         }
4222 
4223 #if[!long]
4224         @Override
4225         @ForceInline
4226         public final $abstractvectortype$ broadcast(long e) {
4227             return broadcastBits(longToElementBits(e));
4228         }
4229 #end[!long]
4230 
4231         /*package-private*/
4232         final @Override
4233         @ForceInline
4234         long longToElementBits(long value) {
4235 #if[long]
4236             // In this case, the conversion can never fail.
4237             return value;
4238 #else[long]
4239             // Do the conversion, and then test it for failure.
4240             $type$ e = ($type$) value;
4241             if ((long) e != value) {
4242                 throw badElementBits(value, e);
4243             }
4244             return toBits(e);
4245 #end[long]
4246         }
4247 
4248         /*package-private*/
4249         @ForceInline
4250         static long toIntegralChecked($type$ e, boolean convertToInt) {
4251             long value = convertToInt ? (int) e : (long) e;
4252             if (($type$) value != e) {
4253                 throw badArrayBits(e, convertToInt, value);
4254             }
4255             return value;
4256         }
4257 
4258         @Override
4259         @ForceInline
4260         public final $abstractvectortype$ fromValues(long... values) {
4261             VectorIntrinsics.requireLength(values.length, laneCount);
4262             $type$[] va = new $type$[laneCount()];
4263             for (int i = 0; i < va.length; i++) {
4264                 long lv = values[i];
4265                 $type$ v = ($type$) lv;
4266                 va[i] = v;
4267                 if ((long)v != lv) {
4268                     throw badElementBits(lv, v);
4269                 }
4270             }
4271             return dummyVector().fromArray0(va, 0);
4272         }
4273 
4274         /* this non-public one is for internal conversions */
4275         @Override
4276         @ForceInline
4277         final $abstractvectortype$ fromIntValues(int[] values) {
4278             VectorIntrinsics.requireLength(values.length, laneCount);
4279             $type$[] va = new $type$[laneCount()];
4280             for (int i = 0; i < va.length; i++) {
4281                 int lv = values[i];
4282                 $type$ v = ($type$) lv;
4283                 va[i] = v;
4284                 if ((int)v != lv) {
4285                     throw badElementBits(lv, v);
4286                 }
4287             }
4288             return dummyVector().fromArray0(va, 0);
4289         }
4290 
4291         // Virtual constructors
4292 
4293         @ForceInline
4294         @Override final
4295         public $abstractvectortype$ fromArray(Object a, int offset) {
4296             // User entry point:  Be careful with inputs.
4297             return $abstractvectortype$
4298                 .fromArray(this, ($type$[]) a, offset);
4299         }
4300 
4301         @Override final
4302         $abstractvectortype$ dummyVector() {
4303             return ($abstractvectortype$) super.dummyVector();
4304         }
4305 
4306         final
4307         $abstractvectortype$ vectorFactory($type$[] vec) {
4308             // Species delegates all factory requests to its dummy
4309             // vector.  The dummy knows all about it.
4310             return dummyVector().vectorFactory(vec);
4311         }
4312 
4313         /*package-private*/
4314         final @Override
4315         @ForceInline
4316         $abstractvectortype$ rvOp(RVOp f) {
4317             $type$[] res = new $type$[laneCount()];
4318             for (int i = 0; i < res.length; i++) {
4319                 $bitstype$ bits = {#if[!long]?($bitstype$)} f.apply(i);
4320                 res[i] = fromBits(bits);
4321             }
4322             return dummyVector().vectorFactory(res);
4323         }
4324 
4325         $Type$Vector vOp(FVOp f) {
4326             $type$[] res = new $type$[laneCount()];
4327             for (int i = 0; i < res.length; i++) {
4328                 res[i] = f.apply(i);
4329             }
4330             return dummyVector().vectorFactory(res);
4331         }
4332 
4333         $Type$Vector vOp(VectorMask<$Boxtype$> m, FVOp f) {
4334             $type$[] res = new $type$[laneCount()];
4335             boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
4336             for (int i = 0; i < res.length; i++) {
4337                 if (mbits[i]) {
4338                     res[i] = f.apply(i);
4339                 }
4340             }
4341             return dummyVector().vectorFactory(res);
4342         }
4343 
4344         /*package-private*/
4345         @ForceInline
4346         <M> $abstractvectortype$ ldOp(M memory, int offset,
4347                                       FLdOp<M> f) {
4348             return dummyVector().ldOp(memory, offset, f);
4349         }
4350 
4351         /*package-private*/
4352         @ForceInline
4353         <M> $abstractvectortype$ ldOp(M memory, int offset,
4354                                       AbstractMask<$Boxtype$> m,
4355                                       FLdOp<M> f) {
4356             return dummyVector().ldOp(memory, offset, m, f);
4357         }
4358 
4359         /*package-private*/
4360         @ForceInline
4361         <M> void stOp(M memory, int offset, FStOp<M> f) {
4362             dummyVector().stOp(memory, offset, f);
4363         }
4364 
4365         /*package-private*/
4366         @ForceInline
4367         <M> void stOp(M memory, int offset,
4368                       AbstractMask<$Boxtype$> m,
4369                       FStOp<M> f) {
4370             dummyVector().stOp(memory, offset, m, f);
4371         }
4372 
4373         // N.B. Make sure these constant vectors and
4374         // masks load up correctly into registers.
4375         //
4376         // Also, see if we can avoid all that switching.
4377         // Could we cache both vectors and both masks in
4378         // this species object?
4379 
4380         // Zero and iota vector access
4381         @Override
4382         @ForceInline
4383         public final $abstractvectortype$ zero() {
4384             if ((Class<?>) vectorType() == $Type$MaxVector.class)
4385                 return $Type$MaxVector.ZERO;
4386             switch (vectorBitSize()) {
4387                 case 64: return $Type$64Vector.ZERO;
4388                 case 128: return $Type$128Vector.ZERO;
4389                 case 256: return $Type$256Vector.ZERO;
4390                 case 512: return $Type$512Vector.ZERO;
4391             }
4392             throw new AssertionError();
4393         }        
4394 
4395         @Override
4396         @ForceInline
4397         public final $abstractvectortype$ iota() {
4398             if ((Class<?>) vectorType() == $Type$MaxVector.class)
4399                 return $Type$MaxVector.IOTA;
4400             switch (vectorBitSize()) {
4401                 case 64: return $Type$64Vector.IOTA;
4402                 case 128: return $Type$128Vector.IOTA;
4403                 case 256: return $Type$256Vector.IOTA;
4404                 case 512: return $Type$512Vector.IOTA;
4405             }
4406             throw new AssertionError();
4407         }
4408 
4409         // Mask access
4410         @Override
4411         @ForceInline
4412         public final VectorMask<$Boxtype$> maskAll(boolean bit) {
4413             if ((Class<?>) vectorType() == $Type$MaxVector.class)
4414                 return $Type$MaxVector.$Type$MaxMask.maskAll(bit);
4415             switch (vectorBitSize()) {
4416                 case 64: return $Type$64Vector.$Type$64Mask.maskAll(bit);
4417                 case 128: return $Type$128Vector.$Type$128Mask.maskAll(bit);
4418                 case 256: return $Type$256Vector.$Type$256Mask.maskAll(bit);
4419                 case 512: return $Type$512Vector.$Type$512Mask.maskAll(bit);
4420             }
4421             throw new AssertionError();
4422         }
4423     }
4424 
4425     /**
4426      * Finds a species for an element type of {@code $type$} and shape.
4427      *
4428      * @param s the shape
4429      * @return a species for an element type of {@code $type$} and shape
4430      * @throws IllegalArgumentException if no such species exists for the shape
4431      */
4432     static $Type$Species species(VectorShape s) {
4433         Objects.requireNonNull(s);
4434         switch (s) {
4435             case S_64_BIT: return ($Type$Species) SPECIES_64;
4436             case S_128_BIT: return ($Type$Species) SPECIES_128;
4437             case S_256_BIT: return ($Type$Species) SPECIES_256;
4438             case S_512_BIT: return ($Type$Species) SPECIES_512;
4439             case S_Max_BIT: return ($Type$Species) SPECIES_MAX;
4440             default: throw new IllegalArgumentException("Bad shape: " + s);
4441         }
4442     }
4443 
4444     /** Species representing {@link $Type$Vector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */
4445     public static final VectorSpecies<$Boxtype$> SPECIES_64
4446         = new $Type$Species(VectorShape.S_64_BIT,
4447                             $Type$64Vector.class,
4448                             $Type$64Vector.$Type$64Mask.class,
4449                             $Type$64Vector::new);
4450 
4451     /** Species representing {@link $Type$Vector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */
4452     public static final VectorSpecies<$Boxtype$> SPECIES_128
4453         = new $Type$Species(VectorShape.S_128_BIT,
4454                             $Type$128Vector.class,
4455                             $Type$128Vector.$Type$128Mask.class,
4456                             $Type$128Vector::new);
4457 
4458     /** Species representing {@link $Type$Vector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */
4459     public static final VectorSpecies<$Boxtype$> SPECIES_256
4460         = new $Type$Species(VectorShape.S_256_BIT,
4461                             $Type$256Vector.class,
4462                             $Type$256Vector.$Type$256Mask.class,
4463                             $Type$256Vector::new);
4464 
4465     /** Species representing {@link $Type$Vector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */
4466     public static final VectorSpecies<$Boxtype$> SPECIES_512
4467         = new $Type$Species(VectorShape.S_512_BIT,
4468                             $Type$512Vector.class,
4469                             $Type$512Vector.$Type$512Mask.class,
4470                             $Type$512Vector::new);
4471 
4472     /** Species representing {@link $Type$Vector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */
4473     public static final VectorSpecies<$Boxtype$> SPECIES_MAX
4474         = new $Type$Species(VectorShape.S_Max_BIT,
4475                             $Type$MaxVector.class,
4476                             $Type$MaxVector.$Type$MaxMask.class,
4477                             $Type$MaxVector::new);
4478 
4479     /**
4480      * Preferred species for {@link $Type$Vector}s.
4481      * A preferred species is a species of maximal bit-size for the platform.
4482      */
4483     public static final VectorSpecies<$Boxtype$> SPECIES_PREFERRED
4484         = ($Type$Species) VectorSpecies.ofPreferred($type$.class);
4485 }