1 /* 2 * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have 23 * questions. 24 */ 25 package jdk.incubator.vector; 26 27 import java.nio.ByteBuffer; 28 import java.nio.ShortBuffer; 29 import java.nio.ByteOrder; 30 import java.util.Arrays; 31 import java.util.Objects; 32 import java.util.function.BinaryOperator; 33 import java.util.function.IntUnaryOperator; 34 import java.util.function.Function; 35 import java.util.function.UnaryOperator; 36 import java.util.concurrent.ThreadLocalRandom; 37 38 import jdk.internal.misc.Unsafe; 39 import jdk.internal.vm.annotation.ForceInline; 40 41 import static jdk.incubator.vector.VectorIntrinsics.*; 42 import static jdk.incubator.vector.VectorOperators.*; 43 44 // -- This file was mechanically generated: Do not edit! -- // 45 46 /** 47 * A specialized {@link Vector} representing an ordered immutable sequence of 48 * {@code short} values. 49 */ 50 @SuppressWarnings("cast") // warning: redundant cast 51 public abstract class ShortVector extends AbstractVector<Short> { 52 53 ShortVector() {} 54 55 static final int FORBID_OPCODE_KIND = VO_ONLYFP; 56 57 @ForceInline 58 static int opCode(Operator op) { 59 return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND); 60 } 61 @ForceInline 62 static int opCode(Operator op, int requireKind) { 63 requireKind |= VO_OPCODE_VALID; 64 return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND); 65 } 66 @ForceInline 67 static boolean opKind(Operator op, int bit) { 68 return VectorOperators.opKind(op, bit); 69 } 70 71 // Virtualized factories and operators, 72 // coded with portable definitions. 73 // These are all @ForceInline in case 74 // they need to be used performantly. 75 // The various shape-specific subclasses 76 // also specialize them by wrapping 77 // them in a call like this: 78 // return (Byte128Vector) 79 // super.bOp((Byte128Vector) o); 80 // The purpose of that is to forcibly inline 81 // the generic definition from this file 82 // into a sharply type- and size-specific 83 // wrapper in the subclass file, so that 84 // the JIT can specialize the code. 85 // The code is only inlined and expanded 86 // if it gets hot. Think of it as a cheap 87 // and lazy version of C++ templates. 88 89 // Virtualized getter 90 91 /*package-private*/ 92 abstract short[] getElements(); 93 94 // Virtualized constructors 95 96 /** 97 * Build a vector directly using my own constructor. 98 * It is an error if the array is aliased elsewhere. 99 */ 100 /*package-private*/ 101 abstract ShortVector vectorFactory(short[] vec); 102 103 /** 104 * Build a mask directly using my species. 105 * It is an error if the array is aliased elsewhere. 106 */ 107 /*package-private*/ 108 @ForceInline 109 final 110 AbstractMask<Short> maskFactory(boolean[] bits) { 111 return vspecies().maskFactory(bits); 112 } 113 114 // Constant loader (takes dummy as vector arg) 115 interface FVOp { 116 short apply(int i); 117 } 118 119 /*package-private*/ 120 @ForceInline 121 final 122 ShortVector vOp(FVOp f) { 123 short[] res = new short[length()]; 124 for (int i = 0; i < res.length; i++) { 125 res[i] = f.apply(i); 126 } 127 return vectorFactory(res); 128 } 129 130 @ForceInline 131 final 132 ShortVector vOp(VectorMask<Short> m, FVOp f) { 133 short[] res = new short[length()]; 134 boolean[] mbits = ((AbstractMask<Short>)m).getBits(); 135 for (int i = 0; i < res.length; i++) { 136 if (mbits[i]) { 137 res[i] = f.apply(i); 138 } 139 } 140 return vectorFactory(res); 141 } 142 143 // Unary operator 144 145 /*package-private*/ 146 interface FUnOp { 147 short apply(int i, short a); 148 } 149 150 /*package-private*/ 151 abstract 152 ShortVector uOp(FUnOp f); 153 @ForceInline 154 final 155 ShortVector uOpTemplate(FUnOp f) { 156 short[] vec = getElements(); 157 short[] res = new short[length()]; 158 for (int i = 0; i < res.length; i++) { 159 res[i] = f.apply(i, vec[i]); 160 } 161 return vectorFactory(res); 162 } 163 164 /*package-private*/ 165 abstract 166 ShortVector uOp(VectorMask<Short> m, 167 FUnOp f); 168 @ForceInline 169 final 170 ShortVector uOpTemplate(VectorMask<Short> m, 171 FUnOp f) { 172 short[] vec = getElements(); 173 short[] res = new short[length()]; 174 boolean[] mbits = ((AbstractMask<Short>)m).getBits(); 175 for (int i = 0; i < res.length; i++) { 176 res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i]; 177 } 178 return vectorFactory(res); 179 } 180 181 // Binary operator 182 183 /*package-private*/ 184 interface FBinOp { 185 short apply(int i, short a, short b); 186 } 187 188 /*package-private*/ 189 abstract 190 ShortVector bOp(Vector<Short> o, 191 FBinOp f); 192 @ForceInline 193 final 194 ShortVector bOpTemplate(Vector<Short> o, 195 FBinOp f) { 196 short[] res = new short[length()]; 197 short[] vec1 = this.getElements(); 198 short[] vec2 = ((ShortVector)o).getElements(); 199 for (int i = 0; i < res.length; i++) { 200 res[i] = f.apply(i, vec1[i], vec2[i]); 201 } 202 return vectorFactory(res); 203 } 204 205 /*package-private*/ 206 abstract 207 ShortVector bOp(Vector<Short> o, 208 VectorMask<Short> m, 209 FBinOp f); 210 @ForceInline 211 final 212 ShortVector bOpTemplate(Vector<Short> o, 213 VectorMask<Short> m, 214 FBinOp f) { 215 short[] res = new short[length()]; 216 short[] vec1 = this.getElements(); 217 short[] vec2 = ((ShortVector)o).getElements(); 218 boolean[] mbits = ((AbstractMask<Short>)m).getBits(); 219 for (int i = 0; i < res.length; i++) { 220 res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i]; 221 } 222 return vectorFactory(res); 223 } 224 225 // Ternary operator 226 227 /*package-private*/ 228 interface FTriOp { 229 short apply(int i, short a, short b, short c); 230 } 231 232 /*package-private*/ 233 abstract 234 ShortVector tOp(Vector<Short> o1, 235 Vector<Short> o2, 236 FTriOp f); 237 @ForceInline 238 final 239 ShortVector tOpTemplate(Vector<Short> o1, 240 Vector<Short> o2, 241 FTriOp f) { 242 short[] res = new short[length()]; 243 short[] vec1 = this.getElements(); 244 short[] vec2 = ((ShortVector)o1).getElements(); 245 short[] vec3 = ((ShortVector)o2).getElements(); 246 for (int i = 0; i < res.length; i++) { 247 res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]); 248 } 249 return vectorFactory(res); 250 } 251 252 /*package-private*/ 253 abstract 254 ShortVector tOp(Vector<Short> o1, 255 Vector<Short> o2, 256 VectorMask<Short> m, 257 FTriOp f); 258 @ForceInline 259 final 260 ShortVector tOpTemplate(Vector<Short> o1, 261 Vector<Short> o2, 262 VectorMask<Short> m, 263 FTriOp f) { 264 short[] res = new short[length()]; 265 short[] vec1 = this.getElements(); 266 short[] vec2 = ((ShortVector)o1).getElements(); 267 short[] vec3 = ((ShortVector)o2).getElements(); 268 boolean[] mbits = ((AbstractMask<Short>)m).getBits(); 269 for (int i = 0; i < res.length; i++) { 270 res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i]; 271 } 272 return vectorFactory(res); 273 } 274 275 // Reduction operator 276 277 /*package-private*/ 278 abstract 279 short rOp(short v, FBinOp f); 280 @ForceInline 281 final 282 short rOpTemplate(short v, FBinOp f) { 283 short[] vec = getElements(); 284 for (int i = 0; i < vec.length; i++) { 285 v = f.apply(i, v, vec[i]); 286 } 287 return v; 288 } 289 290 // Memory reference 291 292 /*package-private*/ 293 interface FLdOp<M> { 294 short apply(M memory, int offset, int i); 295 } 296 297 /*package-private*/ 298 @ForceInline 299 final 300 <M> ShortVector ldOp(M memory, int offset, 301 FLdOp<M> f) { 302 //dummy; no vec = getElements(); 303 short[] res = new short[length()]; 304 for (int i = 0; i < res.length; i++) { 305 res[i] = f.apply(memory, offset, i); 306 } 307 return vectorFactory(res); 308 } 309 310 /*package-private*/ 311 @ForceInline 312 final 313 <M> ShortVector ldOp(M memory, int offset, 314 VectorMask<Short> m, 315 FLdOp<M> f) { 316 //short[] vec = getElements(); 317 short[] res = new short[length()]; 318 boolean[] mbits = ((AbstractMask<Short>)m).getBits(); 319 for (int i = 0; i < res.length; i++) { 320 if (mbits[i]) { 321 res[i] = f.apply(memory, offset, i); 322 } 323 } 324 return vectorFactory(res); 325 } 326 327 interface FStOp<M> { 328 void apply(M memory, int offset, int i, short a); 329 } 330 331 /*package-private*/ 332 @ForceInline 333 final 334 <M> void stOp(M memory, int offset, 335 FStOp<M> f) { 336 short[] vec = getElements(); 337 for (int i = 0; i < vec.length; i++) { 338 f.apply(memory, offset, i, vec[i]); 339 } 340 } 341 342 /*package-private*/ 343 @ForceInline 344 final 345 <M> void stOp(M memory, int offset, 346 VectorMask<Short> m, 347 FStOp<M> f) { 348 short[] vec = getElements(); 349 boolean[] mbits = ((AbstractMask<Short>)m).getBits(); 350 for (int i = 0; i < vec.length; i++) { 351 if (mbits[i]) { 352 f.apply(memory, offset, i, vec[i]); 353 } 354 } 355 } 356 357 // Binary test 358 359 /*package-private*/ 360 interface FBinTest { 361 boolean apply(int cond, int i, short a, short b); 362 } 363 364 /*package-private*/ 365 @ForceInline 366 final 367 AbstractMask<Short> bTest(int cond, 368 Vector<Short> o, 369 FBinTest f) { 370 short[] vec1 = getElements(); 371 short[] vec2 = ((ShortVector)o).getElements(); 372 boolean[] bits = new boolean[length()]; 373 for (int i = 0; i < length(); i++){ 374 bits[i] = f.apply(cond, i, vec1[i], vec2[i]); 375 } 376 return maskFactory(bits); 377 } 378 379 /*package-private*/ 380 @ForceInline 381 static boolean doBinTest(int cond, short a, short b) { 382 switch (cond) { 383 case BT_eq: return a == b; 384 case BT_ne: return a != b; 385 case BT_lt: return a < b; 386 case BT_le: return a <= b; 387 case BT_gt: return a > b; 388 case BT_ge: return a >= b; 389 } 390 throw new AssertionError(Integer.toHexString(cond)); 391 } 392 393 /*package-private*/ 394 @Override 395 abstract ShortSpecies vspecies(); 396 397 /*package-private*/ 398 @ForceInline 399 static long toBits(short e) { 400 return e; 401 } 402 403 /*package-private*/ 404 @ForceInline 405 static short fromBits(long bits) { 406 return ((short)bits); 407 } 408 409 // Static factories (other than memory operations) 410 411 // Note: A surprising behavior in javadoc 412 // sometimes makes a lone /** {@inheritDoc} */ 413 // comment drop the method altogether, 414 // apparently if the method mentions an 415 // parameter or return type of Vector<Short> 416 // instead of Vector<E> as originally specified. 417 // Adding an empty HTML fragment appears to 418 // nudge javadoc into providing the desired 419 // inherited documentation. We use the HTML 420 // comment <!--workaround--> for this. 421 422 /** 423 * {@inheritDoc} <!--workaround--> 424 */ 425 @ForceInline 426 public static ShortVector zero(VectorSpecies<Short> species) { 427 ShortSpecies vsp = (ShortSpecies) species; 428 return VectorIntrinsics.broadcastCoerced(vsp.vectorType(), short.class, species.length(), 429 0, vsp, 430 ((bits_, s_) -> s_.rvOp(i -> bits_))); 431 } 432 433 /** 434 * Returns a vector of the same species as this one 435 * where all lane elements are set to 436 * the primitive value {@code e}. 437 * 438 * The contents of the current vector are discarded; 439 * only the species is relevant to this operation. 440 * 441 * <p> This method returns the value of this expression: 442 * {@code ShortVector.broadcast(this.species(), e)}. 443 * 444 * @apiNote 445 * Unlike the similar method named {@code broadcast()} 446 * in the supertype {@code Vector}, this method does not 447 * need to validate its argument, and cannot throw 448 * {@code IllegalArgumentException}. This method is 449 * therefore preferable to the supertype method. 450 * 451 * @param e the value to broadcast 452 * @return a vector where all lane elements are set to 453 * the primitive value {@code e} 454 * @see #broadcast(VectorSpecies,long) 455 * @see Vector#broadcast(long) 456 * @see VectorSpecies#broadcast(long) 457 */ 458 public abstract ShortVector broadcast(short e); 459 460 /** 461 * Returns a vector of the given species 462 * where all lane elements are set to 463 * the primitive value {@code e}. 464 * 465 * @param species species of the desired vector 466 * @param e the value to broadcast 467 * @return a vector where all lane elements are set to 468 * the primitive value {@code e} 469 * @see #broadcast(long) 470 * @see Vector#broadcast(long) 471 * @see VectorSpecies#broadcast(long) 472 */ 473 public static ShortVector broadcast(VectorSpecies<Short> species, short e) { 474 ShortSpecies vsp = (ShortSpecies) species; 475 return vsp.broadcast(e); 476 } 477 478 /*package-private*/ 479 @ForceInline 480 final ShortVector broadcastTemplate(short e) { 481 ShortSpecies vsp = vspecies(); 482 return vsp.broadcast(e); 483 } 484 485 /** 486 * {@inheritDoc} <!--workaround--> 487 * @apiNote 488 * When working with vector subtypes like {@code ShortVector}, 489 * {@linkplain #broadcast(short) the more strongly typed method} 490 * is typically selected. It can be explicitly selected 491 * using a cast: {@code v.broadcast((short)e)}. 492 * The two expressions will produce numerically identical results. 493 */ 494 @Override 495 public abstract ShortVector broadcast(long e); 496 497 /** 498 * Returns a vector of the given species 499 * where all lane elements are set to 500 * the primitive value {@code e}. 501 * 502 * The {@code long} value must be accurately representable 503 * by the {@code ETYPE} of the vector species, so that 504 * {@code e==(long)(ETYPE)e}. 505 * 506 * @param species species of the desired vector 507 * @param e the value to broadcast 508 * @return a vector where all lane elements are set to 509 * the primitive value {@code e} 510 * @throws IllegalArgumentException 511 * if the given {@code long} value cannot 512 * be represented by the vector's {@code ETYPE} 513 * @see #broadcast(VectorSpecies,short) 514 * @see VectorSpecies#checkValue(long) 515 */ 516 public static ShortVector broadcast(VectorSpecies<Short> species, long e) { 517 ShortSpecies vsp = (ShortSpecies) species; 518 return vsp.broadcast(e); 519 } 520 521 /*package-private*/ 522 @ForceInline 523 final ShortVector broadcastTemplate(long e) { 524 return vspecies().broadcast(e); 525 } 526 527 /** 528 * Returns a vector where each lane element is set to given 529 * primitive values. 530 * <p> 531 * For each vector lane, where {@code N} is the vector lane index, the 532 * the primitive value at index {@code N} is placed into the resulting 533 * vector at lane index {@code N}. 534 * 535 * @param species species of the desired vector 536 * @param es the given primitive values 537 * @return a vector where each lane element is set to given primitive 538 * values 539 * @throws IllegalArgumentException 540 * if {@code es.length != species.length()} 541 */ 542 @ForceInline 543 @SuppressWarnings("unchecked") 544 public static ShortVector fromValues(VectorSpecies<Short> species, short... es) { 545 ShortSpecies vsp = (ShortSpecies) species; 546 int vlength = vsp.laneCount(); 547 VectorIntrinsics.requireLength(es.length, vlength); 548 // Get an unaliased copy and use it directly: 549 return vsp.vectorFactory(Arrays.copyOf(es, vlength)); 550 } 551 552 /** 553 * Returns a vector where the first lane element is set to the primtive 554 * value {@code e}, all other lane elements are set to the default 555 * value(zero). 556 * 557 * @param species species of the desired vector 558 * @param e the value 559 * @return a vector where the first lane element is set to the primitive 560 * value {@code e} 561 */ 562 // FIXME: Does this carry its weight? 563 @ForceInline 564 public static ShortVector single(VectorSpecies<Short> species, short e) { 565 return zero(species).withLane(0, e); 566 } 567 568 /** 569 * Returns a vector where each lane element is set to a randomly 570 * generated primitive value. 571 * 572 * The semantics are equivalent to calling 573 * {@code (short)}{@link ThreadLocalRandom#nextInt()} 574 * for each lane, from first to last. 575 * 576 * @param species species of the desired vector 577 * @return a vector where each lane elements is set to a randomly 578 * generated primitive value 579 */ 580 public static ShortVector random(VectorSpecies<Short> species) { 581 ShortSpecies vsp = (ShortSpecies) species; 582 ThreadLocalRandom r = ThreadLocalRandom.current(); 583 return vsp.vOp(i -> nextRandom(r)); 584 } 585 private static short nextRandom(ThreadLocalRandom r) { 586 return (short) r.nextInt(); 587 } 588 589 // Unary lanewise support 590 591 /** 592 * {@inheritDoc} <!--workaround--> 593 */ 594 public abstract 595 ShortVector lanewise(VectorOperators.Unary op); 596 597 @ForceInline 598 final 599 ShortVector lanewiseTemplate(VectorOperators.Unary op) { 600 if (opKind(op, VO_SPECIAL)) { 601 if (op == ZOMO) { 602 return blend(broadcast(-1), compare(NE, 0)); 603 } 604 if (op == NEG) { 605 // FIXME: Support this in the JIT. 606 return broadcast(0).lanewiseTemplate(SUB, this); 607 } 608 } 609 int opc = opCode(op); 610 return VectorIntrinsics.unaryOp( 611 opc, getClass(), short.class, length(), 612 this, 613 UN_IMPL.find(op, opc, (opc_) -> { 614 switch (opc_) { 615 case VECTOR_OP_NEG: return v0 -> 616 v0.uOp((i, a) -> (short) -a); 617 case VECTOR_OP_ABS: return v0 -> 618 v0.uOp((i, a) -> (short) Math.abs(a)); 619 case VECTOR_OP_NOT: return v0 -> 620 v0.uOp((i, a) -> (short) ~a); 621 default: return null; 622 }})); 623 } 624 private static final 625 ImplCache<Unary,UnaryOperator<ShortVector>> UN_IMPL 626 = new ImplCache<>(Unary.class, ShortVector.class); 627 628 /** 629 * {@inheritDoc} <!--workaround--> 630 */ 631 @ForceInline 632 public final 633 ShortVector lanewise(VectorOperators.Unary op, 634 VectorMask<Short> m) { 635 return blend(lanewise(op), m); 636 } 637 638 // Binary lanewise support 639 640 /** 641 * {@inheritDoc} <!--workaround--> 642 * @see #lanewise(VectorOperators.Binary,short) 643 * @see #lanewise(VectorOperators.Binary,short,VectorMask) 644 */ 645 @Override 646 public abstract 647 ShortVector lanewise(VectorOperators.Binary op, 648 Vector<Short> v); 649 @ForceInline 650 final 651 ShortVector lanewiseTemplate(VectorOperators.Binary op, 652 Vector<Short> v) { 653 ShortVector that = (ShortVector) v; 654 that.check(this); 655 if (opKind(op, VO_SPECIAL | VO_SHIFT)) { 656 if (op == FIRST_NONZERO) { 657 // FIXME: Support this in the JIT. 658 VectorMask<Short> thisNZ 659 = this.viewAsIntegralLanes().compare(NE, (short) 0); 660 that = that.blend((short) 0, thisNZ.cast(vspecies())); 661 op = OR_UNCHECKED; 662 } 663 if (opKind(op, VO_SHIFT)) { 664 // As per shift specification for Java, mask the shift count. 665 // This allows the JIT to ignore some ISA details. 666 that = that.lanewise(AND, SHIFT_MASK); 667 } 668 if (op == ROR || op == ROL) { // FIXME: JIT should do this 669 ShortVector neg = that.lanewise(NEG); 670 ShortVector hi = this.lanewise(LSHL, (op == ROR) ? neg : that); 671 ShortVector lo = this.lanewise(LSHR, (op == ROR) ? that : neg); 672 return hi.lanewise(OR, lo); 673 } else if (op == AND_NOT) { 674 // FIXME: Support this in the JIT. 675 that = that.lanewise(NOT); 676 op = AND; 677 } else if (op == DIV) { 678 VectorMask<Short> eqz = that.eq((short)0); 679 if (eqz.anyTrue()) { 680 throw that.divZeroException(); 681 } 682 } 683 } 684 int opc = opCode(op); 685 return VectorIntrinsics.binaryOp( 686 opc, getClass(), short.class, length(), 687 this, that, 688 BIN_IMPL.find(op, opc, (opc_) -> { 689 switch (opc_) { 690 case VECTOR_OP_ADD: return (v0, v1) -> 691 v0.bOp(v1, (i, a, b) -> (short)(a + b)); 692 case VECTOR_OP_SUB: return (v0, v1) -> 693 v0.bOp(v1, (i, a, b) -> (short)(a - b)); 694 case VECTOR_OP_MUL: return (v0, v1) -> 695 v0.bOp(v1, (i, a, b) -> (short)(a * b)); 696 case VECTOR_OP_DIV: return (v0, v1) -> 697 v0.bOp(v1, (i, a, b) -> (short)(a / b)); 698 case VECTOR_OP_MAX: return (v0, v1) -> 699 v0.bOp(v1, (i, a, b) -> (short)Math.max(a, b)); 700 case VECTOR_OP_MIN: return (v0, v1) -> 701 v0.bOp(v1, (i, a, b) -> (short)Math.min(a, b)); 702 case VECTOR_OP_FIRST_NONZERO: return (v0, v1) -> 703 v0.bOp(v1, (i, a, b) -> toBits(a) != 0 ? a : b); 704 case VECTOR_OP_AND: return (v0, v1) -> 705 v0.bOp(v1, (i, a, b) -> (short)(a & b)); 706 case VECTOR_OP_OR: return (v0, v1) -> 707 v0.bOp(v1, (i, a, b) -> (short)(a | b)); 708 case VECTOR_OP_AND_NOT: return (v0, v1) -> 709 v0.bOp(v1, (i, a, b) -> (short)(a & ~b)); 710 case VECTOR_OP_XOR: return (v0, v1) -> 711 v0.bOp(v1, (i, a, b) -> (short)(a ^ b)); 712 case VECTOR_OP_LSHIFT: return (v0, v1) -> 713 v0.bOp(v1, (i, a, n) -> (short)(a << n)); 714 case VECTOR_OP_RSHIFT: return (v0, v1) -> 715 v0.bOp(v1, (i, a, n) -> (short)(a >> n)); 716 case VECTOR_OP_URSHIFT: return (v0, v1) -> 717 v0.bOp(v1, (i, a, n) -> (short)((a & LSHR_SETUP_MASK) >>> n)); 718 case VECTOR_OP_LROTATE: return (v0, v1) -> 719 v0.bOp(v1, (i, a, n) -> (short)((a << n)|(a >> -n))); 720 case VECTOR_OP_RROTATE: return (v0, v1) -> 721 v0.bOp(v1, (i, a, n) -> (short)((a >> n)|(a << -n))); 722 default: return null; 723 }})); 724 } 725 private static final 726 ImplCache<Binary,BinaryOperator<ShortVector>> BIN_IMPL 727 = new ImplCache<>(Binary.class, ShortVector.class); 728 729 /** 730 * {@inheritDoc} <!--workaround--> 731 * @see #lanewise(VectorOperators.Binary,short,VectorMask) 732 */ 733 @ForceInline 734 public final 735 ShortVector lanewise(VectorOperators.Binary op, 736 Vector<Short> v, 737 VectorMask<Short> m) { 738 ShortVector that = (ShortVector) v; 739 if (op == DIV) { 740 // suppress div/0 exceptions in unset lanes 741 that = that.lanewise(NOT, that.eq((short)0)); 742 return blend(lanewise(DIV, that), m); 743 } 744 return blend(lanewise(op, v), m); 745 } 746 // FIXME: Maybe all of the public final methods in this file (the 747 // simple ones that just call lanewise) should be pushed down to 748 // the X-VectorBits template. They can't optimize properly at 749 // this level, and must rely on inlining. Does it work? 750 // (If it works, of course keep the code here.) 751 752 /** 753 * Combines the lane values of this vector 754 * with the value of a broadcast scalar. 755 * 756 * This is a lane-wise binary operation which applies 757 * the selected operation to each lane. 758 * The return value will be equal to this expression: 759 * {@code this.lanewise(op, this.broadcast(e))}. 760 * 761 * @param op the operation used to process lane values 762 * @param e the input scalar 763 * @return the result of applying the operation lane-wise 764 * to the two input vectors 765 * @throws UnsupportedOperationException if this vector does 766 * not support the requested operation 767 * @see #lanewise(VectorOperators.Binary,Vector) 768 * @see #lanewise(VectorOperators.Binary,short,VectorMask) 769 */ 770 @ForceInline 771 public final 772 ShortVector lanewise(VectorOperators.Binary op, 773 short e) { 774 int opc = opCode(op); 775 if (opKind(op, VO_SHIFT) && (short)(int)e == e) { 776 return lanewiseShift(op, (int) e); 777 } 778 if (op == AND_NOT) { 779 op = AND; e = (short) ~e; 780 } 781 return lanewise(op, broadcast(e)); 782 } 783 784 /** 785 * Combines the lane values of this vector 786 * with the value of a broadcast scalar, 787 * with selection of lane elements controlled by a mask. 788 * 789 * This is a masked lane-wise binary operation which applies 790 * the selected operation to each lane. 791 * The return value will be equal to this expression: 792 * {@code this.lanewise(op, this.broadcast(e), m)}. 793 * 794 * @param op the operation used to process lane values 795 * @param e the input scalar 796 * @param m the mask controlling lane selection 797 * @return the result of applying the operation lane-wise 798 * to the input vector and the scalar 799 * @throws UnsupportedOperationException if this vector does 800 * not support the requested operation 801 * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) 802 * @see #lanewise(VectorOperators.Binary,short) 803 */ 804 @ForceInline 805 public final 806 ShortVector lanewise(VectorOperators.Binary op, 807 short e, 808 VectorMask<Short> m) { 809 return blend(lanewise(op, e), m); 810 } 811 812 /** 813 * {@inheritDoc} <!--workaround--> 814 * @apiNote 815 * When working with vector subtypes like {@code ShortVector}, 816 * {@linkplain #lanewise(VectorOperators.Binary,short) 817 * the more strongly typed method} 818 * is typically selected. It can be explicitly selected 819 * using a cast: {@code v.lanewise(op,(short)e)}. 820 * The two expressions will produce numerically identical results. 821 */ 822 @ForceInline 823 public final 824 ShortVector lanewise(VectorOperators.Binary op, 825 long e) { 826 short e1 = (short) e; 827 if ((long)e1 != e 828 // allow shift ops to clip down their int parameters 829 && !(opKind(op, VO_SHIFT) && (int)e1 == e) 830 ) { 831 vspecies().checkValue(e); // for exception 832 } 833 return lanewise(op, e1); 834 } 835 836 /** 837 * {@inheritDoc} <!--workaround--> 838 * @apiNote 839 * When working with vector subtypes like {@code ShortVector}, 840 * {@linkplain #lanewise(VectorOperators.Binary,short,VectorMask) 841 * the more strongly typed method} 842 * is typically selected. It can be explicitly selected 843 * using a cast: {@code v.lanewise(op,(short)e,m)}. 844 * The two expressions will produce numerically identical results. 845 */ 846 @ForceInline 847 public final 848 ShortVector lanewise(VectorOperators.Binary op, 849 long e, VectorMask<Short> m) { 850 return blend(lanewise(op, e), m); 851 } 852 853 /*package-private*/ 854 abstract ShortVector 855 lanewiseShift(VectorOperators.Binary op, int e); 856 857 /*package-private*/ 858 @ForceInline 859 final ShortVector 860 lanewiseShiftTemplate(VectorOperators.Binary op, int e) { 861 // Special handling for these. FIXME: Refactor? 862 int opc = opCode(op); 863 assert(opKind(op, VO_SHIFT)); 864 // As per shift specification for Java, mask the shift count. 865 e &= SHIFT_MASK; 866 if (op == ROR || op == ROL) { // FIXME: JIT should do this 867 ShortVector hi = this.lanewise(LSHL, (op == ROR) ? -e : e); 868 ShortVector lo = this.lanewise(LSHR, (op == ROR) ? e : -e); 869 return hi.lanewise(OR, lo); 870 } 871 return VectorIntrinsics.broadcastInt( 872 opc, getClass(), short.class, length(), 873 this, e, 874 BIN_INT_IMPL.find(op, opc, (opc_) -> { 875 switch (opc_) { 876 case VECTOR_OP_LSHIFT: return (v, n) -> 877 v.uOp((i, a) -> (short)(a << n)); 878 case VECTOR_OP_RSHIFT: return (v, n) -> 879 v.uOp((i, a) -> (short)(a >> n)); 880 case VECTOR_OP_URSHIFT: return (v, n) -> 881 v.uOp((i, a) -> (short)((a & LSHR_SETUP_MASK) >>> n)); 882 case VECTOR_OP_LROTATE: return (v, n) -> 883 v.uOp((i, a) -> (short)((a << n)|(a >> -n))); 884 case VECTOR_OP_RROTATE: return (v, n) -> 885 v.uOp((i, a) -> (short)((a >> n)|(a << -n))); 886 default: return null; 887 }})); 888 } 889 private static final 890 ImplCache<Binary,VectorBroadcastIntOp<ShortVector>> BIN_INT_IMPL 891 = new ImplCache<>(Binary.class, ShortVector.class); 892 893 // As per shift specification for Java, mask the shift count. 894 // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte). 895 // The latter two maskings go beyond the JLS, but seem reasonable 896 // since our lane types are first-class types, not just dressed 897 // up ints. 898 private static final int SHIFT_MASK = (Short.SIZE - 1); 899 // Also simulate >>> on sub-word variables with a mask. 900 private static final int LSHR_SETUP_MASK = ((1 << Short.SIZE) - 1); 901 902 // Ternary lanewise support 903 904 // Ternary operators come in eight variations: 905 // lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2]) 906 // lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask) 907 908 // It is annoying to support all of these variations of masking 909 // and broadcast, but it would be more surprising not to continue 910 // the obvious pattern started by unary and binary. 911 912 /** 913 * {@inheritDoc} <!--workaround--> 914 * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask) 915 * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask) 916 * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask) 917 * @see #lanewise(VectorOperators.Ternary,short,short) 918 * @see #lanewise(VectorOperators.Ternary,Vector,short) 919 * @see #lanewise(VectorOperators.Ternary,short,Vector) 920 */ 921 @Override 922 public abstract 923 ShortVector lanewise(VectorOperators.Ternary op, 924 Vector<Short> v1, 925 Vector<Short> v2); 926 @ForceInline 927 final 928 ShortVector lanewiseTemplate(VectorOperators.Ternary op, 929 Vector<Short> v1, 930 Vector<Short> v2) { 931 ShortVector that = (ShortVector) v1; 932 ShortVector tother = (ShortVector) v2; 933 // It's a word: https://www.dictionary.com/browse/tother 934 // See also Chapter 11 of Dickens, Our Mutual Friend: 935 // "Totherest Governor," replied Mr Riderhood... 936 that.check(this); 937 tother.check(this); 938 if (op == BITWISE_BLEND) { 939 // FIXME: Support this in the JIT. 940 that = this.lanewise(XOR, that).lanewise(AND, tother); 941 return this.lanewise(XOR, that); 942 } 943 int opc = opCode(op); 944 return VectorIntrinsics.ternaryOp( 945 opc, getClass(), short.class, length(), 946 this, that, tother, 947 TERN_IMPL.find(op, opc, (opc_) -> { 948 switch (opc_) { 949 case VECTOR_OP_BITWISE_BLEND: return (v0, v1_, v2_) -> 950 v0.tOp(v1_, v2_, (i, a, b, c) -> (short)(a^((a^b)&c))); 951 default: return null; 952 }})); 953 } 954 private static final 955 ImplCache<Ternary,TernaryOperation<ShortVector>> TERN_IMPL 956 = new ImplCache<>(Ternary.class, ShortVector.class); 957 958 /** 959 * {@inheritDoc} <!--workaround--> 960 * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask) 961 * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask) 962 * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask) 963 */ 964 @ForceInline 965 public final 966 ShortVector lanewise(VectorOperators.Ternary op, 967 Vector<Short> v1, 968 Vector<Short> v2, 969 VectorMask<Short> m) { 970 return blend(lanewise(op, v1, v2), m); 971 } 972 973 /** 974 * Combines the lane values of this vector 975 * with the values of two broadcast scalars. 976 * 977 * This is a lane-wise ternary operation which applies 978 * the selected operation to each lane. 979 * The return value will be equal to this expression: 980 * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}. 981 * 982 * @param op the operation used to combine lane values 983 * @param e1 the first input scalar 984 * @param e2 the second input scalar 985 * @return the result of applying the operation lane-wise 986 * to the input vector and the scalars 987 * @throws UnsupportedOperationException if this vector does 988 * not support the requested operation 989 * @see #lanewise(VectorOperators.Ternary,Vector,Vector) 990 * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask) 991 */ 992 @ForceInline 993 public final 994 ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,e2) 995 short e1, 996 short e2) { 997 return lanewise(op, broadcast(e1), broadcast(e2)); 998 } 999 1000 /** 1001 * Combines the lane values of this vector 1002 * with the values of two broadcast scalars, 1003 * with selection of lane elements controlled by a mask. 1004 * 1005 * This is a masked lane-wise ternary operation which applies 1006 * the selected operation to each lane. 1007 * The return value will be equal to this expression: 1008 * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}. 1009 * 1010 * @param op the operation used to combine lane values 1011 * @param e1 the first input scalar 1012 * @param e2 the second input scalar 1013 * @param m the mask controlling lane selection 1014 * @return the result of applying the operation lane-wise 1015 * to the input vector and the scalars 1016 * @throws UnsupportedOperationException if this vector does 1017 * not support the requested operation 1018 * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) 1019 * @see #lanewise(VectorOperators.Ternary,short,short) 1020 */ 1021 @ForceInline 1022 public final 1023 ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m) 1024 short e1, 1025 short e2, 1026 VectorMask<Short> m) { 1027 return blend(lanewise(op, e1, e2), m); 1028 } 1029 1030 /** 1031 * Combines the lane values of this vector 1032 * with the values of another vector and a broadcast scalar. 1033 * 1034 * This is a lane-wise ternary operation which applies 1035 * the selected operation to each lane. 1036 * The return value will be equal to this expression: 1037 * {@code this.lanewise(op, v1, this.broadcast(e2))}. 1038 * 1039 * @param op the operation used to combine lane values 1040 * @param v1 the other input vector 1041 * @param e2 the input scalar 1042 * @return the result of applying the operation lane-wise 1043 * to the input vectors and the scalar 1044 * @throws UnsupportedOperationException if this vector does 1045 * not support the requested operation 1046 * @see #lanewise(VectorOperators.Ternary,short,short) 1047 * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask) 1048 */ 1049 @ForceInline 1050 public final 1051 ShortVector lanewise(VectorOperators.Ternary op, //(op,v1,e2) 1052 Vector<Short> v1, 1053 short e2) { 1054 return lanewise(op, v1, broadcast(e2)); 1055 } 1056 1057 /** 1058 * Combines the lane values of this vector 1059 * with the values of another vector and a broadcast scalar, 1060 * with selection of lane elements controlled by a mask. 1061 * 1062 * This is a masked lane-wise ternary operation which applies 1063 * the selected operation to each lane. 1064 * The return value will be equal to this expression: 1065 * {@code this.lanewise(op, v1, this.broadcast(e2), m)}. 1066 * 1067 * @param op the operation used to combine lane values 1068 * @param v1 the other input vector 1069 * @param e2 the input scalar 1070 * @param m the mask controlling lane selection 1071 * @return the result of applying the operation lane-wise 1072 * to the input vectors and the scalar 1073 * @throws UnsupportedOperationException if this vector does 1074 * not support the requested operation 1075 * @see #lanewise(VectorOperators.Ternary,Vector,Vector) 1076 * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask) 1077 * @see #lanewise(VectorOperators.Ternary,Vector,short) 1078 */ 1079 @ForceInline 1080 public final 1081 ShortVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m) 1082 Vector<Short> v1, 1083 short e2, 1084 VectorMask<Short> m) { 1085 return blend(lanewise(op, v1, e2), m); 1086 } 1087 1088 /** 1089 * Combines the lane values of this vector 1090 * with the values of another vector and a broadcast scalar. 1091 * 1092 * This is a lane-wise ternary operation which applies 1093 * the selected operation to each lane. 1094 * The return value will be equal to this expression: 1095 * {@code this.lanewise(op, this.broadcast(e1), v2)}. 1096 * 1097 * @param op the operation used to combine lane values 1098 * @param e1 the input scalar 1099 * @param v2 the other input vector 1100 * @return the result of applying the operation lane-wise 1101 * to the input vectors and the scalar 1102 * @throws UnsupportedOperationException if this vector does 1103 * not support the requested operation 1104 * @see #lanewise(VectorOperators.Ternary,Vector,Vector) 1105 * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask) 1106 */ 1107 @ForceInline 1108 public final 1109 ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,v2) 1110 short e1, 1111 Vector<Short> v2) { 1112 return lanewise(op, broadcast(e1), v2); 1113 } 1114 1115 /** 1116 * Combines the lane values of this vector 1117 * with the values of another vector and a broadcast scalar, 1118 * with selection of lane elements controlled by a mask. 1119 * 1120 * This is a masked lane-wise ternary operation which applies 1121 * the selected operation to each lane. 1122 * The return value will be equal to this expression: 1123 * {@code this.lanewise(op, this.broadcast(e1), v2, m)}. 1124 * 1125 * @param op the operation used to combine lane values 1126 * @param e1 the input scalar 1127 * @param v2 the other input vector 1128 * @param m the mask controlling lane selection 1129 * @return the result of applying the operation lane-wise 1130 * to the input vectors and the scalar 1131 * @throws UnsupportedOperationException if this vector does 1132 * not support the requested operation 1133 * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) 1134 * @see #lanewise(VectorOperators.Ternary,short,Vector) 1135 */ 1136 @ForceInline 1137 public final 1138 ShortVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m) 1139 short e1, 1140 Vector<Short> v2, 1141 VectorMask<Short> m) { 1142 return blend(lanewise(op, e1, v2), m); 1143 } 1144 1145 // (Thus endeth the Great and Mighty Ternary Ogdoad.) 1146 // https://en.wikipedia.org/wiki/Ogdoad 1147 1148 /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV 1149 // 1150 // These include masked and non-masked versions. 1151 // This subclass adds broadcast (masked or not). 1152 1153 /** 1154 * {@inheritDoc} <!--workaround--> 1155 * @see #add(short) 1156 */ 1157 @Override 1158 @ForceInline 1159 public final ShortVector add(Vector<Short> v) { 1160 return lanewise(ADD, v); 1161 } 1162 1163 /** 1164 * Adds this vector to the broadcast of an input scalar. 1165 * 1166 * This is a lane-wise binary operation which applies 1167 * the primitive addition operation ({@code +}) to each lane. 1168 * 1169 * This method is also equivalent to the expression 1170 * {@link #lanewise(VectorOperators.Binary,short) 1171 * lanewise}{@code (}{@link VectorOperators#ADD 1172 * ADD}{@code , e)}. 1173 * 1174 * @param e the input scalar 1175 * @return the result of adding each lane of this vector to the scalar 1176 * @see #add(Vector) 1177 * @see #broadcast(short) 1178 * @see #add(short,VectorMask) 1179 * @see VectorOperators#ADD 1180 * @see #lanewise(VectorOperators.Binary,Vector) 1181 * @see #lanewise(VectorOperators.Binary,short) 1182 */ 1183 @ForceInline 1184 public final 1185 ShortVector add(short e) { 1186 return lanewise(ADD, e); 1187 } 1188 1189 /** 1190 * {@inheritDoc} <!--workaround--> 1191 * @see #add(short,VectorMask) 1192 */ 1193 @Override 1194 @ForceInline 1195 public final ShortVector add(Vector<Short> v, 1196 VectorMask<Short> m) { 1197 return lanewise(ADD, v, m); 1198 } 1199 1200 /** 1201 * Adds this vector to the broadcast of an input scalar, 1202 * selecting lane elements controlled by a mask. 1203 * 1204 * This is a masked lane-wise binary operation which applies 1205 * the primitive addition operation ({@code +}) to each lane. 1206 * 1207 * This method is also equivalent to the expression 1208 * {@link #lanewise(VectorOperators.Binary,short,VectorMask) 1209 * lanewise}{@code (}{@link VectorOperators#ADD 1210 * ADD}{@code , s, m)}. 1211 * 1212 * @param e the input scalar 1213 * @param m the mask controlling lane selection 1214 * @return the result of adding each lane of this vector to the scalar 1215 * @see #add(Vector,VectorMask) 1216 * @see #broadcast(short) 1217 * @see #add(short) 1218 * @see VectorOperators#ADD 1219 * @see #lanewise(VectorOperators.Binary,Vector) 1220 * @see #lanewise(VectorOperators.Binary,short) 1221 */ 1222 @ForceInline 1223 public final ShortVector add(short e, 1224 VectorMask<Short> m) { 1225 return lanewise(ADD, e, m); 1226 } 1227 1228 /** 1229 * {@inheritDoc} <!--workaround--> 1230 * @see #sub(short) 1231 */ 1232 @Override 1233 @ForceInline 1234 public final ShortVector sub(Vector<Short> v) { 1235 return lanewise(SUB, v); 1236 } 1237 1238 /** 1239 * Subtracts an input scalar from this vector. 1240 * 1241 * This is a masked lane-wise binary operation which applies 1242 * the primitive subtraction operation ({@code -}) to each lane. 1243 * 1244 * This method is also equivalent to the expression 1245 * {@link #lanewise(VectorOperators.Binary,short) 1246 * lanewise}{@code (}{@link VectorOperators#SUB 1247 * SUB}{@code , e)}. 1248 * 1249 * @param e the input scalar 1250 * @return the result of subtracting the scalar from each lane of this vector 1251 * @see #sub(Vector) 1252 * @see #broadcast(short) 1253 * @see #sub(short,VectorMask) 1254 * @see VectorOperators#SUB 1255 * @see #lanewise(VectorOperators.Binary,Vector) 1256 * @see #lanewise(VectorOperators.Binary,short) 1257 */ 1258 @ForceInline 1259 public final ShortVector sub(short e) { 1260 return lanewise(SUB, e); 1261 } 1262 1263 /** 1264 * {@inheritDoc} <!--workaround--> 1265 * @see #sub(short,VectorMask) 1266 */ 1267 @Override 1268 @ForceInline 1269 public final ShortVector sub(Vector<Short> v, 1270 VectorMask<Short> m) { 1271 return lanewise(SUB, v, m); 1272 } 1273 1274 /** 1275 * Subtracts an input scalar from this vector 1276 * under the control of a mask. 1277 * 1278 * This is a masked lane-wise binary operation which applies 1279 * the primitive subtraction operation ({@code -}) to each lane. 1280 * 1281 * This method is also equivalent to the expression 1282 * {@link #lanewise(VectorOperators.Binary,short,VectorMask) 1283 * lanewise}{@code (}{@link VectorOperators#SUB 1284 * SUB}{@code , s, m)}. 1285 * 1286 * @param e the input scalar 1287 * @param m the mask controlling lane selection 1288 * @return the result of subtracting the scalar from each lane of this vector 1289 * @see #sub(Vector,VectorMask) 1290 * @see #broadcast(short) 1291 * @see #sub(short) 1292 * @see VectorOperators#SUB 1293 * @see #lanewise(VectorOperators.Binary,Vector) 1294 * @see #lanewise(VectorOperators.Binary,short) 1295 */ 1296 @ForceInline 1297 public final ShortVector sub(short e, 1298 VectorMask<Short> m) { 1299 return lanewise(SUB, e, m); 1300 } 1301 1302 /** 1303 * {@inheritDoc} <!--workaround--> 1304 * @see #mul(short) 1305 */ 1306 @Override 1307 @ForceInline 1308 public final ShortVector mul(Vector<Short> v) { 1309 return lanewise(MUL, v); 1310 } 1311 1312 /** 1313 * Multiplies this vector by the broadcast of an input scalar. 1314 * 1315 * This is a lane-wise binary operation which applies 1316 * the primitive multiplication operation ({@code *}) to each lane. 1317 * 1318 * This method is also equivalent to the expression 1319 * {@link #lanewise(VectorOperators.Binary,short) 1320 * lanewise}{@code (}{@link VectorOperators#MUL 1321 * MUL}{@code , e)}. 1322 * 1323 * @param e the input scalar 1324 * @return the result of multiplying this vector by the given scalar 1325 * @see #mul(Vector) 1326 * @see #broadcast(short) 1327 * @see #mul(short,VectorMask) 1328 * @see VectorOperators#MUL 1329 * @see #lanewise(VectorOperators.Binary,Vector) 1330 * @see #lanewise(VectorOperators.Binary,short) 1331 */ 1332 @ForceInline 1333 public final ShortVector mul(short e) { 1334 return lanewise(MUL, e); 1335 } 1336 1337 /** 1338 * {@inheritDoc} <!--workaround--> 1339 * @see #mul(short,VectorMask) 1340 */ 1341 @Override 1342 @ForceInline 1343 public final ShortVector mul(Vector<Short> v, 1344 VectorMask<Short> m) { 1345 return lanewise(MUL, v, m); 1346 } 1347 1348 /** 1349 * Multiplies this vector by the broadcast of an input scalar, 1350 * selecting lane elements controlled by a mask. 1351 * 1352 * This is a masked lane-wise binary operation which applies 1353 * the primitive multiplication operation ({@code *}) to each lane. 1354 * 1355 * This method is also equivalent to the expression 1356 * {@link #lanewise(VectorOperators.Binary,short,VectorMask) 1357 * lanewise}{@code (}{@link VectorOperators#MUL 1358 * MUL}{@code , s, m)}. 1359 * 1360 * @param e the input scalar 1361 * @param m the mask controlling lane selection 1362 * @return the result of muling each lane of this vector to the scalar 1363 * @see #mul(Vector,VectorMask) 1364 * @see #broadcast(short) 1365 * @see #mul(short) 1366 * @see VectorOperators#MUL 1367 * @see #lanewise(VectorOperators.Binary,Vector) 1368 * @see #lanewise(VectorOperators.Binary,short) 1369 */ 1370 @ForceInline 1371 public final ShortVector mul(short e, 1372 VectorMask<Short> m) { 1373 return lanewise(MUL, e, m); 1374 } 1375 1376 /** 1377 * {@inheritDoc} <!--workaround--> 1378 * @apiNote If there is a zero divisor, {@code 1379 * ArithmeticException} will be thrown. 1380 * @see #div(short) 1381 */ 1382 @Override 1383 @ForceInline 1384 public final ShortVector div(Vector<Short> v) { 1385 return lanewise(DIV, v); 1386 } 1387 1388 /** 1389 * Divides this vector by the broadcast of an input scalar. 1390 * 1391 * This is a lane-wise binary operation which applies 1392 * the primitive division operation ({@code /}) to each lane. 1393 * 1394 * This method is also equivalent to the expression 1395 * {@link #lanewise(VectorOperators.Binary,short) 1396 * lanewise}{@code (}{@link VectorOperators#DIV 1397 * DIV}{@code , e)}. 1398 * 1399 * @apiNote If there is a zero divisor, {@code 1400 * ArithmeticException} will be thrown. 1401 * @see #div(short) 1402 1403 * 1404 * @param e the input scalar 1405 * @return the result of dividing each lane of this vector by the scalar 1406 * @see #div(Vector) 1407 * @see #broadcast(short) 1408 * @see #div(short,VectorMask) 1409 * @see VectorOperators#DIV 1410 * @see #lanewise(VectorOperators.Binary,Vector) 1411 * @see #lanewise(VectorOperators.Binary,short) 1412 */ 1413 @ForceInline 1414 public final ShortVector div(short e) { 1415 return lanewise(DIV, e); 1416 } 1417 1418 /** 1419 * {@inheritDoc} <!--workaround--> 1420 * @see #div(short,VectorMask) 1421 * @apiNote If there is a zero divisor, {@code 1422 * ArithmeticException} will be thrown. 1423 */ 1424 @Override 1425 @ForceInline 1426 public final ShortVector div(Vector<Short> v, 1427 VectorMask<Short> m) { 1428 return lanewise(DIV, v, m); 1429 } 1430 1431 /** 1432 * Divides this vector by the broadcast of an input scalar, 1433 * selecting lane elements controlled by a mask. 1434 * 1435 * This is a masked lane-wise binary operation which applies 1436 * the primitive division operation ({@code /}) to each lane. 1437 * 1438 * This method is also equivalent to the expression 1439 * {@link #lanewise(VectorOperators.Binary,short,VectorMask) 1440 * lanewise}{@code (}{@link VectorOperators#DIV 1441 * DIV}{@code , s, m)}. 1442 * 1443 * @apiNote If there is a zero divisor, {@code 1444 * ArithmeticException} will be thrown. 1445 * 1446 * @param e the input scalar 1447 * @param m the mask controlling lane selection 1448 * @return the result of dividing each lane of this vector by the scalar 1449 * @see #div(Vector,VectorMask) 1450 * @see #broadcast(short) 1451 * @see #div(short) 1452 * @see VectorOperators#DIV 1453 * @see #lanewise(VectorOperators.Binary,Vector) 1454 * @see #lanewise(VectorOperators.Binary,short) 1455 */ 1456 @ForceInline 1457 public final ShortVector div(short e, 1458 VectorMask<Short> m) { 1459 return lanewise(DIV, e, m); 1460 } 1461 1462 /// END OF FULL-SERVICE BINARY METHODS 1463 1464 /// SECOND-TIER BINARY METHODS 1465 // 1466 // There are no masked versions. 1467 1468 /** 1469 * {@inheritDoc} <!--workaround--> 1470 */ 1471 @Override 1472 @ForceInline 1473 public final ShortVector min(Vector<Short> v) { 1474 return lanewise(MIN, v); 1475 } 1476 1477 // FIXME: "broadcast of an input scalar" is really wordy. Reduce? 1478 /** 1479 * Computes the smaller of this vector and the broadcast of an input scalar. 1480 * 1481 * This is a lane-wise binary operation which applies the 1482 * operation {@code Math.min()} to each pair of 1483 * corresponding lane values. 1484 * 1485 * This method is also equivalent to the expression 1486 * {@link #lanewise(VectorOperators.Binary,short) 1487 * lanewise}{@code (}{@link VectorOperators#MIN 1488 * MIN}{@code , e)}. 1489 * 1490 * @param e the input scalar 1491 * @return the result of multiplying this vector by the given scalar 1492 * @see #min(Vector) 1493 * @see #broadcast(short) 1494 * @see VectorOperators#MIN 1495 * @see #lanewise(VectorOperators.Binary,short,VectorMask) 1496 */ 1497 @ForceInline 1498 public final ShortVector min(short e) { 1499 return lanewise(MIN, e); 1500 } 1501 1502 /** 1503 * {@inheritDoc} <!--workaround--> 1504 */ 1505 @Override 1506 @ForceInline 1507 public final ShortVector max(Vector<Short> v) { 1508 return lanewise(MAX, v); 1509 } 1510 1511 /** 1512 * Computes the larger of this vector and the broadcast of an input scalar. 1513 * 1514 * This is a lane-wise binary operation which applies the 1515 * operation {@code Math.max()} to each pair of 1516 * corresponding lane values. 1517 * 1518 * This method is also equivalent to the expression 1519 * {@link #lanewise(VectorOperators.Binary,short) 1520 * lanewise}{@code (}{@link VectorOperators#MAX 1521 * MAX}{@code , e)}. 1522 * 1523 * @param e the input scalar 1524 * @return the result of multiplying this vector by the given scalar 1525 * @see #max(Vector) 1526 * @see #broadcast(short) 1527 * @see VectorOperators#MAX 1528 * @see #lanewise(VectorOperators.Binary,short,VectorMask) 1529 */ 1530 @ForceInline 1531 public final ShortVector max(short e) { 1532 return lanewise(MAX, e); 1533 } 1534 1535 // common bitwise operators: and, or, not (with scalar versions) 1536 /** 1537 * Computes the bitwise logical conjunction ({@code &}) 1538 * of this vector and a second input vector. 1539 * 1540 * This is a lane-wise binary operation which applies the 1541 * the primitive bitwise "and" operation ({@code &}) 1542 * to each pair of corresponding lane values. 1543 * 1544 * This method is also equivalent to the expression 1545 * {@link #lanewise(VectorOperators.Binary,Vector) 1546 * lanewise}{@code (}{@link VectorOperators#AND 1547 * AND}{@code , v)}. 1548 * 1549 * <p> 1550 * This is not a full-service named operation like 1551 * {@link #add(Vector) add}. A masked version of 1552 * version of this operation is not directly available 1553 * but may be obtained via the masked version of 1554 * {@code lanewise}. 1555 * 1556 * @param v a second input vector 1557 * @return the bitwise {@code &} of this vector and the second input vector 1558 * @see #and(short) 1559 * @see #or(Vector) 1560 * @see #not() 1561 * @see VectorOperators#AND 1562 * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) 1563 */ 1564 @ForceInline 1565 public final ShortVector and(Vector<Short> v) { 1566 return lanewise(AND, v); 1567 } 1568 1569 /** 1570 * Computes the bitwise logical conjunction ({@code &}) 1571 * of this vector and a scalar. 1572 * 1573 * This is a lane-wise binary operation which applies the 1574 * the primitive bitwise "and" operation ({@code &}) 1575 * to each pair of corresponding lane values. 1576 * 1577 * This method is also equivalent to the expression 1578 * {@link #lanewise(VectorOperators.Binary,Vector) 1579 * lanewise}{@code (}{@link VectorOperators#AND 1580 * AND}{@code , e)}. 1581 * 1582 * @param e an input scalar 1583 * @return the bitwise {@code &} of this vector and scalar 1584 * @see #and(Vector) 1585 * @see VectorOperators#AND 1586 * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) 1587 */ 1588 @ForceInline 1589 public final ShortVector and(short e) { 1590 return lanewise(AND, e); 1591 } 1592 1593 /** 1594 * Computes the bitwise logical disjunction ({@code |}) 1595 * of this vector and a second input vector. 1596 * 1597 * This is a lane-wise binary operation which applies the 1598 * the primitive bitwise "or" operation ({@code |}) 1599 * to each pair of corresponding lane values. 1600 * 1601 * This method is also equivalent to the expression 1602 * {@link #lanewise(VectorOperators.Binary,Vector) 1603 * lanewise}{@code (}{@link VectorOperators#OR 1604 * AND}{@code , v)}. 1605 * 1606 * <p> 1607 * This is not a full-service named operation like 1608 * {@link #add(Vector) add}. A masked version of 1609 * version of this operation is not directly available 1610 * but may be obtained via the masked version of 1611 * {@code lanewise}. 1612 * 1613 * @param v a second input vector 1614 * @return the bitwise {@code |} of this vector and the second input vector 1615 * @see #or(short) 1616 * @see #and(Vector) 1617 * @see #not() 1618 * @see VectorOperators#OR 1619 * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) 1620 */ 1621 @ForceInline 1622 public final ShortVector or(Vector<Short> v) { 1623 return lanewise(OR, v); 1624 } 1625 1626 /** 1627 * Computes the bitwise logical disjunction ({@code |}) 1628 * of this vector and a scalar. 1629 * 1630 * This is a lane-wise binary operation which applies the 1631 * the primitive bitwise "or" operation ({@code |}) 1632 * to each pair of corresponding lane values. 1633 * 1634 * This method is also equivalent to the expression 1635 * {@link #lanewise(VectorOperators.Binary,Vector) 1636 * lanewise}{@code (}{@link VectorOperators#OR 1637 * OR}{@code , e)}. 1638 * 1639 * @param e an input scalar 1640 * @return the bitwise {@code |} of this vector and scalar 1641 * @see #or(Vector) 1642 * @see VectorOperators#OR 1643 * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) 1644 */ 1645 @ForceInline 1646 public final ShortVector or(short e) { 1647 return lanewise(OR, e); 1648 } 1649 1650 1651 1652 /// UNARY METHODS 1653 1654 /** 1655 * {@inheritDoc} <!--workaround--> 1656 */ 1657 @Override 1658 @ForceInline 1659 public final 1660 ShortVector neg() { 1661 return lanewise(NEG); 1662 } 1663 1664 /** 1665 * {@inheritDoc} <!--workaround--> 1666 */ 1667 @Override 1668 @ForceInline 1669 public final 1670 ShortVector abs() { 1671 return lanewise(ABS); 1672 } 1673 1674 // not (~) 1675 /** 1676 * Computes the bitwise logical complement ({@code ~}) 1677 * of this vector. 1678 * 1679 * This is a lane-wise binary operation which applies the 1680 * the primitive bitwise "not" operation ({@code ~}) 1681 * to each lane value. 1682 * 1683 * This method is also equivalent to the expression 1684 * {@link #lanewise(VectorOperators.Unary) 1685 * lanewise}{@code (}{@link VectorOperators#NOT 1686 * NOT}{@code )}. 1687 * 1688 * <p> 1689 * This is not a full-service named operation like 1690 * {@link #add(Vector) add}. A masked version of 1691 * version of this operation is not directly available 1692 * but may be obtained via the masked version of 1693 * {@code lanewise}. 1694 * 1695 * @return the bitwise complement {@code ~} of this vector 1696 * @see #and(Vector) 1697 * @see VectorOperators#NOT 1698 * @see #lanewise(VectorOperators.Unary,VectorMask) 1699 */ 1700 @ForceInline 1701 public final ShortVector not() { 1702 return lanewise(NOT); 1703 } 1704 1705 1706 /// COMPARISONS 1707 1708 /** 1709 * {@inheritDoc} <!--workaround--> 1710 */ 1711 @Override 1712 @ForceInline 1713 public final 1714 VectorMask<Short> eq(Vector<Short> v) { 1715 return compare(EQ, v); 1716 } 1717 1718 /** 1719 * Tests if this vector is equal to an input scalar. 1720 * 1721 * This is a lane-wise binary test operation which applies 1722 * the primitive equals operation ({@code ==}) to each lane. 1723 * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}. 1724 * 1725 * @param e the input scalar 1726 * @return the result mask of testing if this vector 1727 * is equal to {@code e} 1728 * @see #compare(VectorOperators.Comparison,short) 1729 */ 1730 @ForceInline 1731 public final 1732 VectorMask<Short> eq(short e) { 1733 return compare(EQ, e); 1734 } 1735 1736 /** 1737 * {@inheritDoc} <!--workaround--> 1738 */ 1739 @Override 1740 @ForceInline 1741 public final 1742 VectorMask<Short> lt(Vector<Short> v) { 1743 return compare(LT, v); 1744 } 1745 1746 /** 1747 * Tests if this vector is less than an input scalar. 1748 * 1749 * This is a lane-wise binary test operation which applies 1750 * the primitive less than operation ({@code <}) to each lane. 1751 * The result is the same as {@code compare(VectorOperators.LT, e)}. 1752 * 1753 * @param e the input scalar 1754 * @return the mask result of testing if this vector 1755 * is less than the input scalar 1756 * @see #compare(VectorOperators.Comparison,short) 1757 */ 1758 @ForceInline 1759 public final 1760 VectorMask<Short> lt(short e) { 1761 return compare(LT, e); 1762 } 1763 1764 /** 1765 * {@inheritDoc} <!--workaround--> 1766 */ 1767 @Override 1768 public abstract 1769 VectorMask<Short> test(VectorOperators.Test op); 1770 1771 /*package-private*/ 1772 @ForceInline 1773 final 1774 <M extends VectorMask<Short>> 1775 M testTemplate(Class<M> maskType, Test op) { 1776 ShortSpecies vsp = vspecies(); 1777 if (opKind(op, VO_SPECIAL)) { 1778 ShortVector bits = this.viewAsIntegralLanes(); 1779 VectorMask<Short> m; 1780 if (op == IS_DEFAULT) { 1781 m = bits.compare(EQ, (short) 0); 1782 } else if (op == IS_NEGATIVE) { 1783 m = bits.compare(LT, (short) 0); 1784 } 1785 else { 1786 throw new AssertionError(op); 1787 } 1788 return maskType.cast(m); 1789 } 1790 int opc = opCode(op); 1791 throw new AssertionError(op); 1792 } 1793 1794 /** 1795 * {@inheritDoc} <!--workaround--> 1796 */ 1797 @Override 1798 @ForceInline 1799 public final 1800 VectorMask<Short> test(VectorOperators.Test op, 1801 VectorMask<Short> m) { 1802 return test(op).and(m); 1803 } 1804 1805 /** 1806 * {@inheritDoc} <!--workaround--> 1807 */ 1808 @Override 1809 public abstract 1810 VectorMask<Short> compare(VectorOperators.Comparison op, Vector<Short> v); 1811 1812 /*package-private*/ 1813 @ForceInline 1814 final 1815 <M extends VectorMask<Short>> 1816 M compareTemplate(Class<M> maskType, Comparison op, Vector<Short> v) { 1817 Objects.requireNonNull(v); 1818 ShortSpecies vsp = vspecies(); 1819 ShortVector that = (ShortVector) v; 1820 that.check(this); 1821 int opc = opCode(op); 1822 return VectorIntrinsics.compare( 1823 opc, getClass(), maskType, short.class, length(), 1824 this, that, 1825 (cond, v0, v1) -> { 1826 AbstractMask<Short> m 1827 = v0.bTest(cond, v1, (cond_, i, a, b) 1828 -> compareWithOp(cond, a, b)); 1829 @SuppressWarnings("unchecked") 1830 M m2 = (M) m; 1831 return m2; 1832 }); 1833 } 1834 1835 @ForceInline 1836 private static 1837 boolean compareWithOp(int cond, short a, short b) { 1838 switch (cond) { 1839 case VectorIntrinsics.BT_eq: return a == b; 1840 case VectorIntrinsics.BT_ne: return a != b; 1841 case VectorIntrinsics.BT_lt: return a < b; 1842 case VectorIntrinsics.BT_le: return a <= b; 1843 case VectorIntrinsics.BT_gt: return a > b; 1844 case VectorIntrinsics.BT_ge: return a >= b; 1845 } 1846 throw new AssertionError(); 1847 } 1848 1849 /** 1850 * {@inheritDoc} <!--workaround--> 1851 */ 1852 @Override 1853 @ForceInline 1854 public final 1855 VectorMask<Short> compare(VectorOperators.Comparison op, 1856 Vector<Short> v, 1857 VectorMask<Short> m) { 1858 return compare(op, v).and(m); 1859 } 1860 1861 /** 1862 * Tests this vector by comparing it with an input scalar, 1863 * according to the given comparison operation. 1864 * 1865 * This is a lane-wise binary test operation which applies 1866 * the comparison operation to each lane. 1867 * <p> 1868 * The result is the same as 1869 * {@code compare(op, broadcast(species(), e))}. 1870 * That is, the scalar may be regarded as broadcast to 1871 * a vector of the same species, and then compared 1872 * against the original vector, using the selected 1873 * comparison operation. 1874 * 1875 * @param op the operation used to compare lane values 1876 * @param e the input scalar 1877 * @return the mask result of testing lane-wise if this vector 1878 * compares to the input, according to the selected 1879 * comparison operator 1880 * @see ShortVector#compare(VectorOperators.Comparison,Vector) 1881 * @see #eq(short) 1882 * @see #lt(short) 1883 */ 1884 public abstract 1885 VectorMask<Short> compare(Comparison op, short e); 1886 1887 /*package-private*/ 1888 @ForceInline 1889 final 1890 <M extends VectorMask<Short>> 1891 M compareTemplate(Class<M> maskType, Comparison op, short e) { 1892 return compareTemplate(maskType, op, broadcast(e)); 1893 } 1894 1895 /** 1896 * Tests this vector by comparing it with an input scalar, 1897 * according to the given comparison operation, 1898 * in lanes selected by a mask. 1899 * 1900 * This is a masked lane-wise binary test operation which applies 1901 * to each pair of corresponding lane values. 1902 * 1903 * The returned result is equal to the expression 1904 * {@code compare(op,s).and(m)}. 1905 * 1906 * @param op the operation used to compare lane values 1907 * @param e the input scalar 1908 * @param m the mask controlling lane selection 1909 * @return the mask result of testing lane-wise if this vector 1910 * compares to the input, according to the selected 1911 * comparison operator, 1912 * and only in the lanes selected by the mask 1913 * @see ShortVector#compare(VectorOperators.Comparison,Vector,VectorMask) 1914 */ 1915 @ForceInline 1916 public final VectorMask<Short> compare(VectorOperators.Comparison op, 1917 short e, 1918 VectorMask<Short> m) { 1919 return compare(op, e).and(m); 1920 } 1921 1922 /** 1923 * {@inheritDoc} <!--workaround--> 1924 */ 1925 @Override 1926 public abstract 1927 VectorMask<Short> compare(Comparison op, long e); 1928 1929 /*package-private*/ 1930 @ForceInline 1931 final 1932 <M extends VectorMask<Short>> 1933 M compareTemplate(Class<M> maskType, Comparison op, long e) { 1934 return compareTemplate(maskType, op, broadcast(e)); 1935 } 1936 1937 /** 1938 * {@inheritDoc} <!--workaround--> 1939 */ 1940 @Override 1941 @ForceInline 1942 public final 1943 VectorMask<Short> compare(Comparison op, long e, VectorMask<Short> m) { 1944 return compare(op, broadcast(e), m); 1945 } 1946 1947 1948 1949 /** 1950 * {@inheritDoc} <!--workaround--> 1951 */ 1952 @Override public abstract 1953 ShortVector blend(Vector<Short> v, VectorMask<Short> m); 1954 1955 /*package-private*/ 1956 @ForceInline 1957 final 1958 <M extends VectorMask<Short>> 1959 ShortVector 1960 blendTemplate(Class<M> maskType, ShortVector v, M m) { 1961 v.check(this); 1962 return VectorIntrinsics.blend( 1963 getClass(), maskType, short.class, length(), 1964 this, v, m, 1965 (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b)); 1966 } 1967 1968 /** 1969 * {@inheritDoc} <!--workaround--> 1970 */ 1971 @Override public abstract ShortVector addIndex(int scale); 1972 1973 /*package-private*/ 1974 @ForceInline 1975 final ShortVector addIndexTemplate(int scale) { 1976 ShortSpecies vsp = vspecies(); 1977 // make sure VLENGTH*scale doesn't overflow: 1978 vsp.checkScale(scale); 1979 return VectorIntrinsics.indexVector( 1980 getClass(), short.class, length(), 1981 this, scale, vsp, 1982 (v, scale_, s) 1983 -> { 1984 // If the platform doesn't support an INDEX 1985 // instruction directly, load IOTA from memory 1986 // and multiply. 1987 ShortVector iota = s.iota(); 1988 short sc = (short) scale_; 1989 return v.add(sc == 1 ? iota : iota.mul(sc)); 1990 }); 1991 } 1992 1993 /** 1994 * Replaces selected lanes of this vector with 1995 * a scalar value 1996 * under the control of a mask. 1997 * 1998 * This is a masked lane-wise binary operation which 1999 * selects each lane value from one or the other input. 2000 * 2001 * The returned result is equal to the expression 2002 * {@code blend(broadcast(e),m)}. 2003 * 2004 * @param e the input scalar, containing the replacement lane value 2005 * @param m the mask controlling lane selection of the scalar 2006 * @return the result of blending the lane elements of this vector with 2007 * the scalar value 2008 */ 2009 @ForceInline 2010 public final ShortVector blend(short e, 2011 VectorMask<Short> m) { 2012 return blend(broadcast(e), m); 2013 } 2014 2015 /** 2016 * Replaces selected lanes of this vector with 2017 * a scalar value 2018 * under the control of a mask. 2019 * 2020 * This is a masked lane-wise binary operation which 2021 * selects each lane value from one or the other input. 2022 * 2023 * The returned result is equal to the expression 2024 * {@code blend(broadcast(e),m)}. 2025 * 2026 * @param e the input scalar, containing the replacement lane value 2027 * @param m the mask controlling lane selection of the scalar 2028 * @return the result of blending the lane elements of this vector with 2029 * the scalar value 2030 */ 2031 @ForceInline 2032 public final ShortVector blend(long e, 2033 VectorMask<Short> m) { 2034 return blend(broadcast(e), m); 2035 } 2036 2037 /** 2038 * {@inheritDoc} <!--workaround--> 2039 */ 2040 @Override 2041 public abstract 2042 ShortVector slice(int origin, Vector<Short> v1); 2043 2044 /*package-private*/ 2045 final 2046 @ForceInline 2047 ShortVector sliceTemplate(int origin, Vector<Short> v1) { 2048 ShortVector that = (ShortVector) v1; 2049 that.check(this); 2050 short[] a0 = this.getElements(); 2051 short[] a1 = that.getElements(); 2052 short[] res = new short[a0.length]; 2053 int vlen = res.length; 2054 int firstPart = vlen - origin; 2055 System.arraycopy(a0, origin, res, 0, firstPart); 2056 System.arraycopy(a1, 0, res, firstPart, origin); 2057 return vectorFactory(res); 2058 } 2059 2060 /** 2061 * {@inheritDoc} <!--workaround--> 2062 */ 2063 @Override 2064 @ForceInline 2065 public final 2066 ShortVector slice(int origin, 2067 Vector<Short> w, 2068 VectorMask<Short> m) { 2069 return broadcast(0).blend(slice(origin, w), m); 2070 } 2071 2072 /** 2073 * {@inheritDoc} <!--workaround--> 2074 */ 2075 @Override 2076 public abstract 2077 ShortVector slice(int origin); 2078 2079 /** 2080 * {@inheritDoc} <!--workaround--> 2081 */ 2082 @Override 2083 public abstract 2084 ShortVector unslice(int origin, Vector<Short> w, int part); 2085 2086 /*package-private*/ 2087 final 2088 @ForceInline 2089 ShortVector 2090 unsliceTemplate(int origin, Vector<Short> w, int part) { 2091 ShortVector that = (ShortVector) w; 2092 that.check(this); 2093 short[] slice = this.getElements(); 2094 short[] res = that.getElements(); 2095 int vlen = res.length; 2096 int firstPart = vlen - origin; 2097 switch (part) { 2098 case 0: 2099 System.arraycopy(slice, 0, res, origin, firstPart); 2100 break; 2101 case 1: 2102 System.arraycopy(slice, firstPart, res, 0, origin); 2103 break; 2104 default: 2105 throw wrongPartForSlice(part); 2106 } 2107 return vectorFactory(res); 2108 } 2109 2110 /*package-private*/ 2111 final 2112 @ForceInline 2113 <M extends VectorMask<Short>> 2114 ShortVector 2115 unsliceTemplate(Class<M> maskType, int origin, Vector<Short> w, int part, M m) { 2116 ShortVector that = (ShortVector) w; 2117 that.check(this); 2118 ShortVector slice = that.sliceTemplate(origin, that); 2119 slice = slice.blendTemplate(maskType, this, m); 2120 return slice.unsliceTemplate(origin, w, part); 2121 } 2122 2123 /** 2124 * {@inheritDoc} <!--workaround--> 2125 */ 2126 @Override 2127 public abstract 2128 ShortVector unslice(int origin, Vector<Short> w, int part, VectorMask<Short> m); 2129 2130 /** 2131 * {@inheritDoc} <!--workaround--> 2132 */ 2133 @Override 2134 public abstract 2135 ShortVector unslice(int origin); 2136 2137 private ArrayIndexOutOfBoundsException 2138 wrongPartForSlice(int part) { 2139 String msg = String.format("bad part number %d for slice operation", 2140 part); 2141 return new ArrayIndexOutOfBoundsException(msg); 2142 } 2143 2144 /** 2145 * {@inheritDoc} <!--workaround--> 2146 */ 2147 @Override 2148 public abstract 2149 ShortVector rearrange(VectorShuffle<Short> m); 2150 2151 /*package-private*/ 2152 @ForceInline 2153 final 2154 <S extends VectorShuffle<Short>> 2155 ShortVector rearrangeTemplate(Class<S> shuffletype, S shuffle) { 2156 shuffle.checkIndexes(); 2157 return VectorIntrinsics.rearrangeOp( 2158 getClass(), shuffletype, short.class, length(), 2159 this, shuffle, 2160 (v1, s_) -> v1.uOp((i, a) -> { 2161 int ei = s_.laneSource(i); 2162 return v1.lane(ei); 2163 })); 2164 } 2165 2166 /** 2167 * {@inheritDoc} <!--workaround--> 2168 */ 2169 @Override 2170 public abstract 2171 ShortVector rearrange(VectorShuffle<Short> s, 2172 VectorMask<Short> m); 2173 2174 /*package-private*/ 2175 @ForceInline 2176 final 2177 <S extends VectorShuffle<Short>> 2178 ShortVector rearrangeTemplate(Class<S> shuffletype, 2179 S shuffle, 2180 VectorMask<Short> m) { 2181 ShortVector unmasked = 2182 VectorIntrinsics.rearrangeOp( 2183 getClass(), shuffletype, short.class, length(), 2184 this, shuffle, 2185 (v1, s_) -> v1.uOp((i, a) -> { 2186 int ei = s_.laneSource(i); 2187 return ei < 0 ? 0 : v1.lane(ei); 2188 })); 2189 VectorMask<Short> valid = shuffle.laneIsValid(); 2190 if (m.andNot(valid).anyTrue()) { 2191 shuffle.checkIndexes(); 2192 throw new AssertionError(); 2193 } 2194 return broadcast((short)0).blend(unmasked, valid); 2195 } 2196 2197 /** 2198 * {@inheritDoc} <!--workaround--> 2199 */ 2200 @Override 2201 public abstract 2202 ShortVector rearrange(VectorShuffle<Short> s, 2203 Vector<Short> v); 2204 2205 /*package-private*/ 2206 @ForceInline 2207 final 2208 <S extends VectorShuffle<Short>> 2209 ShortVector rearrangeTemplate(Class<S> shuffletype, 2210 S shuffle, 2211 ShortVector v) { 2212 VectorMask<Short> valid = shuffle.laneIsValid(); 2213 S ws = shuffletype.cast(shuffle.wrapIndexes()); 2214 ShortVector r0 = 2215 VectorIntrinsics.rearrangeOp( 2216 getClass(), shuffletype, short.class, length(), 2217 this, ws, 2218 (v0, s_) -> v0.uOp((i, a) -> { 2219 int ei = s_.laneSource(i); 2220 return v0.lane(ei); 2221 })); 2222 ShortVector r1 = 2223 VectorIntrinsics.rearrangeOp( 2224 getClass(), shuffletype, short.class, length(), 2225 v, ws, 2226 (v1, s_) -> v1.uOp((i, a) -> { 2227 int ei = s_.laneSource(i); 2228 return v1.lane(ei); 2229 })); 2230 return r1.blend(r0, valid); 2231 } 2232 2233 /** 2234 * {@inheritDoc} <!--workaround--> 2235 */ 2236 @Override 2237 public abstract 2238 ShortVector selectFrom(Vector<Short> v); 2239 2240 /*package-private*/ 2241 @ForceInline 2242 final ShortVector selectFromTemplate(ShortVector v) { 2243 return v.rearrange(this.toShuffle()); 2244 } 2245 2246 /** 2247 * {@inheritDoc} <!--workaround--> 2248 */ 2249 @Override 2250 public abstract 2251 ShortVector selectFrom(Vector<Short> s, VectorMask<Short> m); 2252 2253 /*package-private*/ 2254 @ForceInline 2255 final ShortVector selectFromTemplate(ShortVector v, 2256 AbstractMask<Short> m) { 2257 return v.rearrange(this.toShuffle(), m); 2258 } 2259 2260 /// Ternary operations 2261 2262 /** 2263 * Blends together the bits of two vectors under 2264 * the control of a third, which supplies mask bits. 2265 * 2266 * 2267 * This is a lane-wise ternary operation which performs 2268 * a bitwise blending operation {@code (a&~c)|(b&c)} 2269 * to each lane. 2270 * 2271 * This method is also equivalent to the expression 2272 * {@link #lanewise(VectorOperators.Ternary,Vector,Vector) 2273 * lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND 2274 * BITWISE_BLEND}{@code , bits, mask)}. 2275 * 2276 * @param bits input bits to blend into the current vector 2277 * @param mask a bitwise mask to enable blending of the input bits 2278 * @return the bitwise blend of the given bits into the current vector, 2279 * under control of the bitwise mask 2280 * @see #bitwiseBlend(short,short) 2281 * @see #bitwiseBlend(short,Vector) 2282 * @see #bitwiseBlend(Vector,short) 2283 * @see VectorOperators#BITWISE_BLEND 2284 * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) 2285 */ 2286 @ForceInline 2287 public final 2288 ShortVector bitwiseBlend(Vector<Short> bits, Vector<Short> mask) { 2289 return lanewise(BITWISE_BLEND, bits, mask); 2290 } 2291 2292 /** 2293 * Blends together the bits of a vector and a scalar under 2294 * the control of another scalar, which supplies mask bits. 2295 * 2296 * 2297 * This is a lane-wise ternary operation which performs 2298 * a bitwise blending operation {@code (a&~c)|(b&c)} 2299 * to each lane. 2300 * 2301 * This method is also equivalent to the expression 2302 * {@link #lanewise(VectorOperators.Ternary,Vector,Vector) 2303 * lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND 2304 * BITWISE_BLEND}{@code , bits, mask)}. 2305 * 2306 * @param bits input bits to blend into the current vector 2307 * @param mask a bitwise mask to enable blending of the input bits 2308 * @return the bitwise blend of the given bits into the current vector, 2309 * under control of the bitwise mask 2310 * @see #bitwiseBlend(Vector,Vector) 2311 * @see VectorOperators#BITWISE_BLEND 2312 * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask) 2313 */ 2314 @ForceInline 2315 public final 2316 ShortVector bitwiseBlend(short bits, short mask) { 2317 return lanewise(BITWISE_BLEND, bits, mask); 2318 } 2319 2320 /** 2321 * Blends together the bits of a vector and a scalar under 2322 * the control of another vector, which supplies mask bits. 2323 * 2324 * 2325 * This is a lane-wise ternary operation which performs 2326 * a bitwise blending operation {@code (a&~c)|(b&c)} 2327 * to each lane. 2328 * 2329 * This method is also equivalent to the expression 2330 * {@link #lanewise(VectorOperators.Ternary,Vector,Vector) 2331 * lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND 2332 * BITWISE_BLEND}{@code , bits, mask)}. 2333 * 2334 * @param bits input bits to blend into the current vector 2335 * @param mask a bitwise mask to enable blending of the input bits 2336 * @return the bitwise blend of the given bits into the current vector, 2337 * under control of the bitwise mask 2338 * @see #bitwiseBlend(Vector,Vector) 2339 * @see VectorOperators#BITWISE_BLEND 2340 * @see #lanewise(VectorOperators.Ternary,short,Vector,VectorMask) 2341 */ 2342 @ForceInline 2343 public final 2344 ShortVector bitwiseBlend(short bits, Vector<Short> mask) { 2345 return lanewise(BITWISE_BLEND, bits, mask); 2346 } 2347 2348 /** 2349 * Blends together the bits of two vectors under 2350 * the control of a scalar, which supplies mask bits. 2351 * 2352 * 2353 * This is a lane-wise ternary operation which performs 2354 * a bitwise blending operation {@code (a&~c)|(b&c)} 2355 * to each lane. 2356 * 2357 * This method is also equivalent to the expression 2358 * {@link #lanewise(VectorOperators.Ternary,Vector,Vector) 2359 * lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND 2360 * BITWISE_BLEND}{@code , bits, mask)}. 2361 * 2362 * @param bits input bits to blend into the current vector 2363 * @param mask a bitwise mask to enable blending of the input bits 2364 * @return the bitwise blend of the given bits into the current vector, 2365 * under control of the bitwise mask 2366 * @see #bitwiseBlend(Vector,Vector) 2367 * @see VectorOperators#BITWISE_BLEND 2368 * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask) 2369 */ 2370 @ForceInline 2371 public final 2372 ShortVector bitwiseBlend(Vector<Short> bits, short mask) { 2373 return lanewise(BITWISE_BLEND, bits, mask); 2374 } 2375 2376 2377 // Type specific horizontal reductions 2378 2379 /** 2380 * Returns a value accumulated from all the lanes of this vector. 2381 * 2382 * This is an associative cross-lane reduction operation which 2383 * applies the specified operation to all the lane elements. 2384 * 2385 * <p> 2386 * A few reduction operations do not support arbitrary reordering 2387 * of their operands, yet are included here because of their 2388 * usefulness. 2389 * 2390 * <ul> 2391 * <li> 2392 * In the case of {@code FIRST_NONZERO}, the reduction returns 2393 * the value from the lowest-numbered non-zero lane. 2394 * 2395 * 2396 * <li> 2397 * In the case of floating point addition and multiplication, the 2398 * precise result will reflect the choice of an arbitrary order 2399 * of operations, which may even vary over time. 2400 * 2401 * <li> 2402 * All other reduction operations are fully commutative and 2403 * associative. The implementation can choose any order of 2404 * processing, yet it will always produce the same result. 2405 * 2406 * </ul> 2407 * 2408 * 2409 * @param op the operation used to combine lane values 2410 * @return the accumulated result 2411 * @throws UnsupportedOperationException if this vector does 2412 * not support the requested operation 2413 * @see #reduceLanes(VectorOperators.Associative,VectorMask) 2414 * @see #add(Vector) 2415 * @see #mul(Vector) 2416 * @see #min(Vector) 2417 * @see #max(Vector) 2418 * @see #and(Vector) 2419 * @see #or(Vector) 2420 * @see VectorOperators#XOR 2421 * @see VectorOperators#FIRST_NONZERO 2422 */ 2423 public abstract short reduceLanes(VectorOperators.Associative op); 2424 2425 /** 2426 * Returns a value accumulated from selected lanes of this vector, 2427 * controlled by a mask. 2428 * 2429 * This is an associative cross-lane reduction operation which 2430 * applies the specified operation to the selected lane elements. 2431 * <p> 2432 * If no elements are selected, an operation-specific identity 2433 * value is returned. 2434 * <ul> 2435 * <li> 2436 * If the operation is 2437 * {@code ADD}, {@code XOR}, {@code OR}, 2438 * or {@code FIRST_NONZERO}, 2439 * then the identity value is zero, the default {@code short} value. 2440 * <li> 2441 * If the operation is {@code MUL}, 2442 * then the identity value is one. 2443 * <li> 2444 * If the operation is {@code AND}, 2445 * then the identity value is minus one (all bits set). 2446 * <li> 2447 * If the operation is {@code MAX}, 2448 * then the identity value is {@code Short.MIN_VALUE}. 2449 * <li> 2450 * If the operation is {@code MIN}, 2451 * then the identity value is {@code Short.MAX_VALUE}. 2452 * </ul> 2453 * 2454 * @param op the operation used to combine lane values 2455 * @param m the mask controlling lane selection 2456 * @return the reduced result accumulated from the selected lane values 2457 * @throws UnsupportedOperationException if this vector does 2458 * not support the requested operation 2459 * @see #reduceLanes(VectorOperators.Associative) 2460 */ 2461 public abstract short reduceLanes(VectorOperators.Associative op, 2462 VectorMask<Short> m); 2463 2464 /*package-private*/ 2465 @ForceInline 2466 final 2467 short reduceLanesTemplate(VectorOperators.Associative op, 2468 VectorMask<Short> m) { 2469 ShortVector v = reduceIdentityVector(op).blend(this, m); 2470 return v.reduceLanesTemplate(op); 2471 } 2472 2473 /*package-private*/ 2474 @ForceInline 2475 final 2476 short reduceLanesTemplate(VectorOperators.Associative op) { 2477 if (op == FIRST_NONZERO) { 2478 // FIXME: The JIT should handle this, and other scan ops alos. 2479 VectorMask<Short> thisNZ 2480 = this.viewAsIntegralLanes().compare(NE, (short) 0); 2481 return this.lane(thisNZ.firstTrue()); 2482 } 2483 int opc = opCode(op); 2484 return fromBits(VectorIntrinsics.reductionCoerced( 2485 opc, getClass(), short.class, length(), 2486 this, 2487 REDUCE_IMPL.find(op, opc, (opc_) -> { 2488 switch (opc_) { 2489 case VECTOR_OP_ADD: return v -> 2490 toBits(v.rOp((short)0, (i, a, b) -> (short)(a + b))); 2491 case VECTOR_OP_MUL: return v -> 2492 toBits(v.rOp((short)1, (i, a, b) -> (short)(a * b))); 2493 case VECTOR_OP_MIN: return v -> 2494 toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (short) Math.min(a, b))); 2495 case VECTOR_OP_MAX: return v -> 2496 toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (short) Math.max(a, b))); 2497 case VECTOR_OP_FIRST_NONZERO: return v -> 2498 toBits(v.rOp((short)0, (i, a, b) -> toBits(a) != 0 ? a : b)); 2499 case VECTOR_OP_AND: return v -> 2500 toBits(v.rOp((short)-1, (i, a, b) -> (short)(a & b))); 2501 case VECTOR_OP_OR: return v -> 2502 toBits(v.rOp((short)0, (i, a, b) -> (short)(a | b))); 2503 case VECTOR_OP_XOR: return v -> 2504 toBits(v.rOp((short)0, (i, a, b) -> (short)(a ^ b))); 2505 default: return null; 2506 }}))); 2507 } 2508 private static final 2509 ImplCache<Associative,Function<ShortVector,Long>> REDUCE_IMPL 2510 = new ImplCache<>(Associative.class, ShortVector.class); 2511 2512 private 2513 @ForceInline 2514 ShortVector reduceIdentityVector(VectorOperators.Associative op) { 2515 int opc = opCode(op); 2516 UnaryOperator<ShortVector> fn 2517 = REDUCE_ID_IMPL.find(op, opc, (opc_) -> { 2518 switch (opc_) { 2519 case VECTOR_OP_ADD: 2520 case VECTOR_OP_OR: 2521 case VECTOR_OP_XOR: 2522 case VECTOR_OP_FIRST_NONZERO: 2523 return v -> v.broadcast(0); 2524 case VECTOR_OP_MUL: 2525 return v -> v.broadcast(1); 2526 case VECTOR_OP_AND: 2527 return v -> v.broadcast(-1); 2528 case VECTOR_OP_MIN: 2529 return v -> v.broadcast(MAX_OR_INF); 2530 case VECTOR_OP_MAX: 2531 return v -> v.broadcast(MIN_OR_INF); 2532 default: return null; 2533 } 2534 }); 2535 return fn.apply(this); 2536 } 2537 private static final 2538 ImplCache<Associative,UnaryOperator<ShortVector>> REDUCE_ID_IMPL 2539 = new ImplCache<>(Associative.class, ShortVector.class); 2540 2541 private static final short MIN_OR_INF = Short.MIN_VALUE; 2542 private static final short MAX_OR_INF = Short.MAX_VALUE; 2543 2544 public @Override abstract long reduceLanesToLong(VectorOperators.Associative op); 2545 public @Override abstract long reduceLanesToLong(VectorOperators.Associative op, 2546 VectorMask<Short> m); 2547 2548 // Type specific accessors 2549 2550 /** 2551 * Gets the lane element at lane index {@code i} 2552 * 2553 * @param i the lane index 2554 * @return the lane element at lane index {@code i} 2555 * @throws IllegalArgumentException if the index is is out of range 2556 * ({@code < 0 || >= length()}) 2557 */ 2558 public abstract short lane(int i); 2559 2560 /** 2561 * Replaces the lane element of this vector at lane index {@code i} with 2562 * value {@code e}. 2563 * 2564 * This is a cross-lane operation and behaves as if it returns the result 2565 * of blending this vector with an input vector that is the result of 2566 * broadcasting {@code e} and a mask that has only one lane set at lane 2567 * index {@code i}. 2568 * 2569 * @param i the lane index of the lane element to be replaced 2570 * @param e the value to be placed 2571 * @return the result of replacing the lane element of this vector at lane 2572 * index {@code i} with value {@code e}. 2573 * @throws IllegalArgumentException if the index is is out of range 2574 * ({@code < 0 || >= length()}) 2575 */ 2576 public abstract ShortVector withLane(int i, short e); 2577 2578 // Memory load operations 2579 2580 /** 2581 * Returns an array of type {@code short[]} 2582 * containing all the lane values. 2583 * The array length is the same as the vector length. 2584 * The array elements are stored in lane order. 2585 * <p> 2586 * This method behaves as if it stores 2587 * this vector into an allocated array 2588 * (using {@link #intoArray(short[], int) intoArray}) 2589 * and returns the array as follows: 2590 * <pre>{@code 2591 * short[] a = new short[this.length()]; 2592 * this.intoArray(a, 0); 2593 * return a; 2594 * }</pre> 2595 * 2596 * @return an array containing the lane values of this vector 2597 */ 2598 @ForceInline 2599 @Override 2600 public final short[] toArray() { 2601 short[] a = new short[vspecies().laneCount()]; 2602 intoArray(a, 0); 2603 return a; 2604 } 2605 2606 /** {@inheritDoc} <!--workaround--> 2607 * @implNote 2608 * When this method is used on used on vectors 2609 * of type {@code ShortVector}, 2610 * there will be no loss of precision or range, 2611 * and so no {@code IllegalArgumentException} will 2612 * be thrown. 2613 */ 2614 @ForceInline 2615 @Override 2616 public final int[] toIntArray() { 2617 short[] a = toArray(); 2618 int[] res = new int[a.length]; 2619 for (int i = 0; i < a.length; i++) { 2620 short e = a[i]; 2621 res[i] = (int) ShortSpecies.toIntegralChecked(e, true); 2622 } 2623 return res; 2624 } 2625 2626 /** {@inheritDoc} <!--workaround--> 2627 * @implNote 2628 * When this method is used on used on vectors 2629 * of type {@code ShortVector}, 2630 * there will be no loss of precision or range, 2631 * and so no {@code IllegalArgumentException} will 2632 * be thrown. 2633 */ 2634 @ForceInline 2635 @Override 2636 public final long[] toLongArray() { 2637 short[] a = toArray(); 2638 long[] res = new long[a.length]; 2639 for (int i = 0; i < a.length; i++) { 2640 short e = a[i]; 2641 res[i] = ShortSpecies.toIntegralChecked(e, false); 2642 } 2643 return res; 2644 } 2645 2646 /** {@inheritDoc} <!--workaround--> 2647 * @implNote 2648 * When this method is used on used on vectors 2649 * of type {@code ShortVector}, 2650 * there will be no loss of precision. 2651 */ 2652 @ForceInline 2653 @Override 2654 public final double[] toDoubleArray() { 2655 short[] a = toArray(); 2656 double[] res = new double[a.length]; 2657 for (int i = 0; i < a.length; i++) { 2658 res[i] = (double) a[i]; 2659 } 2660 return res; 2661 } 2662 2663 /** 2664 * Loads a vector from a byte array starting at an offset. 2665 * Bytes are composed into primitive lane elements according 2666 * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering. 2667 * The vector is arranged into lanes according to 2668 * <a href="Vector.html#lane-order">memory ordering</a>. 2669 * <p> 2670 * This method behaves as if it returns the result of calling 2671 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2672 * fromByteBuffer()} as follows: 2673 * <pre>{@code 2674 * var bb = ByteBuffer.wrap(a); 2675 * var bo = ByteOrder.LITTLE_ENDIAN; 2676 * var m = species.maskAll(true); 2677 * return fromByteBuffer(species, bb, offset, m, bo); 2678 * }</pre> 2679 * 2680 * @param species species of desired vector 2681 * @param a the byte array 2682 * @param offset the offset into the array 2683 * @return a vector loaded from a byte array 2684 * @throws IndexOutOfBoundsException 2685 * if {@code offset+N*ESIZE < 0} 2686 * or {@code offset+(N+1)*ESIZE > a.length} 2687 * for any lane {@code N} in the vector 2688 */ 2689 @ForceInline 2690 public static 2691 ShortVector fromByteArray(VectorSpecies<Short> species, 2692 byte[] a, int offset) { 2693 return fromByteArray(species, a, offset, ByteOrder.LITTLE_ENDIAN); 2694 } 2695 2696 /** 2697 * Loads a vector from a byte array starting at an offset. 2698 * Bytes are composed into primitive lane elements according 2699 * to the specified byte order. 2700 * The vector is arranged into lanes according to 2701 * <a href="Vector.html#lane-order">memory ordering</a>. 2702 * <p> 2703 * This method behaves as if it returns the result of calling 2704 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2705 * fromByteBuffer()} as follows: 2706 * <pre>{@code 2707 * var bb = ByteBuffer.wrap(a); 2708 * var m = species.maskAll(true); 2709 * return fromByteBuffer(species, bb, offset, m, bo); 2710 * }</pre> 2711 * 2712 * @param species species of desired vector 2713 * @param a the byte array 2714 * @param offset the offset into the array 2715 * @param bo the intended byte order 2716 * @return a vector loaded from a byte array 2717 * @throws IndexOutOfBoundsException 2718 * if {@code offset+N*ESIZE < 0} 2719 * or {@code offset+(N+1)*ESIZE > a.length} 2720 * for any lane {@code N} in the vector 2721 */ 2722 @ForceInline 2723 public static 2724 ShortVector fromByteArray(VectorSpecies<Short> species, 2725 byte[] a, int offset, 2726 ByteOrder bo) { 2727 ShortSpecies vsp = (ShortSpecies) species; 2728 offset = checkFromIndexSize(offset, 2729 vsp.vectorBitSize() / Byte.SIZE, 2730 a.length); 2731 return vsp.dummyVector() 2732 .fromByteArray0(a, offset).maybeSwap(bo); 2733 } 2734 2735 /** 2736 * Loads a vector from a byte array starting at an offset 2737 * and using a mask. 2738 * Lanes where the mask is unset are filled with the default 2739 * value of {@code short} (zero). 2740 * Bytes are composed into primitive lane elements according 2741 * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering. 2742 * The vector is arranged into lanes according to 2743 * <a href="Vector.html#lane-order">memory ordering</a>. 2744 * <p> 2745 * This method behaves as if it returns the result of calling 2746 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2747 * fromByteBuffer()} as follows: 2748 * <pre>{@code 2749 * var bb = ByteBuffer.wrap(a); 2750 * var bo = ByteOrder.LITTLE_ENDIAN; 2751 * return fromByteBuffer(species, bb, offset, bo, m); 2752 * }</pre> 2753 * 2754 * @param species species of desired vector 2755 * @param a the byte array 2756 * @param offset the offset into the array 2757 * @param m the mask controlling lane selection 2758 * @return a vector loaded from a byte array 2759 * @throws IndexOutOfBoundsException 2760 * if {@code offset+N*ESIZE < 0} 2761 * or {@code offset+(N+1)*ESIZE > a.length} 2762 * for any lane {@code N} in the vector where 2763 * the mask is set 2764 */ 2765 @ForceInline 2766 public static 2767 ShortVector fromByteArray(VectorSpecies<Short> species, 2768 byte[] a, int offset, 2769 VectorMask<Short> m) { 2770 return fromByteArray(species, a, offset, ByteOrder.LITTLE_ENDIAN, m); 2771 } 2772 2773 /** 2774 * Loads a vector from a byte array starting at an offset 2775 * and using a mask. 2776 * Lanes where the mask is unset are filled with the default 2777 * value of {@code short} (zero). 2778 * Bytes are composed into primitive lane elements according 2779 * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering. 2780 * The vector is arranged into lanes according to 2781 * <a href="Vector.html#lane-order">memory ordering</a>. 2782 * <p> 2783 * This method behaves as if it returns the result of calling 2784 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2785 * fromByteBuffer()} as follows: 2786 * <pre>{@code 2787 * var bb = ByteBuffer.wrap(a); 2788 * return fromByteBuffer(species, bb, offset, m, bo); 2789 * }</pre> 2790 * 2791 * @param species species of desired vector 2792 * @param a the byte array 2793 * @param offset the offset into the array 2794 * @param bo the intended byte order 2795 * @param m the mask controlling lane selection 2796 * @return a vector loaded from a byte array 2797 * @throws IndexOutOfBoundsException 2798 * if {@code offset+N*ESIZE < 0} 2799 * or {@code offset+(N+1)*ESIZE > a.length} 2800 * for any lane {@code N} in the vector 2801 * where the mask is set 2802 */ 2803 @ForceInline 2804 public static 2805 ShortVector fromByteArray(VectorSpecies<Short> species, 2806 byte[] a, int offset, 2807 ByteOrder bo, 2808 VectorMask<Short> m) { 2809 ShortSpecies vsp = (ShortSpecies) species; 2810 ShortVector zero = vsp.zero(); 2811 2812 if (offset >= 0 && offset <= (a.length - vsp.length() * 2)) { 2813 ShortVector v = zero.fromByteArray0(a, offset); 2814 return zero.blend(v.maybeSwap(bo), m); 2815 } 2816 ShortVector iota = zero.addIndex(1); 2817 ((AbstractMask<Short>)m) 2818 .checkIndexByLane(offset, a.length, iota, 2); 2819 ShortBuffer tb = wrapper(a, offset, bo); 2820 return vsp.ldOp(tb, 0, (AbstractMask<Short>)m, 2821 (tb_, __, i) -> tb_.get(i)); 2822 } 2823 2824 /** 2825 * Loads a vector from an array of type {@code short[]} 2826 * starting at an offset. 2827 * For each vector lane, where {@code N} is the vector lane index, the 2828 * array element at index {@code offset + N} is placed into the 2829 * resulting vector at lane index {@code N}. 2830 * 2831 * @param species species of desired vector 2832 * @param a the array 2833 * @param offset the offset into the array 2834 * @return the vector loaded from an array 2835 * @throws IndexOutOfBoundsException 2836 * if {@code offset+N < 0} or {@code offset+N >= a.length} 2837 * for any lane {@code N} in the vector 2838 */ 2839 @ForceInline 2840 public static 2841 ShortVector fromArray(VectorSpecies<Short> species, 2842 short[] a, int offset) { 2843 ShortSpecies vsp = (ShortSpecies) species; 2844 offset = checkFromIndexSize(offset, 2845 vsp.laneCount(), 2846 a.length); 2847 return vsp.dummyVector().fromArray0(a, offset); 2848 } 2849 2850 /** 2851 * Loads a vector from an array of type {@code short[]} 2852 * starting at an offset and using a mask. 2853 * Lanes where the mask is unset are filled with the default 2854 * value of {@code short} (zero). 2855 * For each vector lane, where {@code N} is the vector lane index, 2856 * if the mask lane at index {@code N} is set then the array element at 2857 * index {@code offset + N} is placed into the resulting vector at lane index 2858 * {@code N}, otherwise the default element value is placed into the 2859 * resulting vector at lane index {@code N}. 2860 * 2861 * @param species species of desired vector 2862 * @param a the array 2863 * @param offset the offset into the array 2864 * @param m the mask controlling lane selection 2865 * @return the vector loaded from an array 2866 * @throws IndexOutOfBoundsException 2867 * if {@code offset+N < 0} or {@code offset+N >= a.length} 2868 * for any lane {@code N} in the vector 2869 * where the mask is set 2870 */ 2871 @ForceInline 2872 public static 2873 ShortVector fromArray(VectorSpecies<Short> species, 2874 short[] a, int offset, 2875 VectorMask<Short> m) { 2876 ShortSpecies vsp = (ShortSpecies) species; 2877 if (offset >= 0 && offset <= (a.length - species.length())) { 2878 ShortVector zero = vsp.zero(); 2879 return zero.blend(zero.fromArray0(a, offset), m); 2880 } 2881 ShortVector iota = vsp.iota(); 2882 ((AbstractMask<Short>)m) 2883 .checkIndexByLane(offset, a.length, iota, 1); 2884 return vsp.vOp(m, i -> a[offset + i]); 2885 } 2886 2887 /** 2888 * Gathers a new vector composed of elements from an array of type 2889 * {@code short[]}, 2890 * using indexes obtained by adding a fixed {@code offset} to a 2891 * series of secondary offsets from an <em>index map</em>. 2892 * The index map is a contiguous sequence of {@code VLENGTH} 2893 * elements in a second array of {@code int}s, starting at a given 2894 * {@code mapOffset}. 2895 * <p> 2896 * For each vector lane, where {@code N} is the vector lane index, 2897 * the lane is loaded from the array 2898 * element {@code a[f(N)]}, where {@code f(N)} is the 2899 * index mapping expression 2900 * {@code offset + indexMap[mapOffset + N]]}. 2901 * 2902 * @param species species of desired vector 2903 * @param a the array 2904 * @param offset the offset into the array, may be negative if relative 2905 * indexes in the index map compensate to produce a value within the 2906 * array bounds 2907 * @param indexMap the index map 2908 * @param mapOffset the offset into the index map 2909 * @return the vector loaded from the indexed elements of the array 2910 * @throws IndexOutOfBoundsException 2911 * if {@code mapOffset+N < 0} 2912 * or if {@code mapOffset+N >= indexMap.length}, 2913 * or if {@code f(N)=offset+indexMap[mapOffset+N]} 2914 * is an invalid index into {@code a}, 2915 * for any lane {@code N} in the vector 2916 * @see ShortVector#toIntArray() 2917 */ 2918 @ForceInline 2919 public static 2920 ShortVector fromArray(VectorSpecies<Short> species, 2921 short[] a, int offset, 2922 int[] indexMap, int mapOffset) { 2923 ShortSpecies vsp = (ShortSpecies) species; 2924 return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]); 2925 } 2926 2927 /** 2928 * Gathers a new vector composed of elements from an array of type 2929 * {@code short[]}, 2930 * under the control of a mask, and 2931 * using indexes obtained by adding a fixed {@code offset} to a 2932 * series of secondary offsets from an <em>index map</em>. 2933 * The index map is a contiguous sequence of {@code VLENGTH} 2934 * elements in a second array of {@code int}s, starting at a given 2935 * {@code mapOffset}. 2936 * <p> 2937 * For each vector lane, where {@code N} is the vector lane index, 2938 * if the lane is set in the mask, 2939 * the lane is loaded from the array 2940 * element {@code a[f(N)]}, where {@code f(N)} is the 2941 * index mapping expression 2942 * {@code offset + indexMap[mapOffset + N]]}. 2943 * Unset lanes in the resulting vector are set to zero. 2944 * 2945 * @param species species of desired vector 2946 * @param a the array 2947 * @param offset the offset into the array, may be negative if relative 2948 * indexes in the index map compensate to produce a value within the 2949 * array bounds 2950 * @param indexMap the index map 2951 * @param mapOffset the offset into the index map 2952 * @param m the mask controlling lane selection 2953 * @return the vector loaded from the indexed elements of the array 2954 * @throws IndexOutOfBoundsException 2955 * if {@code mapOffset+N < 0} 2956 * or if {@code mapOffset+N >= indexMap.length}, 2957 * or if {@code f(N)=offset+indexMap[mapOffset+N]} 2958 * is an invalid index into {@code a}, 2959 * for any lane {@code N} in the vector 2960 * where the mask is set 2961 * @see ShortVector#toIntArray() 2962 */ 2963 @ForceInline 2964 public static 2965 ShortVector fromArray(VectorSpecies<Short> species, 2966 short[] a, int offset, 2967 int[] indexMap, int mapOffset, 2968 VectorMask<Short> m) { 2969 ShortSpecies vsp = (ShortSpecies) species; 2970 2971 // Do it the slow way. 2972 return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]); 2973 2974 } 2975 2976 /** 2977 * Loads a vector from a {@linkplain ByteBuffer byte buffer} 2978 * starting at an offset into the byte buffer. 2979 * <p> 2980 * Bytes are composed into primitive lane elements according to 2981 * {@link ByteOrder#LITTLE_ENDIAN little endian} byte order. 2982 * To avoid errors, the 2983 * {@linkplain ByteBuffer#order() intrinsic byte order} 2984 * of the buffer must be little-endian. 2985 * <p> 2986 * This method behaves as if it returns the result of calling 2987 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2988 * fromByteBuffer()} as follows: 2989 * <pre>{@code 2990 * var bb = ByteBuffer.wrap(a); 2991 * var bo = ByteOrder.LITTLE_ENDIAN; 2992 * var m = species.maskAll(true); 2993 * return fromByteBuffer(species, bb, offset, m, bo); 2994 * }</pre> 2995 * 2996 * @param species species of desired vector 2997 * @param bb the byte buffer 2998 * @param offset the offset into the byte buffer 2999 * @param bo the intended byte order 3000 * @return a vector loaded from a byte buffer 3001 * @throws IllegalArgumentException if byte order of bb 3002 * is not {@link ByteOrder#LITTLE_ENDIAN} 3003 * @throws IndexOutOfBoundsException 3004 * if {@code offset+N*2 < 0} 3005 * or {@code offset+N*2 >= bb.limit()} 3006 * for any lane {@code N} in the vector 3007 */ 3008 @ForceInline 3009 public static 3010 ShortVector fromByteBuffer(VectorSpecies<Short> species, 3011 ByteBuffer bb, int offset, 3012 ByteOrder bo) { 3013 ShortSpecies vsp = (ShortSpecies) species; 3014 offset = checkFromIndexSize(offset, 3015 vsp.laneCount(), 3016 bb.limit()); 3017 return vsp.dummyVector() 3018 .fromByteBuffer0(bb, offset).maybeSwap(bo); 3019 } 3020 3021 /** 3022 * Loads a vector from a {@linkplain ByteBuffer byte buffer} 3023 * starting at an offset into the byte buffer 3024 * and using a mask. 3025 * <p> 3026 * Bytes are composed into primitive lane elements according to 3027 * {@link ByteOrder#LITTLE_ENDIAN little endian} byte order. 3028 * To avoid errors, the 3029 * {@linkplain ByteBuffer#order() intrinsic byte order} 3030 * of the buffer must be little-endian. 3031 * <p> 3032 * This method behaves as if it returns the result of calling 3033 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 3034 * fromByteBuffer()} as follows: 3035 * <pre>{@code 3036 * var bb = ByteBuffer.wrap(a); 3037 * var bo = ByteOrder.LITTLE_ENDIAN; 3038 * var m = species.maskAll(true); 3039 * return fromByteBuffer(species, bb, offset, m, bo); 3040 * }</pre> 3041 * 3042 * @param species species of desired vector 3043 * @param bb the byte buffer 3044 * @param offset the offset into the byte buffer 3045 * @param bo the intended byte order 3046 * @param m the mask controlling lane selection 3047 * @return a vector loaded from a byte buffer 3048 * @throws IllegalArgumentException if byte order of bb 3049 * is not {@link ByteOrder#LITTLE_ENDIAN} 3050 * @throws IndexOutOfBoundsException 3051 * if {@code offset+N*2 < 0} 3052 * or {@code offset+N*2 >= bb.limit()} 3053 * for any lane {@code N} in the vector 3054 * where the mask is set 3055 */ 3056 @ForceInline 3057 public static 3058 ShortVector fromByteBuffer(VectorSpecies<Short> species, 3059 ByteBuffer bb, int offset, 3060 ByteOrder bo, 3061 VectorMask<Short> m) { 3062 if (m.allTrue()) { 3063 return fromByteBuffer(species, bb, offset, bo); 3064 } 3065 ShortSpecies vsp = (ShortSpecies) species; 3066 checkMaskFromIndexSize(offset, 3067 vsp, m, 1, 3068 bb.limit()); 3069 ShortVector zero = zero(vsp); 3070 ShortVector v = zero.fromByteBuffer0(bb, offset); 3071 return zero.blend(v.maybeSwap(bo), m); 3072 } 3073 3074 // Memory store operations 3075 3076 /** 3077 * Stores this vector into an array of type {@code short[]} 3078 * starting at an offset. 3079 * <p> 3080 * For each vector lane, where {@code N} is the vector lane index, 3081 * the lane element at index {@code N} is stored into the array 3082 * element {@code a[offset+N]}. 3083 * 3084 * @param a the array, of type {@code short[]} 3085 * @param offset the offset into the array 3086 * @throws IndexOutOfBoundsException 3087 * if {@code offset+N < 0} or {@code offset+N >= a.length} 3088 * for any lane {@code N} in the vector 3089 */ 3090 @ForceInline 3091 public final 3092 void intoArray(short[] a, int offset) { 3093 ShortSpecies vsp = vspecies(); 3094 offset = checkFromIndexSize(offset, 3095 vsp.laneCount(), 3096 a.length); 3097 VectorIntrinsics.store( 3098 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3099 a, arrayAddress(a, offset), 3100 this, 3101 a, offset, 3102 (arr, off, v) 3103 -> v.stOp(arr, off, 3104 (arr_, off_, i, e) -> arr_[off_ + i] = e)); 3105 } 3106 3107 /** 3108 * Stores this vector into an array of {@code short} 3109 * starting at offset and using a mask. 3110 * <p> 3111 * For each vector lane, where {@code N} is the vector lane index, 3112 * the lane element at index {@code N} is stored into the array 3113 * element {@code a[offset+N]}. 3114 * If the mask lane at {@code N} is unset then the corresponding 3115 * array element {@code a[offset+N]} is left unchanged. 3116 * <p> 3117 * Array range checking is done for lanes where the mask is set. 3118 * Lanes where the mask is unset are not stored and do not need 3119 * to correspond to legitimate elements of {@code a}. 3120 * That is, unset lanes may correspond to array indexes less than 3121 * zero or beyond the end of the array. 3122 * 3123 * @param a the array, of type {@code short[]} 3124 * @param offset the offset into the array 3125 * @param m the mask controlling lane storage 3126 * @throws IndexOutOfBoundsException 3127 * if {@code offset+N < 0} or {@code offset+N >= a.length} 3128 * for any lane {@code N} in the vector 3129 * where the mask is set 3130 */ 3131 @ForceInline 3132 public final 3133 void intoArray(short[] a, int offset, 3134 VectorMask<Short> m) { 3135 if (m.allTrue()) { 3136 intoArray(a, offset); 3137 } else { 3138 // FIXME: Cannot vectorize yet, if there's a mask. 3139 stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v); 3140 } 3141 } 3142 3143 /** 3144 * Scatters this vector into an array of type {@code short[]} 3145 * using indexes obtained by adding a fixed {@code offset} to a 3146 * series of secondary offsets from an <em>index map</em>. 3147 * The index map is a contiguous sequence of {@code VLENGTH} 3148 * elements in a second array of {@code int}s, starting at a given 3149 * {@code mapOffset}. 3150 * <p> 3151 * For each vector lane, where {@code N} is the vector lane index, 3152 * the lane element at index {@code N} is stored into the array 3153 * element {@code a[f(N)]}, where {@code f(N)} is the 3154 * index mapping expression 3155 * {@code offset + indexMap[mapOffset + N]]}. 3156 * 3157 * @param a the array 3158 * @param offset an offset to combine with the index map offsets 3159 * @param indexMap the index map 3160 * @param mapOffset the offset into the index map 3161 * @returns a vector of the values {@code a[f(N)]}, where 3162 * {@code f(N) = offset + indexMap[mapOffset + N]]}. 3163 * @throws IndexOutOfBoundsException 3164 * if {@code mapOffset+N < 0} 3165 * or if {@code mapOffset+N >= indexMap.length}, 3166 * or if {@code f(N)=offset+indexMap[mapOffset+N]} 3167 * is an invalid index into {@code a}, 3168 * for any lane {@code N} in the vector 3169 * @see ShortVector#toIntArray() 3170 */ 3171 @ForceInline 3172 public final 3173 void intoArray(short[] a, int offset, 3174 int[] indexMap, int mapOffset) { 3175 ShortSpecies vsp = vspecies(); 3176 if (length() == 1) { 3177 intoArray(a, offset + indexMap[mapOffset]); 3178 return; 3179 } 3180 IntVector.IntSpecies isp = (IntVector.IntSpecies) vsp.indexSpecies(); 3181 if (isp.laneCount() != vsp.laneCount()) { 3182 stOp(a, offset, 3183 (arr, off, i, e) -> { 3184 int j = indexMap[mapOffset + i]; 3185 arr[off + j] = e; 3186 }); 3187 return; 3188 } 3189 3190 // Index vector: vix[0:n] = i -> offset + indexMap[mo + i] 3191 IntVector vix = IntVector 3192 .fromArray(isp, indexMap, mapOffset) 3193 .add(offset); 3194 3195 vix = VectorIntrinsics.checkIndex(vix, a.length); 3196 3197 VectorIntrinsics.storeWithMap( 3198 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3199 isp.vectorType(), 3200 a, arrayAddress(a, 0), vix, 3201 this, 3202 a, offset, indexMap, mapOffset, 3203 (arr, off, v, map, mo) 3204 -> v.stOp(arr, off, 3205 (arr_, off_, i, e) -> { 3206 int j = map[mo + i]; 3207 arr[off + j] = e; 3208 })); 3209 } 3210 3211 /** 3212 * Scatters this vector into an array of type {@code short[]}, 3213 * under the control of a mask, and 3214 * using indexes obtained by adding a fixed {@code offset} to a 3215 * series of secondary offsets from an <em>index map</em>. 3216 * The index map is a contiguous sequence of {@code VLENGTH} 3217 * elements in a second array of {@code int}s, starting at a given 3218 * {@code mapOffset}. 3219 * <p> 3220 * For each vector lane, where {@code N} is the vector lane index, 3221 * if the mask lane at index {@code N} is set then 3222 * the lane element at index {@code N} is stored into the array 3223 * element {@code a[f(N)]}, where {@code f(N)} is the 3224 * index mapping expression 3225 * {@code offset + indexMap[mapOffset + N]]}. 3226 * 3227 * @param a the array 3228 * @param offset an offset to combine with the index map offsets 3229 * @param indexMap the index map 3230 * @param mapOffset the offset into the index map 3231 * @param m the mask 3232 * @returns a vector of the values {@code m ? a[f(N)] : 0}, 3233 * {@code f(N) = offset + indexMap[mapOffset + N]]}. 3234 * @throws IndexOutOfBoundsException 3235 * if {@code mapOffset+N < 0} 3236 * or if {@code mapOffset+N >= indexMap.length}, 3237 * or if {@code f(N)=offset+indexMap[mapOffset+N]} 3238 * is an invalid index into {@code a}, 3239 * for any lane {@code N} in the vector 3240 * where the mask is set 3241 * @see ShortVector#toIntArray() 3242 */ 3243 @ForceInline 3244 public final 3245 void intoArray(short[] a, int offset, 3246 int[] indexMap, int mapOffset, 3247 VectorMask<Short> m) { 3248 ShortSpecies vsp = vspecies(); 3249 if (m.allTrue()) { 3250 intoArray(a, offset, indexMap, mapOffset); 3251 return; 3252 } 3253 throw new AssertionError("fixme"); 3254 } 3255 3256 /** 3257 * {@inheritDoc} <!--workaround--> 3258 */ 3259 @Override 3260 @ForceInline 3261 public final 3262 void intoByteArray(byte[] a, int offset) { 3263 offset = checkFromIndexSize(offset, 3264 bitSize() / Byte.SIZE, 3265 a.length); 3266 this.maybeSwap(ByteOrder.LITTLE_ENDIAN) 3267 .intoByteArray0(a, offset); 3268 } 3269 3270 /** 3271 * {@inheritDoc} <!--workaround--> 3272 */ 3273 @Override 3274 @ForceInline 3275 public final 3276 void intoByteArray(byte[] a, int offset, 3277 VectorMask<Short> m) { 3278 if (m.allTrue()) { 3279 intoByteArray(a, offset); 3280 return; 3281 } 3282 ShortSpecies vsp = vspecies(); 3283 if (offset >= 0 && offset <= (a.length - vsp.length() * 2)) { 3284 var oldVal = fromByteArray0(a, offset); 3285 var newVal = oldVal.blend(this, m); 3286 newVal.intoByteArray0(a, offset); 3287 } else { 3288 checkMaskFromIndexSize(offset, vsp, m, 2, a.length); 3289 ShortBuffer tb = wrapper(a, offset, NATIVE_ENDIAN); 3290 this.stOp(tb, 0, m, (tb_, __, i, e) -> tb_.put(i, e)); 3291 } 3292 } 3293 3294 /** 3295 * {@inheritDoc} <!--workaround--> 3296 */ 3297 @Override 3298 @ForceInline 3299 public final 3300 void intoByteArray(byte[] a, int offset, 3301 ByteOrder bo, 3302 VectorMask<Short> m) { 3303 maybeSwap(bo).intoByteArray(a, offset, m); 3304 } 3305 3306 /** 3307 * {@inheritDoc} <!--workaround--> 3308 */ 3309 @Override 3310 @ForceInline 3311 public final 3312 void intoByteBuffer(ByteBuffer bb, int offset, 3313 ByteOrder bo) { 3314 maybeSwap(bo).intoByteBuffer0(bb, offset); 3315 } 3316 3317 /** 3318 * {@inheritDoc} <!--workaround--> 3319 */ 3320 @Override 3321 @ForceInline 3322 public final 3323 void intoByteBuffer(ByteBuffer bb, int offset, 3324 ByteOrder bo, 3325 VectorMask<Short> m) { 3326 if (m.allTrue()) { 3327 intoByteBuffer(bb, offset, bo); 3328 return; 3329 } 3330 ShortSpecies vsp = vspecies(); 3331 checkMaskFromIndexSize(offset, vsp, m, 2, bb.limit()); 3332 conditionalStoreNYI(offset, vsp, m, 2, bb.limit()); 3333 var oldVal = fromByteBuffer0(bb, offset); 3334 var newVal = oldVal.blend(this.maybeSwap(bo), m); 3335 newVal.intoByteBuffer0(bb, offset); 3336 } 3337 3338 // ================================================ 3339 3340 // Low-level memory operations. 3341 // 3342 // Note that all of these operations *must* inline into a context 3343 // where the exact species of the involved vector is a 3344 // compile-time constant. Otherwise, the intrinsic generation 3345 // will fail and performance will suffer. 3346 // 3347 // In many cases this is achieved by re-deriving a version of the 3348 // method in each concrete subclass (per species). The re-derived 3349 // method simply calls one of these generic methods, with exact 3350 // parameters for the controlling metadata, which is either a 3351 // typed vector or constant species instance. 3352 3353 // Unchecked loading operations in native byte order. 3354 // Caller is reponsible for applying index checks, masking, and 3355 // byte swapping. 3356 3357 /*package-private*/ 3358 abstract 3359 ShortVector fromArray0(short[] a, int offset); 3360 @ForceInline 3361 final 3362 ShortVector fromArray0Template(short[] a, int offset) { 3363 ShortSpecies vsp = vspecies(); 3364 return VectorIntrinsics.load( 3365 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3366 a, arrayAddress(a, offset), 3367 a, offset, vsp, 3368 (arr, off, s) -> s.ldOp(arr, off, 3369 (arr_, off_, i) -> arr_[off_ + i])); 3370 } 3371 3372 @Override 3373 abstract 3374 ShortVector fromByteArray0(byte[] a, int offset); 3375 @ForceInline 3376 final 3377 ShortVector fromByteArray0Template(byte[] a, int offset) { 3378 ShortSpecies vsp = vspecies(); 3379 return VectorIntrinsics.load( 3380 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3381 a, byteArrayAddress(a, offset), 3382 a, offset, vsp, 3383 (arr, off, s) -> { 3384 ShortBuffer tb = wrapper(arr, off, NATIVE_ENDIAN); 3385 return s.ldOp(tb, 0, (tb_, __, i) -> tb_.get(i)); 3386 }); 3387 } 3388 3389 abstract 3390 ShortVector fromByteBuffer0(ByteBuffer bb, int offset); 3391 @ForceInline 3392 final 3393 ShortVector fromByteBuffer0Template(ByteBuffer bb, int offset) { 3394 ShortSpecies vsp = vspecies(); 3395 return VectorIntrinsics.load( 3396 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3397 bufferBase(bb), bufferAddress(bb, offset), 3398 bb, offset, vsp, 3399 (buf, off, s) -> { 3400 ShortBuffer tb = wrapper(buf, off, NATIVE_ENDIAN); 3401 return s.ldOp(tb, 0, (tb_, __, i) -> tb_.get(i)); 3402 }); 3403 } 3404 3405 // Unchecked storing operations in native byte order. 3406 // Caller is reponsible for applying index checks, masking, and 3407 // byte swapping. 3408 3409 abstract 3410 void intoArray0(short[] a, int offset); 3411 @ForceInline 3412 final 3413 void intoArray0Template(short[] a, int offset) { 3414 ShortSpecies vsp = vspecies(); 3415 VectorIntrinsics.store( 3416 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3417 a, arrayAddress(a, offset), 3418 this, a, offset, 3419 (arr, off, v) 3420 -> v.stOp(arr, off, 3421 (arr_, off_, i, e) -> arr_[off_+i] = e)); 3422 } 3423 3424 abstract 3425 void intoByteArray0(byte[] a, int offset); 3426 @ForceInline 3427 final 3428 void intoByteArray0Template(byte[] a, int offset) { 3429 ShortSpecies vsp = vspecies(); 3430 VectorIntrinsics.store( 3431 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3432 a, byteArrayAddress(a, offset), 3433 this, a, offset, 3434 (arr, off, v) -> { 3435 ShortBuffer tb = wrapper(arr, off, NATIVE_ENDIAN); 3436 v.stOp(tb, 0, (tb_, __, i, e) -> tb_.put(i, e)); 3437 }); 3438 } 3439 3440 @ForceInline 3441 final 3442 void intoByteBuffer0(ByteBuffer bb, int offset) { 3443 ShortSpecies vsp = vspecies(); 3444 VectorIntrinsics.store( 3445 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3446 bufferBase(bb), bufferAddress(bb, offset), 3447 this, bb, offset, 3448 (buf, off, v) -> { 3449 ShortBuffer tb = wrapper(buf, off, NATIVE_ENDIAN); 3450 v.stOp(tb, 0, (tb_, __, i, e) -> tb_.put(i, e)); 3451 }); 3452 } 3453 3454 // End of low-level memory operations. 3455 3456 private static 3457 void checkMaskFromIndexSize(int offset, 3458 ShortSpecies vsp, 3459 VectorMask<Short> m, 3460 int scale, 3461 int limit) { 3462 ((AbstractMask<Short>)m) 3463 .checkIndexByLane(offset, limit, vsp.iota(), scale); 3464 } 3465 3466 @ForceInline 3467 private void conditionalStoreNYI(int offset, 3468 ShortSpecies vsp, 3469 VectorMask<Short> m, 3470 int scale, 3471 int limit) { 3472 if (offset < 0 || offset + vsp.laneCount() * scale > limit) { 3473 String msg = 3474 String.format("unimplemented: store @%d in [0..%d), %s in %s", 3475 offset, limit, m, vsp); 3476 throw new AssertionError(msg); 3477 } 3478 } 3479 3480 /*package-private*/ 3481 @Override 3482 @ForceInline 3483 final 3484 ShortVector maybeSwap(ByteOrder bo) { 3485 if (bo != NATIVE_ENDIAN) { 3486 return this.reinterpretAsBytes() 3487 .rearrange(swapBytesShuffle()) 3488 .reinterpretAsShorts(); 3489 } 3490 return this; 3491 } 3492 3493 static final int ARRAY_SHIFT = 3494 31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_SHORT_INDEX_SCALE); 3495 static final long ARRAY_BASE = 3496 Unsafe.ARRAY_SHORT_BASE_OFFSET; 3497 3498 @ForceInline 3499 static long arrayAddress(short[] a, int index) { 3500 return ARRAY_BASE + (((long)index) << ARRAY_SHIFT); 3501 } 3502 3503 @ForceInline 3504 static long byteArrayAddress(byte[] a, int index) { 3505 return Unsafe.ARRAY_BYTE_BASE_OFFSET + index; 3506 } 3507 3508 // Byte buffer wrappers. 3509 private static ShortBuffer wrapper(ByteBuffer bb, int offset, 3510 ByteOrder bo) { 3511 return bb.duplicate().position(offset).slice() 3512 .order(bo).asShortBuffer(); 3513 } 3514 private static ShortBuffer wrapper(byte[] a, int offset, 3515 ByteOrder bo) { 3516 return ByteBuffer.wrap(a, offset, a.length - offset) 3517 .order(bo).asShortBuffer(); 3518 } 3519 3520 // ================================================ 3521 3522 /// Reinterpreting view methods: 3523 // lanewise reinterpret: viewAsXVector() 3524 // keep shape, redraw lanes: reinterpretAsEs() 3525 3526 /** 3527 * {@inheritDoc} <!--workaround--> 3528 */ 3529 @ForceInline 3530 @Override 3531 public final ByteVector reinterpretAsBytes() { 3532 // Going to ByteVector, pay close attention to byte order. 3533 assert(REGISTER_ENDIAN == ByteOrder.LITTLE_ENDIAN); 3534 return asByteVectorRaw(); 3535 //return asByteVectorRaw().rearrange(swapBytesShuffle()); 3536 } 3537 3538 /** 3539 * {@inheritDoc} <!--workaround--> 3540 */ 3541 @ForceInline 3542 @Override 3543 public final ShortVector viewAsIntegralLanes() { 3544 return this; 3545 } 3546 3547 /** 3548 * {@inheritDoc} <!--workaround--> 3549 * 3550 * @implNote This method always throws 3551 * {@code IllegalArgumentException}, because there is no floating 3552 * point type of the same size as {@code short}. The return type 3553 * of this method is arbitrarily designated as 3554 * {@code Vector<?>}. Future versions of this API may change the return 3555 * type if additional floating point types become available. 3556 */ 3557 @ForceInline 3558 @Override 3559 public final 3560 Vector<?> 3561 viewAsFloatingLanes() { 3562 LaneType flt = LaneType.SHORT.asFloating(); 3563 throw new AssertionError(); // should already throw IAE 3564 } 3565 3566 // ================================================ 3567 3568 /// Object methods: toString, equals, hashCode 3569 // 3570 // Object methods are defined as if via Arrays.toString, etc., 3571 // is applied to the array of elements. Two equal vectors 3572 // are required to have equal species and equal lane values. 3573 3574 /** 3575 * Returns a string representation of this vector, of the form 3576 * {@code "[0,1,2...]"}, reporting the lane values of this vector, 3577 * in lane order. 3578 * 3579 * The string is produced as if by a call to {@link 3580 * java.util.Arrays#toString(short[]) Arrays.toString()}, 3581 * as appropriate to the {@code short} array returned by 3582 * {@link #toArray this.toArray()}. 3583 * 3584 * @return a string of the form {@code "[0,1,2...]"} 3585 * reporting the lane values of this vector 3586 */ 3587 @Override 3588 @ForceInline 3589 public final 3590 String toString() { 3591 // now that toArray is strongly typed, we can define this 3592 return Arrays.toString(toArray()); 3593 } 3594 3595 /** 3596 * {@inheritDoc} <!--workaround--> 3597 */ 3598 @Override 3599 @ForceInline 3600 public final 3601 boolean equals(Object obj) { 3602 if (obj instanceof Vector) { 3603 Vector<?> that = (Vector<?>) obj; 3604 if (this.species().equals(that.species())) { 3605 return this.eq(that.check(this.species())).allTrue(); 3606 } 3607 } 3608 return false; 3609 } 3610 3611 /** 3612 * {@inheritDoc} <!--workaround--> 3613 */ 3614 @Override 3615 @ForceInline 3616 public final 3617 int hashCode() { 3618 // now that toArray is strongly typed, we can define this 3619 return Objects.hash(species(), Arrays.hashCode(toArray())); 3620 } 3621 3622 // ================================================ 3623 3624 // Species 3625 3626 /** 3627 * Class representing {@link ShortVector}'s of the same {@link VectorShape VectorShape}. 3628 */ 3629 /*package-private*/ 3630 static final class ShortSpecies extends AbstractSpecies<Short> { 3631 private ShortSpecies(VectorShape shape, 3632 Class<? extends ShortVector> vectorType, 3633 Class<? extends AbstractMask<Short>> maskType, 3634 Function<Object, ShortVector> vectorFactory) { 3635 super(shape, LaneType.of(short.class), 3636 vectorType, maskType, 3637 vectorFactory); 3638 assert(this.elementSize() == Short.SIZE); 3639 } 3640 3641 // Specializing overrides: 3642 3643 @Override 3644 @ForceInline 3645 public final Class<Short> elementType() { 3646 return short.class; 3647 } 3648 3649 @Override 3650 @ForceInline 3651 public final Class<Short> genericElementType() { 3652 return Short.class; 3653 } 3654 3655 @Override 3656 @ForceInline 3657 public final Class<short[]> arrayType() { 3658 return short[].class; 3659 } 3660 3661 @SuppressWarnings("unchecked") 3662 @Override 3663 @ForceInline 3664 public final Class<? extends ShortVector> vectorType() { 3665 return (Class<? extends ShortVector>) vectorType; 3666 } 3667 3668 @Override 3669 @ForceInline 3670 public final long checkValue(long e) { 3671 longToElementBits(e); // only for exception 3672 return e; 3673 } 3674 3675 /*package-private*/ 3676 @Override 3677 @ForceInline 3678 final ShortVector broadcastBits(long bits) { 3679 return (ShortVector) 3680 VectorIntrinsics.broadcastCoerced( 3681 vectorType, short.class, laneCount, 3682 bits, this, 3683 (bits_, s_) -> s_.rvOp(i -> bits_)); 3684 } 3685 3686 /*package-private*/ 3687 @ForceInline 3688 3689 final ShortVector broadcast(short e) { 3690 return broadcastBits(toBits(e)); 3691 } 3692 3693 @Override 3694 @ForceInline 3695 public final ShortVector broadcast(long e) { 3696 return broadcastBits(longToElementBits(e)); 3697 } 3698 3699 /*package-private*/ 3700 final @Override 3701 @ForceInline 3702 long longToElementBits(long value) { 3703 // Do the conversion, and then test it for failure. 3704 short e = (short) value; 3705 if ((long) e != value) { 3706 throw badElementBits(value, e); 3707 } 3708 return toBits(e); 3709 } 3710 3711 /*package-private*/ 3712 @ForceInline 3713 static long toIntegralChecked(short e, boolean convertToInt) { 3714 long value = convertToInt ? (int) e : (long) e; 3715 if ((short) value != e) { 3716 throw badArrayBits(e, convertToInt, value); 3717 } 3718 return value; 3719 } 3720 3721 @Override 3722 @ForceInline 3723 public final ShortVector fromValues(long... values) { 3724 VectorIntrinsics.requireLength(values.length, laneCount); 3725 short[] va = new short[laneCount()]; 3726 for (int i = 0; i < va.length; i++) { 3727 long lv = values[i]; 3728 short v = (short) lv; 3729 va[i] = v; 3730 if ((long)v != lv) { 3731 throw badElementBits(lv, v); 3732 } 3733 } 3734 return dummyVector().fromArray0(va, 0); 3735 } 3736 3737 /* this non-public one is for internal conversions */ 3738 @Override 3739 @ForceInline 3740 final ShortVector fromIntValues(int[] values) { 3741 VectorIntrinsics.requireLength(values.length, laneCount); 3742 short[] va = new short[laneCount()]; 3743 for (int i = 0; i < va.length; i++) { 3744 int lv = values[i]; 3745 short v = (short) lv; 3746 va[i] = v; 3747 if ((int)v != lv) { 3748 throw badElementBits(lv, v); 3749 } 3750 } 3751 return dummyVector().fromArray0(va, 0); 3752 } 3753 3754 // Virtual constructors 3755 3756 @ForceInline 3757 @Override final 3758 public ShortVector fromArray(Object a, int offset) { 3759 // User entry point: Be careful with inputs. 3760 return ShortVector 3761 .fromArray(this, (short[]) a, offset); 3762 } 3763 3764 @Override final 3765 ShortVector dummyVector() { 3766 return (ShortVector) super.dummyVector(); 3767 } 3768 3769 final 3770 ShortVector vectorFactory(short[] vec) { 3771 // Species delegates all factory requests to its dummy 3772 // vector. The dummy knows all about it. 3773 return dummyVector().vectorFactory(vec); 3774 } 3775 3776 /*package-private*/ 3777 final @Override 3778 @ForceInline 3779 ShortVector rvOp(RVOp f) { 3780 short[] res = new short[laneCount()]; 3781 for (int i = 0; i < res.length; i++) { 3782 short bits = (short) f.apply(i); 3783 res[i] = fromBits(bits); 3784 } 3785 return dummyVector().vectorFactory(res); 3786 } 3787 3788 ShortVector vOp(FVOp f) { 3789 short[] res = new short[laneCount()]; 3790 for (int i = 0; i < res.length; i++) { 3791 res[i] = f.apply(i); 3792 } 3793 return dummyVector().vectorFactory(res); 3794 } 3795 3796 ShortVector vOp(VectorMask<Short> m, FVOp f) { 3797 short[] res = new short[laneCount()]; 3798 boolean[] mbits = ((AbstractMask<Short>)m).getBits(); 3799 for (int i = 0; i < res.length; i++) { 3800 if (mbits[i]) { 3801 res[i] = f.apply(i); 3802 } 3803 } 3804 return dummyVector().vectorFactory(res); 3805 } 3806 3807 /*package-private*/ 3808 @ForceInline 3809 <M> ShortVector ldOp(M memory, int offset, 3810 FLdOp<M> f) { 3811 return dummyVector().ldOp(memory, offset, f); 3812 } 3813 3814 /*package-private*/ 3815 @ForceInline 3816 <M> ShortVector ldOp(M memory, int offset, 3817 AbstractMask<Short> m, 3818 FLdOp<M> f) { 3819 return dummyVector().ldOp(memory, offset, m, f); 3820 } 3821 3822 /*package-private*/ 3823 @ForceInline 3824 <M> void stOp(M memory, int offset, FStOp<M> f) { 3825 dummyVector().stOp(memory, offset, f); 3826 } 3827 3828 /*package-private*/ 3829 @ForceInline 3830 <M> void stOp(M memory, int offset, 3831 AbstractMask<Short> m, 3832 FStOp<M> f) { 3833 dummyVector().stOp(memory, offset, m, f); 3834 } 3835 3836 // N.B. Make sure these constant vectors and 3837 // masks load up correctly into registers. 3838 // 3839 // Also, see if we can avoid all that switching. 3840 // Could we cache both vectors and both masks in 3841 // this species object? 3842 3843 // Zero and iota vector access 3844 @Override 3845 @ForceInline 3846 public final ShortVector zero() { 3847 if ((Class<?>) vectorType() == ShortMaxVector.class) 3848 return ShortMaxVector.ZERO; 3849 switch (vectorBitSize()) { 3850 case 64: return Short64Vector.ZERO; 3851 case 128: return Short128Vector.ZERO; 3852 case 256: return Short256Vector.ZERO; 3853 case 512: return Short512Vector.ZERO; 3854 } 3855 throw new AssertionError(); 3856 } 3857 3858 @Override 3859 @ForceInline 3860 public final ShortVector iota() { 3861 if ((Class<?>) vectorType() == ShortMaxVector.class) 3862 return ShortMaxVector.IOTA; 3863 switch (vectorBitSize()) { 3864 case 64: return Short64Vector.IOTA; 3865 case 128: return Short128Vector.IOTA; 3866 case 256: return Short256Vector.IOTA; 3867 case 512: return Short512Vector.IOTA; 3868 } 3869 throw new AssertionError(); 3870 } 3871 3872 // Mask access 3873 @Override 3874 @ForceInline 3875 public final VectorMask<Short> maskAll(boolean bit) { 3876 if ((Class<?>) vectorType() == ShortMaxVector.class) 3877 return ShortMaxVector.ShortMaxMask.maskAll(bit); 3878 switch (vectorBitSize()) { 3879 case 64: return Short64Vector.Short64Mask.maskAll(bit); 3880 case 128: return Short128Vector.Short128Mask.maskAll(bit); 3881 case 256: return Short256Vector.Short256Mask.maskAll(bit); 3882 case 512: return Short512Vector.Short512Mask.maskAll(bit); 3883 } 3884 throw new AssertionError(); 3885 } 3886 } 3887 3888 /** 3889 * Finds a species for an element type of {@code short} and shape. 3890 * 3891 * @param s the shape 3892 * @return a species for an element type of {@code short} and shape 3893 * @throws IllegalArgumentException if no such species exists for the shape 3894 */ 3895 static ShortSpecies species(VectorShape s) { 3896 Objects.requireNonNull(s); 3897 switch (s) { 3898 case S_64_BIT: return (ShortSpecies) SPECIES_64; 3899 case S_128_BIT: return (ShortSpecies) SPECIES_128; 3900 case S_256_BIT: return (ShortSpecies) SPECIES_256; 3901 case S_512_BIT: return (ShortSpecies) SPECIES_512; 3902 case S_Max_BIT: return (ShortSpecies) SPECIES_MAX; 3903 default: throw new IllegalArgumentException("Bad shape: " + s); 3904 } 3905 } 3906 3907 /** Species representing {@link ShortVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */ 3908 public static final VectorSpecies<Short> SPECIES_64 3909 = new ShortSpecies(VectorShape.S_64_BIT, 3910 Short64Vector.class, 3911 Short64Vector.Short64Mask.class, 3912 Short64Vector::new); 3913 3914 /** Species representing {@link ShortVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */ 3915 public static final VectorSpecies<Short> SPECIES_128 3916 = new ShortSpecies(VectorShape.S_128_BIT, 3917 Short128Vector.class, 3918 Short128Vector.Short128Mask.class, 3919 Short128Vector::new); 3920 3921 /** Species representing {@link ShortVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */ 3922 public static final VectorSpecies<Short> SPECIES_256 3923 = new ShortSpecies(VectorShape.S_256_BIT, 3924 Short256Vector.class, 3925 Short256Vector.Short256Mask.class, 3926 Short256Vector::new); 3927 3928 /** Species representing {@link ShortVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */ 3929 public static final VectorSpecies<Short> SPECIES_512 3930 = new ShortSpecies(VectorShape.S_512_BIT, 3931 Short512Vector.class, 3932 Short512Vector.Short512Mask.class, 3933 Short512Vector::new); 3934 3935 /** Species representing {@link ShortVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */ 3936 public static final VectorSpecies<Short> SPECIES_MAX 3937 = new ShortSpecies(VectorShape.S_Max_BIT, 3938 ShortMaxVector.class, 3939 ShortMaxVector.ShortMaxMask.class, 3940 ShortMaxVector::new); 3941 3942 /** 3943 * Preferred species for {@link ShortVector}s. 3944 * A preferred species is a species of maximal bit-size for the platform. 3945 */ 3946 public static final VectorSpecies<Short> SPECIES_PREFERRED 3947 = (ShortSpecies) VectorSpecies.ofPreferred(short.class); 3948 }