1 /* 2 * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have 23 * questions. 24 */ 25 package jdk.incubator.vector; 26 27 import java.nio.ByteBuffer; 28 import java.nio.ByteOrder; 29 import java.util.Arrays; 30 import java.util.Objects; 31 import java.util.function.BinaryOperator; 32 import java.util.function.IntUnaryOperator; 33 import java.util.function.Function; 34 import java.util.function.UnaryOperator; 35 import java.util.concurrent.ThreadLocalRandom; 36 37 import jdk.internal.misc.Unsafe; 38 import jdk.internal.vm.annotation.ForceInline; 39 40 import static jdk.incubator.vector.VectorIntrinsics.*; 41 import static jdk.incubator.vector.VectorOperators.*; 42 43 // -- This file was mechanically generated: Do not edit! -- // 44 45 /** 46 * A specialized {@link Vector} representing an ordered immutable sequence of 47 * {@code byte} values. 48 */ 49 @SuppressWarnings("cast") // warning: redundant cast 50 public abstract class ByteVector extends AbstractVector<Byte> { 51 52 ByteVector() {} 53 54 static final int FORBID_OPCODE_KIND = VO_ONLYFP; 55 56 @ForceInline 57 static int opCode(Operator op) { 58 return VectorOperators.opCode(op, VO_OPCODE_VALID, FORBID_OPCODE_KIND); 59 } 60 @ForceInline 61 static int opCode(Operator op, int requireKind) { 62 requireKind |= VO_OPCODE_VALID; 63 return VectorOperators.opCode(op, requireKind, FORBID_OPCODE_KIND); 64 } 65 @ForceInline 66 static boolean opKind(Operator op, int bit) { 67 return VectorOperators.opKind(op, bit); 68 } 69 70 // Virtualized factories and operators, 71 // coded with portable definitions. 72 // These are all @ForceInline in case 73 // they need to be used performantly. 74 // The various shape-specific subclasses 75 // also specialize them by wrapping 76 // them in a call like this: 77 // return (Byte128Vector) 78 // super.bOp((Byte128Vector) o); 79 // The purpose of that is to forcibly inline 80 // the generic definition from this file 81 // into a sharply type- and size-specific 82 // wrapper in the subclass file, so that 83 // the JIT can specialize the code. 84 // The code is only inlined and expanded 85 // if it gets hot. Think of it as a cheap 86 // and lazy version of C++ templates. 87 88 // Virtualized getter 89 90 /*package-private*/ 91 abstract byte[] getElements(); 92 93 // Virtualized constructors 94 95 /** 96 * Build a vector directly using my own constructor. 97 * It is an error if the array is aliased elsewhere. 98 */ 99 /*package-private*/ 100 abstract ByteVector vectorFactory(byte[] vec); 101 102 /** 103 * Build a mask directly using my species. 104 * It is an error if the array is aliased elsewhere. 105 */ 106 /*package-private*/ 107 @ForceInline 108 final 109 AbstractMask<Byte> maskFactory(boolean[] bits) { 110 return vspecies().maskFactory(bits); 111 } 112 113 // Constant loader (takes dummy as vector arg) 114 interface FVOp { 115 byte apply(int i); 116 } 117 118 /*package-private*/ 119 @ForceInline 120 final 121 ByteVector vOp(FVOp f) { 122 byte[] res = new byte[length()]; 123 for (int i = 0; i < res.length; i++) { 124 res[i] = f.apply(i); 125 } 126 return vectorFactory(res); 127 } 128 129 @ForceInline 130 final 131 ByteVector vOp(VectorMask<Byte> m, FVOp f) { 132 byte[] res = new byte[length()]; 133 boolean[] mbits = ((AbstractMask<Byte>)m).getBits(); 134 for (int i = 0; i < res.length; i++) { 135 if (mbits[i]) { 136 res[i] = f.apply(i); 137 } 138 } 139 return vectorFactory(res); 140 } 141 142 // Unary operator 143 144 /*package-private*/ 145 interface FUnOp { 146 byte apply(int i, byte a); 147 } 148 149 /*package-private*/ 150 abstract 151 ByteVector uOp(FUnOp f); 152 @ForceInline 153 final 154 ByteVector uOpTemplate(FUnOp f) { 155 byte[] vec = getElements(); 156 byte[] res = new byte[length()]; 157 for (int i = 0; i < res.length; i++) { 158 res[i] = f.apply(i, vec[i]); 159 } 160 return vectorFactory(res); 161 } 162 163 /*package-private*/ 164 abstract 165 ByteVector uOp(VectorMask<Byte> m, 166 FUnOp f); 167 @ForceInline 168 final 169 ByteVector uOpTemplate(VectorMask<Byte> m, 170 FUnOp f) { 171 byte[] vec = getElements(); 172 byte[] res = new byte[length()]; 173 boolean[] mbits = ((AbstractMask<Byte>)m).getBits(); 174 for (int i = 0; i < res.length; i++) { 175 res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i]; 176 } 177 return vectorFactory(res); 178 } 179 180 // Binary operator 181 182 /*package-private*/ 183 interface FBinOp { 184 byte apply(int i, byte a, byte b); 185 } 186 187 /*package-private*/ 188 abstract 189 ByteVector bOp(Vector<Byte> o, 190 FBinOp f); 191 @ForceInline 192 final 193 ByteVector bOpTemplate(Vector<Byte> o, 194 FBinOp f) { 195 byte[] res = new byte[length()]; 196 byte[] vec1 = this.getElements(); 197 byte[] vec2 = ((ByteVector)o).getElements(); 198 for (int i = 0; i < res.length; i++) { 199 res[i] = f.apply(i, vec1[i], vec2[i]); 200 } 201 return vectorFactory(res); 202 } 203 204 /*package-private*/ 205 abstract 206 ByteVector bOp(Vector<Byte> o, 207 VectorMask<Byte> m, 208 FBinOp f); 209 @ForceInline 210 final 211 ByteVector bOpTemplate(Vector<Byte> o, 212 VectorMask<Byte> m, 213 FBinOp f) { 214 byte[] res = new byte[length()]; 215 byte[] vec1 = this.getElements(); 216 byte[] vec2 = ((ByteVector)o).getElements(); 217 boolean[] mbits = ((AbstractMask<Byte>)m).getBits(); 218 for (int i = 0; i < res.length; i++) { 219 res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i]; 220 } 221 return vectorFactory(res); 222 } 223 224 // Ternary operator 225 226 /*package-private*/ 227 interface FTriOp { 228 byte apply(int i, byte a, byte b, byte c); 229 } 230 231 /*package-private*/ 232 abstract 233 ByteVector tOp(Vector<Byte> o1, 234 Vector<Byte> o2, 235 FTriOp f); 236 @ForceInline 237 final 238 ByteVector tOpTemplate(Vector<Byte> o1, 239 Vector<Byte> o2, 240 FTriOp f) { 241 byte[] res = new byte[length()]; 242 byte[] vec1 = this.getElements(); 243 byte[] vec2 = ((ByteVector)o1).getElements(); 244 byte[] vec3 = ((ByteVector)o2).getElements(); 245 for (int i = 0; i < res.length; i++) { 246 res[i] = f.apply(i, vec1[i], vec2[i], vec3[i]); 247 } 248 return vectorFactory(res); 249 } 250 251 /*package-private*/ 252 abstract 253 ByteVector tOp(Vector<Byte> o1, 254 Vector<Byte> o2, 255 VectorMask<Byte> m, 256 FTriOp f); 257 @ForceInline 258 final 259 ByteVector tOpTemplate(Vector<Byte> o1, 260 Vector<Byte> o2, 261 VectorMask<Byte> m, 262 FTriOp f) { 263 byte[] res = new byte[length()]; 264 byte[] vec1 = this.getElements(); 265 byte[] vec2 = ((ByteVector)o1).getElements(); 266 byte[] vec3 = ((ByteVector)o2).getElements(); 267 boolean[] mbits = ((AbstractMask<Byte>)m).getBits(); 268 for (int i = 0; i < res.length; i++) { 269 res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i], vec3[i]) : vec1[i]; 270 } 271 return vectorFactory(res); 272 } 273 274 // Reduction operator 275 276 /*package-private*/ 277 abstract 278 byte rOp(byte v, FBinOp f); 279 @ForceInline 280 final 281 byte rOpTemplate(byte v, FBinOp f) { 282 byte[] vec = getElements(); 283 for (int i = 0; i < vec.length; i++) { 284 v = f.apply(i, v, vec[i]); 285 } 286 return v; 287 } 288 289 // Memory reference 290 291 /*package-private*/ 292 interface FLdOp<M> { 293 byte apply(M memory, int offset, int i); 294 } 295 296 /*package-private*/ 297 @ForceInline 298 final 299 <M> ByteVector ldOp(M memory, int offset, 300 FLdOp<M> f) { 301 //dummy; no vec = getElements(); 302 byte[] res = new byte[length()]; 303 for (int i = 0; i < res.length; i++) { 304 res[i] = f.apply(memory, offset, i); 305 } 306 return vectorFactory(res); 307 } 308 309 /*package-private*/ 310 @ForceInline 311 final 312 <M> ByteVector ldOp(M memory, int offset, 313 VectorMask<Byte> m, 314 FLdOp<M> f) { 315 //byte[] vec = getElements(); 316 byte[] res = new byte[length()]; 317 boolean[] mbits = ((AbstractMask<Byte>)m).getBits(); 318 for (int i = 0; i < res.length; i++) { 319 if (mbits[i]) { 320 res[i] = f.apply(memory, offset, i); 321 } 322 } 323 return vectorFactory(res); 324 } 325 326 interface FStOp<M> { 327 void apply(M memory, int offset, int i, byte a); 328 } 329 330 /*package-private*/ 331 @ForceInline 332 final 333 <M> void stOp(M memory, int offset, 334 FStOp<M> f) { 335 byte[] vec = getElements(); 336 for (int i = 0; i < vec.length; i++) { 337 f.apply(memory, offset, i, vec[i]); 338 } 339 } 340 341 /*package-private*/ 342 @ForceInline 343 final 344 <M> void stOp(M memory, int offset, 345 VectorMask<Byte> m, 346 FStOp<M> f) { 347 byte[] vec = getElements(); 348 boolean[] mbits = ((AbstractMask<Byte>)m).getBits(); 349 for (int i = 0; i < vec.length; i++) { 350 if (mbits[i]) { 351 f.apply(memory, offset, i, vec[i]); 352 } 353 } 354 } 355 356 // Binary test 357 358 /*package-private*/ 359 interface FBinTest { 360 boolean apply(int cond, int i, byte a, byte b); 361 } 362 363 /*package-private*/ 364 @ForceInline 365 final 366 AbstractMask<Byte> bTest(int cond, 367 Vector<Byte> o, 368 FBinTest f) { 369 byte[] vec1 = getElements(); 370 byte[] vec2 = ((ByteVector)o).getElements(); 371 boolean[] bits = new boolean[length()]; 372 for (int i = 0; i < length(); i++){ 373 bits[i] = f.apply(cond, i, vec1[i], vec2[i]); 374 } 375 return maskFactory(bits); 376 } 377 378 /*package-private*/ 379 @ForceInline 380 static boolean doBinTest(int cond, byte a, byte b) { 381 switch (cond) { 382 case BT_eq: return a == b; 383 case BT_ne: return a != b; 384 case BT_lt: return a < b; 385 case BT_le: return a <= b; 386 case BT_gt: return a > b; 387 case BT_ge: return a >= b; 388 } 389 throw new AssertionError(Integer.toHexString(cond)); 390 } 391 392 /*package-private*/ 393 @Override 394 abstract ByteSpecies vspecies(); 395 396 /*package-private*/ 397 @ForceInline 398 static long toBits(byte e) { 399 return e; 400 } 401 402 /*package-private*/ 403 @ForceInline 404 static byte fromBits(long bits) { 405 return ((byte)bits); 406 } 407 408 // Static factories (other than memory operations) 409 410 // Note: A surprising behavior in javadoc 411 // sometimes makes a lone /** {@inheritDoc} */ 412 // comment drop the method altogether, 413 // apparently if the method mentions an 414 // parameter or return type of Vector<Byte> 415 // instead of Vector<E> as originally specified. 416 // Adding an empty HTML fragment appears to 417 // nudge javadoc into providing the desired 418 // inherited documentation. We use the HTML 419 // comment <!--workaround--> for this. 420 421 /** 422 * {@inheritDoc} <!--workaround--> 423 */ 424 @ForceInline 425 public static ByteVector zero(VectorSpecies<Byte> species) { 426 ByteSpecies vsp = (ByteSpecies) species; 427 return VectorIntrinsics.broadcastCoerced(vsp.vectorType(), byte.class, species.length(), 428 0, vsp, 429 ((bits_, s_) -> s_.rvOp(i -> bits_))); 430 } 431 432 /** 433 * Returns a vector of the same species as this one 434 * where all lane elements are set to 435 * the primitive value {@code e}. 436 * 437 * The contents of the current vector are discarded; 438 * only the species is relevant to this operation. 439 * 440 * <p> This method returns the value of this expression: 441 * {@code ByteVector.broadcast(this.species(), e)}. 442 * 443 * @apiNote 444 * Unlike the similar method named {@code broadcast()} 445 * in the supertype {@code Vector}, this method does not 446 * need to validate its argument, and cannot throw 447 * {@code IllegalArgumentException}. This method is 448 * therefore preferable to the supertype method. 449 * 450 * @param e the value to broadcast 451 * @return a vector where all lane elements are set to 452 * the primitive value {@code e} 453 * @see #broadcast(VectorSpecies,long) 454 * @see Vector#broadcast(long) 455 * @see VectorSpecies#broadcast(long) 456 */ 457 public abstract ByteVector broadcast(byte e); 458 459 /** 460 * Returns a vector of the given species 461 * where all lane elements are set to 462 * the primitive value {@code e}. 463 * 464 * @param species species of the desired vector 465 * @param e the value to broadcast 466 * @return a vector where all lane elements are set to 467 * the primitive value {@code e} 468 * @see #broadcast(long) 469 * @see Vector#broadcast(long) 470 * @see VectorSpecies#broadcast(long) 471 */ 472 public static ByteVector broadcast(VectorSpecies<Byte> species, byte e) { 473 ByteSpecies vsp = (ByteSpecies) species; 474 return vsp.broadcast(e); 475 } 476 477 /*package-private*/ 478 @ForceInline 479 final ByteVector broadcastTemplate(byte e) { 480 ByteSpecies vsp = vspecies(); 481 return vsp.broadcast(e); 482 } 483 484 /** 485 * {@inheritDoc} <!--workaround--> 486 * @apiNote 487 * When working with vector subtypes like {@code ByteVector}, 488 * {@linkplain #broadcast(byte) the more strongly typed method} 489 * is typically selected. It can be explicitly selected 490 * using a cast: {@code v.broadcast((byte)e)}. 491 * The two expressions will produce numerically identical results. 492 */ 493 @Override 494 public abstract ByteVector broadcast(long e); 495 496 /** 497 * Returns a vector of the given species 498 * where all lane elements are set to 499 * the primitive value {@code e}. 500 * 501 * The {@code long} value must be accurately representable 502 * by the {@code ETYPE} of the vector species, so that 503 * {@code e==(long)(ETYPE)e}. 504 * 505 * @param species species of the desired vector 506 * @param e the value to broadcast 507 * @return a vector where all lane elements are set to 508 * the primitive value {@code e} 509 * @throws IllegalArgumentException 510 * if the given {@code long} value cannot 511 * be represented by the vector's {@code ETYPE} 512 * @see #broadcast(VectorSpecies,byte) 513 * @see VectorSpecies#checkValue(long) 514 */ 515 public static ByteVector broadcast(VectorSpecies<Byte> species, long e) { 516 ByteSpecies vsp = (ByteSpecies) species; 517 return vsp.broadcast(e); 518 } 519 520 /*package-private*/ 521 @ForceInline 522 final ByteVector broadcastTemplate(long e) { 523 return vspecies().broadcast(e); 524 } 525 526 /** 527 * Returns a vector where each lane element is set to given 528 * primitive values. 529 * <p> 530 * For each vector lane, where {@code N} is the vector lane index, the 531 * the primitive value at index {@code N} is placed into the resulting 532 * vector at lane index {@code N}. 533 * 534 * @param species species of the desired vector 535 * @param es the given primitive values 536 * @return a vector where each lane element is set to given primitive 537 * values 538 * @throws IllegalArgumentException 539 * if {@code es.length != species.length()} 540 */ 541 @ForceInline 542 @SuppressWarnings("unchecked") 543 public static ByteVector fromValues(VectorSpecies<Byte> species, byte... es) { 544 ByteSpecies vsp = (ByteSpecies) species; 545 int vlength = vsp.laneCount(); 546 VectorIntrinsics.requireLength(es.length, vlength); 547 // Get an unaliased copy and use it directly: 548 return vsp.vectorFactory(Arrays.copyOf(es, vlength)); 549 } 550 551 /** 552 * Returns a vector where the first lane element is set to the primtive 553 * value {@code e}, all other lane elements are set to the default 554 * value(zero). 555 * 556 * @param species species of the desired vector 557 * @param e the value 558 * @return a vector where the first lane element is set to the primitive 559 * value {@code e} 560 */ 561 // FIXME: Does this carry its weight? 562 @ForceInline 563 public static ByteVector single(VectorSpecies<Byte> species, byte e) { 564 return zero(species).withLane(0, e); 565 } 566 567 /** 568 * Returns a vector where each lane element is set to a randomly 569 * generated primitive value. 570 * 571 * The semantics are equivalent to calling 572 * {@code (byte)}{@link ThreadLocalRandom#nextInt()} 573 * for each lane, from first to last. 574 * 575 * @param species species of the desired vector 576 * @return a vector where each lane elements is set to a randomly 577 * generated primitive value 578 */ 579 public static ByteVector random(VectorSpecies<Byte> species) { 580 ByteSpecies vsp = (ByteSpecies) species; 581 ThreadLocalRandom r = ThreadLocalRandom.current(); 582 return vsp.vOp(i -> nextRandom(r)); 583 } 584 private static byte nextRandom(ThreadLocalRandom r) { 585 return (byte) r.nextInt(); 586 } 587 588 // Unary lanewise support 589 590 /** 591 * {@inheritDoc} <!--workaround--> 592 */ 593 public abstract 594 ByteVector lanewise(VectorOperators.Unary op); 595 596 @ForceInline 597 final 598 ByteVector lanewiseTemplate(VectorOperators.Unary op) { 599 if (opKind(op, VO_SPECIAL)) { 600 if (op == ZOMO) { 601 return blend(broadcast(-1), compare(NE, 0)); 602 } 603 if (op == NEG) { 604 // FIXME: Support this in the JIT. 605 return broadcast(0).lanewiseTemplate(SUB, this); 606 } 607 } 608 int opc = opCode(op); 609 return VectorIntrinsics.unaryOp( 610 opc, getClass(), byte.class, length(), 611 this, 612 UN_IMPL.find(op, opc, (opc_) -> { 613 switch (opc_) { 614 case VECTOR_OP_NEG: return v0 -> 615 v0.uOp((i, a) -> (byte) -a); 616 case VECTOR_OP_ABS: return v0 -> 617 v0.uOp((i, a) -> (byte) Math.abs(a)); 618 case VECTOR_OP_NOT: return v0 -> 619 v0.uOp((i, a) -> (byte) ~a); 620 default: return null; 621 }})); 622 } 623 private static final 624 ImplCache<Unary,UnaryOperator<ByteVector>> UN_IMPL 625 = new ImplCache<>(Unary.class, ByteVector.class); 626 627 /** 628 * {@inheritDoc} <!--workaround--> 629 */ 630 @ForceInline 631 public final 632 ByteVector lanewise(VectorOperators.Unary op, 633 VectorMask<Byte> m) { 634 return blend(lanewise(op), m); 635 } 636 637 // Binary lanewise support 638 639 /** 640 * {@inheritDoc} <!--workaround--> 641 * @see #lanewise(VectorOperators.Binary,byte) 642 * @see #lanewise(VectorOperators.Binary,byte,VectorMask) 643 */ 644 @Override 645 public abstract 646 ByteVector lanewise(VectorOperators.Binary op, 647 Vector<Byte> v); 648 @ForceInline 649 final 650 ByteVector lanewiseTemplate(VectorOperators.Binary op, 651 Vector<Byte> v) { 652 ByteVector that = (ByteVector) v; 653 that.check(this); 654 if (opKind(op, VO_SPECIAL | VO_SHIFT)) { 655 if (op == FIRST_NONZERO) { 656 // FIXME: Support this in the JIT. 657 VectorMask<Byte> thisNZ 658 = this.viewAsIntegralLanes().compare(NE, (byte) 0); 659 that = that.blend((byte) 0, thisNZ.cast(vspecies())); 660 op = OR_UNCHECKED; 661 } 662 if (opKind(op, VO_SHIFT)) { 663 // As per shift specification for Java, mask the shift count. 664 // This allows the JIT to ignore some ISA details. 665 that = that.lanewise(AND, SHIFT_MASK); 666 } 667 if (op == ROR || op == ROL) { // FIXME: JIT should do this 668 ByteVector neg = that.lanewise(NEG); 669 ByteVector hi = this.lanewise(LSHL, (op == ROR) ? neg : that); 670 ByteVector lo = this.lanewise(LSHR, (op == ROR) ? that : neg); 671 return hi.lanewise(OR, lo); 672 } else if (op == AND_NOT) { 673 // FIXME: Support this in the JIT. 674 that = that.lanewise(NOT); 675 op = AND; 676 } else if (op == DIV) { 677 VectorMask<Byte> eqz = that.eq((byte)0); 678 if (eqz.anyTrue()) { 679 throw that.divZeroException(); 680 } 681 } 682 } 683 int opc = opCode(op); 684 return VectorIntrinsics.binaryOp( 685 opc, getClass(), byte.class, length(), 686 this, that, 687 BIN_IMPL.find(op, opc, (opc_) -> { 688 switch (opc_) { 689 case VECTOR_OP_ADD: return (v0, v1) -> 690 v0.bOp(v1, (i, a, b) -> (byte)(a + b)); 691 case VECTOR_OP_SUB: return (v0, v1) -> 692 v0.bOp(v1, (i, a, b) -> (byte)(a - b)); 693 case VECTOR_OP_MUL: return (v0, v1) -> 694 v0.bOp(v1, (i, a, b) -> (byte)(a * b)); 695 case VECTOR_OP_DIV: return (v0, v1) -> 696 v0.bOp(v1, (i, a, b) -> (byte)(a / b)); 697 case VECTOR_OP_MAX: return (v0, v1) -> 698 v0.bOp(v1, (i, a, b) -> (byte)Math.max(a, b)); 699 case VECTOR_OP_MIN: return (v0, v1) -> 700 v0.bOp(v1, (i, a, b) -> (byte)Math.min(a, b)); 701 case VECTOR_OP_FIRST_NONZERO: return (v0, v1) -> 702 v0.bOp(v1, (i, a, b) -> toBits(a) != 0 ? a : b); 703 case VECTOR_OP_AND: return (v0, v1) -> 704 v0.bOp(v1, (i, a, b) -> (byte)(a & b)); 705 case VECTOR_OP_OR: return (v0, v1) -> 706 v0.bOp(v1, (i, a, b) -> (byte)(a | b)); 707 case VECTOR_OP_AND_NOT: return (v0, v1) -> 708 v0.bOp(v1, (i, a, b) -> (byte)(a & ~b)); 709 case VECTOR_OP_XOR: return (v0, v1) -> 710 v0.bOp(v1, (i, a, b) -> (byte)(a ^ b)); 711 case VECTOR_OP_LSHIFT: return (v0, v1) -> 712 v0.bOp(v1, (i, a, n) -> (byte)(a << n)); 713 case VECTOR_OP_RSHIFT: return (v0, v1) -> 714 v0.bOp(v1, (i, a, n) -> (byte)(a >> n)); 715 case VECTOR_OP_URSHIFT: return (v0, v1) -> 716 v0.bOp(v1, (i, a, n) -> (byte)((a & LSHR_SETUP_MASK) >>> n)); 717 case VECTOR_OP_LROTATE: return (v0, v1) -> 718 v0.bOp(v1, (i, a, n) -> (byte)((a << n)|(a >> -n))); 719 case VECTOR_OP_RROTATE: return (v0, v1) -> 720 v0.bOp(v1, (i, a, n) -> (byte)((a >> n)|(a << -n))); 721 default: return null; 722 }})); 723 } 724 private static final 725 ImplCache<Binary,BinaryOperator<ByteVector>> BIN_IMPL 726 = new ImplCache<>(Binary.class, ByteVector.class); 727 728 /** 729 * {@inheritDoc} <!--workaround--> 730 * @see #lanewise(VectorOperators.Binary,byte,VectorMask) 731 */ 732 @ForceInline 733 public final 734 ByteVector lanewise(VectorOperators.Binary op, 735 Vector<Byte> v, 736 VectorMask<Byte> m) { 737 ByteVector that = (ByteVector) v; 738 if (op == DIV) { 739 // suppress div/0 exceptions in unset lanes 740 that = that.lanewise(NOT, that.eq((byte)0)); 741 return blend(lanewise(DIV, that), m); 742 } 743 return blend(lanewise(op, v), m); 744 } 745 // FIXME: Maybe all of the public final methods in this file (the 746 // simple ones that just call lanewise) should be pushed down to 747 // the X-VectorBits template. They can't optimize properly at 748 // this level, and must rely on inlining. Does it work? 749 // (If it works, of course keep the code here.) 750 751 /** 752 * Combines the lane values of this vector 753 * with the value of a broadcast scalar. 754 * 755 * This is a lane-wise binary operation which applies 756 * the selected operation to each lane. 757 * The return value will be equal to this expression: 758 * {@code this.lanewise(op, this.broadcast(e))}. 759 * 760 * @param op the operation used to process lane values 761 * @param e the input scalar 762 * @return the result of applying the operation lane-wise 763 * to the two input vectors 764 * @throws UnsupportedOperationException if this vector does 765 * not support the requested operation 766 * @see #lanewise(VectorOperators.Binary,Vector) 767 * @see #lanewise(VectorOperators.Binary,byte,VectorMask) 768 */ 769 @ForceInline 770 public final 771 ByteVector lanewise(VectorOperators.Binary op, 772 byte e) { 773 int opc = opCode(op); 774 if (opKind(op, VO_SHIFT) && (byte)(int)e == e) { 775 return lanewiseShift(op, (int) e); 776 } 777 if (op == AND_NOT) { 778 op = AND; e = (byte) ~e; 779 } 780 return lanewise(op, broadcast(e)); 781 } 782 783 /** 784 * Combines the lane values of this vector 785 * with the value of a broadcast scalar, 786 * with selection of lane elements controlled by a mask. 787 * 788 * This is a masked lane-wise binary operation which applies 789 * the selected operation to each lane. 790 * The return value will be equal to this expression: 791 * {@code this.lanewise(op, this.broadcast(e), m)}. 792 * 793 * @param op the operation used to process lane values 794 * @param e the input scalar 795 * @param m the mask controlling lane selection 796 * @return the result of applying the operation lane-wise 797 * to the input vector and the scalar 798 * @throws UnsupportedOperationException if this vector does 799 * not support the requested operation 800 * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) 801 * @see #lanewise(VectorOperators.Binary,byte) 802 */ 803 @ForceInline 804 public final 805 ByteVector lanewise(VectorOperators.Binary op, 806 byte e, 807 VectorMask<Byte> m) { 808 return blend(lanewise(op, e), m); 809 } 810 811 /** 812 * {@inheritDoc} <!--workaround--> 813 * @apiNote 814 * When working with vector subtypes like {@code ByteVector}, 815 * {@linkplain #lanewise(VectorOperators.Binary,byte) 816 * the more strongly typed method} 817 * is typically selected. It can be explicitly selected 818 * using a cast: {@code v.lanewise(op,(byte)e)}. 819 * The two expressions will produce numerically identical results. 820 */ 821 @ForceInline 822 public final 823 ByteVector lanewise(VectorOperators.Binary op, 824 long e) { 825 byte e1 = (byte) e; 826 if ((long)e1 != e 827 // allow shift ops to clip down their int parameters 828 && !(opKind(op, VO_SHIFT) && (int)e1 == e) 829 ) { 830 vspecies().checkValue(e); // for exception 831 } 832 return lanewise(op, e1); 833 } 834 835 /** 836 * {@inheritDoc} <!--workaround--> 837 * @apiNote 838 * When working with vector subtypes like {@code ByteVector}, 839 * {@linkplain #lanewise(VectorOperators.Binary,byte,VectorMask) 840 * the more strongly typed method} 841 * is typically selected. It can be explicitly selected 842 * using a cast: {@code v.lanewise(op,(byte)e,m)}. 843 * The two expressions will produce numerically identical results. 844 */ 845 @ForceInline 846 public final 847 ByteVector lanewise(VectorOperators.Binary op, 848 long e, VectorMask<Byte> m) { 849 return blend(lanewise(op, e), m); 850 } 851 852 /*package-private*/ 853 abstract ByteVector 854 lanewiseShift(VectorOperators.Binary op, int e); 855 856 /*package-private*/ 857 @ForceInline 858 final ByteVector 859 lanewiseShiftTemplate(VectorOperators.Binary op, int e) { 860 // Special handling for these. FIXME: Refactor? 861 int opc = opCode(op); 862 assert(opKind(op, VO_SHIFT)); 863 // As per shift specification for Java, mask the shift count. 864 e &= SHIFT_MASK; 865 if (op == ROR || op == ROL) { // FIXME: JIT should do this 866 ByteVector hi = this.lanewise(LSHL, (op == ROR) ? -e : e); 867 ByteVector lo = this.lanewise(LSHR, (op == ROR) ? e : -e); 868 return hi.lanewise(OR, lo); 869 } 870 return VectorIntrinsics.broadcastInt( 871 opc, getClass(), byte.class, length(), 872 this, e, 873 BIN_INT_IMPL.find(op, opc, (opc_) -> { 874 switch (opc_) { 875 case VECTOR_OP_LSHIFT: return (v, n) -> 876 v.uOp((i, a) -> (byte)(a << n)); 877 case VECTOR_OP_RSHIFT: return (v, n) -> 878 v.uOp((i, a) -> (byte)(a >> n)); 879 case VECTOR_OP_URSHIFT: return (v, n) -> 880 v.uOp((i, a) -> (byte)((a & LSHR_SETUP_MASK) >>> n)); 881 case VECTOR_OP_LROTATE: return (v, n) -> 882 v.uOp((i, a) -> (byte)((a << n)|(a >> -n))); 883 case VECTOR_OP_RROTATE: return (v, n) -> 884 v.uOp((i, a) -> (byte)((a >> n)|(a << -n))); 885 default: return null; 886 }})); 887 } 888 private static final 889 ImplCache<Binary,VectorBroadcastIntOp<ByteVector>> BIN_INT_IMPL 890 = new ImplCache<>(Binary.class, ByteVector.class); 891 892 // As per shift specification for Java, mask the shift count. 893 // We mask 0X3F (long), 0X1F (int), 0x0F (short), 0x7 (byte). 894 // The latter two maskings go beyond the JLS, but seem reasonable 895 // since our lane types are first-class types, not just dressed 896 // up ints. 897 private static final int SHIFT_MASK = (Byte.SIZE - 1); 898 // Also simulate >>> on sub-word variables with a mask. 899 private static final int LSHR_SETUP_MASK = ((1 << Byte.SIZE) - 1); 900 901 // Ternary lanewise support 902 903 // Ternary operators come in eight variations: 904 // lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2]) 905 // lanewise(op, [broadcast(e1)|v1], [broadcast(e2)|v2], mask) 906 907 // It is annoying to support all of these variations of masking 908 // and broadcast, but it would be more surprising not to continue 909 // the obvious pattern started by unary and binary. 910 911 /** 912 * {@inheritDoc} <!--workaround--> 913 * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask) 914 * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask) 915 * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask) 916 * @see #lanewise(VectorOperators.Ternary,byte,byte) 917 * @see #lanewise(VectorOperators.Ternary,Vector,byte) 918 * @see #lanewise(VectorOperators.Ternary,byte,Vector) 919 */ 920 @Override 921 public abstract 922 ByteVector lanewise(VectorOperators.Ternary op, 923 Vector<Byte> v1, 924 Vector<Byte> v2); 925 @ForceInline 926 final 927 ByteVector lanewiseTemplate(VectorOperators.Ternary op, 928 Vector<Byte> v1, 929 Vector<Byte> v2) { 930 ByteVector that = (ByteVector) v1; 931 ByteVector tother = (ByteVector) v2; 932 // It's a word: https://www.dictionary.com/browse/tother 933 // See also Chapter 11 of Dickens, Our Mutual Friend: 934 // "Totherest Governor," replied Mr Riderhood... 935 that.check(this); 936 tother.check(this); 937 if (op == BITWISE_BLEND) { 938 // FIXME: Support this in the JIT. 939 that = this.lanewise(XOR, that).lanewise(AND, tother); 940 return this.lanewise(XOR, that); 941 } 942 int opc = opCode(op); 943 return VectorIntrinsics.ternaryOp( 944 opc, getClass(), byte.class, length(), 945 this, that, tother, 946 TERN_IMPL.find(op, opc, (opc_) -> { 947 switch (opc_) { 948 case VECTOR_OP_BITWISE_BLEND: return (v0, v1_, v2_) -> 949 v0.tOp(v1_, v2_, (i, a, b, c) -> (byte)(a^((a^b)&c))); 950 default: return null; 951 }})); 952 } 953 private static final 954 ImplCache<Ternary,TernaryOperation<ByteVector>> TERN_IMPL 955 = new ImplCache<>(Ternary.class, ByteVector.class); 956 957 /** 958 * {@inheritDoc} <!--workaround--> 959 * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask) 960 * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask) 961 * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask) 962 */ 963 @ForceInline 964 public final 965 ByteVector lanewise(VectorOperators.Ternary op, 966 Vector<Byte> v1, 967 Vector<Byte> v2, 968 VectorMask<Byte> m) { 969 return blend(lanewise(op, v1, v2), m); 970 } 971 972 /** 973 * Combines the lane values of this vector 974 * with the values of two broadcast scalars. 975 * 976 * This is a lane-wise ternary operation which applies 977 * the selected operation to each lane. 978 * The return value will be equal to this expression: 979 * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2))}. 980 * 981 * @param op the operation used to combine lane values 982 * @param e1 the first input scalar 983 * @param e2 the second input scalar 984 * @return the result of applying the operation lane-wise 985 * to the input vector and the scalars 986 * @throws UnsupportedOperationException if this vector does 987 * not support the requested operation 988 * @see #lanewise(VectorOperators.Ternary,Vector,Vector) 989 * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask) 990 */ 991 @ForceInline 992 public final 993 ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,e2) 994 byte e1, 995 byte e2) { 996 return lanewise(op, broadcast(e1), broadcast(e2)); 997 } 998 999 /** 1000 * Combines the lane values of this vector 1001 * with the values of two broadcast scalars, 1002 * with selection of lane elements controlled by a mask. 1003 * 1004 * This is a masked lane-wise ternary operation which applies 1005 * the selected operation to each lane. 1006 * The return value will be equal to this expression: 1007 * {@code this.lanewise(op, this.broadcast(e1), this.broadcast(e2), m)}. 1008 * 1009 * @param op the operation used to combine lane values 1010 * @param e1 the first input scalar 1011 * @param e2 the second input scalar 1012 * @param m the mask controlling lane selection 1013 * @return the result of applying the operation lane-wise 1014 * to the input vector and the scalars 1015 * @throws UnsupportedOperationException if this vector does 1016 * not support the requested operation 1017 * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) 1018 * @see #lanewise(VectorOperators.Ternary,byte,byte) 1019 */ 1020 @ForceInline 1021 public final 1022 ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,e2,m) 1023 byte e1, 1024 byte e2, 1025 VectorMask<Byte> m) { 1026 return blend(lanewise(op, e1, e2), m); 1027 } 1028 1029 /** 1030 * Combines the lane values of this vector 1031 * with the values of another vector and a broadcast scalar. 1032 * 1033 * This is a lane-wise ternary operation which applies 1034 * the selected operation to each lane. 1035 * The return value will be equal to this expression: 1036 * {@code this.lanewise(op, v1, this.broadcast(e2))}. 1037 * 1038 * @param op the operation used to combine lane values 1039 * @param v1 the other input vector 1040 * @param e2 the input scalar 1041 * @return the result of applying the operation lane-wise 1042 * to the input vectors and the scalar 1043 * @throws UnsupportedOperationException if this vector does 1044 * not support the requested operation 1045 * @see #lanewise(VectorOperators.Ternary,byte,byte) 1046 * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask) 1047 */ 1048 @ForceInline 1049 public final 1050 ByteVector lanewise(VectorOperators.Ternary op, //(op,v1,e2) 1051 Vector<Byte> v1, 1052 byte e2) { 1053 return lanewise(op, v1, broadcast(e2)); 1054 } 1055 1056 /** 1057 * Combines the lane values of this vector 1058 * with the values of another vector and a broadcast scalar, 1059 * with selection of lane elements controlled by a mask. 1060 * 1061 * This is a masked lane-wise ternary operation which applies 1062 * the selected operation to each lane. 1063 * The return value will be equal to this expression: 1064 * {@code this.lanewise(op, v1, this.broadcast(e2), m)}. 1065 * 1066 * @param op the operation used to combine lane values 1067 * @param v1 the other input vector 1068 * @param e2 the input scalar 1069 * @param m the mask controlling lane selection 1070 * @return the result of applying the operation lane-wise 1071 * to the input vectors and the scalar 1072 * @throws UnsupportedOperationException if this vector does 1073 * not support the requested operation 1074 * @see #lanewise(VectorOperators.Ternary,Vector,Vector) 1075 * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask) 1076 * @see #lanewise(VectorOperators.Ternary,Vector,byte) 1077 */ 1078 @ForceInline 1079 public final 1080 ByteVector lanewise(VectorOperators.Ternary op, //(op,v1,e2,m) 1081 Vector<Byte> v1, 1082 byte e2, 1083 VectorMask<Byte> m) { 1084 return blend(lanewise(op, v1, e2), m); 1085 } 1086 1087 /** 1088 * Combines the lane values of this vector 1089 * with the values of another vector and a broadcast scalar. 1090 * 1091 * This is a lane-wise ternary operation which applies 1092 * the selected operation to each lane. 1093 * The return value will be equal to this expression: 1094 * {@code this.lanewise(op, this.broadcast(e1), v2)}. 1095 * 1096 * @param op the operation used to combine lane values 1097 * @param e1 the input scalar 1098 * @param v2 the other input vector 1099 * @return the result of applying the operation lane-wise 1100 * to the input vectors and the scalar 1101 * @throws UnsupportedOperationException if this vector does 1102 * not support the requested operation 1103 * @see #lanewise(VectorOperators.Ternary,Vector,Vector) 1104 * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask) 1105 */ 1106 @ForceInline 1107 public final 1108 ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,v2) 1109 byte e1, 1110 Vector<Byte> v2) { 1111 return lanewise(op, broadcast(e1), v2); 1112 } 1113 1114 /** 1115 * Combines the lane values of this vector 1116 * with the values of another vector and a broadcast scalar, 1117 * with selection of lane elements controlled by a mask. 1118 * 1119 * This is a masked lane-wise ternary operation which applies 1120 * the selected operation to each lane. 1121 * The return value will be equal to this expression: 1122 * {@code this.lanewise(op, this.broadcast(e1), v2, m)}. 1123 * 1124 * @param op the operation used to combine lane values 1125 * @param e1 the input scalar 1126 * @param v2 the other input vector 1127 * @param m the mask controlling lane selection 1128 * @return the result of applying the operation lane-wise 1129 * to the input vectors and the scalar 1130 * @throws UnsupportedOperationException if this vector does 1131 * not support the requested operation 1132 * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) 1133 * @see #lanewise(VectorOperators.Ternary,byte,Vector) 1134 */ 1135 @ForceInline 1136 public final 1137 ByteVector lanewise(VectorOperators.Ternary op, //(op,e1,v2,m) 1138 byte e1, 1139 Vector<Byte> v2, 1140 VectorMask<Byte> m) { 1141 return blend(lanewise(op, e1, v2), m); 1142 } 1143 1144 // (Thus endeth the Great and Mighty Ternary Ogdoad.) 1145 // https://en.wikipedia.org/wiki/Ogdoad 1146 1147 /// FULL-SERVICE BINARY METHODS: ADD, SUB, MUL, DIV 1148 // 1149 // These include masked and non-masked versions. 1150 // This subclass adds broadcast (masked or not). 1151 1152 /** 1153 * {@inheritDoc} <!--workaround--> 1154 * @see #add(byte) 1155 */ 1156 @Override 1157 @ForceInline 1158 public final ByteVector add(Vector<Byte> v) { 1159 return lanewise(ADD, v); 1160 } 1161 1162 /** 1163 * Adds this vector to the broadcast of an input scalar. 1164 * 1165 * This is a lane-wise binary operation which applies 1166 * the primitive addition operation ({@code +}) to each lane. 1167 * 1168 * This method is also equivalent to the expression 1169 * {@link #lanewise(VectorOperators.Binary,byte) 1170 * lanewise}{@code (}{@link VectorOperators#ADD 1171 * ADD}{@code , e)}. 1172 * 1173 * @param e the input scalar 1174 * @return the result of adding each lane of this vector to the scalar 1175 * @see #add(Vector) 1176 * @see #broadcast(byte) 1177 * @see #add(byte,VectorMask) 1178 * @see VectorOperators#ADD 1179 * @see #lanewise(VectorOperators.Binary,Vector) 1180 * @see #lanewise(VectorOperators.Binary,byte) 1181 */ 1182 @ForceInline 1183 public final 1184 ByteVector add(byte e) { 1185 return lanewise(ADD, e); 1186 } 1187 1188 /** 1189 * {@inheritDoc} <!--workaround--> 1190 * @see #add(byte,VectorMask) 1191 */ 1192 @Override 1193 @ForceInline 1194 public final ByteVector add(Vector<Byte> v, 1195 VectorMask<Byte> m) { 1196 return lanewise(ADD, v, m); 1197 } 1198 1199 /** 1200 * Adds this vector to the broadcast of an input scalar, 1201 * selecting lane elements controlled by a mask. 1202 * 1203 * This is a masked lane-wise binary operation which applies 1204 * the primitive addition operation ({@code +}) to each lane. 1205 * 1206 * This method is also equivalent to the expression 1207 * {@link #lanewise(VectorOperators.Binary,byte,VectorMask) 1208 * lanewise}{@code (}{@link VectorOperators#ADD 1209 * ADD}{@code , s, m)}. 1210 * 1211 * @param e the input scalar 1212 * @param m the mask controlling lane selection 1213 * @return the result of adding each lane of this vector to the scalar 1214 * @see #add(Vector,VectorMask) 1215 * @see #broadcast(byte) 1216 * @see #add(byte) 1217 * @see VectorOperators#ADD 1218 * @see #lanewise(VectorOperators.Binary,Vector) 1219 * @see #lanewise(VectorOperators.Binary,byte) 1220 */ 1221 @ForceInline 1222 public final ByteVector add(byte e, 1223 VectorMask<Byte> m) { 1224 return lanewise(ADD, e, m); 1225 } 1226 1227 /** 1228 * {@inheritDoc} <!--workaround--> 1229 * @see #sub(byte) 1230 */ 1231 @Override 1232 @ForceInline 1233 public final ByteVector sub(Vector<Byte> v) { 1234 return lanewise(SUB, v); 1235 } 1236 1237 /** 1238 * Subtracts an input scalar from this vector. 1239 * 1240 * This is a masked lane-wise binary operation which applies 1241 * the primitive subtraction operation ({@code -}) to each lane. 1242 * 1243 * This method is also equivalent to the expression 1244 * {@link #lanewise(VectorOperators.Binary,byte) 1245 * lanewise}{@code (}{@link VectorOperators#SUB 1246 * SUB}{@code , e)}. 1247 * 1248 * @param e the input scalar 1249 * @return the result of subtracting the scalar from each lane of this vector 1250 * @see #sub(Vector) 1251 * @see #broadcast(byte) 1252 * @see #sub(byte,VectorMask) 1253 * @see VectorOperators#SUB 1254 * @see #lanewise(VectorOperators.Binary,Vector) 1255 * @see #lanewise(VectorOperators.Binary,byte) 1256 */ 1257 @ForceInline 1258 public final ByteVector sub(byte e) { 1259 return lanewise(SUB, e); 1260 } 1261 1262 /** 1263 * {@inheritDoc} <!--workaround--> 1264 * @see #sub(byte,VectorMask) 1265 */ 1266 @Override 1267 @ForceInline 1268 public final ByteVector sub(Vector<Byte> v, 1269 VectorMask<Byte> m) { 1270 return lanewise(SUB, v, m); 1271 } 1272 1273 /** 1274 * Subtracts an input scalar from this vector 1275 * under the control of a mask. 1276 * 1277 * This is a masked lane-wise binary operation which applies 1278 * the primitive subtraction operation ({@code -}) to each lane. 1279 * 1280 * This method is also equivalent to the expression 1281 * {@link #lanewise(VectorOperators.Binary,byte,VectorMask) 1282 * lanewise}{@code (}{@link VectorOperators#SUB 1283 * SUB}{@code , s, m)}. 1284 * 1285 * @param e the input scalar 1286 * @param m the mask controlling lane selection 1287 * @return the result of subtracting the scalar from each lane of this vector 1288 * @see #sub(Vector,VectorMask) 1289 * @see #broadcast(byte) 1290 * @see #sub(byte) 1291 * @see VectorOperators#SUB 1292 * @see #lanewise(VectorOperators.Binary,Vector) 1293 * @see #lanewise(VectorOperators.Binary,byte) 1294 */ 1295 @ForceInline 1296 public final ByteVector sub(byte e, 1297 VectorMask<Byte> m) { 1298 return lanewise(SUB, e, m); 1299 } 1300 1301 /** 1302 * {@inheritDoc} <!--workaround--> 1303 * @see #mul(byte) 1304 */ 1305 @Override 1306 @ForceInline 1307 public final ByteVector mul(Vector<Byte> v) { 1308 return lanewise(MUL, v); 1309 } 1310 1311 /** 1312 * Multiplies this vector by the broadcast of an input scalar. 1313 * 1314 * This is a lane-wise binary operation which applies 1315 * the primitive multiplication operation ({@code *}) to each lane. 1316 * 1317 * This method is also equivalent to the expression 1318 * {@link #lanewise(VectorOperators.Binary,byte) 1319 * lanewise}{@code (}{@link VectorOperators#MUL 1320 * MUL}{@code , e)}. 1321 * 1322 * @param e the input scalar 1323 * @return the result of multiplying this vector by the given scalar 1324 * @see #mul(Vector) 1325 * @see #broadcast(byte) 1326 * @see #mul(byte,VectorMask) 1327 * @see VectorOperators#MUL 1328 * @see #lanewise(VectorOperators.Binary,Vector) 1329 * @see #lanewise(VectorOperators.Binary,byte) 1330 */ 1331 @ForceInline 1332 public final ByteVector mul(byte e) { 1333 return lanewise(MUL, e); 1334 } 1335 1336 /** 1337 * {@inheritDoc} <!--workaround--> 1338 * @see #mul(byte,VectorMask) 1339 */ 1340 @Override 1341 @ForceInline 1342 public final ByteVector mul(Vector<Byte> v, 1343 VectorMask<Byte> m) { 1344 return lanewise(MUL, v, m); 1345 } 1346 1347 /** 1348 * Multiplies this vector by the broadcast of an input scalar, 1349 * selecting lane elements controlled by a mask. 1350 * 1351 * This is a masked lane-wise binary operation which applies 1352 * the primitive multiplication operation ({@code *}) to each lane. 1353 * 1354 * This method is also equivalent to the expression 1355 * {@link #lanewise(VectorOperators.Binary,byte,VectorMask) 1356 * lanewise}{@code (}{@link VectorOperators#MUL 1357 * MUL}{@code , s, m)}. 1358 * 1359 * @param e the input scalar 1360 * @param m the mask controlling lane selection 1361 * @return the result of muling each lane of this vector to the scalar 1362 * @see #mul(Vector,VectorMask) 1363 * @see #broadcast(byte) 1364 * @see #mul(byte) 1365 * @see VectorOperators#MUL 1366 * @see #lanewise(VectorOperators.Binary,Vector) 1367 * @see #lanewise(VectorOperators.Binary,byte) 1368 */ 1369 @ForceInline 1370 public final ByteVector mul(byte e, 1371 VectorMask<Byte> m) { 1372 return lanewise(MUL, e, m); 1373 } 1374 1375 /** 1376 * {@inheritDoc} <!--workaround--> 1377 * @apiNote If there is a zero divisor, {@code 1378 * ArithmeticException} will be thrown. 1379 * @see #div(byte) 1380 */ 1381 @Override 1382 @ForceInline 1383 public final ByteVector div(Vector<Byte> v) { 1384 return lanewise(DIV, v); 1385 } 1386 1387 /** 1388 * Divides this vector by the broadcast of an input scalar. 1389 * 1390 * This is a lane-wise binary operation which applies 1391 * the primitive division operation ({@code /}) to each lane. 1392 * 1393 * This method is also equivalent to the expression 1394 * {@link #lanewise(VectorOperators.Binary,byte) 1395 * lanewise}{@code (}{@link VectorOperators#DIV 1396 * DIV}{@code , e)}. 1397 * 1398 * @apiNote If there is a zero divisor, {@code 1399 * ArithmeticException} will be thrown. 1400 * @see #div(byte) 1401 1402 * 1403 * @param e the input scalar 1404 * @return the result of dividing each lane of this vector by the scalar 1405 * @see #div(Vector) 1406 * @see #broadcast(byte) 1407 * @see #div(byte,VectorMask) 1408 * @see VectorOperators#DIV 1409 * @see #lanewise(VectorOperators.Binary,Vector) 1410 * @see #lanewise(VectorOperators.Binary,byte) 1411 */ 1412 @ForceInline 1413 public final ByteVector div(byte e) { 1414 return lanewise(DIV, e); 1415 } 1416 1417 /** 1418 * {@inheritDoc} <!--workaround--> 1419 * @see #div(byte,VectorMask) 1420 * @apiNote If there is a zero divisor, {@code 1421 * ArithmeticException} will be thrown. 1422 */ 1423 @Override 1424 @ForceInline 1425 public final ByteVector div(Vector<Byte> v, 1426 VectorMask<Byte> m) { 1427 return lanewise(DIV, v, m); 1428 } 1429 1430 /** 1431 * Divides this vector by the broadcast of an input scalar, 1432 * selecting lane elements controlled by a mask. 1433 * 1434 * This is a masked lane-wise binary operation which applies 1435 * the primitive division operation ({@code /}) to each lane. 1436 * 1437 * This method is also equivalent to the expression 1438 * {@link #lanewise(VectorOperators.Binary,byte,VectorMask) 1439 * lanewise}{@code (}{@link VectorOperators#DIV 1440 * DIV}{@code , s, m)}. 1441 * 1442 * @apiNote If there is a zero divisor, {@code 1443 * ArithmeticException} will be thrown. 1444 * 1445 * @param e the input scalar 1446 * @param m the mask controlling lane selection 1447 * @return the result of dividing each lane of this vector by the scalar 1448 * @see #div(Vector,VectorMask) 1449 * @see #broadcast(byte) 1450 * @see #div(byte) 1451 * @see VectorOperators#DIV 1452 * @see #lanewise(VectorOperators.Binary,Vector) 1453 * @see #lanewise(VectorOperators.Binary,byte) 1454 */ 1455 @ForceInline 1456 public final ByteVector div(byte e, 1457 VectorMask<Byte> m) { 1458 return lanewise(DIV, e, m); 1459 } 1460 1461 /// END OF FULL-SERVICE BINARY METHODS 1462 1463 /// SECOND-TIER BINARY METHODS 1464 // 1465 // There are no masked versions. 1466 1467 /** 1468 * {@inheritDoc} <!--workaround--> 1469 */ 1470 @Override 1471 @ForceInline 1472 public final ByteVector min(Vector<Byte> v) { 1473 return lanewise(MIN, v); 1474 } 1475 1476 // FIXME: "broadcast of an input scalar" is really wordy. Reduce? 1477 /** 1478 * Computes the smaller of this vector and the broadcast of an input scalar. 1479 * 1480 * This is a lane-wise binary operation which applies the 1481 * operation {@code Math.min()} to each pair of 1482 * corresponding lane values. 1483 * 1484 * This method is also equivalent to the expression 1485 * {@link #lanewise(VectorOperators.Binary,byte) 1486 * lanewise}{@code (}{@link VectorOperators#MIN 1487 * MIN}{@code , e)}. 1488 * 1489 * @param e the input scalar 1490 * @return the result of multiplying this vector by the given scalar 1491 * @see #min(Vector) 1492 * @see #broadcast(byte) 1493 * @see VectorOperators#MIN 1494 * @see #lanewise(VectorOperators.Binary,byte,VectorMask) 1495 */ 1496 @ForceInline 1497 public final ByteVector min(byte e) { 1498 return lanewise(MIN, e); 1499 } 1500 1501 /** 1502 * {@inheritDoc} <!--workaround--> 1503 */ 1504 @Override 1505 @ForceInline 1506 public final ByteVector max(Vector<Byte> v) { 1507 return lanewise(MAX, v); 1508 } 1509 1510 /** 1511 * Computes the larger of this vector and the broadcast of an input scalar. 1512 * 1513 * This is a lane-wise binary operation which applies the 1514 * operation {@code Math.max()} to each pair of 1515 * corresponding lane values. 1516 * 1517 * This method is also equivalent to the expression 1518 * {@link #lanewise(VectorOperators.Binary,byte) 1519 * lanewise}{@code (}{@link VectorOperators#MAX 1520 * MAX}{@code , e)}. 1521 * 1522 * @param e the input scalar 1523 * @return the result of multiplying this vector by the given scalar 1524 * @see #max(Vector) 1525 * @see #broadcast(byte) 1526 * @see VectorOperators#MAX 1527 * @see #lanewise(VectorOperators.Binary,byte,VectorMask) 1528 */ 1529 @ForceInline 1530 public final ByteVector max(byte e) { 1531 return lanewise(MAX, e); 1532 } 1533 1534 // common bitwise operators: and, or, not (with scalar versions) 1535 /** 1536 * Computes the bitwise logical conjunction ({@code &}) 1537 * of this vector and a second input vector. 1538 * 1539 * This is a lane-wise binary operation which applies the 1540 * the primitive bitwise "and" operation ({@code &}) 1541 * to each pair of corresponding lane values. 1542 * 1543 * This method is also equivalent to the expression 1544 * {@link #lanewise(VectorOperators.Binary,Vector) 1545 * lanewise}{@code (}{@link VectorOperators#AND 1546 * AND}{@code , v)}. 1547 * 1548 * <p> 1549 * This is not a full-service named operation like 1550 * {@link #add(Vector) add}. A masked version of 1551 * version of this operation is not directly available 1552 * but may be obtained via the masked version of 1553 * {@code lanewise}. 1554 * 1555 * @param v a second input vector 1556 * @return the bitwise {@code &} of this vector and the second input vector 1557 * @see #and(byte) 1558 * @see #or(Vector) 1559 * @see #not() 1560 * @see VectorOperators#AND 1561 * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) 1562 */ 1563 @ForceInline 1564 public final ByteVector and(Vector<Byte> v) { 1565 return lanewise(AND, v); 1566 } 1567 1568 /** 1569 * Computes the bitwise logical conjunction ({@code &}) 1570 * of this vector and a scalar. 1571 * 1572 * This is a lane-wise binary operation which applies the 1573 * the primitive bitwise "and" operation ({@code &}) 1574 * to each pair of corresponding lane values. 1575 * 1576 * This method is also equivalent to the expression 1577 * {@link #lanewise(VectorOperators.Binary,Vector) 1578 * lanewise}{@code (}{@link VectorOperators#AND 1579 * AND}{@code , e)}. 1580 * 1581 * @param e an input scalar 1582 * @return the bitwise {@code &} of this vector and scalar 1583 * @see #and(Vector) 1584 * @see VectorOperators#AND 1585 * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) 1586 */ 1587 @ForceInline 1588 public final ByteVector and(byte e) { 1589 return lanewise(AND, e); 1590 } 1591 1592 /** 1593 * Computes the bitwise logical disjunction ({@code |}) 1594 * of this vector and a second input vector. 1595 * 1596 * This is a lane-wise binary operation which applies the 1597 * the primitive bitwise "or" operation ({@code |}) 1598 * to each pair of corresponding lane values. 1599 * 1600 * This method is also equivalent to the expression 1601 * {@link #lanewise(VectorOperators.Binary,Vector) 1602 * lanewise}{@code (}{@link VectorOperators#OR 1603 * AND}{@code , v)}. 1604 * 1605 * <p> 1606 * This is not a full-service named operation like 1607 * {@link #add(Vector) add}. A masked version of 1608 * version of this operation is not directly available 1609 * but may be obtained via the masked version of 1610 * {@code lanewise}. 1611 * 1612 * @param v a second input vector 1613 * @return the bitwise {@code |} of this vector and the second input vector 1614 * @see #or(byte) 1615 * @see #and(Vector) 1616 * @see #not() 1617 * @see VectorOperators#OR 1618 * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) 1619 */ 1620 @ForceInline 1621 public final ByteVector or(Vector<Byte> v) { 1622 return lanewise(OR, v); 1623 } 1624 1625 /** 1626 * Computes the bitwise logical disjunction ({@code |}) 1627 * of this vector and a scalar. 1628 * 1629 * This is a lane-wise binary operation which applies the 1630 * the primitive bitwise "or" operation ({@code |}) 1631 * to each pair of corresponding lane values. 1632 * 1633 * This method is also equivalent to the expression 1634 * {@link #lanewise(VectorOperators.Binary,Vector) 1635 * lanewise}{@code (}{@link VectorOperators#OR 1636 * OR}{@code , e)}. 1637 * 1638 * @param e an input scalar 1639 * @return the bitwise {@code |} of this vector and scalar 1640 * @see #or(Vector) 1641 * @see VectorOperators#OR 1642 * @see #lanewise(VectorOperators.Binary,Vector,VectorMask) 1643 */ 1644 @ForceInline 1645 public final ByteVector or(byte e) { 1646 return lanewise(OR, e); 1647 } 1648 1649 1650 1651 /// UNARY METHODS 1652 1653 /** 1654 * {@inheritDoc} <!--workaround--> 1655 */ 1656 @Override 1657 @ForceInline 1658 public final 1659 ByteVector neg() { 1660 return lanewise(NEG); 1661 } 1662 1663 /** 1664 * {@inheritDoc} <!--workaround--> 1665 */ 1666 @Override 1667 @ForceInline 1668 public final 1669 ByteVector abs() { 1670 return lanewise(ABS); 1671 } 1672 1673 // not (~) 1674 /** 1675 * Computes the bitwise logical complement ({@code ~}) 1676 * of this vector. 1677 * 1678 * This is a lane-wise binary operation which applies the 1679 * the primitive bitwise "not" operation ({@code ~}) 1680 * to each lane value. 1681 * 1682 * This method is also equivalent to the expression 1683 * {@link #lanewise(VectorOperators.Unary) 1684 * lanewise}{@code (}{@link VectorOperators#NOT 1685 * NOT}{@code )}. 1686 * 1687 * <p> 1688 * This is not a full-service named operation like 1689 * {@link #add(Vector) add}. A masked version of 1690 * version of this operation is not directly available 1691 * but may be obtained via the masked version of 1692 * {@code lanewise}. 1693 * 1694 * @return the bitwise complement {@code ~} of this vector 1695 * @see #and(Vector) 1696 * @see VectorOperators#NOT 1697 * @see #lanewise(VectorOperators.Unary,VectorMask) 1698 */ 1699 @ForceInline 1700 public final ByteVector not() { 1701 return lanewise(NOT); 1702 } 1703 1704 1705 /// COMPARISONS 1706 1707 /** 1708 * {@inheritDoc} <!--workaround--> 1709 */ 1710 @Override 1711 @ForceInline 1712 public final 1713 VectorMask<Byte> eq(Vector<Byte> v) { 1714 return compare(EQ, v); 1715 } 1716 1717 /** 1718 * Tests if this vector is equal to an input scalar. 1719 * 1720 * This is a lane-wise binary test operation which applies 1721 * the primitive equals operation ({@code ==}) to each lane. 1722 * The result is the same as {@code compare(VectorOperators.Comparison.EQ, e)}. 1723 * 1724 * @param e the input scalar 1725 * @return the result mask of testing if this vector 1726 * is equal to {@code e} 1727 * @see #compare(VectorOperators.Comparison,byte) 1728 */ 1729 @ForceInline 1730 public final 1731 VectorMask<Byte> eq(byte e) { 1732 return compare(EQ, e); 1733 } 1734 1735 /** 1736 * {@inheritDoc} <!--workaround--> 1737 */ 1738 @Override 1739 @ForceInline 1740 public final 1741 VectorMask<Byte> lt(Vector<Byte> v) { 1742 return compare(LT, v); 1743 } 1744 1745 /** 1746 * Tests if this vector is less than an input scalar. 1747 * 1748 * This is a lane-wise binary test operation which applies 1749 * the primitive less than operation ({@code <}) to each lane. 1750 * The result is the same as {@code compare(VectorOperators.LT, e)}. 1751 * 1752 * @param e the input scalar 1753 * @return the mask result of testing if this vector 1754 * is less than the input scalar 1755 * @see #compare(VectorOperators.Comparison,byte) 1756 */ 1757 @ForceInline 1758 public final 1759 VectorMask<Byte> lt(byte e) { 1760 return compare(LT, e); 1761 } 1762 1763 /** 1764 * {@inheritDoc} <!--workaround--> 1765 */ 1766 @Override 1767 public abstract 1768 VectorMask<Byte> test(VectorOperators.Test op); 1769 1770 /*package-private*/ 1771 @ForceInline 1772 final 1773 <M extends VectorMask<Byte>> 1774 M testTemplate(Class<M> maskType, Test op) { 1775 ByteSpecies vsp = vspecies(); 1776 if (opKind(op, VO_SPECIAL)) { 1777 ByteVector bits = this.viewAsIntegralLanes(); 1778 VectorMask<Byte> m; 1779 if (op == IS_DEFAULT) { 1780 m = bits.compare(EQ, (byte) 0); 1781 } else if (op == IS_NEGATIVE) { 1782 m = bits.compare(LT, (byte) 0); 1783 } 1784 else { 1785 throw new AssertionError(op); 1786 } 1787 return maskType.cast(m); 1788 } 1789 int opc = opCode(op); 1790 throw new AssertionError(op); 1791 } 1792 1793 /** 1794 * {@inheritDoc} <!--workaround--> 1795 */ 1796 @Override 1797 @ForceInline 1798 public final 1799 VectorMask<Byte> test(VectorOperators.Test op, 1800 VectorMask<Byte> m) { 1801 return test(op).and(m); 1802 } 1803 1804 /** 1805 * {@inheritDoc} <!--workaround--> 1806 */ 1807 @Override 1808 public abstract 1809 VectorMask<Byte> compare(VectorOperators.Comparison op, Vector<Byte> v); 1810 1811 /*package-private*/ 1812 @ForceInline 1813 final 1814 <M extends VectorMask<Byte>> 1815 M compareTemplate(Class<M> maskType, Comparison op, Vector<Byte> v) { 1816 Objects.requireNonNull(v); 1817 ByteSpecies vsp = vspecies(); 1818 ByteVector that = (ByteVector) v; 1819 that.check(this); 1820 int opc = opCode(op); 1821 return VectorIntrinsics.compare( 1822 opc, getClass(), maskType, byte.class, length(), 1823 this, that, 1824 (cond, v0, v1) -> { 1825 AbstractMask<Byte> m 1826 = v0.bTest(cond, v1, (cond_, i, a, b) 1827 -> compareWithOp(cond, a, b)); 1828 @SuppressWarnings("unchecked") 1829 M m2 = (M) m; 1830 return m2; 1831 }); 1832 } 1833 1834 @ForceInline 1835 private static 1836 boolean compareWithOp(int cond, byte a, byte b) { 1837 switch (cond) { 1838 case VectorIntrinsics.BT_eq: return a == b; 1839 case VectorIntrinsics.BT_ne: return a != b; 1840 case VectorIntrinsics.BT_lt: return a < b; 1841 case VectorIntrinsics.BT_le: return a <= b; 1842 case VectorIntrinsics.BT_gt: return a > b; 1843 case VectorIntrinsics.BT_ge: return a >= b; 1844 } 1845 throw new AssertionError(); 1846 } 1847 1848 /** 1849 * {@inheritDoc} <!--workaround--> 1850 */ 1851 @Override 1852 @ForceInline 1853 public final 1854 VectorMask<Byte> compare(VectorOperators.Comparison op, 1855 Vector<Byte> v, 1856 VectorMask<Byte> m) { 1857 return compare(op, v).and(m); 1858 } 1859 1860 /** 1861 * Tests this vector by comparing it with an input scalar, 1862 * according to the given comparison operation. 1863 * 1864 * This is a lane-wise binary test operation which applies 1865 * the comparison operation to each lane. 1866 * <p> 1867 * The result is the same as 1868 * {@code compare(op, broadcast(species(), e))}. 1869 * That is, the scalar may be regarded as broadcast to 1870 * a vector of the same species, and then compared 1871 * against the original vector, using the selected 1872 * comparison operation. 1873 * 1874 * @param op the operation used to compare lane values 1875 * @param e the input scalar 1876 * @return the mask result of testing lane-wise if this vector 1877 * compares to the input, according to the selected 1878 * comparison operator 1879 * @see ByteVector#compare(VectorOperators.Comparison,Vector) 1880 * @see #eq(byte) 1881 * @see #lt(byte) 1882 */ 1883 public abstract 1884 VectorMask<Byte> compare(Comparison op, byte e); 1885 1886 /*package-private*/ 1887 @ForceInline 1888 final 1889 <M extends VectorMask<Byte>> 1890 M compareTemplate(Class<M> maskType, Comparison op, byte e) { 1891 return compareTemplate(maskType, op, broadcast(e)); 1892 } 1893 1894 /** 1895 * Tests this vector by comparing it with an input scalar, 1896 * according to the given comparison operation, 1897 * in lanes selected by a mask. 1898 * 1899 * This is a masked lane-wise binary test operation which applies 1900 * to each pair of corresponding lane values. 1901 * 1902 * The returned result is equal to the expression 1903 * {@code compare(op,s).and(m)}. 1904 * 1905 * @param op the operation used to compare lane values 1906 * @param e the input scalar 1907 * @param m the mask controlling lane selection 1908 * @return the mask result of testing lane-wise if this vector 1909 * compares to the input, according to the selected 1910 * comparison operator, 1911 * and only in the lanes selected by the mask 1912 * @see ByteVector#compare(VectorOperators.Comparison,Vector,VectorMask) 1913 */ 1914 @ForceInline 1915 public final VectorMask<Byte> compare(VectorOperators.Comparison op, 1916 byte e, 1917 VectorMask<Byte> m) { 1918 return compare(op, e).and(m); 1919 } 1920 1921 /** 1922 * {@inheritDoc} <!--workaround--> 1923 */ 1924 @Override 1925 public abstract 1926 VectorMask<Byte> compare(Comparison op, long e); 1927 1928 /*package-private*/ 1929 @ForceInline 1930 final 1931 <M extends VectorMask<Byte>> 1932 M compareTemplate(Class<M> maskType, Comparison op, long e) { 1933 return compareTemplate(maskType, op, broadcast(e)); 1934 } 1935 1936 /** 1937 * {@inheritDoc} <!--workaround--> 1938 */ 1939 @Override 1940 @ForceInline 1941 public final 1942 VectorMask<Byte> compare(Comparison op, long e, VectorMask<Byte> m) { 1943 return compare(op, broadcast(e), m); 1944 } 1945 1946 1947 1948 /** 1949 * {@inheritDoc} <!--workaround--> 1950 */ 1951 @Override public abstract 1952 ByteVector blend(Vector<Byte> v, VectorMask<Byte> m); 1953 1954 /*package-private*/ 1955 @ForceInline 1956 final 1957 <M extends VectorMask<Byte>> 1958 ByteVector 1959 blendTemplate(Class<M> maskType, ByteVector v, M m) { 1960 v.check(this); 1961 return VectorIntrinsics.blend( 1962 getClass(), maskType, byte.class, length(), 1963 this, v, m, 1964 (v0, v1, m_) -> v0.bOp(v1, m_, (i, a, b) -> b)); 1965 } 1966 1967 /** 1968 * {@inheritDoc} <!--workaround--> 1969 */ 1970 @Override public abstract ByteVector addIndex(int scale); 1971 1972 /*package-private*/ 1973 @ForceInline 1974 final ByteVector addIndexTemplate(int scale) { 1975 ByteSpecies vsp = vspecies(); 1976 // make sure VLENGTH*scale doesn't overflow: 1977 vsp.checkScale(scale); 1978 return VectorIntrinsics.indexVector( 1979 getClass(), byte.class, length(), 1980 this, scale, vsp, 1981 (v, scale_, s) 1982 -> { 1983 // If the platform doesn't support an INDEX 1984 // instruction directly, load IOTA from memory 1985 // and multiply. 1986 ByteVector iota = s.iota(); 1987 byte sc = (byte) scale_; 1988 return v.add(sc == 1 ? iota : iota.mul(sc)); 1989 }); 1990 } 1991 1992 /** 1993 * Replaces selected lanes of this vector with 1994 * a scalar value 1995 * under the control of a mask. 1996 * 1997 * This is a masked lane-wise binary operation which 1998 * selects each lane value from one or the other input. 1999 * 2000 * The returned result is equal to the expression 2001 * {@code blend(broadcast(e),m)}. 2002 * 2003 * @param e the input scalar, containing the replacement lane value 2004 * @param m the mask controlling lane selection of the scalar 2005 * @return the result of blending the lane elements of this vector with 2006 * the scalar value 2007 */ 2008 @ForceInline 2009 public final ByteVector blend(byte e, 2010 VectorMask<Byte> m) { 2011 return blend(broadcast(e), m); 2012 } 2013 2014 /** 2015 * Replaces selected lanes of this vector with 2016 * a scalar value 2017 * under the control of a mask. 2018 * 2019 * This is a masked lane-wise binary operation which 2020 * selects each lane value from one or the other input. 2021 * 2022 * The returned result is equal to the expression 2023 * {@code blend(broadcast(e),m)}. 2024 * 2025 * @param e the input scalar, containing the replacement lane value 2026 * @param m the mask controlling lane selection of the scalar 2027 * @return the result of blending the lane elements of this vector with 2028 * the scalar value 2029 */ 2030 @ForceInline 2031 public final ByteVector blend(long e, 2032 VectorMask<Byte> m) { 2033 return blend(broadcast(e), m); 2034 } 2035 2036 /** 2037 * {@inheritDoc} <!--workaround--> 2038 */ 2039 @Override 2040 public abstract 2041 ByteVector slice(int origin, Vector<Byte> v1); 2042 2043 /*package-private*/ 2044 final 2045 @ForceInline 2046 ByteVector sliceTemplate(int origin, Vector<Byte> v1) { 2047 ByteVector that = (ByteVector) v1; 2048 that.check(this); 2049 byte[] a0 = this.getElements(); 2050 byte[] a1 = that.getElements(); 2051 byte[] res = new byte[a0.length]; 2052 int vlen = res.length; 2053 int firstPart = vlen - origin; 2054 System.arraycopy(a0, origin, res, 0, firstPart); 2055 System.arraycopy(a1, 0, res, firstPart, origin); 2056 return vectorFactory(res); 2057 } 2058 2059 /** 2060 * {@inheritDoc} <!--workaround--> 2061 */ 2062 @Override 2063 @ForceInline 2064 public final 2065 ByteVector slice(int origin, 2066 Vector<Byte> w, 2067 VectorMask<Byte> m) { 2068 return broadcast(0).blend(slice(origin, w), m); 2069 } 2070 2071 /** 2072 * {@inheritDoc} <!--workaround--> 2073 */ 2074 @Override 2075 public abstract 2076 ByteVector slice(int origin); 2077 2078 /** 2079 * {@inheritDoc} <!--workaround--> 2080 */ 2081 @Override 2082 public abstract 2083 ByteVector unslice(int origin, Vector<Byte> w, int part); 2084 2085 /*package-private*/ 2086 final 2087 @ForceInline 2088 ByteVector 2089 unsliceTemplate(int origin, Vector<Byte> w, int part) { 2090 ByteVector that = (ByteVector) w; 2091 that.check(this); 2092 byte[] slice = this.getElements(); 2093 byte[] res = that.getElements(); 2094 int vlen = res.length; 2095 int firstPart = vlen - origin; 2096 switch (part) { 2097 case 0: 2098 System.arraycopy(slice, 0, res, origin, firstPart); 2099 break; 2100 case 1: 2101 System.arraycopy(slice, firstPart, res, 0, origin); 2102 break; 2103 default: 2104 throw wrongPartForSlice(part); 2105 } 2106 return vectorFactory(res); 2107 } 2108 2109 /*package-private*/ 2110 final 2111 @ForceInline 2112 <M extends VectorMask<Byte>> 2113 ByteVector 2114 unsliceTemplate(Class<M> maskType, int origin, Vector<Byte> w, int part, M m) { 2115 ByteVector that = (ByteVector) w; 2116 that.check(this); 2117 ByteVector slice = that.sliceTemplate(origin, that); 2118 slice = slice.blendTemplate(maskType, this, m); 2119 return slice.unsliceTemplate(origin, w, part); 2120 } 2121 2122 /** 2123 * {@inheritDoc} <!--workaround--> 2124 */ 2125 @Override 2126 public abstract 2127 ByteVector unslice(int origin, Vector<Byte> w, int part, VectorMask<Byte> m); 2128 2129 /** 2130 * {@inheritDoc} <!--workaround--> 2131 */ 2132 @Override 2133 public abstract 2134 ByteVector unslice(int origin); 2135 2136 private ArrayIndexOutOfBoundsException 2137 wrongPartForSlice(int part) { 2138 String msg = String.format("bad part number %d for slice operation", 2139 part); 2140 return new ArrayIndexOutOfBoundsException(msg); 2141 } 2142 2143 /** 2144 * {@inheritDoc} <!--workaround--> 2145 */ 2146 @Override 2147 public abstract 2148 ByteVector rearrange(VectorShuffle<Byte> m); 2149 2150 /*package-private*/ 2151 @ForceInline 2152 final 2153 <S extends VectorShuffle<Byte>> 2154 ByteVector rearrangeTemplate(Class<S> shuffletype, S shuffle) { 2155 shuffle.checkIndexes(); 2156 return VectorIntrinsics.rearrangeOp( 2157 getClass(), shuffletype, byte.class, length(), 2158 this, shuffle, 2159 (v1, s_) -> v1.uOp((i, a) -> { 2160 int ei = s_.laneSource(i); 2161 return v1.lane(ei); 2162 })); 2163 } 2164 2165 /** 2166 * {@inheritDoc} <!--workaround--> 2167 */ 2168 @Override 2169 public abstract 2170 ByteVector rearrange(VectorShuffle<Byte> s, 2171 VectorMask<Byte> m); 2172 2173 /*package-private*/ 2174 @ForceInline 2175 final 2176 <S extends VectorShuffle<Byte>> 2177 ByteVector rearrangeTemplate(Class<S> shuffletype, 2178 S shuffle, 2179 VectorMask<Byte> m) { 2180 ByteVector unmasked = 2181 VectorIntrinsics.rearrangeOp( 2182 getClass(), shuffletype, byte.class, length(), 2183 this, shuffle, 2184 (v1, s_) -> v1.uOp((i, a) -> { 2185 int ei = s_.laneSource(i); 2186 return ei < 0 ? 0 : v1.lane(ei); 2187 })); 2188 VectorMask<Byte> valid = shuffle.laneIsValid(); 2189 if (m.andNot(valid).anyTrue()) { 2190 shuffle.checkIndexes(); 2191 throw new AssertionError(); 2192 } 2193 return broadcast((byte)0).blend(unmasked, valid); 2194 } 2195 2196 /** 2197 * {@inheritDoc} <!--workaround--> 2198 */ 2199 @Override 2200 public abstract 2201 ByteVector rearrange(VectorShuffle<Byte> s, 2202 Vector<Byte> v); 2203 2204 /*package-private*/ 2205 @ForceInline 2206 final 2207 <S extends VectorShuffle<Byte>> 2208 ByteVector rearrangeTemplate(Class<S> shuffletype, 2209 S shuffle, 2210 ByteVector v) { 2211 VectorMask<Byte> valid = shuffle.laneIsValid(); 2212 S ws = shuffletype.cast(shuffle.wrapIndexes()); 2213 ByteVector r0 = 2214 VectorIntrinsics.rearrangeOp( 2215 getClass(), shuffletype, byte.class, length(), 2216 this, ws, 2217 (v0, s_) -> v0.uOp((i, a) -> { 2218 int ei = s_.laneSource(i); 2219 return v0.lane(ei); 2220 })); 2221 ByteVector r1 = 2222 VectorIntrinsics.rearrangeOp( 2223 getClass(), shuffletype, byte.class, length(), 2224 v, ws, 2225 (v1, s_) -> v1.uOp((i, a) -> { 2226 int ei = s_.laneSource(i); 2227 return v1.lane(ei); 2228 })); 2229 return r1.blend(r0, valid); 2230 } 2231 2232 /** 2233 * {@inheritDoc} <!--workaround--> 2234 */ 2235 @Override 2236 public abstract 2237 ByteVector selectFrom(Vector<Byte> v); 2238 2239 /*package-private*/ 2240 @ForceInline 2241 final ByteVector selectFromTemplate(ByteVector v) { 2242 return v.rearrange(this.toShuffle()); 2243 } 2244 2245 /** 2246 * {@inheritDoc} <!--workaround--> 2247 */ 2248 @Override 2249 public abstract 2250 ByteVector selectFrom(Vector<Byte> s, VectorMask<Byte> m); 2251 2252 /*package-private*/ 2253 @ForceInline 2254 final ByteVector selectFromTemplate(ByteVector v, 2255 AbstractMask<Byte> m) { 2256 return v.rearrange(this.toShuffle(), m); 2257 } 2258 2259 /// Ternary operations 2260 2261 /** 2262 * Blends together the bits of two vectors under 2263 * the control of a third, which supplies mask bits. 2264 * 2265 * 2266 * This is a lane-wise ternary operation which performs 2267 * a bitwise blending operation {@code (a&~c)|(b&c)} 2268 * to each lane. 2269 * 2270 * This method is also equivalent to the expression 2271 * {@link #lanewise(VectorOperators.Ternary,Vector,Vector) 2272 * lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND 2273 * BITWISE_BLEND}{@code , bits, mask)}. 2274 * 2275 * @param bits input bits to blend into the current vector 2276 * @param mask a bitwise mask to enable blending of the input bits 2277 * @return the bitwise blend of the given bits into the current vector, 2278 * under control of the bitwise mask 2279 * @see #bitwiseBlend(byte,byte) 2280 * @see #bitwiseBlend(byte,Vector) 2281 * @see #bitwiseBlend(Vector,byte) 2282 * @see VectorOperators#BITWISE_BLEND 2283 * @see #lanewise(VectorOperators.Ternary,Vector,Vector,VectorMask) 2284 */ 2285 @ForceInline 2286 public final 2287 ByteVector bitwiseBlend(Vector<Byte> bits, Vector<Byte> mask) { 2288 return lanewise(BITWISE_BLEND, bits, mask); 2289 } 2290 2291 /** 2292 * Blends together the bits of a vector and a scalar under 2293 * the control of another scalar, which supplies mask bits. 2294 * 2295 * 2296 * This is a lane-wise ternary operation which performs 2297 * a bitwise blending operation {@code (a&~c)|(b&c)} 2298 * to each lane. 2299 * 2300 * This method is also equivalent to the expression 2301 * {@link #lanewise(VectorOperators.Ternary,Vector,Vector) 2302 * lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND 2303 * BITWISE_BLEND}{@code , bits, mask)}. 2304 * 2305 * @param bits input bits to blend into the current vector 2306 * @param mask a bitwise mask to enable blending of the input bits 2307 * @return the bitwise blend of the given bits into the current vector, 2308 * under control of the bitwise mask 2309 * @see #bitwiseBlend(Vector,Vector) 2310 * @see VectorOperators#BITWISE_BLEND 2311 * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask) 2312 */ 2313 @ForceInline 2314 public final 2315 ByteVector bitwiseBlend(byte bits, byte mask) { 2316 return lanewise(BITWISE_BLEND, bits, mask); 2317 } 2318 2319 /** 2320 * Blends together the bits of a vector and a scalar under 2321 * the control of another vector, which supplies mask bits. 2322 * 2323 * 2324 * This is a lane-wise ternary operation which performs 2325 * a bitwise blending operation {@code (a&~c)|(b&c)} 2326 * to each lane. 2327 * 2328 * This method is also equivalent to the expression 2329 * {@link #lanewise(VectorOperators.Ternary,Vector,Vector) 2330 * lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND 2331 * BITWISE_BLEND}{@code , bits, mask)}. 2332 * 2333 * @param bits input bits to blend into the current vector 2334 * @param mask a bitwise mask to enable blending of the input bits 2335 * @return the bitwise blend of the given bits into the current vector, 2336 * under control of the bitwise mask 2337 * @see #bitwiseBlend(Vector,Vector) 2338 * @see VectorOperators#BITWISE_BLEND 2339 * @see #lanewise(VectorOperators.Ternary,byte,Vector,VectorMask) 2340 */ 2341 @ForceInline 2342 public final 2343 ByteVector bitwiseBlend(byte bits, Vector<Byte> mask) { 2344 return lanewise(BITWISE_BLEND, bits, mask); 2345 } 2346 2347 /** 2348 * Blends together the bits of two vectors under 2349 * the control of a scalar, which supplies mask bits. 2350 * 2351 * 2352 * This is a lane-wise ternary operation which performs 2353 * a bitwise blending operation {@code (a&~c)|(b&c)} 2354 * to each lane. 2355 * 2356 * This method is also equivalent to the expression 2357 * {@link #lanewise(VectorOperators.Ternary,Vector,Vector) 2358 * lanewise}{@code (}{@link VectorOperators#BITWISE_BLEND 2359 * BITWISE_BLEND}{@code , bits, mask)}. 2360 * 2361 * @param bits input bits to blend into the current vector 2362 * @param mask a bitwise mask to enable blending of the input bits 2363 * @return the bitwise blend of the given bits into the current vector, 2364 * under control of the bitwise mask 2365 * @see #bitwiseBlend(Vector,Vector) 2366 * @see VectorOperators#BITWISE_BLEND 2367 * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask) 2368 */ 2369 @ForceInline 2370 public final 2371 ByteVector bitwiseBlend(Vector<Byte> bits, byte mask) { 2372 return lanewise(BITWISE_BLEND, bits, mask); 2373 } 2374 2375 2376 // Type specific horizontal reductions 2377 2378 /** 2379 * Returns a value accumulated from all the lanes of this vector. 2380 * 2381 * This is an associative cross-lane reduction operation which 2382 * applies the specified operation to all the lane elements. 2383 * 2384 * <p> 2385 * A few reduction operations do not support arbitrary reordering 2386 * of their operands, yet are included here because of their 2387 * usefulness. 2388 * 2389 * <ul> 2390 * <li> 2391 * In the case of {@code FIRST_NONZERO}, the reduction returns 2392 * the value from the lowest-numbered non-zero lane. 2393 * 2394 * 2395 * <li> 2396 * In the case of floating point addition and multiplication, the 2397 * precise result will reflect the choice of an arbitrary order 2398 * of operations, which may even vary over time. 2399 * 2400 * <li> 2401 * All other reduction operations are fully commutative and 2402 * associative. The implementation can choose any order of 2403 * processing, yet it will always produce the same result. 2404 * 2405 * </ul> 2406 * 2407 * 2408 * @param op the operation used to combine lane values 2409 * @return the accumulated result 2410 * @throws UnsupportedOperationException if this vector does 2411 * not support the requested operation 2412 * @see #reduceLanes(VectorOperators.Associative,VectorMask) 2413 * @see #add(Vector) 2414 * @see #mul(Vector) 2415 * @see #min(Vector) 2416 * @see #max(Vector) 2417 * @see #and(Vector) 2418 * @see #or(Vector) 2419 * @see VectorOperators#XOR 2420 * @see VectorOperators#FIRST_NONZERO 2421 */ 2422 public abstract byte reduceLanes(VectorOperators.Associative op); 2423 2424 /** 2425 * Returns a value accumulated from selected lanes of this vector, 2426 * controlled by a mask. 2427 * 2428 * This is an associative cross-lane reduction operation which 2429 * applies the specified operation to the selected lane elements. 2430 * <p> 2431 * If no elements are selected, an operation-specific identity 2432 * value is returned. 2433 * <ul> 2434 * <li> 2435 * If the operation is 2436 * {@code ADD}, {@code XOR}, {@code OR}, 2437 * or {@code FIRST_NONZERO}, 2438 * then the identity value is zero, the default {@code byte} value. 2439 * <li> 2440 * If the operation is {@code MUL}, 2441 * then the identity value is one. 2442 * <li> 2443 * If the operation is {@code AND}, 2444 * then the identity value is minus one (all bits set). 2445 * <li> 2446 * If the operation is {@code MAX}, 2447 * then the identity value is {@code Byte.MIN_VALUE}. 2448 * <li> 2449 * If the operation is {@code MIN}, 2450 * then the identity value is {@code Byte.MAX_VALUE}. 2451 * </ul> 2452 * 2453 * @param op the operation used to combine lane values 2454 * @param m the mask controlling lane selection 2455 * @return the reduced result accumulated from the selected lane values 2456 * @throws UnsupportedOperationException if this vector does 2457 * not support the requested operation 2458 * @see #reduceLanes(VectorOperators.Associative) 2459 */ 2460 public abstract byte reduceLanes(VectorOperators.Associative op, 2461 VectorMask<Byte> m); 2462 2463 /*package-private*/ 2464 @ForceInline 2465 final 2466 byte reduceLanesTemplate(VectorOperators.Associative op, 2467 VectorMask<Byte> m) { 2468 ByteVector v = reduceIdentityVector(op).blend(this, m); 2469 return v.reduceLanesTemplate(op); 2470 } 2471 2472 /*package-private*/ 2473 @ForceInline 2474 final 2475 byte reduceLanesTemplate(VectorOperators.Associative op) { 2476 if (op == FIRST_NONZERO) { 2477 // FIXME: The JIT should handle this, and other scan ops alos. 2478 VectorMask<Byte> thisNZ 2479 = this.viewAsIntegralLanes().compare(NE, (byte) 0); 2480 return this.lane(thisNZ.firstTrue()); 2481 } 2482 int opc = opCode(op); 2483 return fromBits(VectorIntrinsics.reductionCoerced( 2484 opc, getClass(), byte.class, length(), 2485 this, 2486 REDUCE_IMPL.find(op, opc, (opc_) -> { 2487 switch (opc_) { 2488 case VECTOR_OP_ADD: return v -> 2489 toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a + b))); 2490 case VECTOR_OP_MUL: return v -> 2491 toBits(v.rOp((byte)1, (i, a, b) -> (byte)(a * b))); 2492 case VECTOR_OP_MIN: return v -> 2493 toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (byte) Math.min(a, b))); 2494 case VECTOR_OP_MAX: return v -> 2495 toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (byte) Math.max(a, b))); 2496 case VECTOR_OP_FIRST_NONZERO: return v -> 2497 toBits(v.rOp((byte)0, (i, a, b) -> toBits(a) != 0 ? a : b)); 2498 case VECTOR_OP_AND: return v -> 2499 toBits(v.rOp((byte)-1, (i, a, b) -> (byte)(a & b))); 2500 case VECTOR_OP_OR: return v -> 2501 toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a | b))); 2502 case VECTOR_OP_XOR: return v -> 2503 toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a ^ b))); 2504 default: return null; 2505 }}))); 2506 } 2507 private static final 2508 ImplCache<Associative,Function<ByteVector,Long>> REDUCE_IMPL 2509 = new ImplCache<>(Associative.class, ByteVector.class); 2510 2511 private 2512 @ForceInline 2513 ByteVector reduceIdentityVector(VectorOperators.Associative op) { 2514 int opc = opCode(op); 2515 UnaryOperator<ByteVector> fn 2516 = REDUCE_ID_IMPL.find(op, opc, (opc_) -> { 2517 switch (opc_) { 2518 case VECTOR_OP_ADD: 2519 case VECTOR_OP_OR: 2520 case VECTOR_OP_XOR: 2521 case VECTOR_OP_FIRST_NONZERO: 2522 return v -> v.broadcast(0); 2523 case VECTOR_OP_MUL: 2524 return v -> v.broadcast(1); 2525 case VECTOR_OP_AND: 2526 return v -> v.broadcast(-1); 2527 case VECTOR_OP_MIN: 2528 return v -> v.broadcast(MAX_OR_INF); 2529 case VECTOR_OP_MAX: 2530 return v -> v.broadcast(MIN_OR_INF); 2531 default: return null; 2532 } 2533 }); 2534 return fn.apply(this); 2535 } 2536 private static final 2537 ImplCache<Associative,UnaryOperator<ByteVector>> REDUCE_ID_IMPL 2538 = new ImplCache<>(Associative.class, ByteVector.class); 2539 2540 private static final byte MIN_OR_INF = Byte.MIN_VALUE; 2541 private static final byte MAX_OR_INF = Byte.MAX_VALUE; 2542 2543 public @Override abstract long reduceLanesToLong(VectorOperators.Associative op); 2544 public @Override abstract long reduceLanesToLong(VectorOperators.Associative op, 2545 VectorMask<Byte> m); 2546 2547 // Type specific accessors 2548 2549 /** 2550 * Gets the lane element at lane index {@code i} 2551 * 2552 * @param i the lane index 2553 * @return the lane element at lane index {@code i} 2554 * @throws IllegalArgumentException if the index is is out of range 2555 * ({@code < 0 || >= length()}) 2556 */ 2557 public abstract byte lane(int i); 2558 2559 /** 2560 * Replaces the lane element of this vector at lane index {@code i} with 2561 * value {@code e}. 2562 * 2563 * This is a cross-lane operation and behaves as if it returns the result 2564 * of blending this vector with an input vector that is the result of 2565 * broadcasting {@code e} and a mask that has only one lane set at lane 2566 * index {@code i}. 2567 * 2568 * @param i the lane index of the lane element to be replaced 2569 * @param e the value to be placed 2570 * @return the result of replacing the lane element of this vector at lane 2571 * index {@code i} with value {@code e}. 2572 * @throws IllegalArgumentException if the index is is out of range 2573 * ({@code < 0 || >= length()}) 2574 */ 2575 public abstract ByteVector withLane(int i, byte e); 2576 2577 // Memory load operations 2578 2579 /** 2580 * Returns an array of type {@code byte[]} 2581 * containing all the lane values. 2582 * The array length is the same as the vector length. 2583 * The array elements are stored in lane order. 2584 * <p> 2585 * This method behaves as if it stores 2586 * this vector into an allocated array 2587 * (using {@link #intoArray(byte[], int) intoArray}) 2588 * and returns the array as follows: 2589 * <pre>{@code 2590 * byte[] a = new byte[this.length()]; 2591 * this.intoArray(a, 0); 2592 * return a; 2593 * }</pre> 2594 * 2595 * @return an array containing the lane values of this vector 2596 */ 2597 @ForceInline 2598 @Override 2599 public final byte[] toArray() { 2600 byte[] a = new byte[vspecies().laneCount()]; 2601 intoArray(a, 0); 2602 return a; 2603 } 2604 2605 /** {@inheritDoc} <!--workaround--> 2606 * @implNote 2607 * When this method is used on used on vectors 2608 * of type {@code ByteVector}, 2609 * there will be no loss of precision or range, 2610 * and so no {@code IllegalArgumentException} will 2611 * be thrown. 2612 */ 2613 @ForceInline 2614 @Override 2615 public final int[] toIntArray() { 2616 byte[] a = toArray(); 2617 int[] res = new int[a.length]; 2618 for (int i = 0; i < a.length; i++) { 2619 byte e = a[i]; 2620 res[i] = (int) ByteSpecies.toIntegralChecked(e, true); 2621 } 2622 return res; 2623 } 2624 2625 /** {@inheritDoc} <!--workaround--> 2626 * @implNote 2627 * When this method is used on used on vectors 2628 * of type {@code ByteVector}, 2629 * there will be no loss of precision or range, 2630 * and so no {@code IllegalArgumentException} will 2631 * be thrown. 2632 */ 2633 @ForceInline 2634 @Override 2635 public final long[] toLongArray() { 2636 byte[] a = toArray(); 2637 long[] res = new long[a.length]; 2638 for (int i = 0; i < a.length; i++) { 2639 byte e = a[i]; 2640 res[i] = ByteSpecies.toIntegralChecked(e, false); 2641 } 2642 return res; 2643 } 2644 2645 /** {@inheritDoc} <!--workaround--> 2646 * @implNote 2647 * When this method is used on used on vectors 2648 * of type {@code ByteVector}, 2649 * there will be no loss of precision. 2650 */ 2651 @ForceInline 2652 @Override 2653 public final double[] toDoubleArray() { 2654 byte[] a = toArray(); 2655 double[] res = new double[a.length]; 2656 for (int i = 0; i < a.length; i++) { 2657 res[i] = (double) a[i]; 2658 } 2659 return res; 2660 } 2661 2662 /** 2663 * Loads a vector from a byte array starting at an offset. 2664 * Bytes are composed into primitive lane elements according 2665 * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering. 2666 * The vector is arranged into lanes according to 2667 * <a href="Vector.html#lane-order">memory ordering</a>. 2668 * <p> 2669 * This method behaves as if it returns the result of calling 2670 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2671 * fromByteBuffer()} as follows: 2672 * <pre>{@code 2673 * var bb = ByteBuffer.wrap(a); 2674 * var bo = ByteOrder.LITTLE_ENDIAN; 2675 * var m = species.maskAll(true); 2676 * return fromByteBuffer(species, bb, offset, m, bo); 2677 * }</pre> 2678 * 2679 * @param species species of desired vector 2680 * @param a the byte array 2681 * @param offset the offset into the array 2682 * @return a vector loaded from a byte array 2683 * @throws IndexOutOfBoundsException 2684 * if {@code offset+N*ESIZE < 0} 2685 * or {@code offset+(N+1)*ESIZE > a.length} 2686 * for any lane {@code N} in the vector 2687 */ 2688 @ForceInline 2689 public static 2690 ByteVector fromByteArray(VectorSpecies<Byte> species, 2691 byte[] a, int offset) { 2692 return fromByteArray(species, a, offset, ByteOrder.LITTLE_ENDIAN); 2693 } 2694 2695 /** 2696 * Loads a vector from a byte array starting at an offset. 2697 * Bytes are composed into primitive lane elements according 2698 * to the specified byte order. 2699 * The vector is arranged into lanes according to 2700 * <a href="Vector.html#lane-order">memory ordering</a>. 2701 * <p> 2702 * This method behaves as if it returns the result of calling 2703 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2704 * fromByteBuffer()} as follows: 2705 * <pre>{@code 2706 * var bb = ByteBuffer.wrap(a); 2707 * var m = species.maskAll(true); 2708 * return fromByteBuffer(species, bb, offset, m, bo); 2709 * }</pre> 2710 * 2711 * @param species species of desired vector 2712 * @param a the byte array 2713 * @param offset the offset into the array 2714 * @param bo the intended byte order 2715 * @return a vector loaded from a byte array 2716 * @throws IndexOutOfBoundsException 2717 * if {@code offset+N*ESIZE < 0} 2718 * or {@code offset+(N+1)*ESIZE > a.length} 2719 * for any lane {@code N} in the vector 2720 */ 2721 @ForceInline 2722 public static 2723 ByteVector fromByteArray(VectorSpecies<Byte> species, 2724 byte[] a, int offset, 2725 ByteOrder bo) { 2726 ByteSpecies vsp = (ByteSpecies) species; 2727 offset = checkFromIndexSize(offset, 2728 vsp.vectorBitSize() / Byte.SIZE, 2729 a.length); 2730 return vsp.dummyVector() 2731 .fromByteArray0(a, offset).maybeSwap(bo); 2732 } 2733 2734 /** 2735 * Loads a vector from a byte array starting at an offset 2736 * and using a mask. 2737 * Lanes where the mask is unset are filled with the default 2738 * value of {@code byte} (zero). 2739 * Bytes are composed into primitive lane elements according 2740 * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering. 2741 * The vector is arranged into lanes according to 2742 * <a href="Vector.html#lane-order">memory ordering</a>. 2743 * <p> 2744 * This method behaves as if it returns the result of calling 2745 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2746 * fromByteBuffer()} as follows: 2747 * <pre>{@code 2748 * var bb = ByteBuffer.wrap(a); 2749 * var bo = ByteOrder.LITTLE_ENDIAN; 2750 * return fromByteBuffer(species, bb, offset, bo, m); 2751 * }</pre> 2752 * 2753 * @param species species of desired vector 2754 * @param a the byte array 2755 * @param offset the offset into the array 2756 * @param m the mask controlling lane selection 2757 * @return a vector loaded from a byte array 2758 * @throws IndexOutOfBoundsException 2759 * if {@code offset+N*ESIZE < 0} 2760 * or {@code offset+(N+1)*ESIZE > a.length} 2761 * for any lane {@code N} in the vector where 2762 * the mask is set 2763 */ 2764 @ForceInline 2765 public static 2766 ByteVector fromByteArray(VectorSpecies<Byte> species, 2767 byte[] a, int offset, 2768 VectorMask<Byte> m) { 2769 return fromByteArray(species, a, offset, ByteOrder.LITTLE_ENDIAN, m); 2770 } 2771 2772 /** 2773 * Loads a vector from a byte array starting at an offset 2774 * and using a mask. 2775 * Lanes where the mask is unset are filled with the default 2776 * value of {@code byte} (zero). 2777 * Bytes are composed into primitive lane elements according 2778 * to {@linkplain ByteOrder#LITTLE_ENDIAN little endian} ordering. 2779 * The vector is arranged into lanes according to 2780 * <a href="Vector.html#lane-order">memory ordering</a>. 2781 * <p> 2782 * This method behaves as if it returns the result of calling 2783 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2784 * fromByteBuffer()} as follows: 2785 * <pre>{@code 2786 * var bb = ByteBuffer.wrap(a); 2787 * return fromByteBuffer(species, bb, offset, m, bo); 2788 * }</pre> 2789 * 2790 * @param species species of desired vector 2791 * @param a the byte array 2792 * @param offset the offset into the array 2793 * @param bo the intended byte order 2794 * @param m the mask controlling lane selection 2795 * @return a vector loaded from a byte array 2796 * @throws IndexOutOfBoundsException 2797 * if {@code offset+N*ESIZE < 0} 2798 * or {@code offset+(N+1)*ESIZE > a.length} 2799 * for any lane {@code N} in the vector 2800 * where the mask is set 2801 */ 2802 @ForceInline 2803 public static 2804 ByteVector fromByteArray(VectorSpecies<Byte> species, 2805 byte[] a, int offset, 2806 ByteOrder bo, 2807 VectorMask<Byte> m) { 2808 ByteSpecies vsp = (ByteSpecies) species; 2809 ByteVector zero = vsp.zero(); 2810 2811 if (offset >= 0 && offset <= (a.length - vsp.length() * 1)) { 2812 ByteVector v = zero.fromByteArray0(a, offset); 2813 return zero.blend(v.maybeSwap(bo), m); 2814 } 2815 ByteVector iota = zero.addIndex(1); 2816 ((AbstractMask<Byte>)m) 2817 .checkIndexByLane(offset, a.length, iota, 1); 2818 ByteBuffer tb = wrapper(a, offset, bo); 2819 return vsp.ldOp(tb, 0, (AbstractMask<Byte>)m, 2820 (tb_, __, i) -> tb_.get(i)); 2821 } 2822 2823 /** 2824 * Loads a vector from an array of type {@code byte[]} 2825 * starting at an offset. 2826 * For each vector lane, where {@code N} is the vector lane index, the 2827 * array element at index {@code offset + N} is placed into the 2828 * resulting vector at lane index {@code N}. 2829 * 2830 * @param species species of desired vector 2831 * @param a the array 2832 * @param offset the offset into the array 2833 * @return the vector loaded from an array 2834 * @throws IndexOutOfBoundsException 2835 * if {@code offset+N < 0} or {@code offset+N >= a.length} 2836 * for any lane {@code N} in the vector 2837 */ 2838 @ForceInline 2839 public static 2840 ByteVector fromArray(VectorSpecies<Byte> species, 2841 byte[] a, int offset) { 2842 ByteSpecies vsp = (ByteSpecies) species; 2843 offset = checkFromIndexSize(offset, 2844 vsp.laneCount(), 2845 a.length); 2846 return vsp.dummyVector().fromArray0(a, offset); 2847 } 2848 2849 /** 2850 * Loads a vector from an array of type {@code byte[]} 2851 * starting at an offset and using a mask. 2852 * Lanes where the mask is unset are filled with the default 2853 * value of {@code byte} (zero). 2854 * For each vector lane, where {@code N} is the vector lane index, 2855 * if the mask lane at index {@code N} is set then the array element at 2856 * index {@code offset + N} is placed into the resulting vector at lane index 2857 * {@code N}, otherwise the default element value is placed into the 2858 * resulting vector at lane index {@code N}. 2859 * 2860 * @param species species of desired vector 2861 * @param a the array 2862 * @param offset the offset into the array 2863 * @param m the mask controlling lane selection 2864 * @return the vector loaded from an array 2865 * @throws IndexOutOfBoundsException 2866 * if {@code offset+N < 0} or {@code offset+N >= a.length} 2867 * for any lane {@code N} in the vector 2868 * where the mask is set 2869 */ 2870 @ForceInline 2871 public static 2872 ByteVector fromArray(VectorSpecies<Byte> species, 2873 byte[] a, int offset, 2874 VectorMask<Byte> m) { 2875 ByteSpecies vsp = (ByteSpecies) species; 2876 if (offset >= 0 && offset <= (a.length - species.length())) { 2877 ByteVector zero = vsp.zero(); 2878 return zero.blend(zero.fromArray0(a, offset), m); 2879 } 2880 ByteVector iota = vsp.iota(); 2881 ((AbstractMask<Byte>)m) 2882 .checkIndexByLane(offset, a.length, iota, 1); 2883 return vsp.vOp(m, i -> a[offset + i]); 2884 } 2885 2886 /** 2887 * Gathers a new vector composed of elements from an array of type 2888 * {@code byte[]}, 2889 * using indexes obtained by adding a fixed {@code offset} to a 2890 * series of secondary offsets from an <em>index map</em>. 2891 * The index map is a contiguous sequence of {@code VLENGTH} 2892 * elements in a second array of {@code int}s, starting at a given 2893 * {@code mapOffset}. 2894 * <p> 2895 * For each vector lane, where {@code N} is the vector lane index, 2896 * the lane is loaded from the array 2897 * element {@code a[f(N)]}, where {@code f(N)} is the 2898 * index mapping expression 2899 * {@code offset + indexMap[mapOffset + N]]}. 2900 * 2901 * @param species species of desired vector 2902 * @param a the array 2903 * @param offset the offset into the array, may be negative if relative 2904 * indexes in the index map compensate to produce a value within the 2905 * array bounds 2906 * @param indexMap the index map 2907 * @param mapOffset the offset into the index map 2908 * @return the vector loaded from the indexed elements of the array 2909 * @throws IndexOutOfBoundsException 2910 * if {@code mapOffset+N < 0} 2911 * or if {@code mapOffset+N >= indexMap.length}, 2912 * or if {@code f(N)=offset+indexMap[mapOffset+N]} 2913 * is an invalid index into {@code a}, 2914 * for any lane {@code N} in the vector 2915 * @see ByteVector#toIntArray() 2916 */ 2917 @ForceInline 2918 public static 2919 ByteVector fromArray(VectorSpecies<Byte> species, 2920 byte[] a, int offset, 2921 int[] indexMap, int mapOffset) { 2922 ByteSpecies vsp = (ByteSpecies) species; 2923 return vsp.vOp(n -> a[offset + indexMap[mapOffset + n]]); 2924 } 2925 2926 /** 2927 * Gathers a new vector composed of elements from an array of type 2928 * {@code byte[]}, 2929 * under the control of a mask, and 2930 * using indexes obtained by adding a fixed {@code offset} to a 2931 * series of secondary offsets from an <em>index map</em>. 2932 * The index map is a contiguous sequence of {@code VLENGTH} 2933 * elements in a second array of {@code int}s, starting at a given 2934 * {@code mapOffset}. 2935 * <p> 2936 * For each vector lane, where {@code N} is the vector lane index, 2937 * if the lane is set in the mask, 2938 * the lane is loaded from the array 2939 * element {@code a[f(N)]}, where {@code f(N)} is the 2940 * index mapping expression 2941 * {@code offset + indexMap[mapOffset + N]]}. 2942 * Unset lanes in the resulting vector are set to zero. 2943 * 2944 * @param species species of desired vector 2945 * @param a the array 2946 * @param offset the offset into the array, may be negative if relative 2947 * indexes in the index map compensate to produce a value within the 2948 * array bounds 2949 * @param indexMap the index map 2950 * @param mapOffset the offset into the index map 2951 * @param m the mask controlling lane selection 2952 * @return the vector loaded from the indexed elements of the array 2953 * @throws IndexOutOfBoundsException 2954 * if {@code mapOffset+N < 0} 2955 * or if {@code mapOffset+N >= indexMap.length}, 2956 * or if {@code f(N)=offset+indexMap[mapOffset+N]} 2957 * is an invalid index into {@code a}, 2958 * for any lane {@code N} in the vector 2959 * where the mask is set 2960 * @see ByteVector#toIntArray() 2961 */ 2962 @ForceInline 2963 public static 2964 ByteVector fromArray(VectorSpecies<Byte> species, 2965 byte[] a, int offset, 2966 int[] indexMap, int mapOffset, 2967 VectorMask<Byte> m) { 2968 ByteSpecies vsp = (ByteSpecies) species; 2969 2970 // Do it the slow way. 2971 return vsp.vOp(m, n -> a[offset + indexMap[mapOffset + n]]); 2972 2973 } 2974 2975 /** 2976 * Loads a vector from a {@linkplain ByteBuffer byte buffer} 2977 * starting at an offset into the byte buffer. 2978 * <p> 2979 * This method behaves as if it returns the result of calling 2980 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 2981 * fromByteBuffer()} as follows: 2982 * <pre>{@code 2983 * var bb = ByteBuffer.wrap(a); 2984 * var bo = ByteOrder.LITTLE_ENDIAN; 2985 * var m = species.maskAll(true); 2986 * return fromByteBuffer(species, bb, offset, m, bo); 2987 * }</pre> 2988 * 2989 * @param species species of desired vector 2990 * @param bb the byte buffer 2991 * @param offset the offset into the byte buffer 2992 * @param bo the intended byte order 2993 * @return a vector loaded from a byte buffer 2994 * @throws IndexOutOfBoundsException 2995 * if {@code offset+N*1 < 0} 2996 * or {@code offset+N*1 >= bb.limit()} 2997 * for any lane {@code N} in the vector 2998 */ 2999 @ForceInline 3000 public static 3001 ByteVector fromByteBuffer(VectorSpecies<Byte> species, 3002 ByteBuffer bb, int offset, 3003 ByteOrder bo) { 3004 ByteSpecies vsp = (ByteSpecies) species; 3005 offset = checkFromIndexSize(offset, 3006 vsp.laneCount(), 3007 bb.limit()); 3008 return vsp.dummyVector() 3009 .fromByteBuffer0(bb, offset).maybeSwap(bo); 3010 } 3011 3012 /** 3013 * Loads a vector from a {@linkplain ByteBuffer byte buffer} 3014 * starting at an offset into the byte buffer 3015 * and using a mask. 3016 * <p> 3017 * This method behaves as if it returns the result of calling 3018 * {@link #fromByteBuffer(VectorSpecies,ByteBuffer,int,ByteOrder,VectorMask) 3019 * fromByteBuffer()} as follows: 3020 * <pre>{@code 3021 * var bb = ByteBuffer.wrap(a); 3022 * var bo = ByteOrder.LITTLE_ENDIAN; 3023 * var m = species.maskAll(true); 3024 * return fromByteBuffer(species, bb, offset, m, bo); 3025 * }</pre> 3026 * 3027 * @param species species of desired vector 3028 * @param bb the byte buffer 3029 * @param offset the offset into the byte buffer 3030 * @param bo the intended byte order 3031 * @param m the mask controlling lane selection 3032 * @return a vector loaded from a byte buffer 3033 * @throws IndexOutOfBoundsException 3034 * if {@code offset+N*1 < 0} 3035 * or {@code offset+N*1 >= bb.limit()} 3036 * for any lane {@code N} in the vector 3037 * where the mask is set 3038 */ 3039 @ForceInline 3040 public static 3041 ByteVector fromByteBuffer(VectorSpecies<Byte> species, 3042 ByteBuffer bb, int offset, 3043 ByteOrder bo, 3044 VectorMask<Byte> m) { 3045 if (m.allTrue()) { 3046 return fromByteBuffer(species, bb, offset, bo); 3047 } 3048 ByteSpecies vsp = (ByteSpecies) species; 3049 checkMaskFromIndexSize(offset, 3050 vsp, m, 1, 3051 bb.limit()); 3052 ByteVector zero = zero(vsp); 3053 ByteVector v = zero.fromByteBuffer0(bb, offset); 3054 return zero.blend(v.maybeSwap(bo), m); 3055 } 3056 3057 // Memory store operations 3058 3059 /** 3060 * Stores this vector into an array of type {@code byte[]} 3061 * starting at an offset. 3062 * <p> 3063 * For each vector lane, where {@code N} is the vector lane index, 3064 * the lane element at index {@code N} is stored into the array 3065 * element {@code a[offset+N]}. 3066 * 3067 * @param a the array, of type {@code byte[]} 3068 * @param offset the offset into the array 3069 * @throws IndexOutOfBoundsException 3070 * if {@code offset+N < 0} or {@code offset+N >= a.length} 3071 * for any lane {@code N} in the vector 3072 */ 3073 @ForceInline 3074 public final 3075 void intoArray(byte[] a, int offset) { 3076 ByteSpecies vsp = vspecies(); 3077 offset = checkFromIndexSize(offset, 3078 vsp.laneCount(), 3079 a.length); 3080 VectorIntrinsics.store( 3081 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3082 a, arrayAddress(a, offset), 3083 this, 3084 a, offset, 3085 (arr, off, v) 3086 -> v.stOp(arr, off, 3087 (arr_, off_, i, e) -> arr_[off_ + i] = e)); 3088 } 3089 3090 /** 3091 * Stores this vector into an array of {@code byte} 3092 * starting at offset and using a mask. 3093 * <p> 3094 * For each vector lane, where {@code N} is the vector lane index, 3095 * the lane element at index {@code N} is stored into the array 3096 * element {@code a[offset+N]}. 3097 * If the mask lane at {@code N} is unset then the corresponding 3098 * array element {@code a[offset+N]} is left unchanged. 3099 * <p> 3100 * Array range checking is done for lanes where the mask is set. 3101 * Lanes where the mask is unset are not stored and do not need 3102 * to correspond to legitimate elements of {@code a}. 3103 * That is, unset lanes may correspond to array indexes less than 3104 * zero or beyond the end of the array. 3105 * 3106 * @param a the array, of type {@code byte[]} 3107 * @param offset the offset into the array 3108 * @param m the mask controlling lane storage 3109 * @throws IndexOutOfBoundsException 3110 * if {@code offset+N < 0} or {@code offset+N >= a.length} 3111 * for any lane {@code N} in the vector 3112 * where the mask is set 3113 */ 3114 @ForceInline 3115 public final 3116 void intoArray(byte[] a, int offset, 3117 VectorMask<Byte> m) { 3118 if (m.allTrue()) { 3119 intoArray(a, offset); 3120 } else { 3121 // FIXME: Cannot vectorize yet, if there's a mask. 3122 stOp(a, offset, m, (arr, off, i, v) -> arr[off+i] = v); 3123 } 3124 } 3125 3126 /** 3127 * Scatters this vector into an array of type {@code byte[]} 3128 * using indexes obtained by adding a fixed {@code offset} to a 3129 * series of secondary offsets from an <em>index map</em>. 3130 * The index map is a contiguous sequence of {@code VLENGTH} 3131 * elements in a second array of {@code int}s, starting at a given 3132 * {@code mapOffset}. 3133 * <p> 3134 * For each vector lane, where {@code N} is the vector lane index, 3135 * the lane element at index {@code N} is stored into the array 3136 * element {@code a[f(N)]}, where {@code f(N)} is the 3137 * index mapping expression 3138 * {@code offset + indexMap[mapOffset + N]]}. 3139 * 3140 * @param a the array 3141 * @param offset an offset to combine with the index map offsets 3142 * @param indexMap the index map 3143 * @param mapOffset the offset into the index map 3144 * @returns a vector of the values {@code a[f(N)]}, where 3145 * {@code f(N) = offset + indexMap[mapOffset + N]]}. 3146 * @throws IndexOutOfBoundsException 3147 * if {@code mapOffset+N < 0} 3148 * or if {@code mapOffset+N >= indexMap.length}, 3149 * or if {@code f(N)=offset+indexMap[mapOffset+N]} 3150 * is an invalid index into {@code a}, 3151 * for any lane {@code N} in the vector 3152 * @see ByteVector#toIntArray() 3153 */ 3154 @ForceInline 3155 public final 3156 void intoArray(byte[] a, int offset, 3157 int[] indexMap, int mapOffset) { 3158 ByteSpecies vsp = vspecies(); 3159 if (length() == 1) { 3160 intoArray(a, offset + indexMap[mapOffset]); 3161 return; 3162 } 3163 IntVector.IntSpecies isp = (IntVector.IntSpecies) vsp.indexSpecies(); 3164 if (isp.laneCount() != vsp.laneCount()) { 3165 stOp(a, offset, 3166 (arr, off, i, e) -> { 3167 int j = indexMap[mapOffset + i]; 3168 arr[off + j] = e; 3169 }); 3170 return; 3171 } 3172 3173 // Index vector: vix[0:n] = i -> offset + indexMap[mo + i] 3174 IntVector vix = IntVector 3175 .fromArray(isp, indexMap, mapOffset) 3176 .add(offset); 3177 3178 vix = VectorIntrinsics.checkIndex(vix, a.length); 3179 3180 VectorIntrinsics.storeWithMap( 3181 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3182 isp.vectorType(), 3183 a, arrayAddress(a, 0), vix, 3184 this, 3185 a, offset, indexMap, mapOffset, 3186 (arr, off, v, map, mo) 3187 -> v.stOp(arr, off, 3188 (arr_, off_, i, e) -> { 3189 int j = map[mo + i]; 3190 arr[off + j] = e; 3191 })); 3192 } 3193 3194 /** 3195 * Scatters this vector into an array of type {@code byte[]}, 3196 * under the control of a mask, and 3197 * using indexes obtained by adding a fixed {@code offset} to a 3198 * series of secondary offsets from an <em>index map</em>. 3199 * The index map is a contiguous sequence of {@code VLENGTH} 3200 * elements in a second array of {@code int}s, starting at a given 3201 * {@code mapOffset}. 3202 * <p> 3203 * For each vector lane, where {@code N} is the vector lane index, 3204 * if the mask lane at index {@code N} is set then 3205 * the lane element at index {@code N} is stored into the array 3206 * element {@code a[f(N)]}, where {@code f(N)} is the 3207 * index mapping expression 3208 * {@code offset + indexMap[mapOffset + N]]}. 3209 * 3210 * @param a the array 3211 * @param offset an offset to combine with the index map offsets 3212 * @param indexMap the index map 3213 * @param mapOffset the offset into the index map 3214 * @param m the mask 3215 * @returns a vector of the values {@code m ? a[f(N)] : 0}, 3216 * {@code f(N) = offset + indexMap[mapOffset + N]]}. 3217 * @throws IndexOutOfBoundsException 3218 * if {@code mapOffset+N < 0} 3219 * or if {@code mapOffset+N >= indexMap.length}, 3220 * or if {@code f(N)=offset+indexMap[mapOffset+N]} 3221 * is an invalid index into {@code a}, 3222 * for any lane {@code N} in the vector 3223 * where the mask is set 3224 * @see ByteVector#toIntArray() 3225 */ 3226 @ForceInline 3227 public final 3228 void intoArray(byte[] a, int offset, 3229 int[] indexMap, int mapOffset, 3230 VectorMask<Byte> m) { 3231 ByteSpecies vsp = vspecies(); 3232 if (m.allTrue()) { 3233 intoArray(a, offset, indexMap, mapOffset); 3234 return; 3235 } 3236 throw new AssertionError("fixme"); 3237 } 3238 3239 /** 3240 * {@inheritDoc} <!--workaround--> 3241 */ 3242 @Override 3243 @ForceInline 3244 public final 3245 void intoByteArray(byte[] a, int offset) { 3246 offset = checkFromIndexSize(offset, 3247 bitSize() / Byte.SIZE, 3248 a.length); 3249 this.maybeSwap(ByteOrder.LITTLE_ENDIAN) 3250 .intoByteArray0(a, offset); 3251 } 3252 3253 /** 3254 * {@inheritDoc} <!--workaround--> 3255 */ 3256 @Override 3257 @ForceInline 3258 public final 3259 void intoByteArray(byte[] a, int offset, 3260 VectorMask<Byte> m) { 3261 if (m.allTrue()) { 3262 intoByteArray(a, offset); 3263 return; 3264 } 3265 ByteSpecies vsp = vspecies(); 3266 if (offset >= 0 && offset <= (a.length - vsp.length() * 1)) { 3267 var oldVal = fromByteArray0(a, offset); 3268 var newVal = oldVal.blend(this, m); 3269 newVal.intoByteArray0(a, offset); 3270 } else { 3271 checkMaskFromIndexSize(offset, vsp, m, 1, a.length); 3272 ByteBuffer tb = wrapper(a, offset, NATIVE_ENDIAN); 3273 this.stOp(tb, 0, m, (tb_, __, i, e) -> tb_.put(i, e)); 3274 } 3275 } 3276 3277 /** 3278 * {@inheritDoc} <!--workaround--> 3279 */ 3280 @Override 3281 @ForceInline 3282 public final 3283 void intoByteArray(byte[] a, int offset, 3284 ByteOrder bo, 3285 VectorMask<Byte> m) { 3286 maybeSwap(bo).intoByteArray(a, offset, m); 3287 } 3288 3289 /** 3290 * {@inheritDoc} <!--workaround--> 3291 */ 3292 @Override 3293 @ForceInline 3294 public final 3295 void intoByteBuffer(ByteBuffer bb, int offset, 3296 ByteOrder bo) { 3297 maybeSwap(bo).intoByteBuffer0(bb, offset); 3298 } 3299 3300 /** 3301 * {@inheritDoc} <!--workaround--> 3302 */ 3303 @Override 3304 @ForceInline 3305 public final 3306 void intoByteBuffer(ByteBuffer bb, int offset, 3307 ByteOrder bo, 3308 VectorMask<Byte> m) { 3309 if (m.allTrue()) { 3310 intoByteBuffer(bb, offset, bo); 3311 return; 3312 } 3313 ByteSpecies vsp = vspecies(); 3314 checkMaskFromIndexSize(offset, vsp, m, 1, bb.limit()); 3315 conditionalStoreNYI(offset, vsp, m, 1, bb.limit()); 3316 var oldVal = fromByteBuffer0(bb, offset); 3317 var newVal = oldVal.blend(this.maybeSwap(bo), m); 3318 newVal.intoByteBuffer0(bb, offset); 3319 } 3320 3321 // ================================================ 3322 3323 // Low-level memory operations. 3324 // 3325 // Note that all of these operations *must* inline into a context 3326 // where the exact species of the involved vector is a 3327 // compile-time constant. Otherwise, the intrinsic generation 3328 // will fail and performance will suffer. 3329 // 3330 // In many cases this is achieved by re-deriving a version of the 3331 // method in each concrete subclass (per species). The re-derived 3332 // method simply calls one of these generic methods, with exact 3333 // parameters for the controlling metadata, which is either a 3334 // typed vector or constant species instance. 3335 3336 // Unchecked loading operations in native byte order. 3337 // Caller is reponsible for applying index checks, masking, and 3338 // byte swapping. 3339 3340 /*package-private*/ 3341 abstract 3342 ByteVector fromArray0(byte[] a, int offset); 3343 @ForceInline 3344 final 3345 ByteVector fromArray0Template(byte[] a, int offset) { 3346 ByteSpecies vsp = vspecies(); 3347 return VectorIntrinsics.load( 3348 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3349 a, arrayAddress(a, offset), 3350 a, offset, vsp, 3351 (arr, off, s) -> s.ldOp(arr, off, 3352 (arr_, off_, i) -> arr_[off_ + i])); 3353 } 3354 3355 @Override 3356 abstract 3357 ByteVector fromByteArray0(byte[] a, int offset); 3358 @ForceInline 3359 final 3360 ByteVector fromByteArray0Template(byte[] a, int offset) { 3361 ByteSpecies vsp = vspecies(); 3362 return VectorIntrinsics.load( 3363 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3364 a, byteArrayAddress(a, offset), 3365 a, offset, vsp, 3366 (arr, off, s) -> { 3367 ByteBuffer tb = wrapper(arr, off, NATIVE_ENDIAN); 3368 return s.ldOp(tb, 0, (tb_, __, i) -> tb_.get(i)); 3369 }); 3370 } 3371 3372 abstract 3373 ByteVector fromByteBuffer0(ByteBuffer bb, int offset); 3374 @ForceInline 3375 final 3376 ByteVector fromByteBuffer0Template(ByteBuffer bb, int offset) { 3377 ByteSpecies vsp = vspecies(); 3378 return VectorIntrinsics.load( 3379 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3380 bufferBase(bb), bufferAddress(bb, offset), 3381 bb, offset, vsp, 3382 (buf, off, s) -> { 3383 ByteBuffer tb = wrapper(buf, off, NATIVE_ENDIAN); 3384 return s.ldOp(tb, 0, (tb_, __, i) -> tb_.get(i)); 3385 }); 3386 } 3387 3388 // Unchecked storing operations in native byte order. 3389 // Caller is reponsible for applying index checks, masking, and 3390 // byte swapping. 3391 3392 abstract 3393 void intoArray0(byte[] a, int offset); 3394 @ForceInline 3395 final 3396 void intoArray0Template(byte[] a, int offset) { 3397 ByteSpecies vsp = vspecies(); 3398 VectorIntrinsics.store( 3399 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3400 a, arrayAddress(a, offset), 3401 this, a, offset, 3402 (arr, off, v) 3403 -> v.stOp(arr, off, 3404 (arr_, off_, i, e) -> arr_[off_+i] = e)); 3405 } 3406 3407 abstract 3408 void intoByteArray0(byte[] a, int offset); 3409 @ForceInline 3410 final 3411 void intoByteArray0Template(byte[] a, int offset) { 3412 ByteSpecies vsp = vspecies(); 3413 VectorIntrinsics.store( 3414 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3415 a, byteArrayAddress(a, offset), 3416 this, a, offset, 3417 (arr, off, v) -> { 3418 ByteBuffer tb = wrapper(arr, off, NATIVE_ENDIAN); 3419 v.stOp(tb, 0, (tb_, __, i, e) -> tb_.put(i, e)); 3420 }); 3421 } 3422 3423 @ForceInline 3424 final 3425 void intoByteBuffer0(ByteBuffer bb, int offset) { 3426 ByteSpecies vsp = vspecies(); 3427 VectorIntrinsics.store( 3428 vsp.vectorType(), vsp.elementType(), vsp.laneCount(), 3429 bufferBase(bb), bufferAddress(bb, offset), 3430 this, bb, offset, 3431 (buf, off, v) -> { 3432 ByteBuffer tb = wrapper(buf, off, NATIVE_ENDIAN); 3433 v.stOp(tb, 0, (tb_, __, i, e) -> tb_.put(i, e)); 3434 }); 3435 } 3436 3437 // End of low-level memory operations. 3438 3439 private static 3440 void checkMaskFromIndexSize(int offset, 3441 ByteSpecies vsp, 3442 VectorMask<Byte> m, 3443 int scale, 3444 int limit) { 3445 ((AbstractMask<Byte>)m) 3446 .checkIndexByLane(offset, limit, vsp.iota(), scale); 3447 } 3448 3449 @ForceInline 3450 private void conditionalStoreNYI(int offset, 3451 ByteSpecies vsp, 3452 VectorMask<Byte> m, 3453 int scale, 3454 int limit) { 3455 if (offset < 0 || offset + vsp.laneCount() * scale > limit) { 3456 String msg = 3457 String.format("unimplemented: store @%d in [0..%d), %s in %s", 3458 offset, limit, m, vsp); 3459 throw new AssertionError(msg); 3460 } 3461 } 3462 3463 /*package-private*/ 3464 @Override 3465 @ForceInline 3466 final 3467 ByteVector maybeSwap(ByteOrder bo) { 3468 return this; 3469 } 3470 3471 static final int ARRAY_SHIFT = 3472 31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_BYTE_INDEX_SCALE); 3473 static final long ARRAY_BASE = 3474 Unsafe.ARRAY_BYTE_BASE_OFFSET; 3475 3476 @ForceInline 3477 static long arrayAddress(byte[] a, int index) { 3478 return ARRAY_BASE + (((long)index) << ARRAY_SHIFT); 3479 } 3480 3481 @ForceInline 3482 static long byteArrayAddress(byte[] a, int index) { 3483 return Unsafe.ARRAY_BYTE_BASE_OFFSET + index; 3484 } 3485 3486 // Byte buffer wrappers. 3487 private static ByteBuffer wrapper(ByteBuffer bb, int offset, 3488 ByteOrder bo) { 3489 return bb.duplicate().position(offset).slice() 3490 .order(bo); 3491 } 3492 private static ByteBuffer wrapper(byte[] a, int offset, 3493 ByteOrder bo) { 3494 return ByteBuffer.wrap(a, offset, a.length - offset) 3495 .order(bo); 3496 } 3497 3498 // ================================================ 3499 3500 /// Reinterpreting view methods: 3501 // lanewise reinterpret: viewAsXVector() 3502 // keep shape, redraw lanes: reinterpretAsEs() 3503 3504 /** 3505 * {@inheritDoc} <!--workaround--> 3506 */ 3507 @ForceInline 3508 @Override 3509 public final ByteVector reinterpretAsBytes() { 3510 return this; 3511 } 3512 3513 /** 3514 * {@inheritDoc} <!--workaround--> 3515 */ 3516 @ForceInline 3517 @Override 3518 public final ByteVector viewAsIntegralLanes() { 3519 return this; 3520 } 3521 3522 /** 3523 * {@inheritDoc} <!--workaround--> 3524 * 3525 * @implNote This method always throws 3526 * {@code IllegalArgumentException}, because there is no floating 3527 * point type of the same size as {@code byte}. The return type 3528 * of this method is arbitrarily designated as 3529 * {@code Vector<?>}. Future versions of this API may change the return 3530 * type if additional floating point types become available. 3531 */ 3532 @ForceInline 3533 @Override 3534 public final 3535 Vector<?> 3536 viewAsFloatingLanes() { 3537 LaneType flt = LaneType.BYTE.asFloating(); 3538 throw new AssertionError(); // should already throw IAE 3539 } 3540 3541 // ================================================ 3542 3543 /// Object methods: toString, equals, hashCode 3544 // 3545 // Object methods are defined as if via Arrays.toString, etc., 3546 // is applied to the array of elements. Two equal vectors 3547 // are required to have equal species and equal lane values. 3548 3549 /** 3550 * Returns a string representation of this vector, of the form 3551 * {@code "[0,1,2...]"}, reporting the lane values of this vector, 3552 * in lane order. 3553 * 3554 * The string is produced as if by a call to {@link 3555 * java.util.Arrays#toString(byte[]) Arrays.toString()}, 3556 * as appropriate to the {@code byte} array returned by 3557 * {@link #toArray this.toArray()}. 3558 * 3559 * @return a string of the form {@code "[0,1,2...]"} 3560 * reporting the lane values of this vector 3561 */ 3562 @Override 3563 @ForceInline 3564 public final 3565 String toString() { 3566 // now that toArray is strongly typed, we can define this 3567 return Arrays.toString(toArray()); 3568 } 3569 3570 /** 3571 * {@inheritDoc} <!--workaround--> 3572 */ 3573 @Override 3574 @ForceInline 3575 public final 3576 boolean equals(Object obj) { 3577 if (obj instanceof Vector) { 3578 Vector<?> that = (Vector<?>) obj; 3579 if (this.species().equals(that.species())) { 3580 return this.eq(that.check(this.species())).allTrue(); 3581 } 3582 } 3583 return false; 3584 } 3585 3586 /** 3587 * {@inheritDoc} <!--workaround--> 3588 */ 3589 @Override 3590 @ForceInline 3591 public final 3592 int hashCode() { 3593 // now that toArray is strongly typed, we can define this 3594 return Objects.hash(species(), Arrays.hashCode(toArray())); 3595 } 3596 3597 // ================================================ 3598 3599 // Species 3600 3601 /** 3602 * Class representing {@link ByteVector}'s of the same {@link VectorShape VectorShape}. 3603 */ 3604 /*package-private*/ 3605 static final class ByteSpecies extends AbstractSpecies<Byte> { 3606 private ByteSpecies(VectorShape shape, 3607 Class<? extends ByteVector> vectorType, 3608 Class<? extends AbstractMask<Byte>> maskType, 3609 Function<Object, ByteVector> vectorFactory) { 3610 super(shape, LaneType.of(byte.class), 3611 vectorType, maskType, 3612 vectorFactory); 3613 assert(this.elementSize() == Byte.SIZE); 3614 } 3615 3616 // Specializing overrides: 3617 3618 @Override 3619 @ForceInline 3620 public final Class<Byte> elementType() { 3621 return byte.class; 3622 } 3623 3624 @Override 3625 @ForceInline 3626 public final Class<Byte> genericElementType() { 3627 return Byte.class; 3628 } 3629 3630 @Override 3631 @ForceInline 3632 public final Class<byte[]> arrayType() { 3633 return byte[].class; 3634 } 3635 3636 @SuppressWarnings("unchecked") 3637 @Override 3638 @ForceInline 3639 public final Class<? extends ByteVector> vectorType() { 3640 return (Class<? extends ByteVector>) vectorType; 3641 } 3642 3643 @Override 3644 @ForceInline 3645 public final long checkValue(long e) { 3646 longToElementBits(e); // only for exception 3647 return e; 3648 } 3649 3650 /*package-private*/ 3651 @Override 3652 @ForceInline 3653 final ByteVector broadcastBits(long bits) { 3654 return (ByteVector) 3655 VectorIntrinsics.broadcastCoerced( 3656 vectorType, byte.class, laneCount, 3657 bits, this, 3658 (bits_, s_) -> s_.rvOp(i -> bits_)); 3659 } 3660 3661 /*package-private*/ 3662 @ForceInline 3663 3664 final ByteVector broadcast(byte e) { 3665 return broadcastBits(toBits(e)); 3666 } 3667 3668 @Override 3669 @ForceInline 3670 public final ByteVector broadcast(long e) { 3671 return broadcastBits(longToElementBits(e)); 3672 } 3673 3674 /*package-private*/ 3675 final @Override 3676 @ForceInline 3677 long longToElementBits(long value) { 3678 // Do the conversion, and then test it for failure. 3679 byte e = (byte) value; 3680 if ((long) e != value) { 3681 throw badElementBits(value, e); 3682 } 3683 return toBits(e); 3684 } 3685 3686 /*package-private*/ 3687 @ForceInline 3688 static long toIntegralChecked(byte e, boolean convertToInt) { 3689 long value = convertToInt ? (int) e : (long) e; 3690 if ((byte) value != e) { 3691 throw badArrayBits(e, convertToInt, value); 3692 } 3693 return value; 3694 } 3695 3696 @Override 3697 @ForceInline 3698 public final ByteVector fromValues(long... values) { 3699 VectorIntrinsics.requireLength(values.length, laneCount); 3700 byte[] va = new byte[laneCount()]; 3701 for (int i = 0; i < va.length; i++) { 3702 long lv = values[i]; 3703 byte v = (byte) lv; 3704 va[i] = v; 3705 if ((long)v != lv) { 3706 throw badElementBits(lv, v); 3707 } 3708 } 3709 return dummyVector().fromArray0(va, 0); 3710 } 3711 3712 /* this non-public one is for internal conversions */ 3713 @Override 3714 @ForceInline 3715 final ByteVector fromIntValues(int[] values) { 3716 VectorIntrinsics.requireLength(values.length, laneCount); 3717 byte[] va = new byte[laneCount()]; 3718 for (int i = 0; i < va.length; i++) { 3719 int lv = values[i]; 3720 byte v = (byte) lv; 3721 va[i] = v; 3722 if ((int)v != lv) { 3723 throw badElementBits(lv, v); 3724 } 3725 } 3726 return dummyVector().fromArray0(va, 0); 3727 } 3728 3729 // Virtual constructors 3730 3731 @ForceInline 3732 @Override final 3733 public ByteVector fromArray(Object a, int offset) { 3734 // User entry point: Be careful with inputs. 3735 return ByteVector 3736 .fromArray(this, (byte[]) a, offset); 3737 } 3738 3739 @Override final 3740 ByteVector dummyVector() { 3741 return (ByteVector) super.dummyVector(); 3742 } 3743 3744 final 3745 ByteVector vectorFactory(byte[] vec) { 3746 // Species delegates all factory requests to its dummy 3747 // vector. The dummy knows all about it. 3748 return dummyVector().vectorFactory(vec); 3749 } 3750 3751 /*package-private*/ 3752 final @Override 3753 @ForceInline 3754 ByteVector rvOp(RVOp f) { 3755 byte[] res = new byte[laneCount()]; 3756 for (int i = 0; i < res.length; i++) { 3757 byte bits = (byte) f.apply(i); 3758 res[i] = fromBits(bits); 3759 } 3760 return dummyVector().vectorFactory(res); 3761 } 3762 3763 ByteVector vOp(FVOp f) { 3764 byte[] res = new byte[laneCount()]; 3765 for (int i = 0; i < res.length; i++) { 3766 res[i] = f.apply(i); 3767 } 3768 return dummyVector().vectorFactory(res); 3769 } 3770 3771 ByteVector vOp(VectorMask<Byte> m, FVOp f) { 3772 byte[] res = new byte[laneCount()]; 3773 boolean[] mbits = ((AbstractMask<Byte>)m).getBits(); 3774 for (int i = 0; i < res.length; i++) { 3775 if (mbits[i]) { 3776 res[i] = f.apply(i); 3777 } 3778 } 3779 return dummyVector().vectorFactory(res); 3780 } 3781 3782 /*package-private*/ 3783 @ForceInline 3784 <M> ByteVector ldOp(M memory, int offset, 3785 FLdOp<M> f) { 3786 return dummyVector().ldOp(memory, offset, f); 3787 } 3788 3789 /*package-private*/ 3790 @ForceInline 3791 <M> ByteVector ldOp(M memory, int offset, 3792 AbstractMask<Byte> m, 3793 FLdOp<M> f) { 3794 return dummyVector().ldOp(memory, offset, m, f); 3795 } 3796 3797 /*package-private*/ 3798 @ForceInline 3799 <M> void stOp(M memory, int offset, FStOp<M> f) { 3800 dummyVector().stOp(memory, offset, f); 3801 } 3802 3803 /*package-private*/ 3804 @ForceInline 3805 <M> void stOp(M memory, int offset, 3806 AbstractMask<Byte> m, 3807 FStOp<M> f) { 3808 dummyVector().stOp(memory, offset, m, f); 3809 } 3810 3811 // N.B. Make sure these constant vectors and 3812 // masks load up correctly into registers. 3813 // 3814 // Also, see if we can avoid all that switching. 3815 // Could we cache both vectors and both masks in 3816 // this species object? 3817 3818 // Zero and iota vector access 3819 @Override 3820 @ForceInline 3821 public final ByteVector zero() { 3822 if ((Class<?>) vectorType() == ByteMaxVector.class) 3823 return ByteMaxVector.ZERO; 3824 switch (vectorBitSize()) { 3825 case 64: return Byte64Vector.ZERO; 3826 case 128: return Byte128Vector.ZERO; 3827 case 256: return Byte256Vector.ZERO; 3828 case 512: return Byte512Vector.ZERO; 3829 } 3830 throw new AssertionError(); 3831 } 3832 3833 @Override 3834 @ForceInline 3835 public final ByteVector iota() { 3836 if ((Class<?>) vectorType() == ByteMaxVector.class) 3837 return ByteMaxVector.IOTA; 3838 switch (vectorBitSize()) { 3839 case 64: return Byte64Vector.IOTA; 3840 case 128: return Byte128Vector.IOTA; 3841 case 256: return Byte256Vector.IOTA; 3842 case 512: return Byte512Vector.IOTA; 3843 } 3844 throw new AssertionError(); 3845 } 3846 3847 // Mask access 3848 @Override 3849 @ForceInline 3850 public final VectorMask<Byte> maskAll(boolean bit) { 3851 if ((Class<?>) vectorType() == ByteMaxVector.class) 3852 return ByteMaxVector.ByteMaxMask.maskAll(bit); 3853 switch (vectorBitSize()) { 3854 case 64: return Byte64Vector.Byte64Mask.maskAll(bit); 3855 case 128: return Byte128Vector.Byte128Mask.maskAll(bit); 3856 case 256: return Byte256Vector.Byte256Mask.maskAll(bit); 3857 case 512: return Byte512Vector.Byte512Mask.maskAll(bit); 3858 } 3859 throw new AssertionError(); 3860 } 3861 } 3862 3863 /** 3864 * Finds a species for an element type of {@code byte} and shape. 3865 * 3866 * @param s the shape 3867 * @return a species for an element type of {@code byte} and shape 3868 * @throws IllegalArgumentException if no such species exists for the shape 3869 */ 3870 static ByteSpecies species(VectorShape s) { 3871 Objects.requireNonNull(s); 3872 switch (s) { 3873 case S_64_BIT: return (ByteSpecies) SPECIES_64; 3874 case S_128_BIT: return (ByteSpecies) SPECIES_128; 3875 case S_256_BIT: return (ByteSpecies) SPECIES_256; 3876 case S_512_BIT: return (ByteSpecies) SPECIES_512; 3877 case S_Max_BIT: return (ByteSpecies) SPECIES_MAX; 3878 default: throw new IllegalArgumentException("Bad shape: " + s); 3879 } 3880 } 3881 3882 /** Species representing {@link ByteVector}s of {@link VectorShape#S_64_BIT VectorShape.S_64_BIT}. */ 3883 public static final VectorSpecies<Byte> SPECIES_64 3884 = new ByteSpecies(VectorShape.S_64_BIT, 3885 Byte64Vector.class, 3886 Byte64Vector.Byte64Mask.class, 3887 Byte64Vector::new); 3888 3889 /** Species representing {@link ByteVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */ 3890 public static final VectorSpecies<Byte> SPECIES_128 3891 = new ByteSpecies(VectorShape.S_128_BIT, 3892 Byte128Vector.class, 3893 Byte128Vector.Byte128Mask.class, 3894 Byte128Vector::new); 3895 3896 /** Species representing {@link ByteVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */ 3897 public static final VectorSpecies<Byte> SPECIES_256 3898 = new ByteSpecies(VectorShape.S_256_BIT, 3899 Byte256Vector.class, 3900 Byte256Vector.Byte256Mask.class, 3901 Byte256Vector::new); 3902 3903 /** Species representing {@link ByteVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */ 3904 public static final VectorSpecies<Byte> SPECIES_512 3905 = new ByteSpecies(VectorShape.S_512_BIT, 3906 Byte512Vector.class, 3907 Byte512Vector.Byte512Mask.class, 3908 Byte512Vector::new); 3909 3910 /** Species representing {@link ByteVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */ 3911 public static final VectorSpecies<Byte> SPECIES_MAX 3912 = new ByteSpecies(VectorShape.S_Max_BIT, 3913 ByteMaxVector.class, 3914 ByteMaxVector.ByteMaxMask.class, 3915 ByteMaxVector::new); 3916 3917 /** 3918 * Preferred species for {@link ByteVector}s. 3919 * A preferred species is a species of maximal bit-size for the platform. 3920 */ 3921 public static final VectorSpecies<Byte> SPECIES_PREFERRED 3922 = (ByteSpecies) VectorSpecies.ofPreferred(byte.class); 3923 }