< prev index next >

test/jdk/jdk/incubator/vector/benchmark/src/main/java/benchmark/crypto/Poly1305Bench.java

Print this page
rev 55894 : 8222897: [vector] Renaming of shift, rotate operations. Few other api changes.
Summary: Renaming of shift, rotate operations. Few other api changes.
Reviewed-by: jrose, briangoetz


  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.crypto;
  25 
  26 import org.openjdk.jmh.annotations.*;
  27 import jdk.incubator.vector.*;
  28 import java.util.Arrays;
  29 
  30 @State(Scope.Thread)
  31 @BenchmarkMode(Mode.Throughput)
  32 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  33 @Warmup(iterations = 3, time = 3)
  34 @Measurement(iterations = 8, time = 2)
  35 public class Poly1305Bench {
  36 
  37     @Param({"16384", "65536"})
  38     private int dataSize;
  39 
  40     private Poly1305Vector poly1305_S128 = makePoly1305(Vector.Shape.S_128_BIT);
  41     private Poly1305Vector poly1305_S256 = makePoly1305(Vector.Shape.S_256_BIT);
  42     private Poly1305Vector poly1305_S512 = makePoly1305(Vector.Shape.S_512_BIT);
  43 
  44     private byte[] in;
  45     private byte[] out = new byte[16];
  46     private byte[] key = new byte[32];
  47 
  48     private static Poly1305Vector makePoly1305(Vector.Shape shape) {
  49         Poly1305Vector poly = new Poly1305Vector(shape);
  50         runKAT(poly);
  51         return poly;
  52     }
  53 
  54     @Setup
  55     public void setup() {
  56         in = new byte[dataSize];
  57     }
  58 
  59     @Benchmark
  60     public void auth128() {
  61         poly1305_S128.computeTag(key, in, out);
  62     }
  63 
  64     @Benchmark
  65     public void auth256() {
  66         poly1305_S256.computeTag(key, in, out);
  67     }
  68 
  69     @Benchmark
  70     public void auth512() {
  71         poly1305_S512.computeTag(key, in, out);
  72     }
  73 
  74     private static class Poly1305Vector {
  75 
  76         private static final int BITS_PER_LIMB = 26;
  77         private static final int LIMB_MASK = (1 << BITS_PER_LIMB) - 1;
  78         private static final int KEY_LENGTH = 32;
  79         private static final int RS_LENGTH = KEY_LENGTH / 2;
  80 
  81         private final Vector.Species<Long> longSpecies;
  82         private final Vector.Species<Integer> intSpecies;
  83         private final int vectorWidth;
  84         private final int parBlockCount;
  85 
  86         private final LongVector.Shuffle<Long> inShuffle0;
  87         private final LongVector.Shuffle<Long> inShuffle1;
  88         private final IntVector.Mask<Long> inMask;
  89 
  90         public Poly1305Vector(Vector.Shape shape) {
  91 
  92             this.longSpecies = Vector.Species.of(long.class, shape);
  93             int intSize = shape.bitSize() / 2;
  94             Vector.Shape intShape = Vector.Shape.forBitSize(intSize);
  95             this.intSpecies = Vector.Species.of(int.class, intShape);
  96             this.vectorWidth = longSpecies.length();
  97             this.parBlockCount = vectorWidth * 16;
  98 
  99             this.inShuffle0 = makeInShuffle0();
 100             this.inShuffle1 = makeInShuffle1();
 101             this.inMask = makeInMask();
 102         }
 103 
 104         private LongVector.Shuffle<Long> makeInShuffle0() {
 105             int[] indexArr = new int[vectorWidth];
 106             for (int i = 0; i < indexArr.length; i++) {
 107                 indexArr[i] = (2 * i) % vectorWidth;
 108             }
 109             return LongVector.shuffleFromArray(longSpecies, indexArr, 0);
 110         }
 111         private LongVector.Shuffle<Long> makeInShuffle1() {
 112             int[] indexArr = new int[vectorWidth];
 113             for (int i = 0; i < indexArr.length; i++) {
 114                 indexArr[i] = ((2 * i) % vectorWidth) + 1;
 115             }
 116             return LongVector.shuffleFromArray(longSpecies, indexArr, 0);
 117         }
 118         private LongVector.Mask<Long> makeInMask() {
 119             boolean[] maskArr = new boolean[vectorWidth];
 120             for (int i = vectorWidth / 2; i < vectorWidth; i++) {
 121                 maskArr[i] = true;
 122             }
 123             return LongVector.maskFromArray(longSpecies, maskArr, 0);
 124         }
 125 
 126         private static int[] fromByteArray(byte[] buf) {
 127             int[] result = new int[5];
 128 
 129             result[0]
 130                     = (buf[0] & 0xFF)
 131                     + ((buf[1] & 0xFF) << 8)
 132                     + ((buf[2] & 0xFF) << 16)
 133                     + ((buf[3] & 0x03) << 24);
 134             result[1]
 135                     = ((buf[3] & 0xFF) >> 2)
 136                     + ((buf[4] & 0xFF) << 6)
 137                     + ((buf[5] & 0xFF) << 14)
 138                     + ((buf[6] & 0x0F) << 22);
 139             result[2]
 140                     = ((buf[6] & 0xFF) >> 4)
 141                     + ((buf[7] & 0xFF) << 4)
 142                     + ((buf[8] & 0xFF) << 12)
 143                     + ((buf[9] & 0x3F) << 20);


 281             IntVector rUp3_int = IntVector.broadcast(intSpecies, r[rUpIndex][3]);
 282             IntVector rUp4_int = IntVector.broadcast(intSpecies, r[rUpIndex][4]);
 283 
 284             IntVector r5Up1_int = rUp1_int.mul(5);
 285             IntVector r5Up2_int = rUp2_int.mul(5);
 286             IntVector r5Up3_int = rUp3_int.mul(5);
 287             IntVector r5Up4_int = rUp4_int.mul(5);
 288 
 289             LongVector longMsg0 = LongVector.fromByteArray(longSpecies, msg, 0);
 290             LongVector longMsg1 =
 291                 LongVector.fromByteArray(longSpecies, msg, vectorWidth * 8);
 292 
 293             LongVector inAlign0 =
 294                 longMsg0.rearrange(longMsg1, inShuffle0, inMask);
 295             LongVector inAlign1 =
 296                 longMsg0.rearrange(longMsg1, inShuffle1, inMask);
 297 
 298             IntVector a0 = (IntVector)
 299                 inAlign0.and(LIMB_MASK).cast(intSpecies);
 300             IntVector a1 = (IntVector)
 301                 inAlign0.shiftR(26).and(LIMB_MASK).cast(intSpecies);
 302             IntVector a2 = (IntVector)
 303                 inAlign0.shiftR(52).and(0xFFF).cast(intSpecies);
 304             a2 = a2.or(inAlign1.and(0x3FFF).shiftL(12).cast(intSpecies));
 305             IntVector a3 = (IntVector)
 306                 inAlign1.shiftR(14).and(LIMB_MASK).cast(intSpecies);
 307             IntVector a4 = (IntVector)
 308                 inAlign1.shiftR(40).and(0xFFFFFF).cast(intSpecies);
 309             a4 = a4.or(1 << 24);
 310 
 311             int numParBlocks = msg.length / parBlockCount - 1;
 312             for (int i = 0; i < numParBlocks; i++) {
 313 
 314                 // multiply and reduce
 315                 LongVector c0 = (LongVector)
 316                     a0.cast(longSpecies).mul(rUp0_int.cast(longSpecies))
 317                     .add(a1.cast(longSpecies).mul(r5Up4_int.cast(longSpecies)))
 318                     .add(a2.cast(longSpecies).mul(r5Up3_int.cast(longSpecies)))
 319                     .add(a3.cast(longSpecies).mul(r5Up2_int.cast(longSpecies)))
 320                     .add(a4.cast(longSpecies).mul(r5Up1_int.cast(longSpecies)));
 321 
 322                 LongVector c1 = (LongVector)
 323                     a0.cast(longSpecies).mul(rUp1_int.cast(longSpecies))
 324                     .add(a1.cast(longSpecies).mul(rUp0_int.cast(longSpecies)))
 325                     .add(a2.cast(longSpecies).mul(r5Up4_int.cast(longSpecies)))
 326                     .add(a3.cast(longSpecies).mul(r5Up3_int.cast(longSpecies)))
 327                     .add(a4.cast(longSpecies).mul(r5Up2_int.cast(longSpecies)));
 328 


 332                     .add(a2.cast(longSpecies).mul(rUp0_int.cast(longSpecies)))
 333                     .add(a3.cast(longSpecies).mul(r5Up4_int.cast(longSpecies)))
 334                     .add(a4.cast(longSpecies).mul(r5Up3_int.cast(longSpecies)));
 335 
 336                 LongVector c3 = (LongVector)
 337                     a0.cast(longSpecies).mul(rUp3_int.cast(longSpecies))
 338                     .add(a1.cast(longSpecies).mul(rUp2_int.cast(longSpecies)))
 339                     .add(a2.cast(longSpecies).mul(rUp1_int.cast(longSpecies)))
 340                     .add(a3.cast(longSpecies).mul(rUp0_int.cast(longSpecies)))
 341                     .add(a4.cast(longSpecies).mul(r5Up4_int.cast(longSpecies)));
 342 
 343                 LongVector c4 = (LongVector)
 344                     a0.cast(longSpecies).mul(rUp4_int.cast(longSpecies))
 345                     .add(a1.cast(longSpecies).mul(rUp3_int.cast(longSpecies)))
 346                     .add(a2.cast(longSpecies).mul(rUp2_int.cast(longSpecies)))
 347                     .add(a3.cast(longSpecies).mul(rUp1_int.cast(longSpecies)))
 348                     .add(a4.cast(longSpecies).mul(rUp0_int.cast(longSpecies)));
 349 
 350                 // carry/reduce
 351                 // Note: this carry/reduce sequence might not be correct
 352                 c4 = c4.add(c3.shiftR(BITS_PER_LIMB));
 353                 c3 = c3.and(LIMB_MASK);
 354                 c0 = c0.add(c4.shiftR(BITS_PER_LIMB).mul(5));
 355                 c4 = c4.and(LIMB_MASK);
 356                 c1 = c1.add(c0.shiftR(BITS_PER_LIMB));
 357                 c0 = c0.and(LIMB_MASK);
 358                 c2 = c2.add(c1.shiftR(BITS_PER_LIMB));
 359                 c1 = c1.and(LIMB_MASK);
 360                 c3 = c3.add(c2.shiftR(BITS_PER_LIMB));
 361                 c2 = c2.and(LIMB_MASK);
 362                 c4 = c4.add(c3.shiftR(BITS_PER_LIMB));
 363                 c3 = c3.and(LIMB_MASK);
 364 
 365                 a0 = (IntVector) c0.cast(intSpecies);
 366                 a1 = (IntVector) c1.cast(intSpecies);
 367                 a2 = (IntVector) c2.cast(intSpecies);
 368                 a3 = (IntVector) c3.cast(intSpecies);
 369                 a4 = (IntVector) c4.cast(intSpecies);
 370 
 371                 // fromByteArray and add next part of message
 372                 int start = parBlockCount * (i + 1);
 373 
 374                 longMsg0 = LongVector.fromByteArray(longSpecies, msg, start);
 375                 longMsg1 = LongVector.fromByteArray(longSpecies, msg,
 376                     start + vectorWidth * 8);
 377 
 378                 inAlign0 = longMsg0.rearrange(longMsg1, inShuffle0, inMask);
 379                 inAlign1 = longMsg0.rearrange(longMsg1, inShuffle1, inMask);
 380 
 381                 IntVector in0 = (IntVector)
 382                     inAlign0.and(LIMB_MASK).cast(intSpecies);
 383                 IntVector in1 = (IntVector)
 384                     inAlign0.shiftR(26).and(LIMB_MASK).cast(intSpecies);
 385                 IntVector in2 = (IntVector)
 386                     inAlign0.shiftR(52).and(0xFFF).cast(intSpecies);
 387                 in2 = in2.or(inAlign1.and(0x3FFF).shiftL(12).cast(intSpecies));
 388                 IntVector in3 = (IntVector)
 389                     inAlign1.shiftR(14).and(LIMB_MASK).cast(intSpecies);
 390                 IntVector in4 = (IntVector)
 391                     inAlign1.shiftR(40).and(0xFFFFFF).cast(intSpecies);
 392                 in4 = in4.or(1 << 24);
 393 
 394                 a0 = a0.add(in0);
 395                 a1 = a1.add(in1);
 396                 a2 = a2.add(in2);
 397                 a3 = a3.add(in3);
 398                 a4 = a4.add(in4);
 399             }
 400 
 401             // multiply by powers of r
 402             long[] rTemp = new long[vectorWidth];
 403             LongVector rFin0 = rPowerVec(r, rTemp, rUpIndex, 0);
 404             LongVector rFin1 = rPowerVec(r, rTemp, rUpIndex, 1);
 405             LongVector rFin2 = rPowerVec(r, rTemp, rUpIndex, 2);
 406             LongVector rFin3 = rPowerVec(r, rTemp, rUpIndex, 3);
 407             LongVector rFin4 = rPowerVec(r, rTemp, rUpIndex, 4);
 408 
 409             LongVector r5Fin_1 = rFin1.mul(5);
 410             LongVector r5Fin_2 = rFin2.mul(5);
 411             LongVector r5Fin_3 = rFin3.mul(5);


 420                 .add(a1.cast(longSpecies).mul(rFin0))
 421                 .add(a2.cast(longSpecies).mul(r5Fin_4))
 422                 .add(a3.cast(longSpecies).mul(r5Fin_3))
 423                 .add(a4.cast(longSpecies).mul(r5Fin_2));
 424             LongVector c2 = (LongVector) a0.cast(longSpecies).mul(rFin2)
 425                 .add(a1.cast(longSpecies).mul(rFin1))
 426                 .add(a2.cast(longSpecies).mul(rFin0))
 427                 .add(a3.cast(longSpecies).mul(r5Fin_4))
 428                 .add(a4.cast(longSpecies).mul(r5Fin_3));
 429             LongVector c3 = (LongVector) a0.cast(longSpecies).mul(rFin3)
 430                 .add(a1.cast(longSpecies).mul(rFin2))
 431                 .add(a2.cast(longSpecies).mul(rFin1))
 432                 .add(a3.cast(longSpecies).mul(rFin0))
 433                 .add(a4.cast(longSpecies).mul(r5Fin_4));
 434             LongVector c4 = (LongVector) a0.cast(longSpecies).mul(rFin4)
 435                 .add(a1.cast(longSpecies).mul(rFin3))
 436                 .add(a2.cast(longSpecies).mul(rFin2))
 437                 .add(a3.cast(longSpecies).mul(rFin1))
 438                 .add(a4.cast(longSpecies).mul(rFin0));
 439 
 440             c4 = c4.add(c3.shiftR(BITS_PER_LIMB));
 441             c3 = c3.and(LIMB_MASK);
 442             c0 = c0.add(c4.shiftR(BITS_PER_LIMB).mul(5));
 443             c4 = c4.and(LIMB_MASK);
 444             c1 = c1.add(c0.shiftR(BITS_PER_LIMB));
 445             c0 = c0.and(LIMB_MASK);
 446             c2 = c2.add(c1.shiftR(BITS_PER_LIMB));
 447             c1 = c1.and(LIMB_MASK);
 448             c3 = c3.add(c2.shiftR(BITS_PER_LIMB));
 449             c2 = c2.and(LIMB_MASK);
 450             c4 = c4.add(c3.shiftR(BITS_PER_LIMB));
 451             c3 = c3.and(LIMB_MASK);
 452 
 453             a0 = (IntVector) c0.cast(intSpecies);
 454             a1 = (IntVector) c1.cast(intSpecies);
 455             a2 = (IntVector) c2.cast(intSpecies);
 456             a3 = (IntVector) c3.cast(intSpecies);
 457             a4 = (IntVector) c4.cast(intSpecies);
 458 
 459             // collect lanes and calculate tag
 460             long a0Fin = a0.addAll();
 461             long a1Fin = a1.addAll();
 462             long a2Fin = a2.addAll();
 463             long a3Fin = a3.addAll();
 464             long a4Fin = a4.addAll();
 465 
 466             // carry/reduce the result
 467             a4Fin = a4Fin + (a3Fin >>> BITS_PER_LIMB);
 468             a3Fin = a3Fin & LIMB_MASK;
 469             a0Fin = a0Fin + ((a4Fin >>> BITS_PER_LIMB) * 5);
 470             a4Fin = a4Fin & LIMB_MASK;
 471             a1Fin = a1Fin + (a0Fin >>> BITS_PER_LIMB);
 472             a0Fin = a0Fin & LIMB_MASK;
 473             a2Fin = a2Fin + (a1Fin >>> BITS_PER_LIMB);
 474             a1Fin = a1Fin & LIMB_MASK;
 475             a3Fin = a3Fin + (a2Fin >>> BITS_PER_LIMB);
 476             a2Fin = a2Fin & LIMB_MASK;
 477             a4Fin = a4Fin + (a3Fin >>> BITS_PER_LIMB);
 478             a3Fin = a3Fin & LIMB_MASK;
 479 
 480             byte[] s_arr =
 481                 Arrays.copyOfRange(keyBytes, RS_LENGTH, 2 * RS_LENGTH);
 482             int[] s = fromByteArray(s_arr);
 483 
 484             // Add in the s-half of the key to the accumulator




  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.crypto;
  25 
  26 import org.openjdk.jmh.annotations.*;
  27 import jdk.incubator.vector.*;
  28 import java.util.Arrays;
  29 
  30 @State(Scope.Thread)
  31 @BenchmarkMode(Mode.Throughput)
  32 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  33 @Warmup(iterations = 3, time = 3)
  34 @Measurement(iterations = 8, time = 2)
  35 public class Poly1305Bench {
  36 
  37     @Param({"16384", "65536"})
  38     private int dataSize;
  39 
  40     private Poly1305Vector poly1305_S128 = makePoly1305(VectorShape.S_128_BIT);
  41     private Poly1305Vector poly1305_S256 = makePoly1305(VectorShape.S_256_BIT);
  42     private Poly1305Vector poly1305_S512 = makePoly1305(VectorShape.S_512_BIT);
  43 
  44     private byte[] in;
  45     private byte[] out = new byte[16];
  46     private byte[] key = new byte[32];
  47 
  48     private static Poly1305Vector makePoly1305(VectorShape shape) {
  49         Poly1305Vector poly = new Poly1305Vector(shape);
  50         runKAT(poly);
  51         return poly;
  52     }
  53 
  54     @Setup
  55     public void setup() {
  56         in = new byte[dataSize];
  57     }
  58 
  59     @Benchmark
  60     public void auth128() {
  61         poly1305_S128.computeTag(key, in, out);
  62     }
  63 
  64     @Benchmark
  65     public void auth256() {
  66         poly1305_S256.computeTag(key, in, out);
  67     }
  68 
  69     @Benchmark
  70     public void auth512() {
  71         poly1305_S512.computeTag(key, in, out);
  72     }
  73 
  74     private static class Poly1305Vector {
  75 
  76         private static final int BITS_PER_LIMB = 26;
  77         private static final int LIMB_MASK = (1 << BITS_PER_LIMB) - 1;
  78         private static final int KEY_LENGTH = 32;
  79         private static final int RS_LENGTH = KEY_LENGTH / 2;
  80 
  81         private final VectorSpecies<Long> longSpecies;
  82         private final VectorSpecies<Integer> intSpecies;
  83         private final int vectorWidth;
  84         private final int parBlockCount;
  85 
  86         private final VectorShuffle<Long> inShuffle0;
  87         private final VectorShuffle<Long> inShuffle1;
  88         private final VectorMask<Long> inMask;
  89 
  90         public Poly1305Vector(VectorShape shape) {
  91 
  92             this.longSpecies = VectorSpecies.of(long.class, shape);
  93             int intSize = shape.bitSize() / 2;
  94             VectorShape intShape = VectorShape.forBitSize(intSize);
  95             this.intSpecies = VectorSpecies.of(int.class, intShape);
  96             this.vectorWidth = longSpecies.length();
  97             this.parBlockCount = vectorWidth * 16;
  98 
  99             this.inShuffle0 = makeInShuffle0();
 100             this.inShuffle1 = makeInShuffle1();
 101             this.inMask = makeInMask();
 102         }
 103 
 104         private VectorShuffle<Long> makeInShuffle0() {
 105             int[] indexArr = new int[vectorWidth];
 106             for (int i = 0; i < indexArr.length; i++) {
 107                 indexArr[i] = (2 * i) % vectorWidth;
 108             }
 109             return VectorShuffle.fromArray(longSpecies, indexArr, 0);
 110         }
 111         private VectorShuffle<Long> makeInShuffle1() {
 112             int[] indexArr = new int[vectorWidth];
 113             for (int i = 0; i < indexArr.length; i++) {
 114                 indexArr[i] = ((2 * i) % vectorWidth) + 1;
 115             }
 116             return VectorShuffle.fromArray(longSpecies, indexArr, 0);
 117         }
 118         private VectorMask<Long> makeInMask() {
 119             boolean[] maskArr = new boolean[vectorWidth];
 120             for (int i = vectorWidth / 2; i < vectorWidth; i++) {
 121                 maskArr[i] = true;
 122             }
 123             return VectorMask.fromArray(longSpecies, maskArr, 0);
 124         }
 125 
 126         private static int[] fromByteArray(byte[] buf) {
 127             int[] result = new int[5];
 128 
 129             result[0]
 130                     = (buf[0] & 0xFF)
 131                     + ((buf[1] & 0xFF) << 8)
 132                     + ((buf[2] & 0xFF) << 16)
 133                     + ((buf[3] & 0x03) << 24);
 134             result[1]
 135                     = ((buf[3] & 0xFF) >> 2)
 136                     + ((buf[4] & 0xFF) << 6)
 137                     + ((buf[5] & 0xFF) << 14)
 138                     + ((buf[6] & 0x0F) << 22);
 139             result[2]
 140                     = ((buf[6] & 0xFF) >> 4)
 141                     + ((buf[7] & 0xFF) << 4)
 142                     + ((buf[8] & 0xFF) << 12)
 143                     + ((buf[9] & 0x3F) << 20);


 281             IntVector rUp3_int = IntVector.broadcast(intSpecies, r[rUpIndex][3]);
 282             IntVector rUp4_int = IntVector.broadcast(intSpecies, r[rUpIndex][4]);
 283 
 284             IntVector r5Up1_int = rUp1_int.mul(5);
 285             IntVector r5Up2_int = rUp2_int.mul(5);
 286             IntVector r5Up3_int = rUp3_int.mul(5);
 287             IntVector r5Up4_int = rUp4_int.mul(5);
 288 
 289             LongVector longMsg0 = LongVector.fromByteArray(longSpecies, msg, 0);
 290             LongVector longMsg1 =
 291                 LongVector.fromByteArray(longSpecies, msg, vectorWidth * 8);
 292 
 293             LongVector inAlign0 =
 294                 longMsg0.rearrange(longMsg1, inShuffle0, inMask);
 295             LongVector inAlign1 =
 296                 longMsg0.rearrange(longMsg1, inShuffle1, inMask);
 297 
 298             IntVector a0 = (IntVector)
 299                 inAlign0.and(LIMB_MASK).cast(intSpecies);
 300             IntVector a1 = (IntVector)
 301                 inAlign0.shiftRight(26).and(LIMB_MASK).cast(intSpecies);
 302             IntVector a2 = (IntVector)
 303                 inAlign0.shiftRight(52).and(0xFFF).cast(intSpecies);
 304             a2 = a2.or(inAlign1.and(0x3FFF).shiftLeft(12).cast(intSpecies));
 305             IntVector a3 = (IntVector)
 306                 inAlign1.shiftRight(14).and(LIMB_MASK).cast(intSpecies);
 307             IntVector a4 = (IntVector)
 308                 inAlign1.shiftRight(40).and(0xFFFFFF).cast(intSpecies);
 309             a4 = a4.or(1 << 24);
 310 
 311             int numParBlocks = msg.length / parBlockCount - 1;
 312             for (int i = 0; i < numParBlocks; i++) {
 313 
 314                 // multiply and reduce
 315                 LongVector c0 = (LongVector)
 316                     a0.cast(longSpecies).mul(rUp0_int.cast(longSpecies))
 317                     .add(a1.cast(longSpecies).mul(r5Up4_int.cast(longSpecies)))
 318                     .add(a2.cast(longSpecies).mul(r5Up3_int.cast(longSpecies)))
 319                     .add(a3.cast(longSpecies).mul(r5Up2_int.cast(longSpecies)))
 320                     .add(a4.cast(longSpecies).mul(r5Up1_int.cast(longSpecies)));
 321 
 322                 LongVector c1 = (LongVector)
 323                     a0.cast(longSpecies).mul(rUp1_int.cast(longSpecies))
 324                     .add(a1.cast(longSpecies).mul(rUp0_int.cast(longSpecies)))
 325                     .add(a2.cast(longSpecies).mul(r5Up4_int.cast(longSpecies)))
 326                     .add(a3.cast(longSpecies).mul(r5Up3_int.cast(longSpecies)))
 327                     .add(a4.cast(longSpecies).mul(r5Up2_int.cast(longSpecies)));
 328 


 332                     .add(a2.cast(longSpecies).mul(rUp0_int.cast(longSpecies)))
 333                     .add(a3.cast(longSpecies).mul(r5Up4_int.cast(longSpecies)))
 334                     .add(a4.cast(longSpecies).mul(r5Up3_int.cast(longSpecies)));
 335 
 336                 LongVector c3 = (LongVector)
 337                     a0.cast(longSpecies).mul(rUp3_int.cast(longSpecies))
 338                     .add(a1.cast(longSpecies).mul(rUp2_int.cast(longSpecies)))
 339                     .add(a2.cast(longSpecies).mul(rUp1_int.cast(longSpecies)))
 340                     .add(a3.cast(longSpecies).mul(rUp0_int.cast(longSpecies)))
 341                     .add(a4.cast(longSpecies).mul(r5Up4_int.cast(longSpecies)));
 342 
 343                 LongVector c4 = (LongVector)
 344                     a0.cast(longSpecies).mul(rUp4_int.cast(longSpecies))
 345                     .add(a1.cast(longSpecies).mul(rUp3_int.cast(longSpecies)))
 346                     .add(a2.cast(longSpecies).mul(rUp2_int.cast(longSpecies)))
 347                     .add(a3.cast(longSpecies).mul(rUp1_int.cast(longSpecies)))
 348                     .add(a4.cast(longSpecies).mul(rUp0_int.cast(longSpecies)));
 349 
 350                 // carry/reduce
 351                 // Note: this carry/reduce sequence might not be correct
 352                 c4 = c4.add(c3.shiftRight(BITS_PER_LIMB));
 353                 c3 = c3.and(LIMB_MASK);
 354                 c0 = c0.add(c4.shiftRight(BITS_PER_LIMB).mul(5));
 355                 c4 = c4.and(LIMB_MASK);
 356                 c1 = c1.add(c0.shiftRight(BITS_PER_LIMB));
 357                 c0 = c0.and(LIMB_MASK);
 358                 c2 = c2.add(c1.shiftRight(BITS_PER_LIMB));
 359                 c1 = c1.and(LIMB_MASK);
 360                 c3 = c3.add(c2.shiftRight(BITS_PER_LIMB));
 361                 c2 = c2.and(LIMB_MASK);
 362                 c4 = c4.add(c3.shiftRight(BITS_PER_LIMB));
 363                 c3 = c3.and(LIMB_MASK);
 364 
 365                 a0 = (IntVector) c0.cast(intSpecies);
 366                 a1 = (IntVector) c1.cast(intSpecies);
 367                 a2 = (IntVector) c2.cast(intSpecies);
 368                 a3 = (IntVector) c3.cast(intSpecies);
 369                 a4 = (IntVector) c4.cast(intSpecies);
 370 
 371                 // fromByteArray and add next part of message
 372                 int start = parBlockCount * (i + 1);
 373 
 374                 longMsg0 = LongVector.fromByteArray(longSpecies, msg, start);
 375                 longMsg1 = LongVector.fromByteArray(longSpecies, msg,
 376                     start + vectorWidth * 8);
 377 
 378                 inAlign0 = longMsg0.rearrange(longMsg1, inShuffle0, inMask);
 379                 inAlign1 = longMsg0.rearrange(longMsg1, inShuffle1, inMask);
 380 
 381                 IntVector in0 = (IntVector)
 382                     inAlign0.and(LIMB_MASK).cast(intSpecies);
 383                 IntVector in1 = (IntVector)
 384                     inAlign0.shiftRight(26).and(LIMB_MASK).cast(intSpecies);
 385                 IntVector in2 = (IntVector)
 386                     inAlign0.shiftRight(52).and(0xFFF).cast(intSpecies);
 387                 in2 = in2.or(inAlign1.and(0x3FFF).shiftLeft(12).cast(intSpecies));
 388                 IntVector in3 = (IntVector)
 389                     inAlign1.shiftRight(14).and(LIMB_MASK).cast(intSpecies);
 390                 IntVector in4 = (IntVector)
 391                     inAlign1.shiftRight(40).and(0xFFFFFF).cast(intSpecies);
 392                 in4 = in4.or(1 << 24);
 393 
 394                 a0 = a0.add(in0);
 395                 a1 = a1.add(in1);
 396                 a2 = a2.add(in2);
 397                 a3 = a3.add(in3);
 398                 a4 = a4.add(in4);
 399             }
 400 
 401             // multiply by powers of r
 402             long[] rTemp = new long[vectorWidth];
 403             LongVector rFin0 = rPowerVec(r, rTemp, rUpIndex, 0);
 404             LongVector rFin1 = rPowerVec(r, rTemp, rUpIndex, 1);
 405             LongVector rFin2 = rPowerVec(r, rTemp, rUpIndex, 2);
 406             LongVector rFin3 = rPowerVec(r, rTemp, rUpIndex, 3);
 407             LongVector rFin4 = rPowerVec(r, rTemp, rUpIndex, 4);
 408 
 409             LongVector r5Fin_1 = rFin1.mul(5);
 410             LongVector r5Fin_2 = rFin2.mul(5);
 411             LongVector r5Fin_3 = rFin3.mul(5);


 420                 .add(a1.cast(longSpecies).mul(rFin0))
 421                 .add(a2.cast(longSpecies).mul(r5Fin_4))
 422                 .add(a3.cast(longSpecies).mul(r5Fin_3))
 423                 .add(a4.cast(longSpecies).mul(r5Fin_2));
 424             LongVector c2 = (LongVector) a0.cast(longSpecies).mul(rFin2)
 425                 .add(a1.cast(longSpecies).mul(rFin1))
 426                 .add(a2.cast(longSpecies).mul(rFin0))
 427                 .add(a3.cast(longSpecies).mul(r5Fin_4))
 428                 .add(a4.cast(longSpecies).mul(r5Fin_3));
 429             LongVector c3 = (LongVector) a0.cast(longSpecies).mul(rFin3)
 430                 .add(a1.cast(longSpecies).mul(rFin2))
 431                 .add(a2.cast(longSpecies).mul(rFin1))
 432                 .add(a3.cast(longSpecies).mul(rFin0))
 433                 .add(a4.cast(longSpecies).mul(r5Fin_4));
 434             LongVector c4 = (LongVector) a0.cast(longSpecies).mul(rFin4)
 435                 .add(a1.cast(longSpecies).mul(rFin3))
 436                 .add(a2.cast(longSpecies).mul(rFin2))
 437                 .add(a3.cast(longSpecies).mul(rFin1))
 438                 .add(a4.cast(longSpecies).mul(rFin0));
 439 
 440             c4 = c4.add(c3.shiftRight(BITS_PER_LIMB));
 441             c3 = c3.and(LIMB_MASK);
 442             c0 = c0.add(c4.shiftRight(BITS_PER_LIMB).mul(5));
 443             c4 = c4.and(LIMB_MASK);
 444             c1 = c1.add(c0.shiftRight(BITS_PER_LIMB));
 445             c0 = c0.and(LIMB_MASK);
 446             c2 = c2.add(c1.shiftRight(BITS_PER_LIMB));
 447             c1 = c1.and(LIMB_MASK);
 448             c3 = c3.add(c2.shiftRight(BITS_PER_LIMB));
 449             c2 = c2.and(LIMB_MASK);
 450             c4 = c4.add(c3.shiftRight(BITS_PER_LIMB));
 451             c3 = c3.and(LIMB_MASK);
 452 
 453             a0 = (IntVector) c0.cast(intSpecies);
 454             a1 = (IntVector) c1.cast(intSpecies);
 455             a2 = (IntVector) c2.cast(intSpecies);
 456             a3 = (IntVector) c3.cast(intSpecies);
 457             a4 = (IntVector) c4.cast(intSpecies);
 458 
 459             // collect lanes and calculate tag
 460             long a0Fin = a0.addLanes();
 461             long a1Fin = a1.addLanes();
 462             long a2Fin = a2.addLanes();
 463             long a3Fin = a3.addLanes();
 464             long a4Fin = a4.addLanes();
 465 
 466             // carry/reduce the result
 467             a4Fin = a4Fin + (a3Fin >>> BITS_PER_LIMB);
 468             a3Fin = a3Fin & LIMB_MASK;
 469             a0Fin = a0Fin + ((a4Fin >>> BITS_PER_LIMB) * 5);
 470             a4Fin = a4Fin & LIMB_MASK;
 471             a1Fin = a1Fin + (a0Fin >>> BITS_PER_LIMB);
 472             a0Fin = a0Fin & LIMB_MASK;
 473             a2Fin = a2Fin + (a1Fin >>> BITS_PER_LIMB);
 474             a1Fin = a1Fin & LIMB_MASK;
 475             a3Fin = a3Fin + (a2Fin >>> BITS_PER_LIMB);
 476             a2Fin = a2Fin & LIMB_MASK;
 477             a4Fin = a4Fin + (a3Fin >>> BITS_PER_LIMB);
 478             a3Fin = a3Fin & LIMB_MASK;
 479 
 480             byte[] s_arr =
 481                 Arrays.copyOfRange(keyBytes, RS_LENGTH, 2 * RS_LENGTH);
 482             int[] s = fromByteArray(s_arr);
 483 
 484             // Add in the s-half of the key to the accumulator


< prev index next >