jdk Sdiff src/java.base/share/classes/java/math

src/java.base/share/classes/java/math/BigInteger.java

rev 12150 : 8130150: Implement BigInteger.montgomeryMultiply intrinsic
Summary: Add montgomeryMultiply intrinsics
Reviewed-by: kvn

 245     static final int BURNIKEL_ZIEGLER_OFFSET = 40;
 246 
 247     /**
 248      * The threshold value for using Schoenhage recursive base conversion. If
 249      * the number of ints in the number are larger than this value,
 250      * the Schoenhage algorithm will be used.  In practice, it appears that the
 251      * Schoenhage routine is faster for any threshold down to 2, and is
 252      * relatively flat for thresholds between 2-25, so this choice may be
 253      * varied within this range for very small effect.
 254      */
 255     private static final int SCHOENHAGE_BASE_CONVERSION_THRESHOLD = 20;
 256 
 257     /**
 258      * The threshold value for using squaring code to perform multiplication
 259      * of a {@code BigInteger} instance by itself.  If the number of ints in
 260      * the number are larger than this value, {@code multiply(this)} will
 261      * return {@code square()}.
 262      */
 263     private static final int MULTIPLY_SQUARE_THRESHOLD = 20;
 264 









 265     // Constructors
 266 
 267     /**
 268      * Translates a byte sub-array containing the two's-complement binary
 269      * representation of a BigInteger into a BigInteger.  The sub-array is
 270      * specified via an offset into the array and a length.  The sub-array is
 271      * assumed to be in <i>big-endian</i> byte-order: the most significant
 272      * byte is the element at index {@code off}.  The {@code val} array is
 273      * assumed to be unchanged for the duration of the constructor call.
 274      *
 275      * An {@code IndexOutOfBoundsException} is thrown if the length of the array
 276      * {@code val} is non-zero and either {@code off} is negative, {@code len}
 277      * is negative, or {@code off+len} is greater than the length of
 278      * {@code val}.
 279      *
 280      * @param  val byte array containing a sub-array which is the big-endian
 281      *         two's-complement binary representation of a BigInteger.
 282      * @param  off the start offset of the binary representation.
 283      * @param  len the number of bytes to use.
 284      * @throws NumberFormatException {@code val} is zero bytes long.

1622         if (dh != 0L) {
1623             carry = 0;
1624             rstart = rmag.length - 2;
1625             for (int i = xlen - 1; i >= 0; i--) {
1626                 long product = (value[i] & LONG_MASK) * dh +
1627                     (rmag[rstart] & LONG_MASK) + carry;
1628                 rmag[rstart--] = (int)product;
1629                 carry = product >>> 32;
1630             }
1631             rmag[0] = (int)carry;
1632         }
1633         if (carry == 0L)
1634             rmag = java.util.Arrays.copyOfRange(rmag, 1, rmag.length);
1635         return new BigInteger(rmag, rsign);
1636     }
1637 
1638     /**
1639      * Multiplies int arrays x and y to the specified lengths and places
1640      * the result into z. There will be no leading zeros in the resultant array.
1641      */
1642     private int[] multiplyToLen(int[] x, int xlen, int[] y, int ylen, int[] z) {
1643         int xstart = xlen - 1;
1644         int ystart = ylen - 1;
1645 
1646         if (z == null || z.length < (xlen+ ylen))
1647             z = new int[xlen+ylen];
1648 
1649         long carry = 0;
1650         for (int j=ystart, k=ystart+1+xstart; j >= 0; j--, k--) {
1651             long product = (y[j] & LONG_MASK) *
1652                            (x[xstart] & LONG_MASK) + carry;
1653             z[k] = (int)product;
1654             carry = product >>> 32;
1655         }
1656         z[xstart] = (int)carry;
1657 
1658         for (int i = xstart-1; i >= 0; i--) {
1659             carry = 0;
1660             for (int j=ystart, k=ystart+1+i; j >= 0; j--, k--) {
1661                 long product = (y[j] & LONG_MASK) *
1662                                (x[i] & LONG_MASK) +

2584             // Combine results using Chinese Remainder Theorem
2585             BigInteger y1 = m2.modInverse(m1);
2586             BigInteger y2 = m1.modInverse(m2);
2587 
2588             if (m.mag.length < MAX_MAG_LENGTH / 2) {
2589                 result = a1.multiply(m2).multiply(y1).add(a2.multiply(m1).multiply(y2)).mod(m);
2590             } else {
2591                 MutableBigInteger t1 = new MutableBigInteger();
2592                 new MutableBigInteger(a1.multiply(m2)).multiply(new MutableBigInteger(y1), t1);
2593                 MutableBigInteger t2 = new MutableBigInteger();
2594                 new MutableBigInteger(a2.multiply(m1)).multiply(new MutableBigInteger(y2), t2);
2595                 t1.add(t2);
2596                 MutableBigInteger q = new MutableBigInteger();
2597                 result = t1.divide(new MutableBigInteger(m), q).toBigInteger();
2598             }
2599         }
2600 
2601         return (invertResult ? result.modInverse(m) : result);
2602     }
2603 





































































2604     static int[] bnExpModThreshTable = {7, 25, 81, 241, 673, 1793,
2605                                                 Integer.MAX_VALUE}; // Sentinel
2606 
2607     /**
2608      * Returns a BigInteger whose value is x to the power of y mod z.
2609      * Assumes: z is odd && x < z.
2610      */
2611     private BigInteger oddModPow(BigInteger y, BigInteger z) {
2612     /*
2613      * The algorithm is adapted from Colin Plumb's C library.
2614      *
2615      * The window algorithm:
2616      * The idea is to keep a running product of b1 = n^(high-order bits of exp)
2617      * and then keep appending exponent bits to it.  The following patterns
2618      * apply to a 3-bit window (k = 3):
2619      * To append   0: square
2620      * To append   1: square, multiply by n^1
2621      * To append  10: square, multiply by n^1, square
2622      * To append  11: square, square, multiply by n^3
2623      * To append 100: square, multiply by n^1, square, square

2662      * 1 followed by k-1 0 bits, so it again only requires k-2
2663      * squarings, not k-1.  The average of these is 1.  Add that
2664      * to the one squaring we have to do to compute the table,
2665      * and you'll see that a k-bit window saves k-2 squarings
2666      * as well as reducing the multiplies.  (It actually doesn't
2667      * hurt in the case k = 1, either.)
2668      */
2669         // Special case for exponent of one
2670         if (y.equals(ONE))
2671             return this;
2672 
2673         // Special case for base of zero
2674         if (signum == 0)
2675             return ZERO;
2676 
2677         int[] base = mag.clone();
2678         int[] exp = y.mag;
2679         int[] mod = z.mag;
2680         int modLen = mod.length;
2681 











2682         // Select an appropriate window size
2683         int wbits = 0;
2684         int ebits = bitLength(exp, exp.length);
2685         // if exponent is 65537 (0x10001), use minimum window size
2686         if ((ebits != 17) || (exp[0] != 65537)) {
2687             while (ebits > bnExpModThreshTable[wbits]) {
2688                 wbits++;
2689             }
2690         }
2691 
2692         // Calculate appropriate table size
2693         int tblmask = 1 << wbits;
2694 
2695         // Allocate table for precomputed odd powers of base in Montgomery form
2696         int[][] table = new int[tblmask][];
2697         for (int i=0; i < tblmask; i++)
2698             table[i] = new int[modLen];
2699 
2700         // Compute the modular inverse
2701         int inv = -MutableBigInteger.inverseMod32(mod[modLen-1]);


2702 
2703         // Convert base to Montgomery form
2704         int[] a = leftShift(base, base.length, modLen << 5);
2705 
2706         MutableBigInteger q = new MutableBigInteger(),
2707                           a2 = new MutableBigInteger(a),
2708                           b2 = new MutableBigInteger(mod);


2709 
2710         MutableBigInteger r= a2.divide(b2, q);
2711         table[0] = r.toIntArray();
2712 
2713         // Pad table[0] with leading zeros so its length is at least modLen
2714         if (table[0].length < modLen) {
2715            int offset = modLen - table[0].length;
2716            int[] t2 = new int[modLen];
2717            for (int i=0; i < table[0].length; i++)
2718                t2[i+offset] = table[0][i];
2719            table[0] = t2;
2720         }
2721 
2722         // Set b to the square of the base
2723         int[] b = squareToLen(table[0], modLen, null);
2724         b = montReduce(b, mod, modLen, inv);
2725 
2726         // Set t to high half of b
2727         int[] t = Arrays.copyOf(b, modLen);
2728 
2729         // Fill in the table with odd powers of the base
2730         for (int i=1; i < tblmask; i++) {
2731             int[] prod = multiplyToLen(t, modLen, table[i-1], modLen, null);
2732             table[i] = montReduce(prod, mod, modLen, inv);
2733         }
2734 
2735         // Pre load the window that slides over the exponent
2736         int bitpos = 1 << ((ebits-1) & (32-1));
2737 
2738         int buf = 0;
2739         int elen = exp.length;
2740         int eIndex = 0;
2741         for (int i = 0; i <= wbits; i++) {
2742             buf = (buf << 1) | (((exp[eIndex] & bitpos) != 0)?1:0);
2743             bitpos >>>= 1;
2744             if (bitpos == 0) {
2745                 eIndex++;
2746                 bitpos = 1 << (32-1);
2747                 elen--;
2748             }
2749         }
2750 
2751         int multpos = ebits;
2752

2783             }
2784 
2785             // Examine the window for pending multiplies
2786             if ((buf & tblmask) != 0) {
2787                 multpos = ebits - wbits;
2788                 while ((buf & 1) == 0) {
2789                     buf >>>= 1;
2790                     multpos++;
2791                 }
2792                 mult = table[buf >>> 1];
2793                 buf = 0;
2794             }
2795 
2796             // Perform multiply
2797             if (ebits == multpos) {
2798                 if (isone) {
2799                     b = mult.clone();
2800                     isone = false;
2801                 } else {
2802                     t = b;
2803                     a = multiplyToLen(t, modLen, mult, modLen, a);
2804                     a = montReduce(a, mod, modLen, inv);
2805                     t = a; a = b; b = t;
2806                 }
2807             }
2808 
2809             // Check if done
2810             if (ebits == 0)
2811                 break;
2812 
2813             // Square the input
2814             if (!isone) {
2815                 t = b;
2816                 a = squareToLen(t, modLen, a);
2817                 a = montReduce(a, mod, modLen, inv);
2818                 t = a; a = b; b = t;
2819             }
2820         }
2821 
2822         // Convert result out of Montgomery form and return
2823         int[] t2 = new int[2*modLen];
2824         System.arraycopy(b, 0, t2, modLen, modLen);
2825 
2826         b = montReduce(t2, mod, modLen, inv);
2827 
2828         t2 = Arrays.copyOf(b, modLen);
2829 
2830         return new BigInteger(1, t2);
2831     }
2832 
2833     /**
2834      * Montgomery reduce n, modulo mod.  This reduces modulo mod and divides
2835      * by 2^(32*mlen). Adapted from Colin Plumb's C library.
2836      */
2837     private static int[] montReduce(int[] n, int[] mod, int mlen, int inv) {
2838         int c=0;
2839         int len = mlen;
2840         int offset=0;
2841 
2842         do {
2843             int nEnd = n[n.length-1-offset];
2844             int carry = mulAdd(n, mod, offset, mlen, inv * nEnd);
2845             c += addOne(n, offset, mlen, carry);
2846             offset++;

 245     static final int BURNIKEL_ZIEGLER_OFFSET = 40;
 246 
 247     /**
 248      * The threshold value for using Schoenhage recursive base conversion. If
 249      * the number of ints in the number are larger than this value,
 250      * the Schoenhage algorithm will be used.  In practice, it appears that the
 251      * Schoenhage routine is faster for any threshold down to 2, and is
 252      * relatively flat for thresholds between 2-25, so this choice may be
 253      * varied within this range for very small effect.
 254      */
 255     private static final int SCHOENHAGE_BASE_CONVERSION_THRESHOLD = 20;
 256 
 257     /**
 258      * The threshold value for using squaring code to perform multiplication
 259      * of a {@code BigInteger} instance by itself.  If the number of ints in
 260      * the number are larger than this value, {@code multiply(this)} will
 261      * return {@code square()}.
 262      */
 263     private static final int MULTIPLY_SQUARE_THRESHOLD = 20;
 264 
 265     /**
 266      * The threshold for using an intrinsic version of
 267      * implMontgomeryXXX to perform Montgomery multiplication.  If the
 268      * number of ints in the number is more than this value we do not
 269      * use the intrinsic.
 270      */
 271     private static final int MONTGOMERY_INTRINSIC_THRESHOLD = 512;
 272 
 273 
 274     // Constructors
 275 
 276     /**
 277      * Translates a byte sub-array containing the two's-complement binary
 278      * representation of a BigInteger into a BigInteger.  The sub-array is
 279      * specified via an offset into the array and a length.  The sub-array is
 280      * assumed to be in <i>big-endian</i> byte-order: the most significant
 281      * byte is the element at index {@code off}.  The {@code val} array is
 282      * assumed to be unchanged for the duration of the constructor call.
 283      *
 284      * An {@code IndexOutOfBoundsException} is thrown if the length of the array
 285      * {@code val} is non-zero and either {@code off} is negative, {@code len}
 286      * is negative, or {@code off+len} is greater than the length of
 287      * {@code val}.
 288      *
 289      * @param  val byte array containing a sub-array which is the big-endian
 290      *         two's-complement binary representation of a BigInteger.
 291      * @param  off the start offset of the binary representation.
 292      * @param  len the number of bytes to use.
 293      * @throws NumberFormatException {@code val} is zero bytes long.

1631         if (dh != 0L) {
1632             carry = 0;
1633             rstart = rmag.length - 2;
1634             for (int i = xlen - 1; i >= 0; i--) {
1635                 long product = (value[i] & LONG_MASK) * dh +
1636                     (rmag[rstart] & LONG_MASK) + carry;
1637                 rmag[rstart--] = (int)product;
1638                 carry = product >>> 32;
1639             }
1640             rmag[0] = (int)carry;
1641         }
1642         if (carry == 0L)
1643             rmag = java.util.Arrays.copyOfRange(rmag, 1, rmag.length);
1644         return new BigInteger(rmag, rsign);
1645     }
1646 
1647     /**
1648      * Multiplies int arrays x and y to the specified lengths and places
1649      * the result into z. There will be no leading zeros in the resultant array.
1650      */
1651     private static int[] multiplyToLen(int[] x, int xlen, int[] y, int ylen, int[] z) {
1652         int xstart = xlen - 1;
1653         int ystart = ylen - 1;
1654 
1655         if (z == null || z.length < (xlen+ ylen))
1656             z = new int[xlen+ylen];
1657 
1658         long carry = 0;
1659         for (int j=ystart, k=ystart+1+xstart; j >= 0; j--, k--) {
1660             long product = (y[j] & LONG_MASK) *
1661                            (x[xstart] & LONG_MASK) + carry;
1662             z[k] = (int)product;
1663             carry = product >>> 32;
1664         }
1665         z[xstart] = (int)carry;
1666 
1667         for (int i = xstart-1; i >= 0; i--) {
1668             carry = 0;
1669             for (int j=ystart, k=ystart+1+i; j >= 0; j--, k--) {
1670                 long product = (y[j] & LONG_MASK) *
1671                                (x[i] & LONG_MASK) +

2593             // Combine results using Chinese Remainder Theorem
2594             BigInteger y1 = m2.modInverse(m1);
2595             BigInteger y2 = m1.modInverse(m2);
2596 
2597             if (m.mag.length < MAX_MAG_LENGTH / 2) {
2598                 result = a1.multiply(m2).multiply(y1).add(a2.multiply(m1).multiply(y2)).mod(m);
2599             } else {
2600                 MutableBigInteger t1 = new MutableBigInteger();
2601                 new MutableBigInteger(a1.multiply(m2)).multiply(new MutableBigInteger(y1), t1);
2602                 MutableBigInteger t2 = new MutableBigInteger();
2603                 new MutableBigInteger(a2.multiply(m1)).multiply(new MutableBigInteger(y2), t2);
2604                 t1.add(t2);
2605                 MutableBigInteger q = new MutableBigInteger();
2606                 result = t1.divide(new MutableBigInteger(m), q).toBigInteger();
2607             }
2608         }
2609 
2610         return (invertResult ? result.modInverse(m) : result);
2611     }
2612 
2613     // Montgomery multiplication.  These are wrappers for
2614     // implMontgomeryXX routines which are expected to be replaced by
2615     // virtual machine intrinsics.  We don't use the intrinsics for
2616     // very large operands: MONTGOMERY_INTRINSIC_THRESHOLD should be
2617     // larger than any reasonable crypto key.
2618     private static int[] montgomeryMultiply(int[] a, int[] b, int[] n, int len, long inv,
2619                                             int[] product) {
2620         implMontgomeryMultiplyChecks(a, b, n, len, product);
2621         if (len > MONTGOMERY_INTRINSIC_THRESHOLD) {
2622             // Very long argument: do not use an intrinsic
2623             product = multiplyToLen(a, len, b, len, product);
2624             return montReduce(product, n, len, (int)inv);
2625         } else {
2626             return implMontgomeryMultiply(a, b, n, len, inv, materialize(product, len));
2627         }
2628     }
2629     private static int[] montgomerySquare(int[] a, int[] n, int len, long inv,
2630                                           int[] product) {
2631         implMontgomeryMultiplyChecks(a, a, n, len, product);
2632         if (len > MONTGOMERY_INTRINSIC_THRESHOLD) {
2633             // Very long argument: do not use an intrinsic
2634             product = squareToLen(a, len, product);
2635             return montReduce(product, n, len, (int)inv);
2636         } else {
2637             return implMontgomerySquare(a, n, len, inv, materialize(product, len));
2638         }
2639     }
2640 
2641     // Range-check everything.
2642     private static void implMontgomeryMultiplyChecks
2643         (int[] a, int[] b, int[] n, int len, int[] product) throws RuntimeException {
2644         if (len % 2 != 0) {
2645             throw new IllegalArgumentException("input array length must be even: " + len);
2646         }
2647 
2648         if (len < 1) {
2649             throw new IllegalArgumentException("invalid input length: " + len);
2650         }
2651 
2652         if (len > a.length ||
2653             len > b.length ||
2654             len > n.length ||
2655             (product != null && len > product.length)) {
2656             throw new IllegalArgumentException("input array length out of bound: " + len);
2657         }
2658     }
2659 
2660     // Make sure that the int array z (which is expected to contain
2661     // the result of a Montgomery multiplication) is present and
2662     // sufficiently large.
2663     private static int[] materialize(int[] z, int len) {
2664          if (z == null || z.length < len)
2665              z = new int[len];
2666          return z;
2667     }
2668 
2669     // These methods are intended to be be replaced by virtual machine
2670     // intrinsics.
2671     private static int[] implMontgomeryMultiply(int[] a, int[] b, int[] n, int len,
2672                                          long inv, int[] product) {
2673         product = multiplyToLen(a, len, b, len, product);
2674         return montReduce(product, n, len, (int)inv);
2675     }
2676     private static int[] implMontgomerySquare(int[] a, int[] n, int len,
2677                                        long inv, int[] product) {
2678         product = squareToLen(a, len, product);
2679         return montReduce(product, n, len, (int)inv);
2680     }
2681 
2682     static int[] bnExpModThreshTable = {7, 25, 81, 241, 673, 1793,
2683                                                 Integer.MAX_VALUE}; // Sentinel
2684 
2685     /**
2686      * Returns a BigInteger whose value is x to the power of y mod z.
2687      * Assumes: z is odd && x < z.
2688      */
2689     private BigInteger oddModPow(BigInteger y, BigInteger z) {
2690     /*
2691      * The algorithm is adapted from Colin Plumb's C library.
2692      *
2693      * The window algorithm:
2694      * The idea is to keep a running product of b1 = n^(high-order bits of exp)
2695      * and then keep appending exponent bits to it.  The following patterns
2696      * apply to a 3-bit window (k = 3):
2697      * To append   0: square
2698      * To append   1: square, multiply by n^1
2699      * To append  10: square, multiply by n^1, square
2700      * To append  11: square, square, multiply by n^3
2701      * To append 100: square, multiply by n^1, square, square

2740      * 1 followed by k-1 0 bits, so it again only requires k-2
2741      * squarings, not k-1.  The average of these is 1.  Add that
2742      * to the one squaring we have to do to compute the table,
2743      * and you'll see that a k-bit window saves k-2 squarings
2744      * as well as reducing the multiplies.  (It actually doesn't
2745      * hurt in the case k = 1, either.)
2746      */
2747         // Special case for exponent of one
2748         if (y.equals(ONE))
2749             return this;
2750 
2751         // Special case for base of zero
2752         if (signum == 0)
2753             return ZERO;
2754 
2755         int[] base = mag.clone();
2756         int[] exp = y.mag;
2757         int[] mod = z.mag;
2758         int modLen = mod.length;
2759 
2760         // Make modLen even. It is conventional to use a cryptographic
2761         // modulus that is 512, 768, 1024, or 2048 bits, so this code
2762         // will not normally be executed. However, it is necessary for
2763         // the correct functioning of the HotSpot intrinsics.
2764         if ((modLen & 1) != 0) {
2765             int[] x = new int[modLen + 1];
2766             System.arraycopy(mod, 0, x, 1, modLen);
2767             mod = x;
2768             modLen++;
2769         }
2770 
2771         // Select an appropriate window size
2772         int wbits = 0;
2773         int ebits = bitLength(exp, exp.length);
2774         // if exponent is 65537 (0x10001), use minimum window size
2775         if ((ebits != 17) || (exp[0] != 65537)) {
2776             while (ebits > bnExpModThreshTable[wbits]) {
2777                 wbits++;
2778             }
2779         }
2780 
2781         // Calculate appropriate table size
2782         int tblmask = 1 << wbits;
2783 
2784         // Allocate table for precomputed odd powers of base in Montgomery form
2785         int[][] table = new int[tblmask][];
2786         for (int i=0; i < tblmask; i++)
2787             table[i] = new int[modLen];
2788 
2789         // Compute the modular inverse of the least significant 64-bit
2790         // digit of the modulus
2791         long n0 = (mod[modLen-1] & LONG_MASK) + ((mod[modLen-2] & LONG_MASK) << 32);
2792         long inv = -MutableBigInteger.inverseMod64(n0);
2793 
2794         // Convert base to Montgomery form
2795         int[] a = leftShift(base, base.length, modLen << 5);
2796 
2797         MutableBigInteger q = new MutableBigInteger(),
2798                           a2 = new MutableBigInteger(a),
2799                           b2 = new MutableBigInteger(mod);
2800         b2.normalize(); // MutableBigInteger.divide() assumes that its
2801                         // divisor is in normal form.
2802 
2803         MutableBigInteger r= a2.divide(b2, q);
2804         table[0] = r.toIntArray();
2805 
2806         // Pad table[0] with leading zeros so its length is at least modLen
2807         if (table[0].length < modLen) {
2808            int offset = modLen - table[0].length;
2809            int[] t2 = new int[modLen];
2810            System.arraycopy(table[0], 0, t2, offset, table[0].length);

2811            table[0] = t2;
2812         }
2813 
2814         // Set b to the square of the base
2815         int[] b = montgomerySquare(table[0], mod, modLen, inv, null);

2816 
2817         // Set t to high half of b
2818         int[] t = Arrays.copyOf(b, modLen);
2819 
2820         // Fill in the table with odd powers of the base
2821         for (int i=1; i < tblmask; i++) {
2822             table[i] = montgomeryMultiply(t, table[i-1], mod, modLen, inv, null);

2823         }
2824 
2825         // Pre load the window that slides over the exponent
2826         int bitpos = 1 << ((ebits-1) & (32-1));
2827 
2828         int buf = 0;
2829         int elen = exp.length;
2830         int eIndex = 0;
2831         for (int i = 0; i <= wbits; i++) {
2832             buf = (buf << 1) | (((exp[eIndex] & bitpos) != 0)?1:0);
2833             bitpos >>>= 1;
2834             if (bitpos == 0) {
2835                 eIndex++;
2836                 bitpos = 1 << (32-1);
2837                 elen--;
2838             }
2839         }
2840 
2841         int multpos = ebits;
2842

2873             }
2874 
2875             // Examine the window for pending multiplies
2876             if ((buf & tblmask) != 0) {
2877                 multpos = ebits - wbits;
2878                 while ((buf & 1) == 0) {
2879                     buf >>>= 1;
2880                     multpos++;
2881                 }
2882                 mult = table[buf >>> 1];
2883                 buf = 0;
2884             }
2885 
2886             // Perform multiply
2887             if (ebits == multpos) {
2888                 if (isone) {
2889                     b = mult.clone();
2890                     isone = false;
2891                 } else {
2892                     t = b;
2893                     a = montgomeryMultiply(t, mult, mod, modLen, inv, a);

2894                     t = a; a = b; b = t;
2895                 }
2896             }
2897 
2898             // Check if done
2899             if (ebits == 0)
2900                 break;
2901 
2902             // Square the input
2903             if (!isone) {
2904                 t = b;
2905                 a = montgomerySquare(t, mod, modLen, inv, a);

2906                 t = a; a = b; b = t;
2907             }
2908         }
2909 
2910         // Convert result out of Montgomery form and return
2911         int[] t2 = new int[2*modLen];
2912         System.arraycopy(b, 0, t2, modLen, modLen);
2913 
2914         b = montReduce(t2, mod, modLen, (int)inv);
2915 
2916         t2 = Arrays.copyOf(b, modLen);
2917 
2918         return new BigInteger(1, t2);
2919     }
2920 
2921     /**
2922      * Montgomery reduce n, modulo mod.  This reduces modulo mod and divides
2923      * by 2^(32*mlen). Adapted from Colin Plumb's C library.
2924      */
2925     private static int[] montReduce(int[] n, int[] mod, int mlen, int inv) {
2926         int c=0;
2927         int len = mlen;
2928         int offset=0;
2929 
2930         do {
2931             int nEnd = n[n.length-1-offset];
2932             int carry = mulAdd(n, mod, offset, mlen, inv * nEnd);
2933             c += addOne(n, offset, mlen, carry);
2934             offset++;

< prev index next >