< prev index next >

src/java.base/share/classes/java/math/BigInteger.java

Print this page
rev 12150 : 8046943: Leverage CPU Instructions for GHASH and RSA
Summary: Add montgomeryMultiply intrinsics
Reviewed-by: kvn


2587 
2588             if (m.mag.length < MAX_MAG_LENGTH / 2) {
2589                 result = a1.multiply(m2).multiply(y1).add(a2.multiply(m1).multiply(y2)).mod(m);
2590             } else {
2591                 MutableBigInteger t1 = new MutableBigInteger();
2592                 new MutableBigInteger(a1.multiply(m2)).multiply(new MutableBigInteger(y1), t1);
2593                 MutableBigInteger t2 = new MutableBigInteger();
2594                 new MutableBigInteger(a2.multiply(m1)).multiply(new MutableBigInteger(y2), t2);
2595                 t1.add(t2);
2596                 MutableBigInteger q = new MutableBigInteger();
2597                 result = t1.divide(new MutableBigInteger(m), q).toBigInteger();
2598             }
2599         }
2600 
2601         return (invertResult ? result.modInverse(m) : result);
2602     }
2603 
2604     static int[] bnExpModThreshTable = {7, 25, 81, 241, 673, 1793,
2605                                                 Integer.MAX_VALUE}; // Sentinel
2606 












2607     /**
2608      * Returns a BigInteger whose value is x to the power of y mod z.
2609      * Assumes: z is odd && x < z.
2610      */
2611     private BigInteger oddModPow(BigInteger y, BigInteger z) {
2612     /*
2613      * The algorithm is adapted from Colin Plumb's C library.
2614      *
2615      * The window algorithm:
2616      * The idea is to keep a running product of b1 = n^(high-order bits of exp)
2617      * and then keep appending exponent bits to it.  The following patterns
2618      * apply to a 3-bit window (k = 3):
2619      * To append   0: square
2620      * To append   1: square, multiply by n^1
2621      * To append  10: square, multiply by n^1, square
2622      * To append  11: square, square, multiply by n^3
2623      * To append 100: square, multiply by n^1, square, square
2624      * To append 101: square, square, square, multiply by n^5
2625      * To append 110: square, square, multiply by n^3, square
2626      * To append 111: square, square, square, multiply by n^7


2662      * 1 followed by k-1 0 bits, so it again only requires k-2
2663      * squarings, not k-1.  The average of these is 1.  Add that
2664      * to the one squaring we have to do to compute the table,
2665      * and you'll see that a k-bit window saves k-2 squarings
2666      * as well as reducing the multiplies.  (It actually doesn't
2667      * hurt in the case k = 1, either.)
2668      */
2669         // Special case for exponent of one
2670         if (y.equals(ONE))
2671             return this;
2672 
2673         // Special case for base of zero
2674         if (signum == 0)
2675             return ZERO;
2676 
2677         int[] base = mag.clone();
2678         int[] exp = y.mag;
2679         int[] mod = z.mag;
2680         int modLen = mod.length;
2681 











2682         // Select an appropriate window size
2683         int wbits = 0;
2684         int ebits = bitLength(exp, exp.length);
2685         // if exponent is 65537 (0x10001), use minimum window size
2686         if ((ebits != 17) || (exp[0] != 65537)) {
2687             while (ebits > bnExpModThreshTable[wbits]) {
2688                 wbits++;
2689             }
2690         }
2691 
2692         // Calculate appropriate table size
2693         int tblmask = 1 << wbits;
2694 
2695         // Allocate table for precomputed odd powers of base in Montgomery form
2696         int[][] table = new int[tblmask][];
2697         for (int i=0; i < tblmask; i++)
2698             table[i] = new int[modLen];
2699 
2700         // Compute the modular inverse
2701         int inv = -MutableBigInteger.inverseMod32(mod[modLen-1]);


2702 
2703         // Convert base to Montgomery form
2704         int[] a = leftShift(base, base.length, modLen << 5);
2705 
2706         MutableBigInteger q = new MutableBigInteger(),
2707                           a2 = new MutableBigInteger(a),
2708                           b2 = new MutableBigInteger(mod);


2709 
2710         MutableBigInteger r= a2.divide(b2, q);
2711         table[0] = r.toIntArray();
2712 
2713         // Pad table[0] with leading zeros so its length is at least modLen
2714         if (table[0].length < modLen) {
2715            int offset = modLen - table[0].length;
2716            int[] t2 = new int[modLen];
2717            for (int i=0; i < table[0].length; i++)
2718                t2[i+offset] = table[0][i];
2719            table[0] = t2;
2720         }
2721 
2722         // Set b to the square of the base
2723         int[] b = squareToLen(table[0], modLen, null);
2724         b = montReduce(b, mod, modLen, inv);
2725 
2726         // Set t to high half of b
2727         int[] t = Arrays.copyOf(b, modLen);
2728 
2729         // Fill in the table with odd powers of the base
2730         for (int i=1; i < tblmask; i++) {
2731             int[] prod = multiplyToLen(t, modLen, table[i-1], modLen, null);
2732             table[i] = montReduce(prod, mod, modLen, inv);
2733         }
2734 
2735         // Pre load the window that slides over the exponent
2736         int bitpos = 1 << ((ebits-1) & (32-1));
2737 
2738         int buf = 0;
2739         int elen = exp.length;
2740         int eIndex = 0;
2741         for (int i = 0; i <= wbits; i++) {
2742             buf = (buf << 1) | (((exp[eIndex] & bitpos) != 0)?1:0);
2743             bitpos >>>= 1;
2744             if (bitpos == 0) {
2745                 eIndex++;
2746                 bitpos = 1 << (32-1);
2747                 elen--;
2748             }
2749         }
2750 
2751         int multpos = ebits;
2752 


2783             }
2784 
2785             // Examine the window for pending multiplies
2786             if ((buf & tblmask) != 0) {
2787                 multpos = ebits - wbits;
2788                 while ((buf & 1) == 0) {
2789                     buf >>>= 1;
2790                     multpos++;
2791                 }
2792                 mult = table[buf >>> 1];
2793                 buf = 0;
2794             }
2795 
2796             // Perform multiply
2797             if (ebits == multpos) {
2798                 if (isone) {
2799                     b = mult.clone();
2800                     isone = false;
2801                 } else {
2802                     t = b;
2803                     a = multiplyToLen(t, modLen, mult, modLen, a);
2804                     a = montReduce(a, mod, modLen, inv);
2805                     t = a; a = b; b = t;
2806                 }
2807             }
2808 
2809             // Check if done
2810             if (ebits == 0)
2811                 break;
2812 
2813             // Square the input
2814             if (!isone) {
2815                 t = b;
2816                 a = squareToLen(t, modLen, a);
2817                 a = montReduce(a, mod, modLen, inv);
2818                 t = a; a = b; b = t;
2819             }
2820         }
2821 
2822         // Convert result out of Montgomery form and return
2823         int[] t2 = new int[2*modLen];
2824         System.arraycopy(b, 0, t2, modLen, modLen);
2825 
2826         b = montReduce(t2, mod, modLen, inv);
2827 
2828         t2 = Arrays.copyOf(b, modLen);
2829 
2830         return new BigInteger(1, t2);
2831     }
2832 
2833     /**
2834      * Montgomery reduce n, modulo mod.  This reduces modulo mod and divides
2835      * by 2^(32*mlen). Adapted from Colin Plumb's C library.
2836      */
2837     private static int[] montReduce(int[] n, int[] mod, int mlen, int inv) {
2838         int c=0;
2839         int len = mlen;
2840         int offset=0;
2841 
2842         do {
2843             int nEnd = n[n.length-1-offset];
2844             int carry = mulAdd(n, mod, offset, mlen, inv * nEnd);
2845             c += addOne(n, offset, mlen, carry);
2846             offset++;




2587 
2588             if (m.mag.length < MAX_MAG_LENGTH / 2) {
2589                 result = a1.multiply(m2).multiply(y1).add(a2.multiply(m1).multiply(y2)).mod(m);
2590             } else {
2591                 MutableBigInteger t1 = new MutableBigInteger();
2592                 new MutableBigInteger(a1.multiply(m2)).multiply(new MutableBigInteger(y1), t1);
2593                 MutableBigInteger t2 = new MutableBigInteger();
2594                 new MutableBigInteger(a2.multiply(m1)).multiply(new MutableBigInteger(y2), t2);
2595                 t1.add(t2);
2596                 MutableBigInteger q = new MutableBigInteger();
2597                 result = t1.divide(new MutableBigInteger(m), q).toBigInteger();
2598             }
2599         }
2600 
2601         return (invertResult ? result.modInverse(m) : result);
2602     }
2603 
2604     static int[] bnExpModThreshTable = {7, 25, 81, 241, 673, 1793,
2605                                                 Integer.MAX_VALUE}; // Sentinel
2606     
2607     private int[] montgomeryMultiply(int[] a, int[] b, int[] n, int len, long inv,
2608                                      int[] product) {
2609         product = multiplyToLen(a, len, b, len, product);
2610         return montReduce(product, n, len, (int)inv);
2611     }
2612 
2613     private int[] montgomerySquare(int[] a, int[] n, int len, long inv,
2614                                      int[] product) {
2615         product = squareToLen(a, len, product);
2616         return montReduce(product, n, len, (int)inv);
2617     }
2618 
2619     /**
2620      * Returns a BigInteger whose value is x to the power of y mod z.
2621      * Assumes: z is odd && x < z.
2622      */
2623     private BigInteger oddModPow(BigInteger y, BigInteger z) {
2624     /*
2625      * The algorithm is adapted from Colin Plumb's C library.
2626      *
2627      * The window algorithm:
2628      * The idea is to keep a running product of b1 = n^(high-order bits of exp)
2629      * and then keep appending exponent bits to it.  The following patterns
2630      * apply to a 3-bit window (k = 3):
2631      * To append   0: square
2632      * To append   1: square, multiply by n^1
2633      * To append  10: square, multiply by n^1, square
2634      * To append  11: square, square, multiply by n^3
2635      * To append 100: square, multiply by n^1, square, square
2636      * To append 101: square, square, square, multiply by n^5
2637      * To append 110: square, square, multiply by n^3, square
2638      * To append 111: square, square, square, multiply by n^7


2674      * 1 followed by k-1 0 bits, so it again only requires k-2
2675      * squarings, not k-1.  The average of these is 1.  Add that
2676      * to the one squaring we have to do to compute the table,
2677      * and you'll see that a k-bit window saves k-2 squarings
2678      * as well as reducing the multiplies.  (It actually doesn't
2679      * hurt in the case k = 1, either.)
2680      */
2681         // Special case for exponent of one
2682         if (y.equals(ONE))
2683             return this;
2684 
2685         // Special case for base of zero
2686         if (signum == 0)
2687             return ZERO;
2688 
2689         int[] base = mag.clone();
2690         int[] exp = y.mag;
2691         int[] mod = z.mag;
2692         int modLen = mod.length;
2693 
2694         // Make modLen even. It is conventional to use a cryptographic
2695         // modulus that is 512, 768, 1024, or 2048 bits, so this code
2696         // will not normally be executed. However, it is necessary for
2697         // the correct functioning of the HotSpot intrinsics.
2698         if ((modLen & 1) != 0) {
2699             int[] x = new int[modLen + 1];
2700             System.arraycopy(mod, 0, x, 1, modLen);
2701             mod = x;
2702             modLen++;
2703         }
2704                 
2705         // Select an appropriate window size
2706         int wbits = 0;
2707         int ebits = bitLength(exp, exp.length);
2708         // if exponent is 65537 (0x10001), use minimum window size
2709         if ((ebits != 17) || (exp[0] != 65537)) {
2710             while (ebits > bnExpModThreshTable[wbits]) {
2711                 wbits++;
2712             }
2713         }
2714 
2715         // Calculate appropriate table size
2716         int tblmask = 1 << wbits;
2717 
2718         // Allocate table for precomputed odd powers of base in Montgomery form
2719         int[][] table = new int[tblmask][];
2720         for (int i=0; i < tblmask; i++)
2721             table[i] = new int[modLen];
2722 
2723         // Compute the modular inverse of the least significant 64-bit
2724         // digit of the modulus
2725         long n0 = (mod[modLen-1] & LONG_MASK) + ((mod[modLen-2] & LONG_MASK) << 32);
2726         long inv = -MutableBigInteger.inverseMod64(n0);
2727 
2728         // Convert base to Montgomery form
2729         int[] a = leftShift(base, base.length, modLen << 5);
2730 
2731         MutableBigInteger q = new MutableBigInteger(),
2732                           a2 = new MutableBigInteger(a),
2733                           b2 = new MutableBigInteger(mod);
2734         b2.normalize(); // MutableBigInteger.divide() assumes that its
2735                         // divisor is in normal form.
2736 
2737         MutableBigInteger r= a2.divide(b2, q);
2738         table[0] = r.toIntArray();
2739 
2740         // Pad table[0] with leading zeros so its length is at least modLen
2741         if (table[0].length < modLen) {
2742            int offset = modLen - table[0].length;
2743            int[] t2 = new int[modLen];
2744            System.arraycopy(table[0], 0, t2, offset, table[0].length);

2745            table[0] = t2;
2746         }
2747 
2748         // Set b to the square of the base
2749         int[] b = montgomerySquare(table[0], mod, modLen, inv, null);

2750 
2751         // Set t to high half of b
2752         int[] t = Arrays.copyOf(b, modLen);
2753 
2754         // Fill in the table with odd powers of the base
2755         for (int i=1; i < tblmask; i++) {
2756             table[i] = montgomeryMultiply(t, table[i-1], mod, modLen, inv, null);

2757         }
2758 
2759         // Pre load the window that slides over the exponent
2760         int bitpos = 1 << ((ebits-1) & (32-1));
2761 
2762         int buf = 0;
2763         int elen = exp.length;
2764         int eIndex = 0;
2765         for (int i = 0; i <= wbits; i++) {
2766             buf = (buf << 1) | (((exp[eIndex] & bitpos) != 0)?1:0);
2767             bitpos >>>= 1;
2768             if (bitpos == 0) {
2769                 eIndex++;
2770                 bitpos = 1 << (32-1);
2771                 elen--;
2772             }
2773         }
2774 
2775         int multpos = ebits;
2776 


2807             }
2808 
2809             // Examine the window for pending multiplies
2810             if ((buf & tblmask) != 0) {
2811                 multpos = ebits - wbits;
2812                 while ((buf & 1) == 0) {
2813                     buf >>>= 1;
2814                     multpos++;
2815                 }
2816                 mult = table[buf >>> 1];
2817                 buf = 0;
2818             }
2819 
2820             // Perform multiply
2821             if (ebits == multpos) {
2822                 if (isone) {
2823                     b = mult.clone();
2824                     isone = false;
2825                 } else {
2826                     t = b;
2827                     a = montgomeryMultiply(t, mult, mod, modLen, inv, a);

2828                     t = a; a = b; b = t;
2829                 }
2830             }
2831 
2832             // Check if done
2833             if (ebits == 0)
2834                 break;
2835 
2836             // Square the input
2837             if (!isone) {
2838                 t = b;
2839                 a = montgomerySquare(t, mod, modLen, inv, a);

2840                 t = a; a = b; b = t;
2841             }
2842         }
2843 
2844         // Convert result out of Montgomery form and return
2845         int[] t2 = new int[2*modLen];
2846         System.arraycopy(b, 0, t2, modLen, modLen);
2847 
2848         b = montReduce(t2, mod, modLen, (int)inv);
2849 
2850         t2 = Arrays.copyOf(b, modLen);
2851 
2852         return new BigInteger(1, t2);
2853     }
2854 
2855     /**
2856      * Montgomery reduce n, modulo mod.  This reduces modulo mod and divides
2857      * by 2^(32*mlen). Adapted from Colin Plumb's C library.
2858      */
2859     private static int[] montReduce(int[] n, int[] mod, int mlen, int inv) {
2860         int c=0;
2861         int len = mlen;
2862         int offset=0;
2863 
2864         do {
2865             int nEnd = n[n.length-1-offset];
2866             int carry = mulAdd(n, mod, offset, mlen, inv * nEnd);
2867             c += addOne(n, offset, mlen, carry);
2868             offset++;


< prev index next >