Old src/jdk.crypto.ec/share/native/libsunec/impl/mpi.c

   1 /*
   2  * Copyright (c) 2007, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * Use is subject to license terms.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2.1 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public License
  16  * along with this library; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /* *********************************************************************
  25  *
  26  * The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
  27  *
  28  * The Initial Developer of the Original Code is
  29  * Michael J. Fromberger.
  30  * Portions created by the Initial Developer are Copyright (C) 1998
  31  * the Initial Developer. All Rights Reserved.
  32  *
  33  * Contributor(s):
  34  *   Netscape Communications Corporation
  35  *   Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
  36  *
  37  * Last Modified Date from the Original Code: June 2014
  38  *********************************************************************** */
  39 
  40 /*  Arbitrary precision integer arithmetic library */
  41 
  42 #include "mpi-priv.h"
  43 #if defined(OSF1)
  44 #include <c_asm.h>
  45 #endif
  46 
  47 #if MP_LOGTAB
  48 /*
  49   A table of the logs of 2 for various bases (the 0 and 1 entries of
  50   this table are meaningless and should not be referenced).
  51 
  52   This table is used to compute output lengths for the mp_toradix()
  53   function.  Since a number n in radix r takes up about log_r(n)
  54   digits, we estimate the output size by taking the least integer
  55   greater than log_r(n), where:
  56 
  57   log_r(n) = log_2(n) * log_r(2)
  58 
  59   This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
  60   which are the output bases supported.
  61  */
  62 #include "logtab.h"
  63 #endif
  64 
  65 /* {{{ Constant strings */
  66 
  67 /* Constant strings returned by mp_strerror() */
  68 static const char *mp_err_string[] = {
  69   "unknown result code",     /* say what?            */
  70   "boolean true",            /* MP_OKAY, MP_YES      */
  71   "boolean false",           /* MP_NO                */
  72   "out of memory",           /* MP_MEM               */
  73   "argument out of range",   /* MP_RANGE             */
  74   "invalid input parameter", /* MP_BADARG            */
  75   "result is undefined"      /* MP_UNDEF             */
  76 };
  77 
  78 /* Value to digit maps for radix conversion   */
  79 
  80 /* s_dmap_1 - standard digits and letters */
  81 static const char *s_dmap_1 =
  82   "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
  83 
  84 /* }}} */
  85 
  86 unsigned long mp_allocs;
  87 unsigned long mp_frees;
  88 unsigned long mp_copies;
  89 
  90 /* {{{ Default precision manipulation */
  91 
  92 /* Default precision for newly created mp_int's      */
  93 static mp_size s_mp_defprec = MP_DEFPREC;
  94 
  95 mp_size mp_get_prec(void)
  96 {
  97   return s_mp_defprec;
  98 
  99 } /* end mp_get_prec() */
 100 
 101 void         mp_set_prec(mp_size prec)
 102 {
 103   if(prec == 0)
 104     s_mp_defprec = MP_DEFPREC;
 105   else
 106     s_mp_defprec = prec;
 107 
 108 } /* end mp_set_prec() */
 109 
 110 /* }}} */
 111 
 112 /*------------------------------------------------------------------------*/
 113 /* {{{ mp_init(mp, kmflag) */
 114 
 115 /*
 116   mp_init(mp, kmflag)
 117 
 118   Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
 119   MP_MEM if memory could not be allocated for the structure.
 120  */
 121 
 122 mp_err mp_init(mp_int *mp, int kmflag)
 123 {
 124   return mp_init_size(mp, s_mp_defprec, kmflag);
 125 
 126 } /* end mp_init() */
 127 
 128 /* }}} */
 129 
 130 /* {{{ mp_init_size(mp, prec, kmflag) */
 131 
 132 /*
 133   mp_init_size(mp, prec, kmflag)
 134 
 135   Initialize a new zero-valued mp_int with at least the given
 136   precision; returns MP_OKAY if successful, or MP_MEM if memory could
 137   not be allocated for the structure.
 138  */
 139 
 140 mp_err mp_init_size(mp_int *mp, mp_size prec, int kmflag)
 141 {
 142   ARGCHK(mp != NULL && prec > 0, MP_BADARG);
 143 
 144   prec = MP_ROUNDUP(prec, s_mp_defprec);
 145   if((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit), kmflag)) == NULL)
 146     return MP_MEM;
 147 
 148   SIGN(mp) = ZPOS;
 149   USED(mp) = 1;
 150   ALLOC(mp) = prec;
 151 
 152   return MP_OKAY;
 153 
 154 } /* end mp_init_size() */
 155 
 156 /* }}} */
 157 
 158 /* {{{ mp_init_copy(mp, from) */
 159 
 160 /*
 161   mp_init_copy(mp, from)
 162 
 163   Initialize mp as an exact copy of from.  Returns MP_OKAY if
 164   successful, MP_MEM if memory could not be allocated for the new
 165   structure.
 166  */
 167 
 168 mp_err mp_init_copy(mp_int *mp, const mp_int *from)
 169 {
 170   ARGCHK(mp != NULL && from != NULL, MP_BADARG);
 171 
 172   if(mp == from)
 173     return MP_OKAY;
 174 
 175   if((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit), FLAG(from))) == NULL)
 176     return MP_MEM;
 177 
 178   s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
 179   USED(mp) = USED(from);
 180   ALLOC(mp) = ALLOC(from);
 181   SIGN(mp) = SIGN(from);
 182 
 183 #ifndef _WIN32
 184   FLAG(mp) = FLAG(from);
 185 #endif /* _WIN32 */
 186 
 187   return MP_OKAY;
 188 
 189 } /* end mp_init_copy() */
 190 
 191 /* }}} */
 192 
 193 /* {{{ mp_copy(from, to) */
 194 
 195 /*
 196   mp_copy(from, to)
 197 
 198   Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
 199   'to' has already been initialized (if not, use mp_init_copy()
 200   instead). If 'from' and 'to' are identical, nothing happens.
 201  */
 202 
 203 mp_err mp_copy(const mp_int *from, mp_int *to)
 204 {
 205   ARGCHK(from != NULL && to != NULL, MP_BADARG);
 206 
 207   if(from == to)
 208     return MP_OKAY;
 209 
 210   ++mp_copies;
 211   { /* copy */
 212     mp_digit   *tmp;
 213 
 214     /*
 215       If the allocated buffer in 'to' already has enough space to hold
 216       all the used digits of 'from', we'll re-use it to avoid hitting
 217       the memory allocater more than necessary; otherwise, we'd have
 218       to grow anyway, so we just allocate a hunk and make the copy as
 219       usual
 220      */
 221     if(ALLOC(to) >= USED(from)) {
 222       s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
 223       s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
 224 
 225     } else {
 226       if((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit), FLAG(from))) == NULL)
 227         return MP_MEM;
 228 
 229       s_mp_copy(DIGITS(from), tmp, USED(from));
 230 
 231       if(DIGITS(to) != NULL) {
 232 #if MP_CRYPTO
 233         s_mp_setz(DIGITS(to), ALLOC(to));
 234 #endif
 235         s_mp_free(DIGITS(to), ALLOC(to));
 236       }
 237 
 238       DIGITS(to) = tmp;
 239       ALLOC(to) = ALLOC(from);
 240     }
 241 
 242     /* Copy the precision and sign from the original */
 243     USED(to) = USED(from);
 244     SIGN(to) = SIGN(from);
 245   } /* end copy */
 246 
 247   return MP_OKAY;
 248 
 249 } /* end mp_copy() */
 250 
 251 /* }}} */
 252 
 253 /* {{{ mp_exch(mp1, mp2) */
 254 
 255 /*
 256   mp_exch(mp1, mp2)
 257 
 258   Exchange mp1 and mp2 without allocating any intermediate memory
 259   (well, unless you count the stack space needed for this call and the
 260   locals it creates...).  This cannot fail.
 261  */
 262 
 263 void mp_exch(mp_int *mp1, mp_int *mp2)
 264 {
 265 #if MP_ARGCHK == 2
 266   assert(mp1 != NULL && mp2 != NULL);
 267 #else
 268   if(mp1 == NULL || mp2 == NULL)
 269     return;
 270 #endif
 271 
 272   s_mp_exch(mp1, mp2);
 273 
 274 } /* end mp_exch() */
 275 
 276 /* }}} */
 277 
 278 /* {{{ mp_clear(mp) */
 279 
 280 /*
 281   mp_clear(mp)
 282 
 283   Release the storage used by an mp_int, and void its fields so that
 284   if someone calls mp_clear() again for the same int later, we won't
 285   get tollchocked.
 286  */
 287 
 288 void   mp_clear(mp_int *mp)
 289 {
 290   if(mp == NULL)
 291     return;
 292 
 293   if(DIGITS(mp) != NULL) {
 294 #if MP_CRYPTO
 295     s_mp_setz(DIGITS(mp), ALLOC(mp));
 296 #endif
 297     s_mp_free(DIGITS(mp), ALLOC(mp));
 298     DIGITS(mp) = NULL;
 299   }
 300 
 301   USED(mp) = 0;
 302   ALLOC(mp) = 0;
 303 
 304 } /* end mp_clear() */
 305 
 306 /* }}} */
 307 
 308 /* {{{ mp_zero(mp) */
 309 
 310 /*
 311   mp_zero(mp)
 312 
 313   Set mp to zero.  Does not change the allocated size of the structure,
 314   and therefore cannot fail (except on a bad argument, which we ignore)
 315  */
 316 void   mp_zero(mp_int *mp)
 317 {
 318   if(mp == NULL)
 319     return;
 320 
 321   s_mp_setz(DIGITS(mp), ALLOC(mp));
 322   USED(mp) = 1;
 323   SIGN(mp) = ZPOS;
 324 
 325 } /* end mp_zero() */
 326 
 327 /* }}} */
 328 
 329 /* {{{ mp_set(mp, d) */
 330 
 331 void   mp_set(mp_int *mp, mp_digit d)
 332 {
 333   if(mp == NULL)
 334     return;
 335 
 336   mp_zero(mp);
 337   DIGIT(mp, 0) = d;
 338 
 339 } /* end mp_set() */
 340 
 341 /* }}} */
 342 
 343 /* {{{ mp_set_int(mp, z) */
 344 
 345 mp_err mp_set_int(mp_int *mp, long z)
 346 {
 347   int            ix;
 348   unsigned long  v = labs(z);
 349   mp_err         res;
 350 
 351   ARGCHK(mp != NULL, MP_BADARG);
 352 
 353   mp_zero(mp);
 354   if(z == 0)
 355     return MP_OKAY;  /* shortcut for zero */
 356 
 357   if (sizeof v <= sizeof(mp_digit)) {
 358     DIGIT(mp,0) = v;
 359   } else {
 360     for (ix = sizeof(long) - 1; ix >= 0; ix--) {
 361       if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
 362         return res;
 363 
 364       res = s_mp_add_d(mp, (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
 365       if (res != MP_OKAY)
 366         return res;
 367     }
 368   }
 369   if(z < 0)
 370     SIGN(mp) = NEG;
 371 
 372   return MP_OKAY;
 373 
 374 } /* end mp_set_int() */
 375 
 376 /* }}} */
 377 
 378 /* {{{ mp_set_ulong(mp, z) */
 379 
 380 mp_err mp_set_ulong(mp_int *mp, unsigned long z)
 381 {
 382   int            ix;
 383   mp_err         res;
 384 
 385   ARGCHK(mp != NULL, MP_BADARG);
 386 
 387   mp_zero(mp);
 388   if(z == 0)
 389     return MP_OKAY;  /* shortcut for zero */
 390 
 391   if (sizeof z <= sizeof(mp_digit)) {
 392     DIGIT(mp,0) = z;
 393   } else {
 394     for (ix = sizeof(long) - 1; ix >= 0; ix--) {
 395       if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
 396         return res;
 397 
 398       res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
 399       if (res != MP_OKAY)
 400         return res;
 401     }
 402   }
 403   return MP_OKAY;
 404 } /* end mp_set_ulong() */
 405 
 406 /* }}} */
 407 
 408 /*------------------------------------------------------------------------*/
 409 /* {{{ Digit arithmetic */
 410 
 411 /* {{{ mp_add_d(a, d, b) */
 412 
 413 /*
 414   mp_add_d(a, d, b)
 415 
 416   Compute the sum b = a + d, for a single digit d.  Respects the sign of
 417   its primary addend (single digits are unsigned anyway).
 418  */
 419 
 420 mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
 421 {
 422   mp_int   tmp;
 423   mp_err   res;
 424 
 425   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 426 
 427   if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 428     return res;
 429 
 430   if(SIGN(&tmp) == ZPOS) {
 431     if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
 432       goto CLEANUP;
 433   } else if(s_mp_cmp_d(&tmp, d) >= 0) {
 434     if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
 435       goto CLEANUP;
 436   } else {
 437     mp_neg(&tmp, &tmp);
 438 
 439     DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
 440   }
 441 
 442   if(s_mp_cmp_d(&tmp, 0) == 0)
 443     SIGN(&tmp) = ZPOS;
 444 
 445   s_mp_exch(&tmp, b);
 446 
 447 CLEANUP:
 448   mp_clear(&tmp);
 449   return res;
 450 
 451 } /* end mp_add_d() */
 452 
 453 /* }}} */
 454 
 455 /* {{{ mp_sub_d(a, d, b) */
 456 
 457 /*
 458   mp_sub_d(a, d, b)
 459 
 460   Compute the difference b = a - d, for a single digit d.  Respects the
 461   sign of its subtrahend (single digits are unsigned anyway).
 462  */
 463 
 464 mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
 465 {
 466   mp_int   tmp;
 467   mp_err   res;
 468 
 469   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 470 
 471   if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 472     return res;
 473 
 474   if(SIGN(&tmp) == NEG) {
 475     if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
 476       goto CLEANUP;
 477   } else if(s_mp_cmp_d(&tmp, d) >= 0) {
 478     if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
 479       goto CLEANUP;
 480   } else {
 481     mp_neg(&tmp, &tmp);
 482 
 483     DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
 484     SIGN(&tmp) = NEG;
 485   }
 486 
 487   if(s_mp_cmp_d(&tmp, 0) == 0)
 488     SIGN(&tmp) = ZPOS;
 489 
 490   s_mp_exch(&tmp, b);
 491 
 492 CLEANUP:
 493   mp_clear(&tmp);
 494   return res;
 495 
 496 } /* end mp_sub_d() */
 497 
 498 /* }}} */
 499 
 500 /* {{{ mp_mul_d(a, d, b) */
 501 
 502 /*
 503   mp_mul_d(a, d, b)
 504 
 505   Compute the product b = a * d, for a single digit d.  Respects the sign
 506   of its multiplicand (single digits are unsigned anyway)
 507  */
 508 
 509 mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
 510 {
 511   mp_err  res;
 512 
 513   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 514 
 515   if(d == 0) {
 516     mp_zero(b);
 517     return MP_OKAY;
 518   }
 519 
 520   if((res = mp_copy(a, b)) != MP_OKAY)
 521     return res;
 522 
 523   res = s_mp_mul_d(b, d);
 524 
 525   return res;
 526 
 527 } /* end mp_mul_d() */
 528 
 529 /* }}} */
 530 
 531 /* {{{ mp_mul_2(a, c) */
 532 
 533 mp_err mp_mul_2(const mp_int *a, mp_int *c)
 534 {
 535   mp_err  res;
 536 
 537   ARGCHK(a != NULL && c != NULL, MP_BADARG);
 538 
 539   if((res = mp_copy(a, c)) != MP_OKAY)
 540     return res;
 541 
 542   return s_mp_mul_2(c);
 543 
 544 } /* end mp_mul_2() */
 545 
 546 /* }}} */
 547 
 548 /* {{{ mp_div_d(a, d, q, r) */
 549 
 550 /*
 551   mp_div_d(a, d, q, r)
 552 
 553   Compute the quotient q = a / d and remainder r = a mod d, for a
 554   single digit d.  Respects the sign of its divisor (single digits are
 555   unsigned anyway).
 556  */
 557 
 558 mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
 559 {
 560   mp_err   res;
 561   mp_int   qp;
 562   mp_digit rem;
 563   int      pow;
 564 
 565   ARGCHK(a != NULL, MP_BADARG);
 566 
 567   if(d == 0)
 568     return MP_RANGE;
 569 
 570   /* Shortcut for powers of two ... */
 571   if((pow = s_mp_ispow2d(d)) >= 0) {
 572     mp_digit  mask;
 573 
 574     mask = ((mp_digit)1 << pow) - 1;
 575     rem = DIGIT(a, 0) & mask;
 576 
 577     if(q) {
 578       mp_copy(a, q);
 579       s_mp_div_2d(q, pow);
 580     }
 581 
 582     if(r)
 583       *r = rem;
 584 
 585     return MP_OKAY;
 586   }
 587 
 588   if((res = mp_init_copy(&qp, a)) != MP_OKAY)
 589     return res;
 590 
 591   res = s_mp_div_d(&qp, d, &rem);
 592 
 593   if(s_mp_cmp_d(&qp, 0) == 0)
 594     SIGN(q) = ZPOS;
 595 
 596   if(r)
 597     *r = rem;
 598 
 599   if(q)
 600     s_mp_exch(&qp, q);
 601 
 602   mp_clear(&qp);
 603   return res;
 604 
 605 } /* end mp_div_d() */
 606 
 607 /* }}} */
 608 
 609 /* {{{ mp_div_2(a, c) */
 610 
 611 /*
 612   mp_div_2(a, c)
 613 
 614   Compute c = a / 2, disregarding the remainder.
 615  */
 616 
 617 mp_err mp_div_2(const mp_int *a, mp_int *c)
 618 {
 619   mp_err  res;
 620 
 621   ARGCHK(a != NULL && c != NULL, MP_BADARG);
 622 
 623   if((res = mp_copy(a, c)) != MP_OKAY)
 624     return res;
 625 
 626   s_mp_div_2(c);
 627 
 628   return MP_OKAY;
 629 
 630 } /* end mp_div_2() */
 631 
 632 /* }}} */
 633 
 634 /* {{{ mp_expt_d(a, d, b) */
 635 
 636 mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
 637 {
 638   mp_int   s, x;
 639   mp_err   res;
 640 
 641   ARGCHK(a != NULL && c != NULL, MP_BADARG);
 642 
 643   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
 644     return res;
 645   if((res = mp_init_copy(&x, a)) != MP_OKAY)
 646     goto X;
 647 
 648   DIGIT(&s, 0) = 1;
 649 
 650   while(d != 0) {
 651     if(d & 1) {
 652       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
 653         goto CLEANUP;
 654     }
 655 
 656     d /= 2;
 657 
 658     if((res = s_mp_sqr(&x)) != MP_OKAY)
 659       goto CLEANUP;
 660   }
 661 
 662   s_mp_exch(&s, c);
 663 
 664 CLEANUP:
 665   mp_clear(&x);
 666 X:
 667   mp_clear(&s);
 668 
 669   return res;
 670 
 671 } /* end mp_expt_d() */
 672 
 673 /* }}} */
 674 
 675 /* }}} */
 676 
 677 /*------------------------------------------------------------------------*/
 678 /* {{{ Full arithmetic */
 679 
 680 /* {{{ mp_abs(a, b) */
 681 
 682 /*
 683   mp_abs(a, b)
 684 
 685   Compute b = |a|.  'a' and 'b' may be identical.
 686  */
 687 
 688 mp_err mp_abs(const mp_int *a, mp_int *b)
 689 {
 690   mp_err   res;
 691 
 692   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 693 
 694   if((res = mp_copy(a, b)) != MP_OKAY)
 695     return res;
 696 
 697   SIGN(b) = ZPOS;
 698 
 699   return MP_OKAY;
 700 
 701 } /* end mp_abs() */
 702 
 703 /* }}} */
 704 
 705 /* {{{ mp_neg(a, b) */
 706 
 707 /*
 708   mp_neg(a, b)
 709 
 710   Compute b = -a.  'a' and 'b' may be identical.
 711  */
 712 
 713 mp_err mp_neg(const mp_int *a, mp_int *b)
 714 {
 715   mp_err   res;
 716 
 717   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 718 
 719   if((res = mp_copy(a, b)) != MP_OKAY)
 720     return res;
 721 
 722   if(s_mp_cmp_d(b, 0) == MP_EQ)
 723     SIGN(b) = ZPOS;
 724   else
 725     SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
 726 
 727   return MP_OKAY;
 728 
 729 } /* end mp_neg() */
 730 
 731 /* }}} */
 732 
 733 /* {{{ mp_add(a, b, c) */
 734 
 735 /*
 736   mp_add(a, b, c)
 737 
 738   Compute c = a + b.  All parameters may be identical.
 739  */
 740 
 741 mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c)
 742 {
 743   mp_err  res;
 744 
 745   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 746 
 747   if(SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
 748     MP_CHECKOK( s_mp_add_3arg(a, b, c) );
 749   } else if(s_mp_cmp(a, b) >= 0) {  /* different sign: |a| >= |b|   */
 750     MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
 751   } else {                          /* different sign: |a|  < |b|   */
 752     MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
 753   }
 754 
 755   if (s_mp_cmp_d(c, 0) == MP_EQ)
 756     SIGN(c) = ZPOS;
 757 
 758 CLEANUP:
 759   return res;
 760 
 761 } /* end mp_add() */
 762 
 763 /* }}} */
 764 
 765 /* {{{ mp_sub(a, b, c) */
 766 
 767 /*
 768   mp_sub(a, b, c)
 769 
 770   Compute c = a - b.  All parameters may be identical.
 771  */
 772 
 773 mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
 774 {
 775   mp_err  res;
 776   int     magDiff;
 777 
 778   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 779 
 780   if (a == b) {
 781     mp_zero(c);
 782     return MP_OKAY;
 783   }
 784 
 785   if (MP_SIGN(a) != MP_SIGN(b)) {
 786     MP_CHECKOK( s_mp_add_3arg(a, b, c) );
 787   } else if (!(magDiff = s_mp_cmp(a, b))) {
 788     mp_zero(c);
 789     res = MP_OKAY;
 790   } else if (magDiff > 0) {
 791     MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
 792   } else {
 793     MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
 794     MP_SIGN(c) = !MP_SIGN(a);
 795   }
 796 
 797   if (s_mp_cmp_d(c, 0) == MP_EQ)
 798     MP_SIGN(c) = MP_ZPOS;
 799 
 800 CLEANUP:
 801   return res;
 802 
 803 } /* end mp_sub() */
 804 
 805 /* }}} */
 806 
 807 /* {{{ mp_mul(a, b, c) */
 808 
 809 /*
 810   mp_mul(a, b, c)
 811 
 812   Compute c = a * b.  All parameters may be identical.
 813  */
 814 mp_err   mp_mul(const mp_int *a, const mp_int *b, mp_int * c)
 815 {
 816   mp_digit *pb;
 817   mp_int   tmp;
 818   mp_err   res;
 819   mp_size  ib;
 820   mp_size  useda, usedb;
 821 
 822   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 823 
 824   if (a == c) {
 825     if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 826       return res;
 827     if (a == b)
 828       b = &tmp;
 829     a = &tmp;
 830   } else if (b == c) {
 831     if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
 832       return res;
 833     b = &tmp;
 834   } else {
 835     MP_DIGITS(&tmp) = 0;
 836   }
 837 
 838   if (MP_USED(a) < MP_USED(b)) {
 839     const mp_int *xch = b;      /* switch a and b, to do fewer outer loops */
 840     b = a;
 841     a = xch;
 842   }
 843 
 844   MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
 845   if((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
 846     goto CLEANUP;
 847 
 848 #ifdef NSS_USE_COMBA
 849   if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
 850       if (MP_USED(a) == 4) {
 851           s_mp_mul_comba_4(a, b, c);
 852           goto CLEANUP;
 853       }
 854       if (MP_USED(a) == 8) {
 855           s_mp_mul_comba_8(a, b, c);
 856           goto CLEANUP;
 857       }
 858       if (MP_USED(a) == 16) {
 859           s_mp_mul_comba_16(a, b, c);
 860           goto CLEANUP;
 861       }
 862       if (MP_USED(a) == 32) {
 863           s_mp_mul_comba_32(a, b, c);
 864           goto CLEANUP;
 865       }
 866   }
 867 #endif
 868 
 869   pb = MP_DIGITS(b);
 870   s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
 871 
 872   /* Outer loop:  Digits of b */
 873   useda = MP_USED(a);
 874   usedb = MP_USED(b);
 875   for (ib = 1; ib < usedb; ib++) {
 876     mp_digit b_i    = *pb++;
 877 
 878     /* Inner product:  Digits of a */
 879     if (b_i)
 880       s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
 881     else
 882       MP_DIGIT(c, ib + useda) = b_i;
 883   }
 884 
 885   s_mp_clamp(c);
 886 
 887   if(SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
 888     SIGN(c) = ZPOS;
 889   else
 890     SIGN(c) = NEG;
 891 
 892 CLEANUP:
 893   mp_clear(&tmp);
 894   return res;
 895 } /* end mp_mul() */
 896 
 897 /* }}} */
 898 
 899 /* {{{ mp_sqr(a, sqr) */
 900 
 901 #if MP_SQUARE
 902 /*
 903   Computes the square of a.  This can be done more
 904   efficiently than a general multiplication, because many of the
 905   computation steps are redundant when squaring.  The inner product
 906   step is a bit more complicated, but we save a fair number of
 907   iterations of the multiplication loop.
 908  */
 909 
 910 /* sqr = a^2;   Caller provides both a and tmp; */
 911 mp_err   mp_sqr(const mp_int *a, mp_int *sqr)
 912 {
 913   mp_digit *pa;
 914   mp_digit d;
 915   mp_err   res;
 916   mp_size  ix;
 917   mp_int   tmp;
 918   int      count;
 919 
 920   ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
 921 
 922   if (a == sqr) {
 923     if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 924       return res;
 925     a = &tmp;
 926   } else {
 927     DIGITS(&tmp) = 0;
 928     res = MP_OKAY;
 929   }
 930 
 931   ix = 2 * MP_USED(a);
 932   if (ix > MP_ALLOC(sqr)) {
 933     MP_USED(sqr) = 1;
 934     MP_CHECKOK( s_mp_grow(sqr, ix) );
 935   }
 936   MP_USED(sqr) = ix;
 937   MP_DIGIT(sqr, 0) = 0;
 938 
 939 #ifdef NSS_USE_COMBA
 940   if (IS_POWER_OF_2(MP_USED(a))) {
 941       if (MP_USED(a) == 4) {
 942           s_mp_sqr_comba_4(a, sqr);
 943           goto CLEANUP;
 944       }
 945       if (MP_USED(a) == 8) {
 946           s_mp_sqr_comba_8(a, sqr);
 947           goto CLEANUP;
 948       }
 949       if (MP_USED(a) == 16) {
 950           s_mp_sqr_comba_16(a, sqr);
 951           goto CLEANUP;
 952       }
 953       if (MP_USED(a) == 32) {
 954           s_mp_sqr_comba_32(a, sqr);
 955           goto CLEANUP;
 956       }
 957   }
 958 #endif
 959 
 960   pa = MP_DIGITS(a);
 961   count = MP_USED(a) - 1;
 962   if (count > 0) {
 963     d = *pa++;
 964     s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
 965     for (ix = 3; --count > 0; ix += 2) {
 966       d = *pa++;
 967       s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
 968     } /* for(ix ...) */
 969     MP_DIGIT(sqr, MP_USED(sqr)-1) = 0; /* above loop stopped short of this. */
 970 
 971     /* now sqr *= 2 */
 972     s_mp_mul_2(sqr);
 973   } else {
 974     MP_DIGIT(sqr, 1) = 0;
 975   }
 976 
 977   /* now add the squares of the digits of a to sqr. */
 978   s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
 979 
 980   SIGN(sqr) = ZPOS;
 981   s_mp_clamp(sqr);
 982 
 983 CLEANUP:
 984   mp_clear(&tmp);
 985   return res;
 986 
 987 } /* end mp_sqr() */
 988 #endif
 989 
 990 /* }}} */
 991 
 992 /* {{{ mp_div(a, b, q, r) */
 993 
 994 /*
 995   mp_div(a, b, q, r)
 996 
 997   Compute q = a / b and r = a mod b.  Input parameters may be re-used
 998   as output parameters.  If q or r is NULL, that portion of the
 999   computation will be discarded (although it will still be computed)
1000  */
1001 mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
1002 {
1003   mp_err   res;
1004   mp_int   *pQ, *pR;
1005   mp_int   qtmp, rtmp, btmp;
1006   int      cmp;
1007   mp_sign  signA;
1008   mp_sign  signB;
1009 
1010   ARGCHK(a != NULL && b != NULL, MP_BADARG);
1011 
1012   signA = MP_SIGN(a);
1013   signB = MP_SIGN(b);
1014 
1015   if(mp_cmp_z(b) == MP_EQ)
1016     return MP_RANGE;
1017 
1018   DIGITS(&qtmp) = 0;
1019   DIGITS(&rtmp) = 0;
1020   DIGITS(&btmp) = 0;
1021 
1022   /* Set up some temporaries... */
1023   if (!r || r == a || r == b) {
1024     MP_CHECKOK( mp_init_copy(&rtmp, a) );
1025     pR = &rtmp;
1026   } else {
1027     MP_CHECKOK( mp_copy(a, r) );
1028     pR = r;
1029   }
1030 
1031   if (!q || q == a || q == b) {
1032     MP_CHECKOK( mp_init_size(&qtmp, MP_USED(a), FLAG(a)) );
1033     pQ = &qtmp;
1034   } else {
1035     MP_CHECKOK( s_mp_pad(q, MP_USED(a)) );
1036     pQ = q;
1037     mp_zero(pQ);
1038   }
1039 
1040   /*
1041     If |a| <= |b|, we can compute the solution without division;
1042     otherwise, we actually do the work required.
1043    */
1044   if ((cmp = s_mp_cmp(a, b)) <= 0) {
1045     if (cmp) {
1046       /* r was set to a above. */
1047       mp_zero(pQ);
1048     } else {
1049       mp_set(pQ, 1);
1050       mp_zero(pR);
1051     }
1052   } else {
1053     MP_CHECKOK( mp_init_copy(&btmp, b) );
1054     MP_CHECKOK( s_mp_div(pR, &btmp, pQ) );
1055   }
1056 
1057   /* Compute the signs for the output  */
1058   MP_SIGN(pR) = signA;   /* Sr = Sa              */
1059   /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
1060   MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
1061 
1062   if(s_mp_cmp_d(pQ, 0) == MP_EQ)
1063     SIGN(pQ) = ZPOS;
1064   if(s_mp_cmp_d(pR, 0) == MP_EQ)
1065     SIGN(pR) = ZPOS;
1066 
1067   /* Copy output, if it is needed      */
1068   if(q && q != pQ)
1069     s_mp_exch(pQ, q);
1070 
1071   if(r && r != pR)
1072     s_mp_exch(pR, r);
1073 
1074 CLEANUP:
1075   mp_clear(&btmp);
1076   mp_clear(&rtmp);
1077   mp_clear(&qtmp);
1078 
1079   return res;
1080 
1081 } /* end mp_div() */
1082 
1083 /* }}} */
1084 
1085 /* {{{ mp_div_2d(a, d, q, r) */
1086 
1087 mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
1088 {
1089   mp_err  res;
1090 
1091   ARGCHK(a != NULL, MP_BADARG);
1092 
1093   if(q) {
1094     if((res = mp_copy(a, q)) != MP_OKAY)
1095       return res;
1096   }
1097   if(r) {
1098     if((res = mp_copy(a, r)) != MP_OKAY)
1099       return res;
1100   }
1101   if(q) {
1102     s_mp_div_2d(q, d);
1103   }
1104   if(r) {
1105     s_mp_mod_2d(r, d);
1106   }
1107 
1108   return MP_OKAY;
1109 
1110 } /* end mp_div_2d() */
1111 
1112 /* }}} */
1113 
1114 /* {{{ mp_expt(a, b, c) */
1115 
1116 /*
1117   mp_expt(a, b, c)
1118 
1119   Compute c = a ** b, that is, raise a to the b power.  Uses a
1120   standard iterative square-and-multiply technique.
1121  */
1122 
1123 mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c)
1124 {
1125   mp_int   s, x;
1126   mp_err   res;
1127   mp_digit d;
1128   unsigned int      dig, bit;
1129 
1130   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1131 
1132   if(mp_cmp_z(b) < 0)
1133     return MP_RANGE;
1134 
1135   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1136     return res;
1137 
1138   mp_set(&s, 1);
1139 
1140   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1141     goto X;
1142 
1143   /* Loop over low-order digits in ascending order */
1144   for(dig = 0; dig < (USED(b) - 1); dig++) {
1145     d = DIGIT(b, dig);
1146 
1147     /* Loop over bits of each non-maximal digit */
1148     for(bit = 0; bit < DIGIT_BIT; bit++) {
1149       if(d & 1) {
1150         if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1151           goto CLEANUP;
1152       }
1153 
1154       d >>= 1;
1155 
1156       if((res = s_mp_sqr(&x)) != MP_OKAY)
1157         goto CLEANUP;
1158     }
1159   }
1160 
1161   /* Consider now the last digit... */
1162   d = DIGIT(b, dig);
1163 
1164   while(d) {
1165     if(d & 1) {
1166       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1167         goto CLEANUP;
1168     }
1169 
1170     d >>= 1;
1171 
1172     if((res = s_mp_sqr(&x)) != MP_OKAY)
1173       goto CLEANUP;
1174   }
1175 
1176   if(mp_iseven(b))
1177     SIGN(&s) = SIGN(a);
1178 
1179   res = mp_copy(&s, c);
1180 
1181 CLEANUP:
1182   mp_clear(&x);
1183 X:
1184   mp_clear(&s);
1185 
1186   return res;
1187 
1188 } /* end mp_expt() */
1189 
1190 /* }}} */
1191 
1192 /* {{{ mp_2expt(a, k) */
1193 
1194 /* Compute a = 2^k */
1195 
1196 mp_err mp_2expt(mp_int *a, mp_digit k)
1197 {
1198   ARGCHK(a != NULL, MP_BADARG);
1199 
1200   return s_mp_2expt(a, k);
1201 
1202 } /* end mp_2expt() */
1203 
1204 /* }}} */
1205 
1206 /* {{{ mp_mod(a, m, c) */
1207 
1208 /*
1209   mp_mod(a, m, c)
1210 
1211   Compute c = a (mod m).  Result will always be 0 <= c < m.
1212  */
1213 
1214 mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
1215 {
1216   mp_err  res;
1217   int     mag;
1218 
1219   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1220 
1221   if(SIGN(m) == NEG)
1222     return MP_RANGE;
1223 
1224   /*
1225      If |a| > m, we need to divide to get the remainder and take the
1226      absolute value.
1227 
1228      If |a| < m, we don't need to do any division, just copy and adjust
1229      the sign (if a is negative).
1230 
1231      If |a| == m, we can simply set the result to zero.
1232 
1233      This order is intended to minimize the average path length of the
1234      comparison chain on common workloads -- the most frequent cases are
1235      that |a| != m, so we do those first.
1236    */
1237   if((mag = s_mp_cmp(a, m)) > 0) {
1238     if((res = mp_div(a, m, NULL, c)) != MP_OKAY)
1239       return res;
1240 
1241     if(SIGN(c) == NEG) {
1242       if((res = mp_add(c, m, c)) != MP_OKAY)
1243         return res;
1244     }
1245 
1246   } else if(mag < 0) {
1247     if((res = mp_copy(a, c)) != MP_OKAY)
1248       return res;
1249 
1250     if(mp_cmp_z(a) < 0) {
1251       if((res = mp_add(c, m, c)) != MP_OKAY)
1252         return res;
1253 
1254     }
1255 
1256   } else {
1257     mp_zero(c);
1258 
1259   }
1260 
1261   return MP_OKAY;
1262 
1263 } /* end mp_mod() */
1264 
1265 /* }}} */
1266 
1267 /* {{{ mp_mod_d(a, d, c) */
1268 
1269 /*
1270   mp_mod_d(a, d, c)
1271 
1272   Compute c = a (mod d).  Result will always be 0 <= c < d
1273  */
1274 mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
1275 {
1276   mp_err   res;
1277   mp_digit rem;
1278 
1279   ARGCHK(a != NULL && c != NULL, MP_BADARG);
1280 
1281   if(s_mp_cmp_d(a, d) > 0) {
1282     if((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
1283       return res;
1284 
1285   } else {
1286     if(SIGN(a) == NEG)
1287       rem = d - DIGIT(a, 0);
1288     else
1289       rem = DIGIT(a, 0);
1290   }
1291 
1292   if(c)
1293     *c = rem;
1294 
1295   return MP_OKAY;
1296 
1297 } /* end mp_mod_d() */
1298 
1299 /* }}} */
1300 
1301 /* {{{ mp_sqrt(a, b) */
1302 
1303 /*
1304   mp_sqrt(a, b)
1305 
1306   Compute the integer square root of a, and store the result in b.
1307   Uses an integer-arithmetic version of Newton's iterative linear
1308   approximation technique to determine this value; the result has the
1309   following two properties:
1310 
1311      b^2 <= a
1312      (b+1)^2 >= a
1313 
1314   It is a range error to pass a negative value.
1315  */
1316 mp_err mp_sqrt(const mp_int *a, mp_int *b)
1317 {
1318   mp_int   x, t;
1319   mp_err   res;
1320   mp_size  used;
1321 
1322   ARGCHK(a != NULL && b != NULL, MP_BADARG);
1323 
1324   /* Cannot take square root of a negative value */
1325   if(SIGN(a) == NEG)
1326     return MP_RANGE;
1327 
1328   /* Special cases for zero and one, trivial     */
1329   if(mp_cmp_d(a, 1) <= 0)
1330     return mp_copy(a, b);
1331 
1332   /* Initialize the temporaries we'll use below  */
1333   if((res = mp_init_size(&t, USED(a), FLAG(a))) != MP_OKAY)
1334     return res;
1335 
1336   /* Compute an initial guess for the iteration as a itself */
1337   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1338     goto X;
1339 
1340   used = MP_USED(&x);
1341   if (used > 1) {
1342     s_mp_rshd(&x, used / 2);
1343   }
1344 
1345   for(;;) {
1346     /* t = (x * x) - a */
1347     mp_copy(&x, &t);      /* can't fail, t is big enough for original x */
1348     if((res = mp_sqr(&t, &t)) != MP_OKAY ||
1349        (res = mp_sub(&t, a, &t)) != MP_OKAY)
1350       goto CLEANUP;
1351 
1352     /* t = t / 2x       */
1353     s_mp_mul_2(&x);
1354     if((res = mp_div(&t, &x, &t, NULL)) != MP_OKAY)
1355       goto CLEANUP;
1356     s_mp_div_2(&x);
1357 
1358     /* Terminate the loop, if the quotient is zero */
1359     if(mp_cmp_z(&t) == MP_EQ)
1360       break;
1361 
1362     /* x = x - t       */
1363     if((res = mp_sub(&x, &t, &x)) != MP_OKAY)
1364       goto CLEANUP;
1365 
1366   }
1367 
1368   /* Copy result to output parameter */
1369   mp_sub_d(&x, 1, &x);
1370   s_mp_exch(&x, b);
1371 
1372  CLEANUP:
1373   mp_clear(&x);
1374  X:
1375   mp_clear(&t);
1376 
1377   return res;
1378 
1379 } /* end mp_sqrt() */
1380 
1381 /* }}} */
1382 
1383 /* }}} */
1384 
1385 /*------------------------------------------------------------------------*/
1386 /* {{{ Modular arithmetic */
1387 
1388 #if MP_MODARITH
1389 /* {{{ mp_addmod(a, b, m, c) */
1390 
1391 /*
1392   mp_addmod(a, b, m, c)
1393 
1394   Compute c = (a + b) mod m
1395  */
1396 
1397 mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1398 {
1399   mp_err  res;
1400 
1401   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1402 
1403   if((res = mp_add(a, b, c)) != MP_OKAY)
1404     return res;
1405   if((res = mp_mod(c, m, c)) != MP_OKAY)
1406     return res;
1407 
1408   return MP_OKAY;
1409 
1410 }
1411 
1412 /* }}} */
1413 
1414 /* {{{ mp_submod(a, b, m, c) */
1415 
1416 /*
1417   mp_submod(a, b, m, c)
1418 
1419   Compute c = (a - b) mod m
1420  */
1421 
1422 mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1423 {
1424   mp_err  res;
1425 
1426   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1427 
1428   if((res = mp_sub(a, b, c)) != MP_OKAY)
1429     return res;
1430   if((res = mp_mod(c, m, c)) != MP_OKAY)
1431     return res;
1432 
1433   return MP_OKAY;
1434 
1435 }
1436 
1437 /* }}} */
1438 
1439 /* {{{ mp_mulmod(a, b, m, c) */
1440 
1441 /*
1442   mp_mulmod(a, b, m, c)
1443 
1444   Compute c = (a * b) mod m
1445  */
1446 
1447 mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1448 {
1449   mp_err  res;
1450 
1451   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1452 
1453   if((res = mp_mul(a, b, c)) != MP_OKAY)
1454     return res;
1455   if((res = mp_mod(c, m, c)) != MP_OKAY)
1456     return res;
1457 
1458   return MP_OKAY;
1459 
1460 }
1461 
1462 /* }}} */
1463 
1464 /* {{{ mp_sqrmod(a, m, c) */
1465 
1466 #if MP_SQUARE
1467 mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
1468 {
1469   mp_err  res;
1470 
1471   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1472 
1473   if((res = mp_sqr(a, c)) != MP_OKAY)
1474     return res;
1475   if((res = mp_mod(c, m, c)) != MP_OKAY)
1476     return res;
1477 
1478   return MP_OKAY;
1479 
1480 } /* end mp_sqrmod() */
1481 #endif
1482 
1483 /* }}} */
1484 
1485 /* {{{ s_mp_exptmod(a, b, m, c) */
1486 
1487 /*
1488   s_mp_exptmod(a, b, m, c)
1489 
1490   Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
1491   method with modular reductions at each step. (This is basically the
1492   same code as mp_expt(), except for the addition of the reductions)
1493 
1494   The modular reductions are done using Barrett's algorithm (see
1495   s_mp_reduce() below for details)
1496  */
1497 
1498 mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1499 {
1500   mp_int   s, x, mu;
1501   mp_err   res;
1502   mp_digit d;
1503   unsigned int      dig, bit;
1504 
1505   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1506 
1507   if(mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
1508     return MP_RANGE;
1509 
1510   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1511     return res;
1512   if((res = mp_init_copy(&x, a)) != MP_OKAY ||
1513      (res = mp_mod(&x, m, &x)) != MP_OKAY)
1514     goto X;
1515   if((res = mp_init(&mu, FLAG(a))) != MP_OKAY)
1516     goto MU;
1517 
1518   mp_set(&s, 1);
1519 
1520   /* mu = b^2k / m */
1521   s_mp_add_d(&mu, 1);
1522   s_mp_lshd(&mu, 2 * USED(m));
1523   if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
1524     goto CLEANUP;
1525 
1526   /* Loop over digits of b in ascending order, except highest order */
1527   for(dig = 0; dig < (USED(b) - 1); dig++) {
1528     d = DIGIT(b, dig);
1529 
1530     /* Loop over the bits of the lower-order digits */
1531     for(bit = 0; bit < DIGIT_BIT; bit++) {
1532       if(d & 1) {
1533         if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1534           goto CLEANUP;
1535         if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1536           goto CLEANUP;
1537       }
1538 
1539       d >>= 1;
1540 
1541       if((res = s_mp_sqr(&x)) != MP_OKAY)
1542         goto CLEANUP;
1543       if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1544         goto CLEANUP;
1545     }
1546   }
1547 
1548   /* Now do the last digit... */
1549   d = DIGIT(b, dig);
1550 
1551   while(d) {
1552     if(d & 1) {
1553       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1554         goto CLEANUP;
1555       if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1556         goto CLEANUP;
1557     }
1558 
1559     d >>= 1;
1560 
1561     if((res = s_mp_sqr(&x)) != MP_OKAY)
1562       goto CLEANUP;
1563     if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1564       goto CLEANUP;
1565   }
1566 
1567   s_mp_exch(&s, c);
1568 
1569  CLEANUP:
1570   mp_clear(&mu);
1571  MU:
1572   mp_clear(&x);
1573  X:
1574   mp_clear(&s);
1575 
1576   return res;
1577 
1578 } /* end s_mp_exptmod() */
1579 
1580 /* }}} */
1581 
1582 /* {{{ mp_exptmod_d(a, d, m, c) */
1583 
1584 mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
1585 {
1586   mp_int   s, x;
1587   mp_err   res;
1588 
1589   ARGCHK(a != NULL && c != NULL, MP_BADARG);
1590 
1591   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1592     return res;
1593   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1594     goto X;
1595 
1596   mp_set(&s, 1);
1597 
1598   while(d != 0) {
1599     if(d & 1) {
1600       if((res = s_mp_mul(&s, &x)) != MP_OKAY ||
1601          (res = mp_mod(&s, m, &s)) != MP_OKAY)
1602         goto CLEANUP;
1603     }
1604 
1605     d /= 2;
1606 
1607     if((res = s_mp_sqr(&x)) != MP_OKAY ||
1608        (res = mp_mod(&x, m, &x)) != MP_OKAY)
1609       goto CLEANUP;
1610   }
1611 
1612   s_mp_exch(&s, c);
1613 
1614 CLEANUP:
1615   mp_clear(&x);
1616 X:
1617   mp_clear(&s);
1618 
1619   return res;
1620 
1621 } /* end mp_exptmod_d() */
1622 
1623 /* }}} */
1624 #endif /* if MP_MODARITH */
1625 
1626 /* }}} */
1627 
1628 /*------------------------------------------------------------------------*/
1629 /* {{{ Comparison functions */
1630 
1631 /* {{{ mp_cmp_z(a) */
1632 
1633 /*
1634   mp_cmp_z(a)
1635 
1636   Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
1637  */
1638 
1639 int    mp_cmp_z(const mp_int *a)
1640 {
1641   if(SIGN(a) == NEG)
1642     return MP_LT;
1643   else if(USED(a) == 1 && DIGIT(a, 0) == 0)
1644     return MP_EQ;
1645   else
1646     return MP_GT;
1647 
1648 } /* end mp_cmp_z() */
1649 
1650 /* }}} */
1651 
1652 /* {{{ mp_cmp_d(a, d) */
1653 
1654 /*
1655   mp_cmp_d(a, d)
1656 
1657   Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
1658  */
1659 
1660 int    mp_cmp_d(const mp_int *a, mp_digit d)
1661 {
1662   ARGCHK(a != NULL, MP_EQ);
1663 
1664   if(SIGN(a) == NEG)
1665     return MP_LT;
1666 
1667   return s_mp_cmp_d(a, d);
1668 
1669 } /* end mp_cmp_d() */
1670 
1671 /* }}} */
1672 
1673 /* {{{ mp_cmp(a, b) */
1674 
1675 int    mp_cmp(const mp_int *a, const mp_int *b)
1676 {
1677   ARGCHK(a != NULL && b != NULL, MP_EQ);
1678 
1679   if(SIGN(a) == SIGN(b)) {
1680     int  mag;
1681 
1682     if((mag = s_mp_cmp(a, b)) == MP_EQ)
1683       return MP_EQ;
1684 
1685     if(SIGN(a) == ZPOS)
1686       return mag;
1687     else
1688       return -mag;
1689 
1690   } else if(SIGN(a) == ZPOS) {
1691     return MP_GT;
1692   } else {
1693     return MP_LT;
1694   }
1695 
1696 } /* end mp_cmp() */
1697 
1698 /* }}} */
1699 
1700 /* {{{ mp_cmp_mag(a, b) */
1701 
1702 /*
1703   mp_cmp_mag(a, b)
1704 
1705   Compares |a| <=> |b|, and returns an appropriate comparison result
1706  */
1707 
1708 int    mp_cmp_mag(mp_int *a, mp_int *b)
1709 {
1710   ARGCHK(a != NULL && b != NULL, MP_EQ);
1711 
1712   return s_mp_cmp(a, b);
1713 
1714 } /* end mp_cmp_mag() */
1715 
1716 /* }}} */
1717 
1718 /* {{{ mp_cmp_int(a, z, kmflag) */
1719 
1720 /*
1721   This just converts z to an mp_int, and uses the existing comparison
1722   routines.  This is sort of inefficient, but it's not clear to me how
1723   frequently this wil get used anyway.  For small positive constants,
1724   you can always use mp_cmp_d(), and for zero, there is mp_cmp_z().
1725  */
1726 int    mp_cmp_int(const mp_int *a, long z, int kmflag)
1727 {
1728   mp_int  tmp;
1729   int     out;
1730 
1731   ARGCHK(a != NULL, MP_EQ);
1732 
1733   mp_init(&tmp, kmflag); mp_set_int(&tmp, z);
1734   out = mp_cmp(a, &tmp);
1735   mp_clear(&tmp);
1736 
1737   return out;
1738 
1739 } /* end mp_cmp_int() */
1740 
1741 /* }}} */
1742 
1743 /* {{{ mp_isodd(a) */
1744 
1745 /*
1746   mp_isodd(a)
1747 
1748   Returns a true (non-zero) value if a is odd, false (zero) otherwise.
1749  */
1750 int    mp_isodd(const mp_int *a)
1751 {
1752   ARGCHK(a != NULL, 0);
1753 
1754   return (int)(DIGIT(a, 0) & 1);
1755 
1756 } /* end mp_isodd() */
1757 
1758 /* }}} */
1759 
1760 /* {{{ mp_iseven(a) */
1761 
1762 int    mp_iseven(const mp_int *a)
1763 {
1764   return !mp_isodd(a);
1765 
1766 } /* end mp_iseven() */
1767 
1768 /* }}} */
1769 
1770 /* }}} */
1771 
1772 /*------------------------------------------------------------------------*/
1773 /* {{{ Number theoretic functions */
1774 
1775 #if MP_NUMTH
1776 /* {{{ mp_gcd(a, b, c) */
1777 
1778 /*
1779   Like the old mp_gcd() function, except computes the GCD using the
1780   binary algorithm due to Josef Stein in 1961 (via Knuth).
1781  */
1782 mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c)
1783 {
1784   mp_err   res;
1785   mp_int   u, v, t;
1786   mp_size  k = 0;
1787 
1788   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1789 
1790   if(mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ)
1791       return MP_RANGE;
1792   if(mp_cmp_z(a) == MP_EQ) {
1793     return mp_copy(b, c);
1794   } else if(mp_cmp_z(b) == MP_EQ) {
1795     return mp_copy(a, c);
1796   }
1797 
1798   if((res = mp_init(&t, FLAG(a))) != MP_OKAY)
1799     return res;
1800   if((res = mp_init_copy(&u, a)) != MP_OKAY)
1801     goto U;
1802   if((res = mp_init_copy(&v, b)) != MP_OKAY)
1803     goto V;
1804 
1805   SIGN(&u) = ZPOS;
1806   SIGN(&v) = ZPOS;
1807 
1808   /* Divide out common factors of 2 until at least 1 of a, b is even */
1809   while(mp_iseven(&u) && mp_iseven(&v)) {
1810     s_mp_div_2(&u);
1811     s_mp_div_2(&v);
1812     ++k;
1813   }
1814 
1815   /* Initialize t */
1816   if(mp_isodd(&u)) {
1817     if((res = mp_copy(&v, &t)) != MP_OKAY)
1818       goto CLEANUP;
1819 
1820     /* t = -v */
1821     if(SIGN(&v) == ZPOS)
1822       SIGN(&t) = NEG;
1823     else
1824       SIGN(&t) = ZPOS;
1825 
1826   } else {
1827     if((res = mp_copy(&u, &t)) != MP_OKAY)
1828       goto CLEANUP;
1829 
1830   }
1831 
1832   for(;;) {
1833     while(mp_iseven(&t)) {
1834       s_mp_div_2(&t);
1835     }
1836 
1837     if(mp_cmp_z(&t) == MP_GT) {
1838       if((res = mp_copy(&t, &u)) != MP_OKAY)
1839         goto CLEANUP;
1840 
1841     } else {
1842       if((res = mp_copy(&t, &v)) != MP_OKAY)
1843         goto CLEANUP;
1844 
1845       /* v = -t */
1846       if(SIGN(&t) == ZPOS)
1847         SIGN(&v) = NEG;
1848       else
1849         SIGN(&v) = ZPOS;
1850     }
1851 
1852     if((res = mp_sub(&u, &v, &t)) != MP_OKAY)
1853       goto CLEANUP;
1854 
1855     if(s_mp_cmp_d(&t, 0) == MP_EQ)
1856       break;
1857   }
1858 
1859   s_mp_2expt(&v, k);       /* v = 2^k   */
1860   res = mp_mul(&u, &v, c); /* c = u * v */
1861 
1862  CLEANUP:
1863   mp_clear(&v);
1864  V:
1865   mp_clear(&u);
1866  U:
1867   mp_clear(&t);
1868 
1869   return res;
1870 
1871 } /* end mp_gcd() */
1872 
1873 /* }}} */
1874 
1875 /* {{{ mp_lcm(a, b, c) */
1876 
1877 /* We compute the least common multiple using the rule:
1878 
1879    ab = [a, b](a, b)
1880 
1881    ... by computing the product, and dividing out the gcd.
1882  */
1883 
1884 mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c)
1885 {
1886   mp_int  gcd, prod;
1887   mp_err  res;
1888 
1889   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1890 
1891   /* Set up temporaries */
1892   if((res = mp_init(&gcd, FLAG(a))) != MP_OKAY)
1893     return res;
1894   if((res = mp_init(&prod, FLAG(a))) != MP_OKAY)
1895     goto GCD;
1896 
1897   if((res = mp_mul(a, b, &prod)) != MP_OKAY)
1898     goto CLEANUP;
1899   if((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
1900     goto CLEANUP;
1901 
1902   res = mp_div(&prod, &gcd, c, NULL);
1903 
1904  CLEANUP:
1905   mp_clear(&prod);
1906  GCD:
1907   mp_clear(&gcd);
1908 
1909   return res;
1910 
1911 } /* end mp_lcm() */
1912 
1913 /* }}} */
1914 
1915 /* {{{ mp_xgcd(a, b, g, x, y) */
1916 
1917 /*
1918   mp_xgcd(a, b, g, x, y)
1919 
1920   Compute g = (a, b) and values x and y satisfying Bezout's identity
1921   (that is, ax + by = g).  This uses the binary extended GCD algorithm
1922   based on the Stein algorithm used for mp_gcd()
1923   See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
1924  */
1925 
1926 mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
1927 {
1928   mp_int   gx, xc, yc, u, v, A, B, C, D;
1929   mp_int  *clean[9];
1930   mp_err   res;
1931   int      last = -1;
1932 
1933   if(mp_cmp_z(b) == 0)
1934     return MP_RANGE;
1935 
1936   /* Initialize all these variables we need */
1937   MP_CHECKOK( mp_init(&u, FLAG(a)) );
1938   clean[++last] = &u;
1939   MP_CHECKOK( mp_init(&v, FLAG(a)) );
1940   clean[++last] = &v;
1941   MP_CHECKOK( mp_init(&gx, FLAG(a)) );
1942   clean[++last] = &gx;
1943   MP_CHECKOK( mp_init(&A, FLAG(a)) );
1944   clean[++last] = &A;
1945   MP_CHECKOK( mp_init(&B, FLAG(a)) );
1946   clean[++last] = &B;
1947   MP_CHECKOK( mp_init(&C, FLAG(a)) );
1948   clean[++last] = &C;
1949   MP_CHECKOK( mp_init(&D, FLAG(a)) );
1950   clean[++last] = &D;
1951   MP_CHECKOK( mp_init_copy(&xc, a) );
1952   clean[++last] = &xc;
1953   mp_abs(&xc, &xc);
1954   MP_CHECKOK( mp_init_copy(&yc, b) );
1955   clean[++last] = &yc;
1956   mp_abs(&yc, &yc);
1957 
1958   mp_set(&gx, 1);
1959 
1960   /* Divide by two until at least one of them is odd */
1961   while(mp_iseven(&xc) && mp_iseven(&yc)) {
1962     mp_size nx = mp_trailing_zeros(&xc);
1963     mp_size ny = mp_trailing_zeros(&yc);
1964     mp_size n  = MP_MIN(nx, ny);
1965     s_mp_div_2d(&xc,n);
1966     s_mp_div_2d(&yc,n);
1967     MP_CHECKOK( s_mp_mul_2d(&gx,n) );
1968   }
1969 
1970   mp_copy(&xc, &u);
1971   mp_copy(&yc, &v);
1972   mp_set(&A, 1); mp_set(&D, 1);
1973 
1974   /* Loop through binary GCD algorithm */
1975   do {
1976     while(mp_iseven(&u)) {
1977       s_mp_div_2(&u);
1978 
1979       if(mp_iseven(&A) && mp_iseven(&B)) {
1980         s_mp_div_2(&A); s_mp_div_2(&B);
1981       } else {
1982         MP_CHECKOK( mp_add(&A, &yc, &A) );
1983         s_mp_div_2(&A);
1984         MP_CHECKOK( mp_sub(&B, &xc, &B) );
1985         s_mp_div_2(&B);
1986       }
1987     }
1988 
1989     while(mp_iseven(&v)) {
1990       s_mp_div_2(&v);
1991 
1992       if(mp_iseven(&C) && mp_iseven(&D)) {
1993         s_mp_div_2(&C); s_mp_div_2(&D);
1994       } else {
1995         MP_CHECKOK( mp_add(&C, &yc, &C) );
1996         s_mp_div_2(&C);
1997         MP_CHECKOK( mp_sub(&D, &xc, &D) );
1998         s_mp_div_2(&D);
1999       }
2000     }
2001 
2002     if(mp_cmp(&u, &v) >= 0) {
2003       MP_CHECKOK( mp_sub(&u, &v, &u) );
2004       MP_CHECKOK( mp_sub(&A, &C, &A) );
2005       MP_CHECKOK( mp_sub(&B, &D, &B) );
2006     } else {
2007       MP_CHECKOK( mp_sub(&v, &u, &v) );
2008       MP_CHECKOK( mp_sub(&C, &A, &C) );
2009       MP_CHECKOK( mp_sub(&D, &B, &D) );
2010     }
2011   } while (mp_cmp_z(&u) != 0);
2012 
2013   /* copy results to output */
2014   if(x)
2015     MP_CHECKOK( mp_copy(&C, x) );
2016 
2017   if(y)
2018     MP_CHECKOK( mp_copy(&D, y) );
2019 
2020   if(g)
2021     MP_CHECKOK( mp_mul(&gx, &v, g) );
2022 
2023  CLEANUP:
2024   while(last >= 0)
2025     mp_clear(clean[last--]);
2026 
2027   return res;
2028 
2029 } /* end mp_xgcd() */
2030 
2031 /* }}} */
2032 
2033 mp_size mp_trailing_zeros(const mp_int *mp)
2034 {
2035   mp_digit d;
2036   mp_size  n = 0;
2037   unsigned int      ix;
2038 
2039   if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
2040     return n;
2041 
2042   for (ix = 0; !(d = MP_DIGIT(mp,ix)) && (ix < MP_USED(mp)); ++ix)
2043     n += MP_DIGIT_BIT;
2044   if (!d)
2045     return 0;   /* shouldn't happen, but ... */
2046 #if !defined(MP_USE_UINT_DIGIT)
2047   if (!(d & 0xffffffffU)) {
2048     d >>= 32;
2049     n  += 32;
2050   }
2051 #endif
2052   if (!(d & 0xffffU)) {
2053     d >>= 16;
2054     n  += 16;
2055   }
2056   if (!(d & 0xffU)) {
2057     d >>= 8;
2058     n  += 8;
2059   }
2060   if (!(d & 0xfU)) {
2061     d >>= 4;
2062     n  += 4;
2063   }
2064   if (!(d & 0x3U)) {
2065     d >>= 2;
2066     n  += 2;
2067   }
2068   if (!(d & 0x1U)) {
2069     d >>= 1;
2070     n  += 1;
2071   }
2072 #if MP_ARGCHK == 2
2073   assert(0 != (d & 1));
2074 #endif
2075   return n;
2076 }
2077 
2078 /* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
2079 ** Returns k (positive) or error (negative).
2080 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2081 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2082 */
2083 mp_err s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
2084 {
2085   mp_err res;
2086   mp_err k    = 0;
2087   mp_int d, f, g;
2088 
2089   ARGCHK(a && p && c, MP_BADARG);
2090 
2091   MP_DIGITS(&d) = 0;
2092   MP_DIGITS(&f) = 0;
2093   MP_DIGITS(&g) = 0;
2094   MP_CHECKOK( mp_init(&d, FLAG(a)) );
2095   MP_CHECKOK( mp_init_copy(&f, a) );    /* f = a */
2096   MP_CHECKOK( mp_init_copy(&g, p) );    /* g = p */
2097 
2098   mp_set(c, 1);
2099   mp_zero(&d);
2100 
2101   if (mp_cmp_z(&f) == 0) {
2102     res = MP_UNDEF;
2103   } else
2104   for (;;) {
2105     int diff_sign;
2106     while (mp_iseven(&f)) {
2107       mp_size n = mp_trailing_zeros(&f);
2108       if (!n) {
2109         res = MP_UNDEF;
2110         goto CLEANUP;
2111       }
2112       s_mp_div_2d(&f, n);
2113       MP_CHECKOK( s_mp_mul_2d(&d, n) );
2114       k += n;
2115     }
2116     if (mp_cmp_d(&f, 1) == MP_EQ) {     /* f == 1 */
2117       res = k;
2118       break;
2119     }
2120     diff_sign = mp_cmp(&f, &g);
2121     if (diff_sign < 0) {                /* f < g */
2122       s_mp_exch(&f, &g);
2123       s_mp_exch(c, &d);
2124     } else if (diff_sign == 0) {                /* f == g */
2125       res = MP_UNDEF;           /* a and p are not relatively prime */
2126       break;
2127     }
2128     if ((MP_DIGIT(&f,0) % 4) == (MP_DIGIT(&g,0) % 4)) {
2129       MP_CHECKOK( mp_sub(&f, &g, &f) ); /* f = f - g */
2130       MP_CHECKOK( mp_sub(c,  &d,  c) ); /* c = c - d */
2131     } else {
2132       MP_CHECKOK( mp_add(&f, &g, &f) ); /* f = f + g */
2133       MP_CHECKOK( mp_add(c,  &d,  c) ); /* c = c + d */
2134     }
2135   }
2136   if (res >= 0) {
2137     while (MP_SIGN(c) != MP_ZPOS) {
2138       MP_CHECKOK( mp_add(c, p, c) );
2139     }
2140     res = k;
2141   }
2142 
2143 CLEANUP:
2144   mp_clear(&d);
2145   mp_clear(&f);
2146   mp_clear(&g);
2147   return res;
2148 }
2149 
2150 /* Compute T = (P ** -1) mod MP_RADIX.  Also works for 16-bit mp_digits.
2151 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2152 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2153 */
2154 mp_digit  s_mp_invmod_radix(mp_digit P)
2155 {
2156   mp_digit T = P;
2157   T *= 2 - (P * T);
2158   T *= 2 - (P * T);
2159   T *= 2 - (P * T);
2160   T *= 2 - (P * T);
2161 #if !defined(MP_USE_UINT_DIGIT)
2162   T *= 2 - (P * T);
2163   T *= 2 - (P * T);
2164 #endif
2165   return T;
2166 }
2167 
2168 /* Given c, k, and prime p, where a*c == 2**k (mod p),
2169 ** Compute x = (a ** -1) mod p.  This is similar to Montgomery reduction.
2170 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2171 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2172 */
2173 mp_err  s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
2174 {
2175   int      k_orig = k;
2176   mp_digit r;
2177   mp_size  ix;
2178   mp_err   res;
2179 
2180   if (mp_cmp_z(c) < 0) {                /* c < 0 */
2181     MP_CHECKOK( mp_add(c, p, x) );      /* x = c + p */
2182   } else {
2183     MP_CHECKOK( mp_copy(c, x) );        /* x = c */
2184   }
2185 
2186   /* make sure x is large enough */
2187   ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
2188   ix = MP_MAX(ix, MP_USED(x));
2189   MP_CHECKOK( s_mp_pad(x, ix) );
2190 
2191   r = 0 - s_mp_invmod_radix(MP_DIGIT(p,0));
2192 
2193   for (ix = 0; k > 0; ix++) {
2194     int      j = MP_MIN(k, MP_DIGIT_BIT);
2195     mp_digit v = r * MP_DIGIT(x, ix);
2196     if (j < MP_DIGIT_BIT) {
2197       v &= ((mp_digit)1 << j) - 1;      /* v = v mod (2 ** j) */
2198     }
2199     s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
2200     k -= j;
2201   }
2202   s_mp_clamp(x);
2203   s_mp_div_2d(x, k_orig);
2204   res = MP_OKAY;
2205 
2206 CLEANUP:
2207   return res;
2208 }
2209 
2210 /* compute mod inverse using Schroeppel's method, only if m is odd */
2211 mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
2212 {
2213   int k;
2214   mp_err  res;
2215   mp_int  x;
2216 
2217   ARGCHK(a && m && c, MP_BADARG);
2218 
2219   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2220     return MP_RANGE;
2221   if (mp_iseven(m))
2222     return MP_UNDEF;
2223 
2224   MP_DIGITS(&x) = 0;
2225 
2226   if (a == c) {
2227     if ((res = mp_init_copy(&x, a)) != MP_OKAY)
2228       return res;
2229     if (a == m)
2230       m = &x;
2231     a = &x;
2232   } else if (m == c) {
2233     if ((res = mp_init_copy(&x, m)) != MP_OKAY)
2234       return res;
2235     m = &x;
2236   } else {
2237     MP_DIGITS(&x) = 0;
2238   }
2239 
2240   MP_CHECKOK( s_mp_almost_inverse(a, m, c) );
2241   k = res;
2242   MP_CHECKOK( s_mp_fixup_reciprocal(c, m, k, c) );
2243 CLEANUP:
2244   mp_clear(&x);
2245   return res;
2246 }
2247 
2248 /* Known good algorithm for computing modular inverse.  But slow. */
2249 mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
2250 {
2251   mp_int  g, x;
2252   mp_err  res;
2253 
2254   ARGCHK(a && m && c, MP_BADARG);
2255 
2256   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2257     return MP_RANGE;
2258 
2259   MP_DIGITS(&g) = 0;
2260   MP_DIGITS(&x) = 0;
2261   MP_CHECKOK( mp_init(&x, FLAG(a)) );
2262   MP_CHECKOK( mp_init(&g, FLAG(a)) );
2263 
2264   MP_CHECKOK( mp_xgcd(a, m, &g, &x, NULL) );
2265 
2266   if (mp_cmp_d(&g, 1) != MP_EQ) {
2267     res = MP_UNDEF;
2268     goto CLEANUP;
2269   }
2270 
2271   res = mp_mod(&x, m, c);
2272   SIGN(c) = SIGN(a);
2273 
2274 CLEANUP:
2275   mp_clear(&x);
2276   mp_clear(&g);
2277 
2278   return res;
2279 }
2280 
2281 /* modular inverse where modulus is 2**k. */
2282 /* c = a**-1 mod 2**k */
2283 mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
2284 {
2285   mp_err res;
2286   mp_size ix = k + 4;
2287   mp_int t0, t1, val, tmp, two2k;
2288 
2289   static const mp_digit d2 = 2;
2290   static const mp_int two = { 0, MP_ZPOS, 1, 1, (mp_digit *)&d2 };
2291 
2292   if (mp_iseven(a))
2293     return MP_UNDEF;
2294   if (k <= MP_DIGIT_BIT) {
2295     mp_digit i = s_mp_invmod_radix(MP_DIGIT(a,0));
2296     if (k < MP_DIGIT_BIT)
2297       i &= ((mp_digit)1 << k) - (mp_digit)1;
2298     mp_set(c, i);
2299     return MP_OKAY;
2300   }
2301   MP_DIGITS(&t0) = 0;
2302   MP_DIGITS(&t1) = 0;
2303   MP_DIGITS(&val) = 0;
2304   MP_DIGITS(&tmp) = 0;
2305   MP_DIGITS(&two2k) = 0;
2306   MP_CHECKOK( mp_init_copy(&val, a) );
2307   s_mp_mod_2d(&val, k);
2308   MP_CHECKOK( mp_init_copy(&t0, &val) );
2309   MP_CHECKOK( mp_init_copy(&t1, &t0)  );
2310   MP_CHECKOK( mp_init(&tmp, FLAG(a)) );
2311   MP_CHECKOK( mp_init(&two2k, FLAG(a)) );
2312   MP_CHECKOK( s_mp_2expt(&two2k, k) );
2313   do {
2314     MP_CHECKOK( mp_mul(&val, &t1, &tmp)  );
2315     MP_CHECKOK( mp_sub(&two, &tmp, &tmp) );
2316     MP_CHECKOK( mp_mul(&t1, &tmp, &t1)   );
2317     s_mp_mod_2d(&t1, k);
2318     while (MP_SIGN(&t1) != MP_ZPOS) {
2319       MP_CHECKOK( mp_add(&t1, &two2k, &t1) );
2320     }
2321     if (mp_cmp(&t1, &t0) == MP_EQ)
2322       break;
2323     MP_CHECKOK( mp_copy(&t1, &t0) );
2324   } while (--ix > 0);
2325   if (!ix) {
2326     res = MP_UNDEF;
2327   } else {
2328     mp_exch(c, &t1);
2329   }
2330 
2331 CLEANUP:
2332   mp_clear(&t0);
2333   mp_clear(&t1);
2334   mp_clear(&val);
2335   mp_clear(&tmp);
2336   mp_clear(&two2k);
2337   return res;
2338 }
2339 
2340 mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
2341 {
2342   mp_err res;
2343   mp_size k;
2344   mp_int oddFactor, evenFactor; /* factors of the modulus */
2345   mp_int oddPart, evenPart;     /* parts to combine via CRT. */
2346   mp_int C2, tmp1, tmp2;
2347 
2348   /*static const mp_digit d1 = 1; */
2349   /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
2350 
2351   if ((res = s_mp_ispow2(m)) >= 0) {
2352     k = res;
2353     return s_mp_invmod_2d(a, k, c);
2354   }
2355   MP_DIGITS(&oddFactor) = 0;
2356   MP_DIGITS(&evenFactor) = 0;
2357   MP_DIGITS(&oddPart) = 0;
2358   MP_DIGITS(&evenPart) = 0;
2359   MP_DIGITS(&C2)     = 0;
2360   MP_DIGITS(&tmp1)   = 0;
2361   MP_DIGITS(&tmp2)   = 0;
2362 
2363   MP_CHECKOK( mp_init_copy(&oddFactor, m) );    /* oddFactor = m */
2364   MP_CHECKOK( mp_init(&evenFactor, FLAG(m)) );
2365   MP_CHECKOK( mp_init(&oddPart, FLAG(m)) );
2366   MP_CHECKOK( mp_init(&evenPart, FLAG(m)) );
2367   MP_CHECKOK( mp_init(&C2, FLAG(m))     );
2368   MP_CHECKOK( mp_init(&tmp1, FLAG(m))   );
2369   MP_CHECKOK( mp_init(&tmp2, FLAG(m))   );
2370 
2371   k = mp_trailing_zeros(m);
2372   s_mp_div_2d(&oddFactor, k);
2373   MP_CHECKOK( s_mp_2expt(&evenFactor, k) );
2374 
2375   /* compute a**-1 mod oddFactor. */
2376   MP_CHECKOK( s_mp_invmod_odd_m(a, &oddFactor, &oddPart) );
2377   /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
2378   MP_CHECKOK( s_mp_invmod_2d(   a,       k,    &evenPart) );
2379 
2380   /* Use Chinese Remainer theorem to compute a**-1 mod m. */
2381   /* let m1 = oddFactor,  v1 = oddPart,
2382    * let m2 = evenFactor, v2 = evenPart.
2383    */
2384 
2385   /* Compute C2 = m1**-1 mod m2. */
2386   MP_CHECKOK( s_mp_invmod_2d(&oddFactor, k,    &C2) );
2387 
2388   /* compute u = (v2 - v1)*C2 mod m2 */
2389   MP_CHECKOK( mp_sub(&evenPart, &oddPart,   &tmp1) );
2390   MP_CHECKOK( mp_mul(&tmp1,     &C2,        &tmp2) );
2391   s_mp_mod_2d(&tmp2, k);
2392   while (MP_SIGN(&tmp2) != MP_ZPOS) {
2393     MP_CHECKOK( mp_add(&tmp2, &evenFactor, &tmp2) );
2394   }
2395 
2396   /* compute answer = v1 + u*m1 */
2397   MP_CHECKOK( mp_mul(&tmp2,     &oddFactor, c) );
2398   MP_CHECKOK( mp_add(&oddPart,  c,          c) );
2399   /* not sure this is necessary, but it's low cost if not. */
2400   MP_CHECKOK( mp_mod(c,         m,          c) );
2401 
2402 CLEANUP:
2403   mp_clear(&oddFactor);
2404   mp_clear(&evenFactor);
2405   mp_clear(&oddPart);
2406   mp_clear(&evenPart);
2407   mp_clear(&C2);
2408   mp_clear(&tmp1);
2409   mp_clear(&tmp2);
2410   return res;
2411 }
2412 
2413 
2414 /* {{{ mp_invmod(a, m, c) */
2415 
2416 /*
2417   mp_invmod(a, m, c)
2418 
2419   Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
2420   This is equivalent to the question of whether (a, m) = 1.  If not,
2421   MP_UNDEF is returned, and there is no inverse.
2422  */
2423 
2424 mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
2425 {
2426 
2427   ARGCHK(a && m && c, MP_BADARG);
2428 
2429   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2430     return MP_RANGE;
2431 
2432   if (mp_isodd(m)) {
2433     return s_mp_invmod_odd_m(a, m, c);
2434   }
2435   if (mp_iseven(a))
2436     return MP_UNDEF;    /* not invertable */
2437 
2438   return s_mp_invmod_even_m(a, m, c);
2439 
2440 } /* end mp_invmod() */
2441 
2442 /* }}} */
2443 #endif /* if MP_NUMTH */
2444 
2445 /* }}} */
2446 
2447 /*------------------------------------------------------------------------*/
2448 /* {{{ mp_print(mp, ofp) */
2449 
2450 #if MP_IOFUNC
2451 /*
2452   mp_print(mp, ofp)
2453 
2454   Print a textual representation of the given mp_int on the output
2455   stream 'ofp'.  Output is generated using the internal radix.
2456  */
2457 
2458 void   mp_print(mp_int *mp, FILE *ofp)
2459 {
2460   int   ix;
2461 
2462   if(mp == NULL || ofp == NULL)
2463     return;
2464 
2465   fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
2466 
2467   for(ix = USED(mp) - 1; ix >= 0; ix--) {
2468     fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
2469   }
2470 
2471 } /* end mp_print() */
2472 
2473 #endif /* if MP_IOFUNC */
2474 
2475 /* }}} */
2476 
2477 /*------------------------------------------------------------------------*/
2478 /* {{{ More I/O Functions */
2479 
2480 /* {{{ mp_read_raw(mp, str, len) */
2481 
2482 /*
2483    mp_read_raw(mp, str, len)
2484 
2485    Read in a raw value (base 256) into the given mp_int
2486  */
2487 
2488 mp_err  mp_read_raw(mp_int *mp, char *str, int len)
2489 {
2490   int            ix;
2491   mp_err         res;
2492   unsigned char *ustr = (unsigned char *)str;
2493 
2494   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
2495 
2496   mp_zero(mp);
2497 
2498   /* Get sign from first byte */
2499   if(ustr[0])
2500     SIGN(mp) = NEG;
2501   else
2502     SIGN(mp) = ZPOS;
2503 
2504   /* Read the rest of the digits */
2505   for(ix = 1; ix < len; ix++) {
2506     if((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
2507       return res;
2508     if((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
2509       return res;
2510   }
2511 
2512   return MP_OKAY;
2513 
2514 } /* end mp_read_raw() */
2515 
2516 /* }}} */
2517 
2518 /* {{{ mp_raw_size(mp) */
2519 
2520 int    mp_raw_size(mp_int *mp)
2521 {
2522   ARGCHK(mp != NULL, 0);
2523 
2524   return (USED(mp) * sizeof(mp_digit)) + 1;
2525 
2526 } /* end mp_raw_size() */
2527 
2528 /* }}} */
2529 
2530 /* {{{ mp_toraw(mp, str) */
2531 
2532 mp_err mp_toraw(mp_int *mp, char *str)
2533 {
2534   int  ix, jx, pos = 1;
2535 
2536   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2537 
2538   str[0] = (char)SIGN(mp);
2539 
2540   /* Iterate over each digit... */
2541   for(ix = USED(mp) - 1; ix >= 0; ix--) {
2542     mp_digit  d = DIGIT(mp, ix);
2543 
2544     /* Unpack digit bytes, high order first */
2545     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
2546       str[pos++] = (char)(d >> (jx * CHAR_BIT));
2547     }
2548   }
2549 
2550   return MP_OKAY;
2551 
2552 } /* end mp_toraw() */
2553 
2554 /* }}} */
2555 
2556 /* {{{ mp_read_radix(mp, str, radix) */
2557 
2558 /*
2559   mp_read_radix(mp, str, radix)
2560 
2561   Read an integer from the given string, and set mp to the resulting
2562   value.  The input is presumed to be in base 10.  Leading non-digit
2563   characters are ignored, and the function reads until a non-digit
2564   character or the end of the string.
2565  */
2566 
2567 mp_err  mp_read_radix(mp_int *mp, const char *str, int radix)
2568 {
2569   int     ix = 0, val = 0;
2570   mp_err  res;
2571   mp_sign sig = ZPOS;
2572 
2573   ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
2574          MP_BADARG);
2575 
2576   mp_zero(mp);
2577 
2578   /* Skip leading non-digit characters until a digit or '-' or '+' */
2579   while(str[ix] &&
2580         (s_mp_tovalue(str[ix], radix) < 0) &&
2581         str[ix] != '-' &&
2582         str[ix] != '+') {
2583     ++ix;
2584   }
2585 
2586   if(str[ix] == '-') {
2587     sig = NEG;
2588     ++ix;
2589   } else if(str[ix] == '+') {
2590     sig = ZPOS; /* this is the default anyway... */
2591     ++ix;
2592   }
2593 
2594   while((val = s_mp_tovalue(str[ix], radix)) >= 0) {
2595     if((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
2596       return res;
2597     if((res = s_mp_add_d(mp, val)) != MP_OKAY)
2598       return res;
2599     ++ix;
2600   }
2601 
2602   if(s_mp_cmp_d(mp, 0) == MP_EQ)
2603     SIGN(mp) = ZPOS;
2604   else
2605     SIGN(mp) = sig;
2606 
2607   return MP_OKAY;
2608 
2609 } /* end mp_read_radix() */
2610 
2611 mp_err mp_read_variable_radix(mp_int *a, const char * str, int default_radix)
2612 {
2613   int     radix = default_radix;
2614   int     cx;
2615   mp_sign sig   = ZPOS;
2616   mp_err  res;
2617 
2618   /* Skip leading non-digit characters until a digit or '-' or '+' */
2619   while ((cx = *str) != 0 &&
2620         (s_mp_tovalue(cx, radix) < 0) &&
2621         cx != '-' &&
2622         cx != '+') {
2623     ++str;
2624   }
2625 
2626   if (cx == '-') {
2627     sig = NEG;
2628     ++str;
2629   } else if (cx == '+') {
2630     sig = ZPOS; /* this is the default anyway... */
2631     ++str;
2632   }
2633 
2634   if (str[0] == '0') {
2635     if ((str[1] | 0x20) == 'x') {
2636       radix = 16;
2637       str += 2;
2638     } else {
2639       radix = 8;
2640       str++;
2641     }
2642   }
2643   res = mp_read_radix(a, str, radix);
2644   if (res == MP_OKAY) {
2645     MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
2646   }
2647   return res;
2648 }
2649 
2650 /* }}} */
2651 
2652 /* {{{ mp_radix_size(mp, radix) */
2653 
2654 int    mp_radix_size(mp_int *mp, int radix)
2655 {
2656   int  bits;
2657 
2658   if(!mp || radix < 2 || radix > MAX_RADIX)
2659     return 0;
2660 
2661   bits = USED(mp) * DIGIT_BIT - 1;
2662 
2663   return s_mp_outlen(bits, radix);
2664 
2665 } /* end mp_radix_size() */
2666 
2667 /* }}} */
2668 
2669 /* {{{ mp_toradix(mp, str, radix) */
2670 
2671 mp_err mp_toradix(mp_int *mp, char *str, int radix)
2672 {
2673   int  ix, pos = 0;
2674 
2675   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2676   ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
2677 
2678   if(mp_cmp_z(mp) == MP_EQ) {
2679     str[0] = '0';
2680     str[1] = '\0';
2681   } else {
2682     mp_err   res;
2683     mp_int   tmp;
2684     mp_sign  sgn;
2685     mp_digit rem, rdx = (mp_digit)radix;
2686     char     ch;
2687 
2688     if((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
2689       return res;
2690 
2691     /* Save sign for later, and take absolute value */
2692     sgn = SIGN(&tmp); SIGN(&tmp) = ZPOS;
2693 
2694     /* Generate output digits in reverse order      */
2695     while(mp_cmp_z(&tmp) != 0) {
2696       if((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
2697         mp_clear(&tmp);
2698         return res;
2699       }
2700 
2701       /* Generate digits, use capital letters */
2702       ch = s_mp_todigit(rem, radix, 0);
2703 
2704       str[pos++] = ch;
2705     }
2706 
2707     /* Add - sign if original value was negative */
2708     if(sgn == NEG)
2709       str[pos++] = '-';
2710 
2711     /* Add trailing NUL to end the string        */
2712     str[pos--] = '\0';
2713 
2714     /* Reverse the digits and sign indicator     */
2715     ix = 0;
2716     while(ix < pos) {
2717       char tmp = str[ix];
2718 
2719       str[ix] = str[pos];
2720       str[pos] = tmp;
2721       ++ix;
2722       --pos;
2723     }
2724 
2725     mp_clear(&tmp);
2726   }
2727 
2728   return MP_OKAY;
2729 
2730 } /* end mp_toradix() */
2731 
2732 /* }}} */
2733 
2734 /* {{{ mp_tovalue(ch, r) */
2735 
2736 int    mp_tovalue(char ch, int r)
2737 {
2738   return s_mp_tovalue(ch, r);
2739 
2740 } /* end mp_tovalue() */
2741 
2742 /* }}} */
2743 
2744 /* }}} */
2745 
2746 /* {{{ mp_strerror(ec) */
2747 
2748 /*
2749   mp_strerror(ec)
2750 
2751   Return a string describing the meaning of error code 'ec'.  The
2752   string returned is allocated in static memory, so the caller should
2753   not attempt to modify or free the memory associated with this
2754   string.
2755  */
2756 const char  *mp_strerror(mp_err ec)
2757 {
2758   int   aec = (ec < 0) ? -ec : ec;
2759 
2760   /* Code values are negative, so the senses of these comparisons
2761      are accurate */
2762   if(ec < MP_LAST_CODE || ec > MP_OKAY) {
2763     return mp_err_string[0];  /* unknown error code */
2764   } else {
2765     return mp_err_string[aec + 1];
2766   }
2767 
2768 } /* end mp_strerror() */
2769 
2770 /* }}} */
2771 
2772 /*========================================================================*/
2773 /*------------------------------------------------------------------------*/
2774 /* Static function definitions (internal use only)                        */
2775 
2776 /* {{{ Memory management */
2777 
2778 /* {{{ s_mp_grow(mp, min) */
2779 
2780 /* Make sure there are at least 'min' digits allocated to mp              */
2781 mp_err   s_mp_grow(mp_int *mp, mp_size min)
2782 {
2783   if(min > ALLOC(mp)) {
2784     mp_digit   *tmp;
2785 
2786     /* Set min to next nearest default precision block size */
2787     min = MP_ROUNDUP(min, s_mp_defprec);
2788 
2789     if((tmp = s_mp_alloc(min, sizeof(mp_digit), FLAG(mp))) == NULL)
2790       return MP_MEM;
2791 
2792     s_mp_copy(DIGITS(mp), tmp, USED(mp));
2793 
2794 #if MP_CRYPTO
2795     s_mp_setz(DIGITS(mp), ALLOC(mp));
2796 #endif
2797     s_mp_free(DIGITS(mp), ALLOC(mp));
2798     DIGITS(mp) = tmp;
2799     ALLOC(mp) = min;
2800   }
2801 
2802   return MP_OKAY;
2803 
2804 } /* end s_mp_grow() */
2805 
2806 /* }}} */
2807 
2808 /* {{{ s_mp_pad(mp, min) */
2809 
2810 /* Make sure the used size of mp is at least 'min', growing if needed     */
2811 mp_err   s_mp_pad(mp_int *mp, mp_size min)
2812 {
2813   if(min > USED(mp)) {
2814     mp_err  res;
2815 
2816     /* Make sure there is room to increase precision  */
2817     if (min > ALLOC(mp)) {
2818       if ((res = s_mp_grow(mp, min)) != MP_OKAY)
2819         return res;
2820     } else {
2821       s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
2822     }
2823 
2824     /* Increase precision; should already be 0-filled */
2825     USED(mp) = min;
2826   }
2827 
2828   return MP_OKAY;
2829 
2830 } /* end s_mp_pad() */
2831 
2832 /* }}} */
2833 
2834 /* {{{ s_mp_setz(dp, count) */
2835 
2836 #if MP_MACRO == 0
2837 /* Set 'count' digits pointed to by dp to be zeroes                       */
2838 void s_mp_setz(mp_digit *dp, mp_size count)
2839 {
2840 #if MP_MEMSET == 0
2841   int  ix;
2842 
2843   for(ix = 0; ix < count; ix++)
2844     dp[ix] = 0;
2845 #else
2846   memset(dp, 0, count * sizeof(mp_digit));
2847 #endif
2848 
2849 } /* end s_mp_setz() */
2850 #endif
2851 
2852 /* }}} */
2853 
2854 /* {{{ s_mp_copy(sp, dp, count) */
2855 
2856 #if MP_MACRO == 0
2857 /* Copy 'count' digits from sp to dp                                      */
2858 void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
2859 {
2860 #if MP_MEMCPY == 0
2861   int  ix;
2862 
2863   for(ix = 0; ix < count; ix++)
2864     dp[ix] = sp[ix];
2865 #else
2866   memcpy(dp, sp, count * sizeof(mp_digit));
2867 #endif
2868 
2869 } /* end s_mp_copy() */
2870 #endif
2871 
2872 /* }}} */
2873 
2874 /* {{{ s_mp_alloc(nb, ni, kmflag) */
2875 
2876 #if MP_MACRO == 0
2877 /* Allocate ni records of nb bytes each, and return a pointer to that     */
2878 void    *s_mp_alloc(size_t nb, size_t ni, int kmflag)
2879 {
2880   ++mp_allocs;
2881 #ifdef _KERNEL
2882   mp_int *mp;
2883   mp = kmem_zalloc(nb * ni, kmflag);
2884   if (mp != NULL)
2885     FLAG(mp) = kmflag;
2886   return (mp);
2887 #else
2888   return calloc(nb, ni);
2889 #endif
2890 
2891 } /* end s_mp_alloc() */
2892 #endif
2893 
2894 /* }}} */
2895 
2896 /* {{{ s_mp_free(ptr) */
2897 
2898 #if MP_MACRO == 0
2899 /* Free the memory pointed to by ptr                                      */
2900 void     s_mp_free(void *ptr, mp_size alloc)
2901 {
2902   if(ptr) {
2903     ++mp_frees;
2904 #ifdef _KERNEL
2905     kmem_free(ptr, alloc * sizeof (mp_digit));
2906 #else
2907     free(ptr);
2908 #endif
2909   }
2910 } /* end s_mp_free() */
2911 #endif
2912 
2913 /* }}} */
2914 
2915 /* {{{ s_mp_clamp(mp) */
2916 
2917 #if MP_MACRO == 0
2918 /* Remove leading zeroes from the given value                             */
2919 void     s_mp_clamp(mp_int *mp)
2920 {
2921   mp_size used = MP_USED(mp);
2922   while (used > 1 && DIGIT(mp, used - 1) == 0)
2923     --used;
2924   MP_USED(mp) = used;
2925 } /* end s_mp_clamp() */
2926 #endif
2927 
2928 /* }}} */
2929 
2930 /* {{{ s_mp_exch(a, b) */
2931 
2932 /* Exchange the data for a and b; (b, a) = (a, b)                         */
2933 void     s_mp_exch(mp_int *a, mp_int *b)
2934 {
2935   mp_int   tmp;
2936 
2937   tmp = *a;
2938   *a = *b;
2939   *b = tmp;
2940 
2941 } /* end s_mp_exch() */
2942 
2943 /* }}} */
2944 
2945 /* }}} */
2946 
2947 /* {{{ Arithmetic helpers */
2948 
2949 /* {{{ s_mp_lshd(mp, p) */
2950 
2951 /*
2952    Shift mp leftward by p digits, growing if needed, and zero-filling
2953    the in-shifted digits at the right end.  This is a convenient
2954    alternative to multiplication by powers of the radix
2955    The value of USED(mp) must already have been set to the value for
2956    the shifted result.
2957  */
2958 
2959 mp_err   s_mp_lshd(mp_int *mp, mp_size p)
2960 {
2961   mp_err  res;
2962   mp_size pos;
2963   int     ix;
2964 
2965   if(p == 0)
2966     return MP_OKAY;
2967 
2968   if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
2969     return MP_OKAY;
2970 
2971   if((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
2972     return res;
2973 
2974   pos = USED(mp) - 1;
2975 
2976   /* Shift all the significant figures over as needed */
2977   for(ix = pos - p; ix >= 0; ix--)
2978     DIGIT(mp, ix + p) = DIGIT(mp, ix);
2979 
2980   /* Fill the bottom digits with zeroes */
2981   for(ix = 0; ix < p; ix++)
2982     DIGIT(mp, ix) = 0;
2983 
2984   return MP_OKAY;
2985 
2986 } /* end s_mp_lshd() */
2987 
2988 /* }}} */
2989 
2990 /* {{{ s_mp_mul_2d(mp, d) */
2991 
2992 /*
2993   Multiply the integer by 2^d, where d is a number of bits.  This
2994   amounts to a bitwise shift of the value.
2995  */
2996 mp_err   s_mp_mul_2d(mp_int *mp, mp_digit d)
2997 {
2998   mp_err   res;
2999   mp_digit dshift, bshift;
3000   mp_digit mask;
3001 
3002   ARGCHK(mp != NULL,  MP_BADARG);
3003 
3004   dshift = d / MP_DIGIT_BIT;
3005   bshift = d % MP_DIGIT_BIT;
3006   /* bits to be shifted out of the top word */
3007   mask   = ((mp_digit)~0 << (MP_DIGIT_BIT - bshift));
3008   mask  &= MP_DIGIT(mp, MP_USED(mp) - 1);
3009 
3010   if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (mask != 0) )))
3011     return res;
3012 
3013   if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
3014     return res;
3015 
3016   if (bshift) {
3017     mp_digit *pa = MP_DIGITS(mp);
3018     mp_digit *alim = pa + MP_USED(mp);
3019     mp_digit  prev = 0;
3020 
3021     for (pa += dshift; pa < alim; ) {
3022       mp_digit x = *pa;
3023       *pa++ = (x << bshift) | prev;
3024       prev = x >> (DIGIT_BIT - bshift);
3025     }
3026   }
3027 
3028   s_mp_clamp(mp);
3029   return MP_OKAY;
3030 } /* end s_mp_mul_2d() */
3031 
3032 /* {{{ s_mp_rshd(mp, p) */
3033 
3034 /*
3035    Shift mp rightward by p digits.  Maintains the invariant that
3036    digits above the precision are all zero.  Digits shifted off the
3037    end are lost.  Cannot fail.
3038  */
3039 
3040 void     s_mp_rshd(mp_int *mp, mp_size p)
3041 {
3042   mp_size  ix;
3043   mp_digit *src, *dst;
3044 
3045   if(p == 0)
3046     return;
3047 
3048   /* Shortcut when all digits are to be shifted off */
3049   if(p >= USED(mp)) {
3050     s_mp_setz(DIGITS(mp), ALLOC(mp));
3051     USED(mp) = 1;
3052     SIGN(mp) = ZPOS;
3053     return;
3054   }
3055 
3056   /* Shift all the significant figures over as needed */
3057   dst = MP_DIGITS(mp);
3058   src = dst + p;
3059   for (ix = USED(mp) - p; ix > 0; ix--)
3060     *dst++ = *src++;
3061 
3062   MP_USED(mp) -= p;
3063   /* Fill the top digits with zeroes */
3064   while (p-- > 0)
3065     *dst++ = 0;
3066 
3067 #if 0
3068   /* Strip off any leading zeroes    */
3069   s_mp_clamp(mp);
3070 #endif
3071 
3072 } /* end s_mp_rshd() */
3073 
3074 /* }}} */
3075 
3076 /* {{{ s_mp_div_2(mp) */
3077 
3078 /* Divide by two -- take advantage of radix properties to do it fast      */
3079 void     s_mp_div_2(mp_int *mp)
3080 {
3081   s_mp_div_2d(mp, 1);
3082 
3083 } /* end s_mp_div_2() */
3084 
3085 /* }}} */
3086 
3087 /* {{{ s_mp_mul_2(mp) */
3088 
3089 mp_err s_mp_mul_2(mp_int *mp)
3090 {
3091   mp_digit *pd;
3092   unsigned int      ix, used;
3093   mp_digit kin = 0;
3094 
3095   /* Shift digits leftward by 1 bit */
3096   used = MP_USED(mp);
3097   pd = MP_DIGITS(mp);
3098   for (ix = 0; ix < used; ix++) {
3099     mp_digit d = *pd;
3100     *pd++ = (d << 1) | kin;
3101     kin = (d >> (DIGIT_BIT - 1));
3102   }
3103 
3104   /* Deal with rollover from last digit */
3105   if (kin) {
3106     if (ix >= ALLOC(mp)) {
3107       mp_err res;
3108       if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
3109         return res;
3110     }
3111 
3112     DIGIT(mp, ix) = kin;
3113     USED(mp) += 1;
3114   }
3115 
3116   return MP_OKAY;
3117 
3118 } /* end s_mp_mul_2() */
3119 
3120 /* }}} */
3121 
3122 /* {{{ s_mp_mod_2d(mp, d) */
3123 
3124 /*
3125   Remainder the integer by 2^d, where d is a number of bits.  This
3126   amounts to a bitwise AND of the value, and does not require the full
3127   division code
3128  */
3129 void     s_mp_mod_2d(mp_int *mp, mp_digit d)
3130 {
3131   mp_size  ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
3132   mp_size  ix;
3133   mp_digit dmask;
3134 
3135   if(ndig >= USED(mp))
3136     return;
3137 
3138   /* Flush all the bits above 2^d in its digit */
3139   dmask = ((mp_digit)1 << nbit) - 1;
3140   DIGIT(mp, ndig) &= dmask;
3141 
3142   /* Flush all digits above the one with 2^d in it */
3143   for(ix = ndig + 1; ix < USED(mp); ix++)
3144     DIGIT(mp, ix) = 0;
3145 
3146   s_mp_clamp(mp);
3147 
3148 } /* end s_mp_mod_2d() */
3149 
3150 /* }}} */
3151 
3152 /* {{{ s_mp_div_2d(mp, d) */
3153 
3154 /*
3155   Divide the integer by 2^d, where d is a number of bits.  This
3156   amounts to a bitwise shift of the value, and does not require the
3157   full division code (used in Barrett reduction, see below)
3158  */
3159 void     s_mp_div_2d(mp_int *mp, mp_digit d)
3160 {
3161   int       ix;
3162   mp_digit  save, next, mask;
3163 
3164   s_mp_rshd(mp, d / DIGIT_BIT);
3165   d %= DIGIT_BIT;
3166   if (d) {
3167     mask = ((mp_digit)1 << d) - 1;
3168     save = 0;
3169     for(ix = USED(mp) - 1; ix >= 0; ix--) {
3170       next = DIGIT(mp, ix) & mask;
3171       DIGIT(mp, ix) = (DIGIT(mp, ix) >> d) | (save << (DIGIT_BIT - d));
3172       save = next;
3173     }
3174   }
3175   s_mp_clamp(mp);
3176 
3177 } /* end s_mp_div_2d() */
3178 
3179 /* }}} */
3180 
3181 /* {{{ s_mp_norm(a, b, *d) */
3182 
3183 /*
3184   s_mp_norm(a, b, *d)
3185 
3186   Normalize a and b for division, where b is the divisor.  In order
3187   that we might make good guesses for quotient digits, we want the
3188   leading digit of b to be at least half the radix, which we
3189   accomplish by multiplying a and b by a power of 2.  The exponent
3190   (shift count) is placed in *pd, so that the remainder can be shifted
3191   back at the end of the division process.
3192  */
3193 
3194 mp_err   s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
3195 {
3196   mp_digit  d;
3197   mp_digit  mask;
3198   mp_digit  b_msd;
3199   mp_err    res    = MP_OKAY;
3200 
3201   d = 0;
3202   mask  = DIGIT_MAX & ~(DIGIT_MAX >> 1);        /* mask is msb of digit */
3203   b_msd = DIGIT(b, USED(b) - 1);
3204   while (!(b_msd & mask)) {
3205     b_msd <<= 1;
3206     ++d;
3207   }
3208 
3209   if (d) {
3210     MP_CHECKOK( s_mp_mul_2d(a, d) );
3211     MP_CHECKOK( s_mp_mul_2d(b, d) );
3212   }
3213 
3214   *pd = d;
3215 CLEANUP:
3216   return res;
3217 
3218 } /* end s_mp_norm() */
3219 
3220 /* }}} */
3221 
3222 /* }}} */
3223 
3224 /* {{{ Primitive digit arithmetic */
3225 
3226 /* {{{ s_mp_add_d(mp, d) */
3227 
3228 /* Add d to |mp| in place                                                 */
3229 mp_err   s_mp_add_d(mp_int *mp, mp_digit d)    /* unsigned digit addition */
3230 {
3231 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3232   mp_word   w, k = 0;
3233   mp_size   ix = 1;
3234 
3235   w = (mp_word)DIGIT(mp, 0) + d;
3236   DIGIT(mp, 0) = ACCUM(w);
3237   k = CARRYOUT(w);
3238 
3239   while(ix < USED(mp) && k) {
3240     w = (mp_word)DIGIT(mp, ix) + k;
3241     DIGIT(mp, ix) = ACCUM(w);
3242     k = CARRYOUT(w);
3243     ++ix;
3244   }
3245 
3246   if(k != 0) {
3247     mp_err  res;
3248 
3249     if((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
3250       return res;
3251 
3252     DIGIT(mp, ix) = (mp_digit)k;
3253   }
3254 
3255   return MP_OKAY;
3256 #else
3257   mp_digit * pmp = MP_DIGITS(mp);
3258   mp_digit sum, mp_i, carry = 0;
3259   mp_err   res = MP_OKAY;
3260   int used = (int)MP_USED(mp);
3261 
3262   mp_i = *pmp;
3263   *pmp++ = sum = d + mp_i;
3264   carry = (sum < d);
3265   while (carry && --used > 0) {
3266     mp_i = *pmp;
3267     *pmp++ = sum = carry + mp_i;
3268     carry = !sum;
3269   }
3270   if (carry && !used) {
3271     /* mp is growing */
3272     used = MP_USED(mp);
3273     MP_CHECKOK( s_mp_pad(mp, used + 1) );
3274     MP_DIGIT(mp, used) = carry;
3275   }
3276 CLEANUP:
3277   return res;
3278 #endif
3279 } /* end s_mp_add_d() */
3280 
3281 /* }}} */
3282 
3283 /* {{{ s_mp_sub_d(mp, d) */
3284 
3285 /* Subtract d from |mp| in place, assumes |mp| > d                        */
3286 mp_err   s_mp_sub_d(mp_int *mp, mp_digit d)    /* unsigned digit subtract */
3287 {
3288 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3289   mp_word   w, b = 0;
3290   mp_size   ix = 1;
3291 
3292   /* Compute initial subtraction    */
3293   w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
3294   b = CARRYOUT(w) ? 0 : 1;
3295   DIGIT(mp, 0) = ACCUM(w);
3296 
3297   /* Propagate borrows leftward     */
3298   while(b && ix < USED(mp)) {
3299     w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
3300     b = CARRYOUT(w) ? 0 : 1;
3301     DIGIT(mp, ix) = ACCUM(w);
3302     ++ix;
3303   }
3304 
3305   /* Remove leading zeroes          */
3306   s_mp_clamp(mp);
3307 
3308   /* If we have a borrow out, it's a violation of the input invariant */
3309   if(b)
3310     return MP_RANGE;
3311   else
3312     return MP_OKAY;
3313 #else
3314   mp_digit *pmp = MP_DIGITS(mp);
3315   mp_digit mp_i, diff, borrow;
3316   mp_size  used = MP_USED(mp);
3317 
3318   mp_i = *pmp;
3319   *pmp++ = diff = mp_i - d;
3320   borrow = (diff > mp_i);
3321   while (borrow && --used) {
3322     mp_i = *pmp;
3323     *pmp++ = diff = mp_i - borrow;
3324     borrow = (diff > mp_i);
3325   }
3326   s_mp_clamp(mp);
3327   return (borrow && !used) ? MP_RANGE : MP_OKAY;
3328 #endif
3329 } /* end s_mp_sub_d() */
3330 
3331 /* }}} */
3332 
3333 /* {{{ s_mp_mul_d(a, d) */
3334 
3335 /* Compute a = a * d, single digit multiplication                         */
3336 mp_err   s_mp_mul_d(mp_int *a, mp_digit d)
3337 {
3338   mp_err  res;
3339   mp_size used;
3340   int     pow;
3341 
3342   if (!d) {
3343     mp_zero(a);
3344     return MP_OKAY;
3345   }
3346   if (d == 1)
3347     return MP_OKAY;
3348   if (0 <= (pow = s_mp_ispow2d(d))) {
3349     return s_mp_mul_2d(a, (mp_digit)pow);
3350   }
3351 
3352   used = MP_USED(a);
3353   MP_CHECKOK( s_mp_pad(a, used + 1) );
3354 
3355   s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
3356 
3357   s_mp_clamp(a);
3358 
3359 CLEANUP:
3360   return res;
3361 
3362 } /* end s_mp_mul_d() */
3363 
3364 /* }}} */
3365 
3366 /* {{{ s_mp_div_d(mp, d, r) */
3367 
3368 /*
3369   s_mp_div_d(mp, d, r)
3370 
3371   Compute the quotient mp = mp / d and remainder r = mp mod d, for a
3372   single digit d.  If r is null, the remainder will be discarded.
3373  */
3374 
3375 mp_err   s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
3376 {
3377 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3378   mp_word   w = 0, q;
3379 #else
3380   mp_digit  w = 0, q;
3381 #endif
3382   int       ix;
3383   mp_err    res;
3384   mp_int    quot;
3385   mp_int    rem;
3386 
3387   if(d == 0)
3388     return MP_RANGE;
3389   if (d == 1) {
3390     if (r)
3391       *r = 0;
3392     return MP_OKAY;
3393   }
3394   /* could check for power of 2 here, but mp_div_d does that. */
3395   if (MP_USED(mp) == 1) {
3396     mp_digit n   = MP_DIGIT(mp,0);
3397     mp_digit rem;
3398 
3399     q   = n / d;
3400     rem = n % d;
3401     MP_DIGIT(mp,0) = q;
3402     if (r)
3403       *r = rem;
3404     return MP_OKAY;
3405   }
3406 
3407   MP_DIGITS(&rem)  = 0;
3408   MP_DIGITS(&quot) = 0;
3409   /* Make room for the quotient */
3410   MP_CHECKOK( mp_init_size(&quot, USED(mp), FLAG(mp)) );
3411 
3412 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3413   for(ix = USED(mp) - 1; ix >= 0; ix--) {
3414     w = (w << DIGIT_BIT) | DIGIT(mp, ix);
3415 
3416     if(w >= d) {
3417       q = w / d;
3418       w = w % d;
3419     } else {
3420       q = 0;
3421     }
3422 
3423     s_mp_lshd(&quot, 1);
3424     DIGIT(&quot, 0) = (mp_digit)q;
3425   }
3426 #else
3427   {
3428     mp_digit p;
3429 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3430     mp_digit norm;
3431 #endif
3432 
3433     MP_CHECKOK( mp_init_copy(&rem, mp) );
3434 
3435 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3436     MP_DIGIT(&quot, 0) = d;
3437     MP_CHECKOK( s_mp_norm(&rem, &quot, &norm) );
3438     if (norm)
3439       d <<= norm;
3440     MP_DIGIT(&quot, 0) = 0;
3441 #endif
3442 
3443     p = 0;
3444     for (ix = USED(&rem) - 1; ix >= 0; ix--) {
3445       w = DIGIT(&rem, ix);
3446 
3447       if (p) {
3448         MP_CHECKOK( s_mpv_div_2dx1d(p, w, d, &q, &w) );
3449       } else if (w >= d) {
3450         q = w / d;
3451         w = w % d;
3452       } else {
3453         q = 0;
3454       }
3455 
3456       MP_CHECKOK( s_mp_lshd(&quot, 1) );
3457       DIGIT(&quot, 0) = q;
3458       p = w;
3459     }
3460 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3461     if (norm)
3462       w >>= norm;
3463 #endif
3464   }
3465 #endif
3466 
3467   /* Deliver the remainder, if desired */
3468   if(r)
3469     *r = (mp_digit)w;
3470 
3471   s_mp_clamp(&quot);
3472   mp_exch(&quot, mp);
3473 CLEANUP:
3474   mp_clear(&quot);
3475   mp_clear(&rem);
3476 
3477   return res;
3478 } /* end s_mp_div_d() */
3479 
3480 /* }}} */
3481 
3482 
3483 /* }}} */
3484 
3485 /* {{{ Primitive full arithmetic */
3486 
3487 /* {{{ s_mp_add(a, b) */
3488 
3489 /* Compute a = |a| + |b|                                                  */
3490 mp_err   s_mp_add(mp_int *a, const mp_int *b)  /* magnitude addition      */
3491 {
3492 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3493   mp_word   w = 0;
3494 #else
3495   mp_digit  d, sum, carry = 0;
3496 #endif
3497   mp_digit *pa, *pb;
3498   mp_size   ix;
3499   mp_size   used;
3500   mp_err    res;
3501 
3502   /* Make sure a has enough precision for the output value */
3503   if((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
3504     return res;
3505 
3506   /*
3507     Add up all digits up to the precision of b.  If b had initially
3508     the same precision as a, or greater, we took care of it by the
3509     padding step above, so there is no problem.  If b had initially
3510     less precision, we'll have to make sure the carry out is duly
3511     propagated upward among the higher-order digits of the sum.
3512    */
3513   pa = MP_DIGITS(a);
3514   pb = MP_DIGITS(b);
3515   used = MP_USED(b);
3516   for(ix = 0; ix < used; ix++) {
3517 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3518     w = w + *pa + *pb++;
3519     *pa++ = ACCUM(w);
3520     w = CARRYOUT(w);
3521 #else
3522     d = *pa;
3523     sum = d + *pb++;
3524     d = (sum < d);                      /* detect overflow */
3525     *pa++ = sum += carry;
3526     carry = d + (sum < carry);          /* detect overflow */
3527 #endif
3528   }
3529 
3530   /* If we run out of 'b' digits before we're actually done, make
3531      sure the carries get propagated upward...
3532    */
3533   used = MP_USED(a);
3534 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3535   while (w && ix < used) {
3536     w = w + *pa;
3537     *pa++ = ACCUM(w);
3538     w = CARRYOUT(w);
3539     ++ix;
3540   }
3541 #else
3542   while (carry && ix < used) {
3543     sum = carry + *pa;
3544     *pa++ = sum;
3545     carry = !sum;
3546     ++ix;
3547   }
3548 #endif
3549 
3550   /* If there's an overall carry out, increase precision and include
3551      it.  We could have done this initially, but why touch the memory
3552      allocator unless we're sure we have to?
3553    */
3554 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3555   if (w) {
3556     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3557       return res;
3558 
3559     DIGIT(a, ix) = (mp_digit)w;
3560   }
3561 #else
3562   if (carry) {
3563     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3564       return res;
3565 
3566     DIGIT(a, used) = carry;
3567   }
3568 #endif
3569 
3570   return MP_OKAY;
3571 } /* end s_mp_add() */
3572 
3573 /* }}} */
3574 
3575 /* Compute c = |a| + |b|         */ /* magnitude addition      */
3576 mp_err   s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3577 {
3578   mp_digit *pa, *pb, *pc;
3579 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3580   mp_word   w = 0;
3581 #else
3582   mp_digit  sum, carry = 0, d;
3583 #endif
3584   mp_size   ix;
3585   mp_size   used;
3586   mp_err    res;
3587 
3588   MP_SIGN(c) = MP_SIGN(a);
3589   if (MP_USED(a) < MP_USED(b)) {
3590     const mp_int *xch = a;
3591     a = b;
3592     b = xch;
3593   }
3594 
3595   /* Make sure a has enough precision for the output value */
3596   if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3597     return res;
3598 
3599   /*
3600     Add up all digits up to the precision of b.  If b had initially
3601     the same precision as a, or greater, we took care of it by the
3602     exchange step above, so there is no problem.  If b had initially
3603     less precision, we'll have to make sure the carry out is duly
3604     propagated upward among the higher-order digits of the sum.
3605    */
3606   pa = MP_DIGITS(a);
3607   pb = MP_DIGITS(b);
3608   pc = MP_DIGITS(c);
3609   used = MP_USED(b);
3610   for (ix = 0; ix < used; ix++) {
3611 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3612     w = w + *pa++ + *pb++;
3613     *pc++ = ACCUM(w);
3614     w = CARRYOUT(w);
3615 #else
3616     d = *pa++;
3617     sum = d + *pb++;
3618     d = (sum < d);                      /* detect overflow */
3619     *pc++ = sum += carry;
3620     carry = d + (sum < carry);          /* detect overflow */
3621 #endif
3622   }
3623 
3624   /* If we run out of 'b' digits before we're actually done, make
3625      sure the carries get propagated upward...
3626    */
3627   for (used = MP_USED(a); ix < used; ++ix) {
3628 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3629     w = w + *pa++;
3630     *pc++ = ACCUM(w);
3631     w = CARRYOUT(w);
3632 #else
3633     *pc++ = sum = carry + *pa++;
3634     carry = (sum < carry);
3635 #endif
3636   }
3637 
3638   /* If there's an overall carry out, increase precision and include
3639      it.  We could have done this initially, but why touch the memory
3640      allocator unless we're sure we have to?
3641    */
3642 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3643   if (w) {
3644     if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3645       return res;
3646 
3647     DIGIT(c, used) = (mp_digit)w;
3648     ++used;
3649   }
3650 #else
3651   if (carry) {
3652     if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3653       return res;
3654 
3655     DIGIT(c, used) = carry;
3656     ++used;
3657   }
3658 #endif
3659   MP_USED(c) = used;
3660   return MP_OKAY;
3661 }
3662 /* {{{ s_mp_add_offset(a, b, offset) */
3663 
3664 /* Compute a = |a| + ( |b| * (RADIX ** offset) )             */
3665 mp_err   s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)
3666 {
3667 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3668   mp_word   w, k = 0;
3669 #else
3670   mp_digit  d, sum, carry = 0;
3671 #endif
3672   mp_size   ib;
3673   mp_size   ia;
3674   mp_size   lim;
3675   mp_err    res;
3676 
3677   /* Make sure a has enough precision for the output value */
3678   lim = MP_USED(b) + offset;
3679   if((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
3680     return res;
3681 
3682   /*
3683     Add up all digits up to the precision of b.  If b had initially
3684     the same precision as a, or greater, we took care of it by the
3685     padding step above, so there is no problem.  If b had initially
3686     less precision, we'll have to make sure the carry out is duly
3687     propagated upward among the higher-order digits of the sum.
3688    */
3689   lim = USED(b);
3690   for(ib = 0, ia = offset; ib < lim; ib++, ia++) {
3691 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3692     w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
3693     DIGIT(a, ia) = ACCUM(w);
3694     k = CARRYOUT(w);
3695 #else
3696     d = MP_DIGIT(a, ia);
3697     sum = d + MP_DIGIT(b, ib);
3698     d = (sum < d);
3699     MP_DIGIT(a,ia) = sum += carry;
3700     carry = d + (sum < carry);
3701 #endif
3702   }
3703 
3704   /* If we run out of 'b' digits before we're actually done, make
3705      sure the carries get propagated upward...
3706    */
3707 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3708   for (lim = MP_USED(a); k && (ia < lim); ++ia) {
3709     w = (mp_word)DIGIT(a, ia) + k;
3710     DIGIT(a, ia) = ACCUM(w);
3711     k = CARRYOUT(w);
3712   }
3713 #else
3714   for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
3715     d = MP_DIGIT(a, ia);
3716     MP_DIGIT(a,ia) = sum = d + carry;
3717     carry = (sum < d);
3718   }
3719 #endif
3720 
3721   /* If there's an overall carry out, increase precision and include
3722      it.  We could have done this initially, but why touch the memory
3723      allocator unless we're sure we have to?
3724    */
3725 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3726   if(k) {
3727     if((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
3728       return res;
3729 
3730     DIGIT(a, ia) = (mp_digit)k;
3731   }
3732 #else
3733   if (carry) {
3734     if((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
3735       return res;
3736 
3737     DIGIT(a, lim) = carry;
3738   }
3739 #endif
3740   s_mp_clamp(a);
3741 
3742   return MP_OKAY;
3743 
3744 } /* end s_mp_add_offset() */
3745 
3746 /* }}} */
3747 
3748 /* {{{ s_mp_sub(a, b) */
3749 
3750 /* Compute a = |a| - |b|, assumes |a| >= |b|                              */
3751 mp_err   s_mp_sub(mp_int *a, const mp_int *b)  /* magnitude subtract      */
3752 {
3753   mp_digit *pa, *pb, *limit;
3754 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3755   mp_sword  w = 0;
3756 #else
3757   mp_digit  d, diff, borrow = 0;
3758 #endif
3759 
3760   /*
3761     Subtract and propagate borrow.  Up to the precision of b, this
3762     accounts for the digits of b; after that, we just make sure the
3763     carries get to the right place.  This saves having to pad b out to
3764     the precision of a just to make the loops work right...
3765    */
3766   pa = MP_DIGITS(a);
3767   pb = MP_DIGITS(b);
3768   limit = pb + MP_USED(b);
3769   while (pb < limit) {
3770 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3771     w = w + *pa - *pb++;
3772     *pa++ = ACCUM(w);
3773     w >>= MP_DIGIT_BIT;
3774 #else
3775     d = *pa;
3776     diff = d - *pb++;
3777     d = (diff > d);                             /* detect borrow */
3778     if (borrow && --diff == MP_DIGIT_MAX)
3779       ++d;
3780     *pa++ = diff;
3781     borrow = d;
3782 #endif
3783   }
3784   limit = MP_DIGITS(a) + MP_USED(a);
3785 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3786   while (w && pa < limit) {
3787     w = w + *pa;
3788     *pa++ = ACCUM(w);
3789     w >>= MP_DIGIT_BIT;
3790   }
3791 #else
3792   while (borrow && pa < limit) {
3793     d = *pa;
3794     *pa++ = diff = d - borrow;
3795     borrow = (diff > d);
3796   }
3797 #endif
3798 
3799   /* Clobber any leading zeroes we created    */
3800   s_mp_clamp(a);
3801 
3802   /*
3803      If there was a borrow out, then |b| > |a| in violation
3804      of our input invariant.  We've already done the work,
3805      but we'll at least complain about it...
3806    */
3807 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3808   return w ? MP_RANGE : MP_OKAY;
3809 #else
3810   return borrow ? MP_RANGE : MP_OKAY;
3811 #endif
3812 } /* end s_mp_sub() */
3813 
3814 /* }}} */
3815 
3816 /* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract      */
3817 mp_err   s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3818 {
3819   mp_digit *pa, *pb, *pc;
3820 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3821   mp_sword  w = 0;
3822 #else
3823   mp_digit  d, diff, borrow = 0;
3824 #endif
3825   int       ix, limit;
3826   mp_err    res;
3827 
3828   MP_SIGN(c) = MP_SIGN(a);
3829 
3830   /* Make sure a has enough precision for the output value */
3831   if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3832     return res;
3833 
3834   /*
3835     Subtract and propagate borrow.  Up to the precision of b, this
3836     accounts for the digits of b; after that, we just make sure the
3837     carries get to the right place.  This saves having to pad b out to
3838     the precision of a just to make the loops work right...
3839    */
3840   pa = MP_DIGITS(a);
3841   pb = MP_DIGITS(b);
3842   pc = MP_DIGITS(c);
3843   limit = MP_USED(b);
3844   for (ix = 0; ix < limit; ++ix) {
3845 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3846     w = w + *pa++ - *pb++;
3847     *pc++ = ACCUM(w);
3848     w >>= MP_DIGIT_BIT;
3849 #else
3850     d = *pa++;
3851     diff = d - *pb++;
3852     d = (diff > d);
3853     if (borrow && --diff == MP_DIGIT_MAX)
3854       ++d;
3855     *pc++ = diff;
3856     borrow = d;
3857 #endif
3858   }
3859   for (limit = MP_USED(a); ix < limit; ++ix) {
3860 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3861     w = w + *pa++;
3862     *pc++ = ACCUM(w);
3863     w >>= MP_DIGIT_BIT;
3864 #else
3865     d = *pa++;
3866     *pc++ = diff = d - borrow;
3867     borrow = (diff > d);
3868 #endif
3869   }
3870 
3871   /* Clobber any leading zeroes we created    */
3872   MP_USED(c) = ix;
3873   s_mp_clamp(c);
3874 
3875   /*
3876      If there was a borrow out, then |b| > |a| in violation
3877      of our input invariant.  We've already done the work,
3878      but we'll at least complain about it...
3879    */
3880 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3881   return w ? MP_RANGE : MP_OKAY;
3882 #else
3883   return borrow ? MP_RANGE : MP_OKAY;
3884 #endif
3885 }
3886 /* {{{ s_mp_mul(a, b) */
3887 
3888 /* Compute a = |a| * |b|                                                  */
3889 mp_err   s_mp_mul(mp_int *a, const mp_int *b)
3890 {
3891   return mp_mul(a, b, a);
3892 } /* end s_mp_mul() */
3893 
3894 /* }}} */
3895 
3896 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
3897 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
3898 #define MP_MUL_DxD(a, b, Phi, Plo) \
3899   { unsigned long long product = (unsigned long long)a * b; \
3900     Plo = (mp_digit)product; \
3901     Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
3902 #elif defined(OSF1)
3903 #define MP_MUL_DxD(a, b, Phi, Plo) \
3904   { Plo = asm ("mulq %a0, %a1, %v0", a, b);\
3905     Phi = asm ("umulh %a0, %a1, %v0", a, b); }
3906 #else
3907 #define MP_MUL_DxD(a, b, Phi, Plo) \
3908   { mp_digit a0b1, a1b0; \
3909     Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \
3910     Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
3911     a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
3912     a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
3913     a1b0 += a0b1; \
3914     Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
3915     if (a1b0 < a0b1)  \
3916       Phi += MP_HALF_RADIX; \
3917     a1b0 <<= MP_HALF_DIGIT_BIT; \
3918     Plo += a1b0; \
3919     if (Plo < a1b0) \
3920       ++Phi; \
3921   }
3922 #endif
3923 
3924 #if !defined(MP_ASSEMBLY_MULTIPLY)
3925 /* c = a * b */
3926 void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
3927 {
3928 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3929   mp_digit   d = 0;
3930 
3931   /* Inner product:  Digits of a */
3932   while (a_len--) {
3933     mp_word w = ((mp_word)b * *a++) + d;
3934     *c++ = ACCUM(w);
3935     d = CARRYOUT(w);
3936   }
3937   *c = d;
3938 #else
3939   mp_digit carry = 0;
3940   while (a_len--) {
3941     mp_digit a_i = *a++;
3942     mp_digit a0b0, a1b1;
3943 
3944     MP_MUL_DxD(a_i, b, a1b1, a0b0);
3945 
3946     a0b0 += carry;
3947     if (a0b0 < carry)
3948       ++a1b1;
3949     *c++ = a0b0;
3950     carry = a1b1;
3951   }
3952   *c = carry;
3953 #endif
3954 }
3955 
3956 /* c += a * b */
3957 void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
3958                               mp_digit *c)
3959 {
3960 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3961   mp_digit   d = 0;
3962 
3963   /* Inner product:  Digits of a */
3964   while (a_len--) {
3965     mp_word w = ((mp_word)b * *a++) + *c + d;
3966     *c++ = ACCUM(w);
3967     d = CARRYOUT(w);
3968   }
3969   *c = d;
3970 #else
3971   mp_digit carry = 0;
3972   while (a_len--) {
3973     mp_digit a_i = *a++;
3974     mp_digit a0b0, a1b1;
3975 
3976     MP_MUL_DxD(a_i, b, a1b1, a0b0);
3977 
3978     a0b0 += carry;
3979     if (a0b0 < carry)
3980       ++a1b1;
3981     a0b0 += a_i = *c;
3982     if (a0b0 < a_i)
3983       ++a1b1;
3984     *c++ = a0b0;
3985     carry = a1b1;
3986   }
3987   *c = carry;
3988 #endif
3989 }
3990 
3991 /* Presently, this is only used by the Montgomery arithmetic code. */
3992 /* c += a * b */
3993 void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
3994 {
3995 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3996   mp_digit   d = 0;
3997 
3998   /* Inner product:  Digits of a */
3999   while (a_len--) {
4000     mp_word w = ((mp_word)b * *a++) + *c + d;
4001     *c++ = ACCUM(w);
4002     d = CARRYOUT(w);
4003   }
4004 
4005   while (d) {
4006     mp_word w = (mp_word)*c + d;
4007     *c++ = ACCUM(w);
4008     d = CARRYOUT(w);
4009   }
4010 #else
4011   mp_digit carry = 0;
4012   while (a_len--) {
4013     mp_digit a_i = *a++;
4014     mp_digit a0b0, a1b1;
4015 
4016     MP_MUL_DxD(a_i, b, a1b1, a0b0);
4017 
4018     a0b0 += carry;
4019     if (a0b0 < carry)
4020       ++a1b1;
4021 
4022     a0b0 += a_i = *c;
4023     if (a0b0 < a_i)
4024       ++a1b1;
4025 
4026     *c++ = a0b0;
4027     carry = a1b1;
4028   }
4029   while (carry) {
4030     mp_digit c_i = *c;
4031     carry += c_i;
4032     *c++ = carry;
4033     carry = carry < c_i;
4034   }
4035 #endif
4036 }
4037 #endif
4038 
4039 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
4040 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
4041 #define MP_SQR_D(a, Phi, Plo) \
4042   { unsigned long long square = (unsigned long long)a * a; \
4043     Plo = (mp_digit)square; \
4044     Phi = (mp_digit)(square >> MP_DIGIT_BIT); }
4045 #elif defined(OSF1)
4046 #define MP_SQR_D(a, Phi, Plo) \
4047   { Plo = asm ("mulq  %a0, %a0, %v0", a);\
4048     Phi = asm ("umulh %a0, %a0, %v0", a); }
4049 #else
4050 #define MP_SQR_D(a, Phi, Plo) \
4051   { mp_digit Pmid; \
4052     Plo  = (a  & MP_HALF_DIGIT_MAX) * (a  & MP_HALF_DIGIT_MAX); \
4053     Phi  = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
4054     Pmid = (a  & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
4055     Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1);  \
4056     Pmid <<= (MP_HALF_DIGIT_BIT + 1);  \
4057     Plo += Pmid;  \
4058     if (Plo < Pmid)  \
4059       ++Phi;  \
4060   }
4061 #endif
4062 
4063 #if !defined(MP_ASSEMBLY_SQUARE)
4064 /* Add the squares of the digits of a to the digits of b. */
4065 void s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
4066 {
4067 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4068   mp_word  w;
4069   mp_digit d;
4070   mp_size  ix;
4071 
4072   w  = 0;
4073 #define ADD_SQUARE(n) \
4074     d = pa[n]; \
4075     w += (d * (mp_word)d) + ps[2*n]; \
4076     ps[2*n] = ACCUM(w); \
4077     w = (w >> DIGIT_BIT) + ps[2*n+1]; \
4078     ps[2*n+1] = ACCUM(w); \
4079     w = (w >> DIGIT_BIT)
4080 
4081   for (ix = a_len; ix >= 4; ix -= 4) {
4082     ADD_SQUARE(0);
4083     ADD_SQUARE(1);
4084     ADD_SQUARE(2);
4085     ADD_SQUARE(3);
4086     pa += 4;
4087     ps += 8;
4088   }
4089   if (ix) {
4090     ps += 2*ix;
4091     pa += ix;
4092     switch (ix) {
4093     case 3: ADD_SQUARE(-3); /* FALLTHRU */
4094     case 2: ADD_SQUARE(-2); /* FALLTHRU */
4095     case 1: ADD_SQUARE(-1); /* FALLTHRU */
4096     case 0: break;
4097     }
4098   }
4099   while (w) {
4100     w += *ps;
4101     *ps++ = ACCUM(w);
4102     w = (w >> DIGIT_BIT);
4103   }
4104 #else
4105   mp_digit carry = 0;
4106   while (a_len--) {
4107     mp_digit a_i = *pa++;
4108     mp_digit a0a0, a1a1;
4109 
4110     MP_SQR_D(a_i, a1a1, a0a0);
4111 
4112     /* here a1a1 and a0a0 constitute a_i ** 2 */
4113     a0a0 += carry;
4114     if (a0a0 < carry)
4115       ++a1a1;
4116 
4117     /* now add to ps */
4118     a0a0 += a_i = *ps;
4119     if (a0a0 < a_i)
4120       ++a1a1;
4121     *ps++ = a0a0;
4122     a1a1 += a_i = *ps;
4123     carry = (a1a1 < a_i);
4124     *ps++ = a1a1;
4125   }
4126   while (carry) {
4127     mp_digit s_i = *ps;
4128     carry += s_i;
4129     *ps++ = carry;
4130     carry = carry < s_i;
4131   }
4132 #endif
4133 }
4134 #endif
4135 
4136 #if (defined(MP_NO_MP_WORD) || defined(MP_NO_DIV_WORD)) \
4137 && !defined(MP_ASSEMBLY_DIV_2DX1D)
4138 /*
4139 ** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
4140 ** so its high bit is 1.   This code is from NSPR.
4141 */
4142 mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
4143                        mp_digit *qp, mp_digit *rp)
4144 {
4145     mp_digit d1, d0, q1, q0;
4146     mp_digit r1, r0, m;
4147 
4148     d1 = divisor >> MP_HALF_DIGIT_BIT;
4149     d0 = divisor & MP_HALF_DIGIT_MAX;
4150     r1 = Nhi % d1;
4151     q1 = Nhi / d1;
4152     m = q1 * d0;
4153     r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
4154     if (r1 < m) {
4155         q1--, r1 += divisor;
4156         if (r1 >= divisor && r1 < m) {
4157             q1--, r1 += divisor;
4158         }
4159     }
4160     r1 -= m;
4161     r0 = r1 % d1;
4162     q0 = r1 / d1;
4163     m = q0 * d0;
4164     r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
4165     if (r0 < m) {
4166         q0--, r0 += divisor;
4167         if (r0 >= divisor && r0 < m) {
4168             q0--, r0 += divisor;
4169         }
4170     }
4171     if (qp)
4172         *qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
4173     if (rp)
4174         *rp = r0 - m;
4175     return MP_OKAY;
4176 }
4177 #endif
4178 
4179 #if MP_SQUARE
4180 /* {{{ s_mp_sqr(a) */
4181 
4182 mp_err   s_mp_sqr(mp_int *a)
4183 {
4184   mp_err   res;
4185   mp_int   tmp;
4186 
4187   if((res = mp_init_size(&tmp, 2 * USED(a), FLAG(a))) != MP_OKAY)
4188     return res;
4189   res = mp_sqr(a, &tmp);
4190   if (res == MP_OKAY) {
4191     s_mp_exch(&tmp, a);
4192   }
4193   mp_clear(&tmp);
4194   return res;
4195 }
4196 
4197 /* }}} */
4198 #endif
4199 
4200 /* {{{ s_mp_div(a, b) */
4201 
4202 /*
4203   s_mp_div(a, b)
4204 
4205   Compute a = a / b and b = a mod b.  Assumes b > a.
4206  */
4207 
4208 mp_err   s_mp_div(mp_int *rem,  /* i: dividend, o: remainder */
4209                   mp_int *div,  /* i: divisor                */
4210                   mp_int *quot) /* i: 0;        o: quotient  */
4211 {
4212   mp_int   part, t;
4213 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4214   mp_word  q_msd;
4215 #else
4216   mp_digit q_msd;
4217 #endif
4218   mp_err   res;
4219   mp_digit d;
4220   mp_digit div_msd;
4221   int      ix;
4222 
4223   if(mp_cmp_z(div) == 0)
4224     return MP_RANGE;
4225 
4226   /* Shortcut if divisor is power of two */
4227   if((ix = s_mp_ispow2(div)) >= 0) {
4228     MP_CHECKOK( mp_copy(rem, quot) );
4229     s_mp_div_2d(quot, (mp_digit)ix);
4230     s_mp_mod_2d(rem,  (mp_digit)ix);
4231 
4232     return MP_OKAY;
4233   }
4234 
4235   DIGITS(&t) = 0;
4236   MP_SIGN(rem) = ZPOS;
4237   MP_SIGN(div) = ZPOS;
4238 
4239   /* A working temporary for division     */
4240   MP_CHECKOK( mp_init_size(&t, MP_ALLOC(rem), FLAG(rem)));
4241 
4242   /* Normalize to optimize guessing       */
4243   MP_CHECKOK( s_mp_norm(rem, div, &d) );
4244 
4245   part = *rem;
4246 
4247   /* Perform the division itself...woo!   */
4248   MP_USED(quot) = MP_ALLOC(quot);
4249 
4250   /* Find a partial substring of rem which is at least div */
4251   /* If we didn't find one, we're finished dividing    */
4252   while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
4253     int i;
4254     int unusedRem;
4255 
4256     unusedRem = MP_USED(rem) - MP_USED(div);
4257     MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
4258     MP_ALLOC(&part)  = MP_ALLOC(rem)  - unusedRem;
4259     MP_USED(&part)   = MP_USED(div);
4260     if (s_mp_cmp(&part, div) < 0) {
4261       -- unusedRem;
4262 #if MP_ARGCHK == 2
4263       assert(unusedRem >= 0);
4264 #endif
4265       -- MP_DIGITS(&part);
4266       ++ MP_USED(&part);
4267       ++ MP_ALLOC(&part);
4268     }
4269 
4270     /* Compute a guess for the next quotient digit       */
4271     q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
4272     div_msd = MP_DIGIT(div, MP_USED(div) - 1);
4273     if (q_msd >= div_msd) {
4274       q_msd = 1;
4275     } else if (MP_USED(&part) > 1) {
4276 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4277       q_msd = (q_msd << MP_DIGIT_BIT) | MP_DIGIT(&part, MP_USED(&part) - 2);
4278       q_msd /= div_msd;
4279       if (q_msd == RADIX)
4280         --q_msd;
4281 #else
4282       mp_digit r;
4283       MP_CHECKOK( s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2),
4284                                   div_msd, &q_msd, &r) );
4285 #endif
4286     } else {
4287       q_msd = 0;
4288     }
4289 #if MP_ARGCHK == 2
4290     assert(q_msd > 0); /* This case should never occur any more. */
4291 #endif
4292     if (q_msd <= 0)
4293       break;
4294 
4295     /* See what that multiplies out to                   */
4296     mp_copy(div, &t);
4297     MP_CHECKOK( s_mp_mul_d(&t, (mp_digit)q_msd) );
4298 
4299     /*
4300        If it's too big, back it off.  We should not have to do this
4301        more than once, or, in rare cases, twice.  Knuth describes a
4302        method by which this could be reduced to a maximum of once, but
4303        I didn't implement that here.
4304      * When using s_mpv_div_2dx1d, we may have to do this 3 times.
4305      */
4306     for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
4307       --q_msd;
4308       s_mp_sub(&t, div);        /* t -= div */
4309     }
4310     if (i < 0) {
4311       res = MP_RANGE;
4312       goto CLEANUP;
4313     }
4314 
4315     /* At this point, q_msd should be the right next digit   */
4316     MP_CHECKOK( s_mp_sub(&part, &t) );  /* part -= t */
4317     s_mp_clamp(rem);
4318 
4319     /*
4320       Include the digit in the quotient.  We allocated enough memory
4321       for any quotient we could ever possibly get, so we should not
4322       have to check for failures here
4323      */
4324     MP_DIGIT(quot, unusedRem) = (mp_digit)q_msd;
4325   }
4326 
4327   /* Denormalize remainder                */
4328   if (d) {
4329     s_mp_div_2d(rem, d);
4330   }
4331 
4332   s_mp_clamp(quot);
4333 
4334 CLEANUP:
4335   mp_clear(&t);
4336 
4337   return res;
4338 
4339 } /* end s_mp_div() */
4340 
4341 
4342 /* }}} */
4343 
4344 /* {{{ s_mp_2expt(a, k) */
4345 
4346 mp_err   s_mp_2expt(mp_int *a, mp_digit k)
4347 {
4348   mp_err    res;
4349   mp_size   dig, bit;
4350 
4351   dig = k / DIGIT_BIT;
4352   bit = k % DIGIT_BIT;
4353 
4354   mp_zero(a);
4355   if((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
4356     return res;
4357 
4358   DIGIT(a, dig) |= ((mp_digit)1 << bit);
4359 
4360   return MP_OKAY;
4361 
4362 } /* end s_mp_2expt() */
4363 
4364 /* }}} */
4365 
4366 /* {{{ s_mp_reduce(x, m, mu) */
4367 
4368 /*
4369   Compute Barrett reduction, x (mod m), given a precomputed value for
4370   mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
4371   faster than straight division, when many reductions by the same
4372   value of m are required (such as in modular exponentiation).  This
4373   can nearly halve the time required to do modular exponentiation,
4374   as compared to using the full integer divide to reduce.
4375 
4376   This algorithm was derived from the _Handbook of Applied
4377   Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
4378   pp. 603-604.
4379  */
4380 
4381 mp_err   s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
4382 {
4383   mp_int   q;
4384   mp_err   res;
4385 
4386   if((res = mp_init_copy(&q, x)) != MP_OKAY)
4387     return res;
4388 
4389   s_mp_rshd(&q, USED(m) - 1);  /* q1 = x / b^(k-1)  */
4390   s_mp_mul(&q, mu);            /* q2 = q1 * mu      */
4391   s_mp_rshd(&q, USED(m) + 1);  /* q3 = q2 / b^(k+1) */
4392 
4393   /* x = x mod b^(k+1), quick (no division) */
4394   s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
4395 
4396   /* q = q * m mod b^(k+1), quick (no division) */
4397   s_mp_mul(&q, m);
4398   s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
4399 
4400   /* x = x - q */
4401   if((res = mp_sub(x, &q, x)) != MP_OKAY)
4402     goto CLEANUP;
4403 
4404   /* If x < 0, add b^(k+1) to it */
4405   if(mp_cmp_z(x) < 0) {
4406     mp_set(&q, 1);
4407     if((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
4408       goto CLEANUP;
4409     if((res = mp_add(x, &q, x)) != MP_OKAY)
4410       goto CLEANUP;
4411   }
4412 
4413   /* Back off if it's too big */
4414   while(mp_cmp(x, m) >= 0) {
4415     if((res = s_mp_sub(x, m)) != MP_OKAY)
4416       break;
4417   }
4418 
4419  CLEANUP:
4420   mp_clear(&q);
4421 
4422   return res;
4423 
4424 } /* end s_mp_reduce() */
4425 
4426 /* }}} */
4427 
4428 /* }}} */
4429 
4430 /* {{{ Primitive comparisons */
4431 
4432 /* {{{ s_mp_cmp(a, b) */
4433 
4434 /* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
4435 int      s_mp_cmp(const mp_int *a, const mp_int *b)
4436 {
4437   mp_size used_a = MP_USED(a);
4438   {
4439     mp_size used_b = MP_USED(b);
4440 
4441     if (used_a > used_b)
4442       goto IS_GT;
4443     if (used_a < used_b)
4444       goto IS_LT;
4445   }
4446   {
4447     mp_digit *pa, *pb;
4448     mp_digit da = 0, db = 0;
4449 
4450 #define CMP_AB(n) if ((da = pa[n]) != (db = pb[n])) goto done
4451 
4452     pa = MP_DIGITS(a) + used_a;
4453     pb = MP_DIGITS(b) + used_a;
4454     while (used_a >= 4) {
4455       pa     -= 4;
4456       pb     -= 4;
4457       used_a -= 4;
4458       CMP_AB(3);
4459       CMP_AB(2);
4460       CMP_AB(1);
4461       CMP_AB(0);
4462     }
4463     while (used_a-- > 0 && ((da = *--pa) == (db = *--pb)))
4464       /* do nothing */;
4465 done:
4466     if (da > db)
4467       goto IS_GT;
4468     if (da < db)
4469       goto IS_LT;
4470   }
4471   return MP_EQ;
4472 IS_LT:
4473   return MP_LT;
4474 IS_GT:
4475   return MP_GT;
4476 } /* end s_mp_cmp() */
4477 
4478 /* }}} */
4479 
4480 /* {{{ s_mp_cmp_d(a, d) */
4481 
4482 /* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
4483 int      s_mp_cmp_d(const mp_int *a, mp_digit d)
4484 {
4485   if(USED(a) > 1)
4486     return MP_GT;
4487 
4488   if(DIGIT(a, 0) < d)
4489     return MP_LT;
4490   else if(DIGIT(a, 0) > d)
4491     return MP_GT;
4492   else
4493     return MP_EQ;
4494 
4495 } /* end s_mp_cmp_d() */
4496 
4497 /* }}} */
4498 
4499 /* {{{ s_mp_ispow2(v) */
4500 
4501 /*
4502   Returns -1 if the value is not a power of two; otherwise, it returns
4503   k such that v = 2^k, i.e. lg(v).
4504  */
4505 int      s_mp_ispow2(const mp_int *v)
4506 {
4507   mp_digit d;
4508   int      extra = 0, ix;
4509 
4510   ix = MP_USED(v) - 1;
4511   d = MP_DIGIT(v, ix); /* most significant digit of v */
4512 
4513   extra = s_mp_ispow2d(d);
4514   if (extra < 0 || ix == 0)
4515     return extra;
4516 
4517   while (--ix >= 0) {
4518     if (DIGIT(v, ix) != 0)
4519       return -1; /* not a power of two */
4520     extra += MP_DIGIT_BIT;
4521   }
4522 
4523   return extra;
4524 
4525 } /* end s_mp_ispow2() */
4526 
4527 /* }}} */
4528 
4529 /* {{{ s_mp_ispow2d(d) */
4530 
4531 int      s_mp_ispow2d(mp_digit d)
4532 {
4533   if ((d != 0) && ((d & (d-1)) == 0)) { /* d is a power of 2 */
4534     int pow = 0;
4535 #if defined (MP_USE_UINT_DIGIT)
4536     if (d & 0xffff0000U)
4537       pow += 16;
4538     if (d & 0xff00ff00U)
4539       pow += 8;
4540     if (d & 0xf0f0f0f0U)
4541       pow += 4;
4542     if (d & 0xccccccccU)
4543       pow += 2;
4544     if (d & 0xaaaaaaaaU)
4545       pow += 1;
4546 #elif defined(MP_USE_LONG_LONG_DIGIT)
4547     if (d & 0xffffffff00000000ULL)
4548       pow += 32;
4549     if (d & 0xffff0000ffff0000ULL)
4550       pow += 16;
4551     if (d & 0xff00ff00ff00ff00ULL)
4552       pow += 8;
4553     if (d & 0xf0f0f0f0f0f0f0f0ULL)
4554       pow += 4;
4555     if (d & 0xccccccccccccccccULL)
4556       pow += 2;
4557     if (d & 0xaaaaaaaaaaaaaaaaULL)
4558       pow += 1;
4559 #elif defined(MP_USE_LONG_DIGIT)
4560     if (d & 0xffffffff00000000UL)
4561       pow += 32;
4562     if (d & 0xffff0000ffff0000UL)
4563       pow += 16;
4564     if (d & 0xff00ff00ff00ff00UL)
4565       pow += 8;
4566     if (d & 0xf0f0f0f0f0f0f0f0UL)
4567       pow += 4;
4568     if (d & 0xccccccccccccccccUL)
4569       pow += 2;
4570     if (d & 0xaaaaaaaaaaaaaaaaUL)
4571       pow += 1;
4572 #else
4573 #error "unknown type for mp_digit"
4574 #endif
4575     return pow;
4576   }
4577   return -1;
4578 
4579 } /* end s_mp_ispow2d() */
4580 
4581 /* }}} */
4582 
4583 /* }}} */
4584 
4585 /* {{{ Primitive I/O helpers */
4586 
4587 /* {{{ s_mp_tovalue(ch, r) */
4588 
4589 /*
4590   Convert the given character to its digit value, in the given radix.
4591   If the given character is not understood in the given radix, -1 is
4592   returned.  Otherwise the digit's numeric value is returned.
4593 
4594   The results will be odd if you use a radix < 2 or > 62, you are
4595   expected to know what you're up to.
4596  */
4597 int      s_mp_tovalue(char ch, int r)
4598 {
4599   int    val, xch;
4600 
4601   if(r > 36)
4602     xch = ch;
4603   else
4604     xch = toupper(ch);
4605 
4606   if(isdigit(xch))
4607     val = xch - '0';
4608   else if(isupper(xch))
4609     val = xch - 'A' + 10;
4610   else if(islower(xch))
4611     val = xch - 'a' + 36;
4612   else if(xch == '+')
4613     val = 62;
4614   else if(xch == '/')
4615     val = 63;
4616   else
4617     return -1;
4618 
4619   if(val < 0 || val >= r)
4620     return -1;
4621 
4622   return val;
4623 
4624 } /* end s_mp_tovalue() */
4625 
4626 /* }}} */
4627 
4628 /* {{{ s_mp_todigit(val, r, low) */
4629 
4630 /*
4631   Convert val to a radix-r digit, if possible.  If val is out of range
4632   for r, returns zero.  Otherwise, returns an ASCII character denoting
4633   the value in the given radix.
4634 
4635   The results may be odd if you use a radix < 2 or > 64, you are
4636   expected to know what you're doing.
4637  */
4638 
4639 char     s_mp_todigit(mp_digit val, int r, int low)
4640 {
4641   char   ch;
4642 
4643   if(val >= (unsigned int)r)
4644     return 0;
4645 
4646   ch = s_dmap_1[val];
4647 
4648   if(r <= 36 && low)
4649     ch = tolower(ch);
4650 
4651   return ch;
4652 
4653 } /* end s_mp_todigit() */
4654 
4655 /* }}} */
4656 
4657 /* {{{ s_mp_outlen(bits, radix) */
4658 
4659 /*
4660    Return an estimate for how long a string is needed to hold a radix
4661    r representation of a number with 'bits' significant bits, plus an
4662    extra for a zero terminator (assuming C style strings here)
4663  */
4664 int      s_mp_outlen(int bits, int r)
4665 {
4666   return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
4667 
4668 } /* end s_mp_outlen() */
4669 
4670 /* }}} */
4671 
4672 /* }}} */
4673 
4674 /* {{{ mp_read_unsigned_octets(mp, str, len) */
4675 /* mp_read_unsigned_octets(mp, str, len)
4676    Read in a raw value (base 256) into the given mp_int
4677    No sign bit, number is positive.  Leading zeros ignored.
4678  */
4679 
4680 mp_err
4681 mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
4682 {
4683   int            count;
4684   mp_err         res;
4685   mp_digit       d;
4686 
4687   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
4688 
4689   mp_zero(mp);
4690 
4691   count = len % sizeof(mp_digit);
4692   if (count) {
4693     for (d = 0; count-- > 0; --len) {
4694       d = (d << 8) | *str++;
4695     }
4696     MP_DIGIT(mp, 0) = d;
4697   }
4698 
4699   /* Read the rest of the digits */
4700   for(; len > 0; len -= sizeof(mp_digit)) {
4701     for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
4702       d = (d << 8) | *str++;
4703     }
4704     if (MP_EQ == mp_cmp_z(mp)) {
4705       if (!d)
4706         continue;
4707     } else {
4708       if((res = s_mp_lshd(mp, 1)) != MP_OKAY)
4709         return res;
4710     }
4711     MP_DIGIT(mp, 0) = d;
4712   }
4713   return MP_OKAY;
4714 } /* end mp_read_unsigned_octets() */
4715 /* }}} */
4716 
4717 /* {{{ mp_unsigned_octet_size(mp) */
4718 int
4719 mp_unsigned_octet_size(const mp_int *mp)
4720 {
4721   int  bytes;
4722   int  ix;
4723   mp_digit  d = 0;
4724 
4725   ARGCHK(mp != NULL, MP_BADARG);
4726   ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
4727 
4728   bytes = (USED(mp) * sizeof(mp_digit));
4729 
4730   /* subtract leading zeros. */
4731   /* Iterate over each digit... */
4732   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4733     d = DIGIT(mp, ix);
4734     if (d)
4735         break;
4736     bytes -= sizeof(d);
4737   }
4738   if (!bytes)
4739     return 1;
4740 
4741   /* Have MSD, check digit bytes, high order first */
4742   for(ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
4743     unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
4744     if (x)
4745         break;
4746     --bytes;
4747   }
4748   return bytes;
4749 } /* end mp_unsigned_octet_size() */
4750 /* }}} */
4751 
4752 /* {{{ mp_to_unsigned_octets(mp, str) */
4753 /* output a buffer of big endian octets no longer than specified. */
4754 mp_err
4755 mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
4756 {
4757   int  ix, pos = 0;
4758   unsigned int  bytes;
4759 
4760   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4761 
4762   bytes = mp_unsigned_octet_size(mp);
4763   ARGCHK(bytes <= maxlen, MP_BADARG);
4764 
4765   /* Iterate over each digit... */
4766   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4767     mp_digit  d = DIGIT(mp, ix);
4768     int       jx;
4769 
4770     /* Unpack digit bytes, high order first */
4771     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4772       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4773       if (!pos && !x)   /* suppress leading zeros */
4774         continue;
4775       str[pos++] = x;
4776     }
4777   }
4778   if (!pos)
4779     str[pos++] = 0;
4780   return pos;
4781 } /* end mp_to_unsigned_octets() */
4782 /* }}} */
4783 
4784 /* {{{ mp_to_signed_octets(mp, str) */
4785 /* output a buffer of big endian octets no longer than specified. */
4786 mp_err
4787 mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
4788 {
4789   int  ix, pos = 0;
4790   unsigned int  bytes;
4791 
4792   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4793 
4794   bytes = mp_unsigned_octet_size(mp);
4795   ARGCHK(bytes <= maxlen, MP_BADARG);
4796 
4797   /* Iterate over each digit... */
4798   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4799     mp_digit  d = DIGIT(mp, ix);
4800     int       jx;
4801 
4802     /* Unpack digit bytes, high order first */
4803     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4804       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4805       if (!pos) {
4806         if (!x)         /* suppress leading zeros */
4807           continue;
4808         if (x & 0x80) { /* add one leading zero to make output positive.  */
4809           ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
4810           if (bytes + 1 > maxlen)
4811             return MP_BADARG;
4812           str[pos++] = 0;
4813         }
4814       }
4815       str[pos++] = x;
4816     }
4817   }
4818   if (!pos)
4819     str[pos++] = 0;
4820   return pos;
4821 } /* end mp_to_signed_octets() */
4822 /* }}} */
4823 
4824 /* {{{ mp_to_fixlen_octets(mp, str) */
4825 /* output a buffer of big endian octets exactly as long as requested. */
4826 mp_err
4827 mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
4828 {
4829   int  ix, pos = 0;
4830   unsigned int  bytes;
4831 
4832   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4833 
4834   bytes = mp_unsigned_octet_size(mp);
4835   ARGCHK(bytes <= length, MP_BADARG);
4836 
4837   /* place any needed leading zeros */
4838   for (;length > bytes; --length) {
4839         *str++ = 0;
4840   }
4841 
4842   /* Iterate over each digit... */
4843   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4844     mp_digit  d = DIGIT(mp, ix);
4845     int       jx;
4846 
4847     /* Unpack digit bytes, high order first */
4848     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4849       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4850       if (!pos && !x)   /* suppress leading zeros */
4851         continue;
4852       str[pos++] = x;
4853     }
4854   }
4855   if (!pos)
4856     str[pos++] = 0;
4857   return MP_OKAY;
4858 } /* end mp_to_fixlen_octets() */
4859 /* }}} */
4860 
4861 
4862 /*------------------------------------------------------------------------*/
4863 /* HERE THERE BE DRAGONS                                                  */