New src/jdk.crypto.ec/share/native/libsunec/impl/mpi.c

   1 /*
   2  * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * Use is subject to license terms.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2.1 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public License
  16  * along with this library; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /* *********************************************************************
  25  *
  26  * The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
  27  *
  28  * The Initial Developer of the Original Code is
  29  * Michael J. Fromberger.
  30  * Portions created by the Initial Developer are Copyright (C) 1998
  31  * the Initial Developer. All Rights Reserved.
  32  *
  33  * Contributor(s):
  34  *   Netscape Communications Corporation
  35  *   Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
  36  *
  37  * Last Modified Date from the Original Code: Nov 2019
  38  *********************************************************************** */
  39 
  40 /*  Arbitrary precision integer arithmetic library */
  41 
  42 #include "mpi-priv.h"
  43 #if defined(OSF1)
  44 #include <c_asm.h>
  45 #endif
  46 
  47 #if MP_LOGTAB
  48 /*
  49   A table of the logs of 2 for various bases (the 0 and 1 entries of
  50   this table are meaningless and should not be referenced).
  51 
  52   This table is used to compute output lengths for the mp_toradix()
  53   function.  Since a number n in radix r takes up about log_r(n)
  54   digits, we estimate the output size by taking the least integer
  55   greater than log_r(n), where:
  56 
  57   log_r(n) = log_2(n) * log_r(2)
  58 
  59   This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
  60   which are the output bases supported.
  61  */
  62 #include "logtab.h"
  63 #endif
  64 
  65 /* {{{ Constant strings */
  66 
  67 /* Constant strings returned by mp_strerror() */
  68 static const char *mp_err_string[] = {
  69   "unknown result code",     /* say what?            */
  70   "boolean true",            /* MP_OKAY, MP_YES      */
  71   "boolean false",           /* MP_NO                */
  72   "out of memory",           /* MP_MEM               */
  73   "argument out of range",   /* MP_RANGE             */
  74   "invalid input parameter", /* MP_BADARG            */
  75   "result is undefined"      /* MP_UNDEF             */
  76 };
  77 
  78 /* Value to digit maps for radix conversion   */
  79 
  80 /* s_dmap_1 - standard digits and letters */
  81 static const char *s_dmap_1 =
  82   "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
  83 
  84 /* }}} */
  85 
  86 unsigned long mp_allocs;
  87 unsigned long mp_frees;
  88 unsigned long mp_copies;
  89 
  90 /* {{{ Default precision manipulation */
  91 
  92 /* Default precision for newly created mp_int's      */
  93 static mp_size s_mp_defprec = MP_DEFPREC;
  94 
  95 mp_size mp_get_prec(void)
  96 {
  97   return s_mp_defprec;
  98 
  99 } /* end mp_get_prec() */
 100 
 101 void         mp_set_prec(mp_size prec)
 102 {
 103   if(prec == 0)
 104     s_mp_defprec = MP_DEFPREC;
 105   else
 106     s_mp_defprec = prec;
 107 
 108 } /* end mp_set_prec() */
 109 
 110 /* }}} */
 111 
 112 /*------------------------------------------------------------------------*/
 113 /* {{{ mp_init(mp, kmflag) */
 114 
 115 /*
 116   mp_init(mp, kmflag)
 117 
 118   Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
 119   MP_MEM if memory could not be allocated for the structure.
 120  */
 121 
 122 mp_err mp_init(mp_int *mp, int kmflag)
 123 {
 124   return mp_init_size(mp, s_mp_defprec, kmflag);
 125 
 126 } /* end mp_init() */
 127 
 128 /* }}} */
 129 
 130 /* {{{ mp_init_size(mp, prec, kmflag) */
 131 
 132 /*
 133   mp_init_size(mp, prec, kmflag)
 134 
 135   Initialize a new zero-valued mp_int with at least the given
 136   precision; returns MP_OKAY if successful, or MP_MEM if memory could
 137   not be allocated for the structure.
 138  */
 139 
 140 mp_err mp_init_size(mp_int *mp, mp_size prec, int kmflag)
 141 {
 142   ARGCHK(mp != NULL && prec > 0, MP_BADARG);
 143 
 144   prec = MP_ROUNDUP(prec, s_mp_defprec);
 145   if((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit), kmflag)) == NULL)
 146     return MP_MEM;
 147 
 148   SIGN(mp) = ZPOS;
 149   USED(mp) = 1;
 150   ALLOC(mp) = prec;
 151 
 152   return MP_OKAY;
 153 
 154 } /* end mp_init_size() */
 155 
 156 /* }}} */
 157 
 158 /* {{{ mp_init_copy(mp, from) */
 159 
 160 /*
 161   mp_init_copy(mp, from)
 162 
 163   Initialize mp as an exact copy of from.  Returns MP_OKAY if
 164   successful, MP_MEM if memory could not be allocated for the new
 165   structure.
 166  */
 167 
 168 mp_err mp_init_copy(mp_int *mp, const mp_int *from)
 169 {
 170   ARGCHK(mp != NULL && from != NULL, MP_BADARG);
 171 
 172   if(mp == from)
 173     return MP_OKAY;
 174 
 175   if((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit), FLAG(from))) == NULL)
 176     return MP_MEM;
 177 
 178   s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
 179   USED(mp) = USED(from);
 180   ALLOC(mp) = ALLOC(from);
 181   SIGN(mp) = SIGN(from);
 182 
 183 #ifndef _WIN32
 184   FLAG(mp) = FLAG(from);
 185 #endif /* _WIN32 */
 186 
 187   return MP_OKAY;
 188 
 189 } /* end mp_init_copy() */
 190 
 191 /* }}} */
 192 
 193 /* {{{ mp_copy(from, to) */
 194 
 195 /*
 196   mp_copy(from, to)
 197 
 198   Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
 199   'to' has already been initialized (if not, use mp_init_copy()
 200   instead). If 'from' and 'to' are identical, nothing happens.
 201  */
 202 
 203 mp_err mp_copy(const mp_int *from, mp_int *to)
 204 {
 205   ARGCHK(from != NULL && to != NULL, MP_BADARG);
 206 
 207   if(from == to)
 208     return MP_OKAY;
 209 
 210   ++mp_copies;
 211   { /* copy */
 212     mp_digit   *tmp;
 213 
 214     /*
 215       If the allocated buffer in 'to' already has enough space to hold
 216       all the used digits of 'from', we'll re-use it to avoid hitting
 217       the memory allocater more than necessary; otherwise, we'd have
 218       to grow anyway, so we just allocate a hunk and make the copy as
 219       usual
 220      */
 221     if(ALLOC(to) >= USED(from)) {
 222       s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
 223       s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
 224 
 225     } else {
 226       if((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit), FLAG(from))) == NULL)
 227         return MP_MEM;
 228 
 229       s_mp_copy(DIGITS(from), tmp, USED(from));
 230 
 231       if(DIGITS(to) != NULL) {
 232 #if MP_CRYPTO
 233         s_mp_setz(DIGITS(to), ALLOC(to));
 234 #endif
 235         s_mp_free(DIGITS(to), ALLOC(to));
 236       }
 237 
 238       DIGITS(to) = tmp;
 239       ALLOC(to) = ALLOC(from);
 240     }
 241 
 242     /* Copy the precision and sign from the original */
 243     USED(to) = USED(from);
 244     SIGN(to) = SIGN(from);
 245   } /* end copy */
 246 
 247   return MP_OKAY;
 248 
 249 } /* end mp_copy() */
 250 
 251 /* }}} */
 252 
 253 /* {{{ mp_exch(mp1, mp2) */
 254 
 255 /*
 256   mp_exch(mp1, mp2)
 257 
 258   Exchange mp1 and mp2 without allocating any intermediate memory
 259   (well, unless you count the stack space needed for this call and the
 260   locals it creates...).  This cannot fail.
 261  */
 262 
 263 void mp_exch(mp_int *mp1, mp_int *mp2)
 264 {
 265 #if MP_ARGCHK == 2
 266   assert(mp1 != NULL && mp2 != NULL);
 267 #else
 268   if(mp1 == NULL || mp2 == NULL)
 269     return;
 270 #endif
 271 
 272   s_mp_exch(mp1, mp2);
 273 
 274 } /* end mp_exch() */
 275 
 276 /* }}} */
 277 
 278 /* {{{ mp_clear(mp) */
 279 
 280 /*
 281   mp_clear(mp)
 282 
 283   Release the storage used by an mp_int, and void its fields so that
 284   if someone calls mp_clear() again for the same int later, we won't
 285   get tollchocked.
 286  */
 287 
 288 void   mp_clear(mp_int *mp)
 289 {
 290   if(mp == NULL)
 291     return;
 292 
 293   if(DIGITS(mp) != NULL) {
 294 #if MP_CRYPTO
 295     s_mp_setz(DIGITS(mp), ALLOC(mp));
 296 #endif
 297     s_mp_free(DIGITS(mp), ALLOC(mp));
 298     DIGITS(mp) = NULL;
 299   }
 300 
 301   USED(mp) = 0;
 302   ALLOC(mp) = 0;
 303 
 304 } /* end mp_clear() */
 305 
 306 /* }}} */
 307 
 308 /* {{{ mp_zero(mp) */
 309 
 310 /*
 311   mp_zero(mp)
 312 
 313   Set mp to zero.  Does not change the allocated size of the structure,
 314   and therefore cannot fail (except on a bad argument, which we ignore)
 315  */
 316 void   mp_zero(mp_int *mp)
 317 {
 318   if(mp == NULL)
 319     return;
 320 
 321   s_mp_setz(DIGITS(mp), ALLOC(mp));
 322   USED(mp) = 1;
 323   SIGN(mp) = ZPOS;
 324 
 325 } /* end mp_zero() */
 326 
 327 /* }}} */
 328 
 329 /* {{{ mp_set(mp, d) */
 330 
 331 void   mp_set(mp_int *mp, mp_digit d)
 332 {
 333   if(mp == NULL)
 334     return;
 335 
 336   mp_zero(mp);
 337   DIGIT(mp, 0) = d;
 338 
 339 } /* end mp_set() */
 340 
 341 /* }}} */
 342 
 343 /* {{{ mp_set_int(mp, z) */
 344 
 345 mp_err mp_set_int(mp_int *mp, long z)
 346 {
 347   int            ix;
 348   unsigned long  v = labs(z);
 349   mp_err         res;
 350 
 351   ARGCHK(mp != NULL, MP_BADARG);
 352 
 353   mp_zero(mp);
 354   if(z == 0)
 355     return MP_OKAY;  /* shortcut for zero */
 356 
 357   if (sizeof v <= sizeof(mp_digit)) {
 358     DIGIT(mp,0) = v;
 359   } else {
 360     for (ix = sizeof(long) - 1; ix >= 0; ix--) {
 361       if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
 362         return res;
 363 
 364       res = s_mp_add_d(mp, (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
 365       if (res != MP_OKAY)
 366         return res;
 367     }
 368   }
 369   if(z < 0)
 370     SIGN(mp) = NEG;
 371 
 372   return MP_OKAY;
 373 
 374 } /* end mp_set_int() */
 375 
 376 /* }}} */
 377 
 378 /* {{{ mp_set_ulong(mp, z) */
 379 
 380 mp_err mp_set_ulong(mp_int *mp, unsigned long z)
 381 {
 382   int            ix;
 383   mp_err         res;
 384 
 385   ARGCHK(mp != NULL, MP_BADARG);
 386 
 387   mp_zero(mp);
 388   if(z == 0)
 389     return MP_OKAY;  /* shortcut for zero */
 390 
 391   if (sizeof z <= sizeof(mp_digit)) {
 392     DIGIT(mp,0) = z;
 393   } else {
 394     for (ix = sizeof(long) - 1; ix >= 0; ix--) {
 395       if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
 396         return res;
 397 
 398       res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
 399       if (res != MP_OKAY)
 400         return res;
 401     }
 402   }
 403   return MP_OKAY;
 404 } /* end mp_set_ulong() */
 405 
 406 /* }}} */
 407 
 408 /*------------------------------------------------------------------------*/
 409 /* {{{ Digit arithmetic */
 410 
 411 /* {{{ mp_add_d(a, d, b) */
 412 
 413 /*
 414   mp_add_d(a, d, b)
 415 
 416   Compute the sum b = a + d, for a single digit d.  Respects the sign of
 417   its primary addend (single digits are unsigned anyway).
 418  */
 419 
 420 mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
 421 {
 422   mp_int   tmp;
 423   mp_err   res;
 424 
 425   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 426 
 427   if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 428     return res;
 429 
 430   if(SIGN(&tmp) == ZPOS) {
 431     if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
 432       goto CLEANUP;
 433   } else if(s_mp_cmp_d(&tmp, d) >= 0) {
 434     if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
 435       goto CLEANUP;
 436   } else {
 437     mp_neg(&tmp, &tmp);
 438 
 439     DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
 440   }
 441 
 442   if(s_mp_cmp_d(&tmp, 0) == 0)
 443     SIGN(&tmp) = ZPOS;
 444 
 445   s_mp_exch(&tmp, b);
 446 
 447 CLEANUP:
 448   mp_clear(&tmp);
 449   return res;
 450 
 451 } /* end mp_add_d() */
 452 
 453 /* }}} */
 454 
 455 /* {{{ mp_sub_d(a, d, b) */
 456 
 457 /*
 458   mp_sub_d(a, d, b)
 459 
 460   Compute the difference b = a - d, for a single digit d.  Respects the
 461   sign of its subtrahend (single digits are unsigned anyway).
 462  */
 463 
 464 mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
 465 {
 466   mp_int   tmp;
 467   mp_err   res;
 468 
 469   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 470 
 471   if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 472     return res;
 473 
 474   if(SIGN(&tmp) == NEG) {
 475     if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
 476       goto CLEANUP;
 477   } else if(s_mp_cmp_d(&tmp, d) >= 0) {
 478     if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
 479       goto CLEANUP;
 480   } else {
 481     mp_neg(&tmp, &tmp);
 482 
 483     DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
 484     SIGN(&tmp) = NEG;
 485   }
 486 
 487   if(s_mp_cmp_d(&tmp, 0) == 0)
 488     SIGN(&tmp) = ZPOS;
 489 
 490   s_mp_exch(&tmp, b);
 491 
 492 CLEANUP:
 493   mp_clear(&tmp);
 494   return res;
 495 
 496 } /* end mp_sub_d() */
 497 
 498 /* }}} */
 499 
 500 /* {{{ mp_mul_d(a, d, b) */
 501 
 502 /*
 503   mp_mul_d(a, d, b)
 504 
 505   Compute the product b = a * d, for a single digit d.  Respects the sign
 506   of its multiplicand (single digits are unsigned anyway)
 507  */
 508 
 509 mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
 510 {
 511   mp_err  res;
 512 
 513   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 514 
 515   if(d == 0) {
 516     mp_zero(b);
 517     return MP_OKAY;
 518   }
 519 
 520   if((res = mp_copy(a, b)) != MP_OKAY)
 521     return res;
 522 
 523   res = s_mp_mul_d(b, d);
 524 
 525   return res;
 526 
 527 } /* end mp_mul_d() */
 528 
 529 /* }}} */
 530 
 531 /* {{{ mp_mul_2(a, c) */
 532 
 533 mp_err mp_mul_2(const mp_int *a, mp_int *c)
 534 {
 535   mp_err  res;
 536 
 537   ARGCHK(a != NULL && c != NULL, MP_BADARG);
 538 
 539   if((res = mp_copy(a, c)) != MP_OKAY)
 540     return res;
 541 
 542   return s_mp_mul_2(c);
 543 
 544 } /* end mp_mul_2() */
 545 
 546 /* }}} */
 547 
 548 /* {{{ mp_div_d(a, d, q, r) */
 549 
 550 /*
 551   mp_div_d(a, d, q, r)
 552 
 553   Compute the quotient q = a / d and remainder r = a mod d, for a
 554   single digit d.  Respects the sign of its divisor (single digits are
 555   unsigned anyway).
 556  */
 557 
 558 mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
 559 {
 560   mp_err   res;
 561   mp_int   qp;
 562   mp_digit rem;
 563   int      pow;
 564 
 565   ARGCHK(a != NULL, MP_BADARG);
 566 
 567   if(d == 0)
 568     return MP_RANGE;
 569 
 570   /* Shortcut for powers of two ... */
 571   if((pow = s_mp_ispow2d(d)) >= 0) {
 572     mp_digit  mask;
 573 
 574     mask = ((mp_digit)1 << pow) - 1;
 575     rem = DIGIT(a, 0) & mask;
 576 
 577     if(q) {
 578       mp_copy(a, q);
 579       s_mp_div_2d(q, pow);
 580     }
 581 
 582     if(r)
 583       *r = rem;
 584 
 585     return MP_OKAY;
 586   }
 587 
 588   if((res = mp_init_copy(&qp, a)) != MP_OKAY)
 589     return res;
 590 
 591   res = s_mp_div_d(&qp, d, &rem);
 592 
 593   if(s_mp_cmp_d(&qp, 0) == 0)
 594     SIGN(q) = ZPOS;
 595 
 596   if(r)
 597     *r = rem;
 598 
 599   if(q)
 600     s_mp_exch(&qp, q);
 601 
 602   mp_clear(&qp);
 603   return res;
 604 
 605 } /* end mp_div_d() */
 606 
 607 /* }}} */
 608 
 609 /* {{{ mp_div_2(a, c) */
 610 
 611 /*
 612   mp_div_2(a, c)
 613 
 614   Compute c = a / 2, disregarding the remainder.
 615  */
 616 
 617 mp_err mp_div_2(const mp_int *a, mp_int *c)
 618 {
 619   mp_err  res;
 620 
 621   ARGCHK(a != NULL && c != NULL, MP_BADARG);
 622 
 623   if((res = mp_copy(a, c)) != MP_OKAY)
 624     return res;
 625 
 626   s_mp_div_2(c);
 627 
 628   return MP_OKAY;
 629 
 630 } /* end mp_div_2() */
 631 
 632 /* }}} */
 633 
 634 /* {{{ mp_expt_d(a, d, b) */
 635 
 636 mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
 637 {
 638   mp_int   s, x;
 639   mp_err   res;
 640 
 641   ARGCHK(a != NULL && c != NULL, MP_BADARG);
 642 
 643   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
 644     return res;
 645   if((res = mp_init_copy(&x, a)) != MP_OKAY)
 646     goto X;
 647 
 648   DIGIT(&s, 0) = 1;
 649 
 650   while(d != 0) {
 651     if(d & 1) {
 652       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
 653         goto CLEANUP;
 654     }
 655 
 656     d /= 2;
 657 
 658     if((res = s_mp_sqr(&x)) != MP_OKAY)
 659       goto CLEANUP;
 660   }
 661 
 662   s.flag = (mp_sign)0;
 663   s_mp_exch(&s, c);
 664 
 665 CLEANUP:
 666   mp_clear(&x);
 667 X:
 668   mp_clear(&s);
 669 
 670   return res;
 671 
 672 } /* end mp_expt_d() */
 673 
 674 /* }}} */
 675 
 676 /* }}} */
 677 
 678 /*------------------------------------------------------------------------*/
 679 /* {{{ Full arithmetic */
 680 
 681 /* {{{ mp_abs(a, b) */
 682 
 683 /*
 684   mp_abs(a, b)
 685 
 686   Compute b = |a|.  'a' and 'b' may be identical.
 687  */
 688 
 689 mp_err mp_abs(const mp_int *a, mp_int *b)
 690 {
 691   mp_err   res;
 692 
 693   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 694 
 695   if((res = mp_copy(a, b)) != MP_OKAY)
 696     return res;
 697 
 698   SIGN(b) = ZPOS;
 699 
 700   return MP_OKAY;
 701 
 702 } /* end mp_abs() */
 703 
 704 /* }}} */
 705 
 706 /* {{{ mp_neg(a, b) */
 707 
 708 /*
 709   mp_neg(a, b)
 710 
 711   Compute b = -a.  'a' and 'b' may be identical.
 712  */
 713 
 714 mp_err mp_neg(const mp_int *a, mp_int *b)
 715 {
 716   mp_err   res;
 717 
 718   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 719 
 720   if((res = mp_copy(a, b)) != MP_OKAY)
 721     return res;
 722 
 723   if(s_mp_cmp_d(b, 0) == MP_EQ)
 724     SIGN(b) = ZPOS;
 725   else
 726     SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
 727 
 728   return MP_OKAY;
 729 
 730 } /* end mp_neg() */
 731 
 732 /* }}} */
 733 
 734 /* {{{ mp_add(a, b, c) */
 735 
 736 /*
 737   mp_add(a, b, c)
 738 
 739   Compute c = a + b.  All parameters may be identical.
 740  */
 741 
 742 mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c)
 743 {
 744   mp_err  res;
 745 
 746   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 747 
 748   if(SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
 749     MP_CHECKOK( s_mp_add_3arg(a, b, c) );
 750   } else if(s_mp_cmp(a, b) >= 0) {  /* different sign: |a| >= |b|   */
 751     MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
 752   } else {                          /* different sign: |a|  < |b|   */
 753     MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
 754   }
 755 
 756   if (s_mp_cmp_d(c, 0) == MP_EQ)
 757     SIGN(c) = ZPOS;
 758 
 759 CLEANUP:
 760   return res;
 761 
 762 } /* end mp_add() */
 763 
 764 /* }}} */
 765 
 766 /* {{{ mp_sub(a, b, c) */
 767 
 768 /*
 769   mp_sub(a, b, c)
 770 
 771   Compute c = a - b.  All parameters may be identical.
 772  */
 773 
 774 mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
 775 {
 776   mp_err  res;
 777   int     magDiff;
 778 
 779   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 780 
 781   if (a == b) {
 782     mp_zero(c);
 783     return MP_OKAY;
 784   }
 785 
 786   if (MP_SIGN(a) != MP_SIGN(b)) {
 787     MP_CHECKOK( s_mp_add_3arg(a, b, c) );
 788   } else if (!(magDiff = s_mp_cmp(a, b))) {
 789     mp_zero(c);
 790     res = MP_OKAY;
 791   } else if (magDiff > 0) {
 792     MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
 793   } else {
 794     MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
 795     MP_SIGN(c) = !MP_SIGN(a);
 796   }
 797 
 798   if (s_mp_cmp_d(c, 0) == MP_EQ)
 799     MP_SIGN(c) = MP_ZPOS;
 800 
 801 CLEANUP:
 802   return res;
 803 
 804 } /* end mp_sub() */
 805 
 806 /* }}} */
 807 
 808 /* {{{ mp_mul(a, b, c) */
 809 
 810 /*
 811   mp_mul(a, b, c)
 812 
 813   Compute c = a * b.  All parameters may be identical.
 814  */
 815 mp_err   mp_mul(const mp_int *a, const mp_int *b, mp_int * c)
 816 {
 817   mp_digit *pb;
 818   mp_int   tmp;
 819   mp_err   res;
 820   mp_size  ib;
 821   mp_size  useda, usedb;
 822 
 823   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 824 
 825   if (a == c) {
 826     if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 827       return res;
 828     if (a == b)
 829       b = &tmp;
 830     a = &tmp;
 831   } else if (b == c) {
 832     if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
 833       return res;
 834     b = &tmp;
 835   } else {
 836     MP_DIGITS(&tmp) = 0;
 837   }
 838 
 839   if (MP_USED(a) < MP_USED(b)) {
 840     const mp_int *xch = b;      /* switch a and b, to do fewer outer loops */
 841     b = a;
 842     a = xch;
 843   }
 844 
 845   MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
 846   if((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
 847     goto CLEANUP;
 848 
 849 #ifdef NSS_USE_COMBA
 850   if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
 851       if (MP_USED(a) == 4) {
 852           s_mp_mul_comba_4(a, b, c);
 853           goto CLEANUP;
 854       }
 855       if (MP_USED(a) == 8) {
 856           s_mp_mul_comba_8(a, b, c);
 857           goto CLEANUP;
 858       }
 859       if (MP_USED(a) == 16) {
 860           s_mp_mul_comba_16(a, b, c);
 861           goto CLEANUP;
 862       }
 863       if (MP_USED(a) == 32) {
 864           s_mp_mul_comba_32(a, b, c);
 865           goto CLEANUP;
 866       }
 867   }
 868 #endif
 869 
 870   pb = MP_DIGITS(b);
 871   s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
 872 
 873   /* Outer loop:  Digits of b */
 874   useda = MP_USED(a);
 875   usedb = MP_USED(b);
 876   for (ib = 1; ib < usedb; ib++) {
 877     mp_digit b_i    = *pb++;
 878 
 879     /* Inner product:  Digits of a */
 880     if (b_i)
 881       s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
 882     else
 883       MP_DIGIT(c, ib + useda) = b_i;
 884   }
 885 
 886   s_mp_clamp(c);
 887 
 888   if(SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
 889     SIGN(c) = ZPOS;
 890   else
 891     SIGN(c) = NEG;
 892 
 893 CLEANUP:
 894   mp_clear(&tmp);
 895   return res;
 896 } /* end mp_mul() */
 897 
 898 /* }}} */
 899 
 900 /* {{{ mp_sqr(a, sqr) */
 901 
 902 #if MP_SQUARE
 903 /*
 904   Computes the square of a.  This can be done more
 905   efficiently than a general multiplication, because many of the
 906   computation steps are redundant when squaring.  The inner product
 907   step is a bit more complicated, but we save a fair number of
 908   iterations of the multiplication loop.
 909  */
 910 
 911 /* sqr = a^2;   Caller provides both a and tmp; */
 912 mp_err   mp_sqr(const mp_int *a, mp_int *sqr)
 913 {
 914   mp_digit *pa;
 915   mp_digit d;
 916   mp_err   res;
 917   mp_size  ix;
 918   mp_int   tmp;
 919   int      count;
 920 
 921   ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
 922 
 923   if (a == sqr) {
 924     if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 925       return res;
 926     a = &tmp;
 927   } else {
 928     DIGITS(&tmp) = 0;
 929     res = MP_OKAY;
 930   }
 931 
 932   ix = 2 * MP_USED(a);
 933   if (ix > MP_ALLOC(sqr)) {
 934     MP_USED(sqr) = 1;
 935     MP_CHECKOK( s_mp_grow(sqr, ix) );
 936   }
 937   MP_USED(sqr) = ix;
 938   MP_DIGIT(sqr, 0) = 0;
 939 
 940 #ifdef NSS_USE_COMBA
 941   if (IS_POWER_OF_2(MP_USED(a))) {
 942       if (MP_USED(a) == 4) {
 943           s_mp_sqr_comba_4(a, sqr);
 944           goto CLEANUP;
 945       }
 946       if (MP_USED(a) == 8) {
 947           s_mp_sqr_comba_8(a, sqr);
 948           goto CLEANUP;
 949       }
 950       if (MP_USED(a) == 16) {
 951           s_mp_sqr_comba_16(a, sqr);
 952           goto CLEANUP;
 953       }
 954       if (MP_USED(a) == 32) {
 955           s_mp_sqr_comba_32(a, sqr);
 956           goto CLEANUP;
 957       }
 958   }
 959 #endif
 960 
 961   pa = MP_DIGITS(a);
 962   count = MP_USED(a) - 1;
 963   if (count > 0) {
 964     d = *pa++;
 965     s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
 966     for (ix = 3; --count > 0; ix += 2) {
 967       d = *pa++;
 968       s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
 969     } /* for(ix ...) */
 970     MP_DIGIT(sqr, MP_USED(sqr)-1) = 0; /* above loop stopped short of this. */
 971 
 972     /* now sqr *= 2 */
 973     s_mp_mul_2(sqr);
 974   } else {
 975     MP_DIGIT(sqr, 1) = 0;
 976   }
 977 
 978   /* now add the squares of the digits of a to sqr. */
 979   s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
 980 
 981   SIGN(sqr) = ZPOS;
 982   s_mp_clamp(sqr);
 983 
 984 CLEANUP:
 985   mp_clear(&tmp);
 986   return res;
 987 
 988 } /* end mp_sqr() */
 989 #endif
 990 
 991 /* }}} */
 992 
 993 /* {{{ mp_div(a, b, q, r) */
 994 
 995 /*
 996   mp_div(a, b, q, r)
 997 
 998   Compute q = a / b and r = a mod b.  Input parameters may be re-used
 999   as output parameters.  If q or r is NULL, that portion of the
1000   computation will be discarded (although it will still be computed)
1001  */
1002 mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
1003 {
1004   mp_err   res;
1005   mp_int   *pQ, *pR;
1006   mp_int   qtmp, rtmp, btmp;
1007   int      cmp;
1008   mp_sign  signA;
1009   mp_sign  signB;
1010 
1011   ARGCHK(a != NULL && b != NULL, MP_BADARG);
1012 
1013   signA = MP_SIGN(a);
1014   signB = MP_SIGN(b);
1015 
1016   if(mp_cmp_z(b) == MP_EQ)
1017     return MP_RANGE;
1018 
1019   DIGITS(&qtmp) = 0;
1020   DIGITS(&rtmp) = 0;
1021   DIGITS(&btmp) = 0;
1022 
1023   /* Set up some temporaries... */
1024   if (!r || r == a || r == b) {
1025     MP_CHECKOK( mp_init_copy(&rtmp, a) );
1026     pR = &rtmp;
1027   } else {
1028     MP_CHECKOK( mp_copy(a, r) );
1029     pR = r;
1030   }
1031 
1032   if (!q || q == a || q == b) {
1033     MP_CHECKOK( mp_init_size(&qtmp, MP_USED(a), FLAG(a)) );
1034     pQ = &qtmp;
1035   } else {
1036     MP_CHECKOK( s_mp_pad(q, MP_USED(a)) );
1037     pQ = q;
1038     mp_zero(pQ);
1039   }
1040 
1041   /*
1042     If |a| <= |b|, we can compute the solution without division;
1043     otherwise, we actually do the work required.
1044    */
1045   if ((cmp = s_mp_cmp(a, b)) <= 0) {
1046     if (cmp) {
1047       /* r was set to a above. */
1048       mp_zero(pQ);
1049     } else {
1050       mp_set(pQ, 1);
1051       mp_zero(pR);
1052     }
1053   } else {
1054     MP_CHECKOK( mp_init_copy(&btmp, b) );
1055     MP_CHECKOK( s_mp_div(pR, &btmp, pQ) );
1056   }
1057 
1058   /* Compute the signs for the output  */
1059   MP_SIGN(pR) = signA;   /* Sr = Sa              */
1060   /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
1061   MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
1062 
1063   if(s_mp_cmp_d(pQ, 0) == MP_EQ)
1064     SIGN(pQ) = ZPOS;
1065   if(s_mp_cmp_d(pR, 0) == MP_EQ)
1066     SIGN(pR) = ZPOS;
1067 
1068   /* Copy output, if it is needed      */
1069   if(q && q != pQ)
1070     s_mp_exch(pQ, q);
1071 
1072   if(r && r != pR)
1073     s_mp_exch(pR, r);
1074 
1075 CLEANUP:
1076   mp_clear(&btmp);
1077   mp_clear(&rtmp);
1078   mp_clear(&qtmp);
1079 
1080   return res;
1081 
1082 } /* end mp_div() */
1083 
1084 /* }}} */
1085 
1086 /* {{{ mp_div_2d(a, d, q, r) */
1087 
1088 mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
1089 {
1090   mp_err  res;
1091 
1092   ARGCHK(a != NULL, MP_BADARG);
1093 
1094   if(q) {
1095     if((res = mp_copy(a, q)) != MP_OKAY)
1096       return res;
1097   }
1098   if(r) {
1099     if((res = mp_copy(a, r)) != MP_OKAY)
1100       return res;
1101   }
1102   if(q) {
1103     s_mp_div_2d(q, d);
1104   }
1105   if(r) {
1106     s_mp_mod_2d(r, d);
1107   }
1108 
1109   return MP_OKAY;
1110 
1111 } /* end mp_div_2d() */
1112 
1113 /* }}} */
1114 
1115 /* {{{ mp_expt(a, b, c) */
1116 
1117 /*
1118   mp_expt(a, b, c)
1119 
1120   Compute c = a ** b, that is, raise a to the b power.  Uses a
1121   standard iterative square-and-multiply technique.
1122  */
1123 
1124 mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c)
1125 {
1126   mp_int   s, x;
1127   mp_err   res;
1128   mp_digit d;
1129   unsigned int      dig, bit;
1130 
1131   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1132 
1133   if(mp_cmp_z(b) < 0)
1134     return MP_RANGE;
1135 
1136   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1137     return res;
1138 
1139   mp_set(&s, 1);
1140 
1141   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1142     goto X;
1143 
1144   /* Loop over low-order digits in ascending order */
1145   for(dig = 0; dig < (USED(b) - 1); dig++) {
1146     d = DIGIT(b, dig);
1147 
1148     /* Loop over bits of each non-maximal digit */
1149     for(bit = 0; bit < DIGIT_BIT; bit++) {
1150       if(d & 1) {
1151         if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1152           goto CLEANUP;
1153       }
1154 
1155       d >>= 1;
1156 
1157       if((res = s_mp_sqr(&x)) != MP_OKAY)
1158         goto CLEANUP;
1159     }
1160   }
1161 
1162   /* Consider now the last digit... */
1163   d = DIGIT(b, dig);
1164 
1165   while(d) {
1166     if(d & 1) {
1167       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1168         goto CLEANUP;
1169     }
1170 
1171     d >>= 1;
1172 
1173     if((res = s_mp_sqr(&x)) != MP_OKAY)
1174       goto CLEANUP;
1175   }
1176 
1177   if(mp_iseven(b))
1178     SIGN(&s) = SIGN(a);
1179 
1180   res = mp_copy(&s, c);
1181 
1182 CLEANUP:
1183   mp_clear(&x);
1184 X:
1185   mp_clear(&s);
1186 
1187   return res;
1188 
1189 } /* end mp_expt() */
1190 
1191 /* }}} */
1192 
1193 /* {{{ mp_2expt(a, k) */
1194 
1195 /* Compute a = 2^k */
1196 
1197 mp_err mp_2expt(mp_int *a, mp_digit k)
1198 {
1199   ARGCHK(a != NULL, MP_BADARG);
1200 
1201   return s_mp_2expt(a, k);
1202 
1203 } /* end mp_2expt() */
1204 
1205 /* }}} */
1206 
1207 /* {{{ mp_mod(a, m, c) */
1208 
1209 /*
1210   mp_mod(a, m, c)
1211 
1212   Compute c = a (mod m).  Result will always be 0 <= c < m.
1213  */
1214 
1215 mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
1216 {
1217   mp_err  res;
1218   int     mag;
1219 
1220   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1221 
1222   if(SIGN(m) == NEG)
1223     return MP_RANGE;
1224 
1225   /*
1226      If |a| > m, we need to divide to get the remainder and take the
1227      absolute value.
1228 
1229      If |a| < m, we don't need to do any division, just copy and adjust
1230      the sign (if a is negative).
1231 
1232      If |a| == m, we can simply set the result to zero.
1233 
1234      This order is intended to minimize the average path length of the
1235      comparison chain on common workloads -- the most frequent cases are
1236      that |a| != m, so we do those first.
1237    */
1238   if((mag = s_mp_cmp(a, m)) > 0) {
1239     if((res = mp_div(a, m, NULL, c)) != MP_OKAY)
1240       return res;
1241 
1242     if(SIGN(c) == NEG) {
1243       if((res = mp_add(c, m, c)) != MP_OKAY)
1244         return res;
1245     }
1246 
1247   } else if(mag < 0) {
1248     if((res = mp_copy(a, c)) != MP_OKAY)
1249       return res;
1250 
1251     if(mp_cmp_z(a) < 0) {
1252       if((res = mp_add(c, m, c)) != MP_OKAY)
1253         return res;
1254 
1255     }
1256 
1257   } else {
1258     mp_zero(c);
1259 
1260   }
1261 
1262   return MP_OKAY;
1263 
1264 } /* end mp_mod() */
1265 
1266 /* }}} */
1267 
1268 /* {{{ mp_mod_d(a, d, c) */
1269 
1270 /*
1271   mp_mod_d(a, d, c)
1272 
1273   Compute c = a (mod d).  Result will always be 0 <= c < d
1274  */
1275 mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
1276 {
1277   mp_err   res;
1278   mp_digit rem;
1279 
1280   ARGCHK(a != NULL && c != NULL, MP_BADARG);
1281 
1282   if(s_mp_cmp_d(a, d) > 0) {
1283     if((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
1284       return res;
1285 
1286   } else {
1287     if(SIGN(a) == NEG)
1288       rem = d - DIGIT(a, 0);
1289     else
1290       rem = DIGIT(a, 0);
1291   }
1292 
1293   if(c)
1294     *c = rem;
1295 
1296   return MP_OKAY;
1297 
1298 } /* end mp_mod_d() */
1299 
1300 /* }}} */
1301 
1302 /* {{{ mp_sqrt(a, b) */
1303 
1304 /*
1305   mp_sqrt(a, b)
1306 
1307   Compute the integer square root of a, and store the result in b.
1308   Uses an integer-arithmetic version of Newton's iterative linear
1309   approximation technique to determine this value; the result has the
1310   following two properties:
1311 
1312      b^2 <= a
1313      (b+1)^2 >= a
1314 
1315   It is a range error to pass a negative value.
1316  */
1317 mp_err mp_sqrt(const mp_int *a, mp_int *b)
1318 {
1319   mp_int   x, t;
1320   mp_err   res;
1321   mp_size  used;
1322 
1323   ARGCHK(a != NULL && b != NULL, MP_BADARG);
1324 
1325   /* Cannot take square root of a negative value */
1326   if(SIGN(a) == NEG)
1327     return MP_RANGE;
1328 
1329   /* Special cases for zero and one, trivial     */
1330   if(mp_cmp_d(a, 1) <= 0)
1331     return mp_copy(a, b);
1332 
1333   /* Initialize the temporaries we'll use below  */
1334   if((res = mp_init_size(&t, USED(a), FLAG(a))) != MP_OKAY)
1335     return res;
1336 
1337   /* Compute an initial guess for the iteration as a itself */
1338   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1339     goto X;
1340 
1341   used = MP_USED(&x);
1342   if (used > 1) {
1343     s_mp_rshd(&x, used / 2);
1344   }
1345 
1346   for(;;) {
1347     /* t = (x * x) - a */
1348     mp_copy(&x, &t);      /* can't fail, t is big enough for original x */
1349     if((res = mp_sqr(&t, &t)) != MP_OKAY ||
1350        (res = mp_sub(&t, a, &t)) != MP_OKAY)
1351       goto CLEANUP;
1352 
1353     /* t = t / 2x       */
1354     s_mp_mul_2(&x);
1355     if((res = mp_div(&t, &x, &t, NULL)) != MP_OKAY)
1356       goto CLEANUP;
1357     s_mp_div_2(&x);
1358 
1359     /* Terminate the loop, if the quotient is zero */
1360     if(mp_cmp_z(&t) == MP_EQ)
1361       break;
1362 
1363     /* x = x - t       */
1364     if((res = mp_sub(&x, &t, &x)) != MP_OKAY)
1365       goto CLEANUP;
1366 
1367   }
1368 
1369   /* Copy result to output parameter */
1370   mp_sub_d(&x, 1, &x);
1371   s_mp_exch(&x, b);
1372 
1373  CLEANUP:
1374   mp_clear(&x);
1375  X:
1376   mp_clear(&t);
1377 
1378   return res;
1379 
1380 } /* end mp_sqrt() */
1381 
1382 /* }}} */
1383 
1384 /* }}} */
1385 
1386 /*------------------------------------------------------------------------*/
1387 /* {{{ Modular arithmetic */
1388 
1389 #if MP_MODARITH
1390 /* {{{ mp_addmod(a, b, m, c) */
1391 
1392 /*
1393   mp_addmod(a, b, m, c)
1394 
1395   Compute c = (a + b) mod m
1396  */
1397 
1398 mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1399 {
1400   mp_err  res;
1401 
1402   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1403 
1404   if((res = mp_add(a, b, c)) != MP_OKAY)
1405     return res;
1406   if((res = mp_mod(c, m, c)) != MP_OKAY)
1407     return res;
1408 
1409   return MP_OKAY;
1410 
1411 }
1412 
1413 /* }}} */
1414 
1415 /* {{{ mp_submod(a, b, m, c) */
1416 
1417 /*
1418   mp_submod(a, b, m, c)
1419 
1420   Compute c = (a - b) mod m
1421  */
1422 
1423 mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1424 {
1425   mp_err  res;
1426 
1427   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1428 
1429   if((res = mp_sub(a, b, c)) != MP_OKAY)
1430     return res;
1431   if((res = mp_mod(c, m, c)) != MP_OKAY)
1432     return res;
1433 
1434   return MP_OKAY;
1435 
1436 }
1437 
1438 /* }}} */
1439 
1440 /* {{{ mp_mulmod(a, b, m, c) */
1441 
1442 /*
1443   mp_mulmod(a, b, m, c)
1444 
1445   Compute c = (a * b) mod m
1446  */
1447 
1448 mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1449 {
1450   mp_err  res;
1451 
1452   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1453 
1454   if((res = mp_mul(a, b, c)) != MP_OKAY)
1455     return res;
1456   if((res = mp_mod(c, m, c)) != MP_OKAY)
1457     return res;
1458 
1459   return MP_OKAY;
1460 
1461 }
1462 
1463 /* }}} */
1464 
1465 /* {{{ mp_sqrmod(a, m, c) */
1466 
1467 #if MP_SQUARE
1468 mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
1469 {
1470   mp_err  res;
1471 
1472   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1473 
1474   if((res = mp_sqr(a, c)) != MP_OKAY)
1475     return res;
1476   if((res = mp_mod(c, m, c)) != MP_OKAY)
1477     return res;
1478 
1479   return MP_OKAY;
1480 
1481 } /* end mp_sqrmod() */
1482 #endif
1483 
1484 /* }}} */
1485 
1486 /* {{{ s_mp_exptmod(a, b, m, c) */
1487 
1488 /*
1489   s_mp_exptmod(a, b, m, c)
1490 
1491   Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
1492   method with modular reductions at each step. (This is basically the
1493   same code as mp_expt(), except for the addition of the reductions)
1494 
1495   The modular reductions are done using Barrett's algorithm (see
1496   s_mp_reduce() below for details)
1497  */
1498 
1499 mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1500 {
1501   mp_int   s, x, mu;
1502   mp_err   res;
1503   mp_digit d;
1504   unsigned int      dig, bit;
1505 
1506   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1507 
1508   if(mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
1509     return MP_RANGE;
1510 
1511   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1512     return res;
1513   if((res = mp_init_copy(&x, a)) != MP_OKAY ||
1514      (res = mp_mod(&x, m, &x)) != MP_OKAY)
1515     goto X;
1516   if((res = mp_init(&mu, FLAG(a))) != MP_OKAY)
1517     goto MU;
1518 
1519   mp_set(&s, 1);
1520 
1521   /* mu = b^2k / m */
1522   s_mp_add_d(&mu, 1);
1523   s_mp_lshd(&mu, 2 * USED(m));
1524   if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
1525     goto CLEANUP;
1526 
1527   /* Loop over digits of b in ascending order, except highest order */
1528   for(dig = 0; dig < (USED(b) - 1); dig++) {
1529     d = DIGIT(b, dig);
1530 
1531     /* Loop over the bits of the lower-order digits */
1532     for(bit = 0; bit < DIGIT_BIT; bit++) {
1533       if(d & 1) {
1534         if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1535           goto CLEANUP;
1536         if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1537           goto CLEANUP;
1538       }
1539 
1540       d >>= 1;
1541 
1542       if((res = s_mp_sqr(&x)) != MP_OKAY)
1543         goto CLEANUP;
1544       if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1545         goto CLEANUP;
1546     }
1547   }
1548 
1549   /* Now do the last digit... */
1550   d = DIGIT(b, dig);
1551 
1552   while(d) {
1553     if(d & 1) {
1554       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1555         goto CLEANUP;
1556       if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1557         goto CLEANUP;
1558     }
1559 
1560     d >>= 1;
1561 
1562     if((res = s_mp_sqr(&x)) != MP_OKAY)
1563       goto CLEANUP;
1564     if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1565       goto CLEANUP;
1566   }
1567 
1568   s_mp_exch(&s, c);
1569 
1570  CLEANUP:
1571   mp_clear(&mu);
1572  MU:
1573   mp_clear(&x);
1574  X:
1575   mp_clear(&s);
1576 
1577   return res;
1578 
1579 } /* end s_mp_exptmod() */
1580 
1581 /* }}} */
1582 
1583 /* {{{ mp_exptmod_d(a, d, m, c) */
1584 
1585 mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
1586 {
1587   mp_int   s, x;
1588   mp_err   res;
1589 
1590   ARGCHK(a != NULL && c != NULL, MP_BADARG);
1591 
1592   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1593     return res;
1594   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1595     goto X;
1596 
1597   mp_set(&s, 1);
1598 
1599   while(d != 0) {
1600     if(d & 1) {
1601       if((res = s_mp_mul(&s, &x)) != MP_OKAY ||
1602          (res = mp_mod(&s, m, &s)) != MP_OKAY)
1603         goto CLEANUP;
1604     }
1605 
1606     d /= 2;
1607 
1608     if((res = s_mp_sqr(&x)) != MP_OKAY ||
1609        (res = mp_mod(&x, m, &x)) != MP_OKAY)
1610       goto CLEANUP;
1611   }
1612 
1613   s.flag = (mp_sign)0;
1614   s_mp_exch(&s, c);
1615 
1616 CLEANUP:
1617   mp_clear(&x);
1618 X:
1619   mp_clear(&s);
1620 
1621   return res;
1622 
1623 } /* end mp_exptmod_d() */
1624 
1625 /* }}} */
1626 #endif /* if MP_MODARITH */
1627 
1628 /* }}} */
1629 
1630 /*------------------------------------------------------------------------*/
1631 /* {{{ Comparison functions */
1632 
1633 /* {{{ mp_cmp_z(a) */
1634 
1635 /*
1636   mp_cmp_z(a)
1637 
1638   Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
1639  */
1640 
1641 int    mp_cmp_z(const mp_int *a)
1642 {
1643   if(SIGN(a) == NEG)
1644     return MP_LT;
1645   else if(USED(a) == 1 && DIGIT(a, 0) == 0)
1646     return MP_EQ;
1647   else
1648     return MP_GT;
1649 
1650 } /* end mp_cmp_z() */
1651 
1652 /* }}} */
1653 
1654 /* {{{ mp_cmp_d(a, d) */
1655 
1656 /*
1657   mp_cmp_d(a, d)
1658 
1659   Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
1660  */
1661 
1662 int    mp_cmp_d(const mp_int *a, mp_digit d)
1663 {
1664   ARGCHK(a != NULL, MP_EQ);
1665 
1666   if(SIGN(a) == NEG)
1667     return MP_LT;
1668 
1669   return s_mp_cmp_d(a, d);
1670 
1671 } /* end mp_cmp_d() */
1672 
1673 /* }}} */
1674 
1675 /* {{{ mp_cmp(a, b) */
1676 
1677 int    mp_cmp(const mp_int *a, const mp_int *b)
1678 {
1679   ARGCHK(a != NULL && b != NULL, MP_EQ);
1680 
1681   if(SIGN(a) == SIGN(b)) {
1682     int  mag;
1683 
1684     if((mag = s_mp_cmp(a, b)) == MP_EQ)
1685       return MP_EQ;
1686 
1687     if(SIGN(a) == ZPOS)
1688       return mag;
1689     else
1690       return -mag;
1691 
1692   } else if(SIGN(a) == ZPOS) {
1693     return MP_GT;
1694   } else {
1695     return MP_LT;
1696   }
1697 
1698 } /* end mp_cmp() */
1699 
1700 /* }}} */
1701 
1702 /* {{{ mp_cmp_mag(a, b) */
1703 
1704 /*
1705   mp_cmp_mag(a, b)
1706 
1707   Compares |a| <=> |b|, and returns an appropriate comparison result
1708  */
1709 
1710 int    mp_cmp_mag(mp_int *a, mp_int *b)
1711 {
1712   ARGCHK(a != NULL && b != NULL, MP_EQ);
1713 
1714   return s_mp_cmp(a, b);
1715 
1716 } /* end mp_cmp_mag() */
1717 
1718 /* }}} */
1719 
1720 /* {{{ mp_cmp_int(a, z, kmflag) */
1721 
1722 /*
1723   This just converts z to an mp_int, and uses the existing comparison
1724   routines.  This is sort of inefficient, but it's not clear to me how
1725   frequently this wil get used anyway.  For small positive constants,
1726   you can always use mp_cmp_d(), and for zero, there is mp_cmp_z().
1727  */
1728 int    mp_cmp_int(const mp_int *a, long z, int kmflag)
1729 {
1730   mp_int  tmp;
1731   int     out;
1732 
1733   ARGCHK(a != NULL, MP_EQ);
1734 
1735   mp_init(&tmp, kmflag); mp_set_int(&tmp, z);
1736   out = mp_cmp(a, &tmp);
1737   mp_clear(&tmp);
1738 
1739   return out;
1740 
1741 } /* end mp_cmp_int() */
1742 
1743 /* }}} */
1744 
1745 /* {{{ mp_isodd(a) */
1746 
1747 /*
1748   mp_isodd(a)
1749 
1750   Returns a true (non-zero) value if a is odd, false (zero) otherwise.
1751  */
1752 int    mp_isodd(const mp_int *a)
1753 {
1754   ARGCHK(a != NULL, 0);
1755 
1756   return (int)(DIGIT(a, 0) & 1);
1757 
1758 } /* end mp_isodd() */
1759 
1760 /* }}} */
1761 
1762 /* {{{ mp_iseven(a) */
1763 
1764 int    mp_iseven(const mp_int *a)
1765 {
1766   return !mp_isodd(a);
1767 
1768 } /* end mp_iseven() */
1769 
1770 /* }}} */
1771 
1772 /* }}} */
1773 
1774 /*------------------------------------------------------------------------*/
1775 /* {{{ Number theoretic functions */
1776 
1777 #if MP_NUMTH
1778 /* {{{ mp_gcd(a, b, c) */
1779 
1780 /*
1781   Like the old mp_gcd() function, except computes the GCD using the
1782   binary algorithm due to Josef Stein in 1961 (via Knuth).
1783  */
1784 mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c)
1785 {
1786   mp_err   res;
1787   mp_int   u, v, t;
1788   mp_size  k = 0;
1789 
1790   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1791 
1792   if(mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ)
1793       return MP_RANGE;
1794   if(mp_cmp_z(a) == MP_EQ) {
1795     return mp_copy(b, c);
1796   } else if(mp_cmp_z(b) == MP_EQ) {
1797     return mp_copy(a, c);
1798   }
1799 
1800   if((res = mp_init(&t, FLAG(a))) != MP_OKAY)
1801     return res;
1802   if((res = mp_init_copy(&u, a)) != MP_OKAY)
1803     goto U;
1804   if((res = mp_init_copy(&v, b)) != MP_OKAY)
1805     goto V;
1806 
1807   SIGN(&u) = ZPOS;
1808   SIGN(&v) = ZPOS;
1809 
1810   /* Divide out common factors of 2 until at least 1 of a, b is even */
1811   while(mp_iseven(&u) && mp_iseven(&v)) {
1812     s_mp_div_2(&u);
1813     s_mp_div_2(&v);
1814     ++k;
1815   }
1816 
1817   /* Initialize t */
1818   if(mp_isodd(&u)) {
1819     if((res = mp_copy(&v, &t)) != MP_OKAY)
1820       goto CLEANUP;
1821 
1822     /* t = -v */
1823     if(SIGN(&v) == ZPOS)
1824       SIGN(&t) = NEG;
1825     else
1826       SIGN(&t) = ZPOS;
1827 
1828   } else {
1829     if((res = mp_copy(&u, &t)) != MP_OKAY)
1830       goto CLEANUP;
1831 
1832   }
1833 
1834   for(;;) {
1835     while(mp_iseven(&t)) {
1836       s_mp_div_2(&t);
1837     }
1838 
1839     if(mp_cmp_z(&t) == MP_GT) {
1840       if((res = mp_copy(&t, &u)) != MP_OKAY)
1841         goto CLEANUP;
1842 
1843     } else {
1844       if((res = mp_copy(&t, &v)) != MP_OKAY)
1845         goto CLEANUP;
1846 
1847       /* v = -t */
1848       if(SIGN(&t) == ZPOS)
1849         SIGN(&v) = NEG;
1850       else
1851         SIGN(&v) = ZPOS;
1852     }
1853 
1854     if((res = mp_sub(&u, &v, &t)) != MP_OKAY)
1855       goto CLEANUP;
1856 
1857     if(s_mp_cmp_d(&t, 0) == MP_EQ)
1858       break;
1859   }
1860 
1861   s_mp_2expt(&v, k);       /* v = 2^k   */
1862   res = mp_mul(&u, &v, c); /* c = u * v */
1863 
1864  CLEANUP:
1865   mp_clear(&v);
1866  V:
1867   mp_clear(&u);
1868  U:
1869   mp_clear(&t);
1870 
1871   return res;
1872 
1873 } /* end mp_gcd() */
1874 
1875 /* }}} */
1876 
1877 /* {{{ mp_lcm(a, b, c) */
1878 
1879 /* We compute the least common multiple using the rule:
1880 
1881    ab = [a, b](a, b)
1882 
1883    ... by computing the product, and dividing out the gcd.
1884  */
1885 
1886 mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c)
1887 {
1888   mp_int  gcd, prod;
1889   mp_err  res;
1890 
1891   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1892 
1893   /* Set up temporaries */
1894   if((res = mp_init(&gcd, FLAG(a))) != MP_OKAY)
1895     return res;
1896   if((res = mp_init(&prod, FLAG(a))) != MP_OKAY)
1897     goto GCD;
1898 
1899   if((res = mp_mul(a, b, &prod)) != MP_OKAY)
1900     goto CLEANUP;
1901   if((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
1902     goto CLEANUP;
1903 
1904   res = mp_div(&prod, &gcd, c, NULL);
1905 
1906  CLEANUP:
1907   mp_clear(&prod);
1908  GCD:
1909   mp_clear(&gcd);
1910 
1911   return res;
1912 
1913 } /* end mp_lcm() */
1914 
1915 /* }}} */
1916 
1917 /* {{{ mp_xgcd(a, b, g, x, y) */
1918 
1919 /*
1920   mp_xgcd(a, b, g, x, y)
1921 
1922   Compute g = (a, b) and values x and y satisfying Bezout's identity
1923   (that is, ax + by = g).  This uses the binary extended GCD algorithm
1924   based on the Stein algorithm used for mp_gcd()
1925   See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
1926  */
1927 
1928 mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
1929 {
1930   mp_int   gx, xc, yc, u, v, A, B, C, D;
1931   mp_int  *clean[9];
1932   mp_err   res;
1933   int      last = -1;
1934 
1935   if(mp_cmp_z(b) == 0)
1936     return MP_RANGE;
1937 
1938   /* Initialize all these variables we need */
1939   MP_CHECKOK( mp_init(&u, FLAG(a)) );
1940   clean[++last] = &u;
1941   MP_CHECKOK( mp_init(&v, FLAG(a)) );
1942   clean[++last] = &v;
1943   MP_CHECKOK( mp_init(&gx, FLAG(a)) );
1944   clean[++last] = &gx;
1945   MP_CHECKOK( mp_init(&A, FLAG(a)) );
1946   clean[++last] = &A;
1947   MP_CHECKOK( mp_init(&B, FLAG(a)) );
1948   clean[++last] = &B;
1949   MP_CHECKOK( mp_init(&C, FLAG(a)) );
1950   clean[++last] = &C;
1951   MP_CHECKOK( mp_init(&D, FLAG(a)) );
1952   clean[++last] = &D;
1953   MP_CHECKOK( mp_init_copy(&xc, a) );
1954   clean[++last] = &xc;
1955   mp_abs(&xc, &xc);
1956   MP_CHECKOK( mp_init_copy(&yc, b) );
1957   clean[++last] = &yc;
1958   mp_abs(&yc, &yc);
1959 
1960   mp_set(&gx, 1);
1961 
1962   /* Divide by two until at least one of them is odd */
1963   while(mp_iseven(&xc) && mp_iseven(&yc)) {
1964     mp_size nx = mp_trailing_zeros(&xc);
1965     mp_size ny = mp_trailing_zeros(&yc);
1966     mp_size n  = MP_MIN(nx, ny);
1967     s_mp_div_2d(&xc,n);
1968     s_mp_div_2d(&yc,n);
1969     MP_CHECKOK( s_mp_mul_2d(&gx,n) );
1970   }
1971 
1972   mp_copy(&xc, &u);
1973   mp_copy(&yc, &v);
1974   mp_set(&A, 1); mp_set(&D, 1);
1975 
1976   /* Loop through binary GCD algorithm */
1977   do {
1978     while(mp_iseven(&u)) {
1979       s_mp_div_2(&u);
1980 
1981       if(mp_iseven(&A) && mp_iseven(&B)) {
1982         s_mp_div_2(&A); s_mp_div_2(&B);
1983       } else {
1984         MP_CHECKOK( mp_add(&A, &yc, &A) );
1985         s_mp_div_2(&A);
1986         MP_CHECKOK( mp_sub(&B, &xc, &B) );
1987         s_mp_div_2(&B);
1988       }
1989     }
1990 
1991     while(mp_iseven(&v)) {
1992       s_mp_div_2(&v);
1993 
1994       if(mp_iseven(&C) && mp_iseven(&D)) {
1995         s_mp_div_2(&C); s_mp_div_2(&D);
1996       } else {
1997         MP_CHECKOK( mp_add(&C, &yc, &C) );
1998         s_mp_div_2(&C);
1999         MP_CHECKOK( mp_sub(&D, &xc, &D) );
2000         s_mp_div_2(&D);
2001       }
2002     }
2003 
2004     if(mp_cmp(&u, &v) >= 0) {
2005       MP_CHECKOK( mp_sub(&u, &v, &u) );
2006       MP_CHECKOK( mp_sub(&A, &C, &A) );
2007       MP_CHECKOK( mp_sub(&B, &D, &B) );
2008     } else {
2009       MP_CHECKOK( mp_sub(&v, &u, &v) );
2010       MP_CHECKOK( mp_sub(&C, &A, &C) );
2011       MP_CHECKOK( mp_sub(&D, &B, &D) );
2012     }
2013   } while (mp_cmp_z(&u) != 0);
2014 
2015   /* copy results to output */
2016   if(x)
2017     MP_CHECKOK( mp_copy(&C, x) );
2018 
2019   if(y)
2020     MP_CHECKOK( mp_copy(&D, y) );
2021 
2022   if(g)
2023     MP_CHECKOK( mp_mul(&gx, &v, g) );
2024 
2025  CLEANUP:
2026   while(last >= 0)
2027     mp_clear(clean[last--]);
2028 
2029   return res;
2030 
2031 } /* end mp_xgcd() */
2032 
2033 /* }}} */
2034 
2035 mp_size mp_trailing_zeros(const mp_int *mp)
2036 {
2037   mp_digit d;
2038   mp_size  n = 0;
2039   unsigned int      ix;
2040 
2041   if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
2042     return n;
2043 
2044   for (ix = 0; !(d = MP_DIGIT(mp,ix)) && (ix < MP_USED(mp)); ++ix)
2045     n += MP_DIGIT_BIT;
2046   if (!d)
2047     return 0;   /* shouldn't happen, but ... */
2048 #if !defined(MP_USE_UINT_DIGIT)
2049   if (!(d & 0xffffffffU)) {
2050     d >>= 32;
2051     n  += 32;
2052   }
2053 #endif
2054   if (!(d & 0xffffU)) {
2055     d >>= 16;
2056     n  += 16;
2057   }
2058   if (!(d & 0xffU)) {
2059     d >>= 8;
2060     n  += 8;
2061   }
2062   if (!(d & 0xfU)) {
2063     d >>= 4;
2064     n  += 4;
2065   }
2066   if (!(d & 0x3U)) {
2067     d >>= 2;
2068     n  += 2;
2069   }
2070   if (!(d & 0x1U)) {
2071     d >>= 1;
2072     n  += 1;
2073   }
2074 #if MP_ARGCHK == 2
2075   assert(0 != (d & 1));
2076 #endif
2077   return n;
2078 }
2079 
2080 /* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
2081 ** Returns k (positive) or error (negative).
2082 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2083 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2084 */
2085 mp_err s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
2086 {
2087   mp_err res;
2088   mp_err k    = 0;
2089   mp_int d, f, g;
2090 
2091   ARGCHK(a && p && c, MP_BADARG);
2092 
2093   MP_DIGITS(&d) = 0;
2094   MP_DIGITS(&f) = 0;
2095   MP_DIGITS(&g) = 0;
2096   MP_CHECKOK( mp_init(&d, FLAG(a)) );
2097   MP_CHECKOK( mp_init_copy(&f, a) );    /* f = a */
2098   MP_CHECKOK( mp_init_copy(&g, p) );    /* g = p */
2099 
2100   mp_set(c, 1);
2101   mp_zero(&d);
2102 
2103   if (mp_cmp_z(&f) == 0) {
2104     res = MP_UNDEF;
2105   } else
2106   for (;;) {
2107     int diff_sign;
2108     while (mp_iseven(&f)) {
2109       mp_size n = mp_trailing_zeros(&f);
2110       if (!n) {
2111         res = MP_UNDEF;
2112         goto CLEANUP;
2113       }
2114       s_mp_div_2d(&f, n);
2115       MP_CHECKOK( s_mp_mul_2d(&d, n) );
2116       k += n;
2117     }
2118     if (mp_cmp_d(&f, 1) == MP_EQ) {     /* f == 1 */
2119       res = k;
2120       break;
2121     }
2122     diff_sign = mp_cmp(&f, &g);
2123     if (diff_sign < 0) {                /* f < g */
2124       s_mp_exch(&f, &g);
2125       s_mp_exch(c, &d);
2126     } else if (diff_sign == 0) {                /* f == g */
2127       res = MP_UNDEF;           /* a and p are not relatively prime */
2128       break;
2129     }
2130     if ((MP_DIGIT(&f,0) % 4) == (MP_DIGIT(&g,0) % 4)) {
2131       MP_CHECKOK( mp_sub(&f, &g, &f) ); /* f = f - g */
2132       MP_CHECKOK( mp_sub(c,  &d,  c) ); /* c = c - d */
2133     } else {
2134       MP_CHECKOK( mp_add(&f, &g, &f) ); /* f = f + g */
2135       MP_CHECKOK( mp_add(c,  &d,  c) ); /* c = c + d */
2136     }
2137   }
2138   if (res >= 0) {
2139     if (s_mp_cmp(c, p) >= 0) {
2140       MP_CHECKOK( mp_div(c, p, NULL, c));
2141     }
2142     if (MP_SIGN(c) != MP_ZPOS) {
2143       MP_CHECKOK( mp_add(c, p, c) );
2144     }
2145     res = k;
2146   }
2147 
2148 CLEANUP:
2149   mp_clear(&d);
2150   mp_clear(&f);
2151   mp_clear(&g);
2152   return res;
2153 }
2154 
2155 /* Compute T = (P ** -1) mod MP_RADIX.  Also works for 16-bit mp_digits.
2156 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2157 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2158 */
2159 mp_digit  s_mp_invmod_radix(mp_digit P)
2160 {
2161   mp_digit T = P;
2162   T *= 2 - (P * T);
2163   T *= 2 - (P * T);
2164   T *= 2 - (P * T);
2165   T *= 2 - (P * T);
2166 #if !defined(MP_USE_UINT_DIGIT)
2167   T *= 2 - (P * T);
2168   T *= 2 - (P * T);
2169 #endif
2170   return T;
2171 }
2172 
2173 /* Given c, k, and prime p, where a*c == 2**k (mod p),
2174 ** Compute x = (a ** -1) mod p.  This is similar to Montgomery reduction.
2175 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2176 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2177 */
2178 mp_err  s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
2179 {
2180   int      k_orig = k;
2181   mp_digit r;
2182   mp_size  ix;
2183   mp_err   res;
2184 
2185   if (mp_cmp_z(c) < 0) {                /* c < 0 */
2186     MP_CHECKOK( mp_add(c, p, x) );      /* x = c + p */
2187   } else {
2188     MP_CHECKOK( mp_copy(c, x) );        /* x = c */
2189   }
2190 
2191   /* make sure x is large enough */
2192   ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
2193   ix = MP_MAX(ix, MP_USED(x));
2194   MP_CHECKOK( s_mp_pad(x, ix) );
2195 
2196   r = 0 - s_mp_invmod_radix(MP_DIGIT(p,0));
2197 
2198   for (ix = 0; k > 0; ix++) {
2199     int      j = MP_MIN(k, MP_DIGIT_BIT);
2200     mp_digit v = r * MP_DIGIT(x, ix);
2201     if (j < MP_DIGIT_BIT) {
2202       v &= ((mp_digit)1 << j) - 1;      /* v = v mod (2 ** j) */
2203     }
2204     s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
2205     k -= j;
2206   }
2207   s_mp_clamp(x);
2208   s_mp_div_2d(x, k_orig);
2209   res = MP_OKAY;
2210 
2211 CLEANUP:
2212   return res;
2213 }
2214 
2215 /* compute mod inverse using Schroeppel's method, only if m is odd */
2216 mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
2217 {
2218   int k;
2219   mp_err  res;
2220   mp_int  x;
2221 
2222   ARGCHK(a && m && c, MP_BADARG);
2223 
2224   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2225     return MP_RANGE;
2226   if (mp_iseven(m))
2227     return MP_UNDEF;
2228 
2229   MP_DIGITS(&x) = 0;
2230 
2231   if (a == c) {
2232     if ((res = mp_init_copy(&x, a)) != MP_OKAY)
2233       return res;
2234     if (a == m)
2235       m = &x;
2236     a = &x;
2237   } else if (m == c) {
2238     if ((res = mp_init_copy(&x, m)) != MP_OKAY)
2239       return res;
2240     m = &x;
2241   } else {
2242     MP_DIGITS(&x) = 0;
2243   }
2244 
2245   MP_CHECKOK( s_mp_almost_inverse(a, m, c) );
2246   k = res;
2247   MP_CHECKOK( s_mp_fixup_reciprocal(c, m, k, c) );
2248 CLEANUP:
2249   mp_clear(&x);
2250   return res;
2251 }
2252 
2253 /* Known good algorithm for computing modular inverse.  But slow. */
2254 mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
2255 {
2256   mp_int  g, x;
2257   mp_err  res;
2258 
2259   ARGCHK(a && m && c, MP_BADARG);
2260 
2261   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2262     return MP_RANGE;
2263 
2264   MP_DIGITS(&g) = 0;
2265   MP_DIGITS(&x) = 0;
2266   MP_CHECKOK( mp_init(&x, FLAG(a)) );
2267   MP_CHECKOK( mp_init(&g, FLAG(a)) );
2268 
2269   MP_CHECKOK( mp_xgcd(a, m, &g, &x, NULL) );
2270 
2271   if (mp_cmp_d(&g, 1) != MP_EQ) {
2272     res = MP_UNDEF;
2273     goto CLEANUP;
2274   }
2275 
2276   res = mp_mod(&x, m, c);
2277   SIGN(c) = SIGN(a);
2278 
2279 CLEANUP:
2280   mp_clear(&x);
2281   mp_clear(&g);
2282 
2283   return res;
2284 }
2285 
2286 /* modular inverse where modulus is 2**k. */
2287 /* c = a**-1 mod 2**k */
2288 mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
2289 {
2290   mp_err res;
2291   mp_size ix = k + 4;
2292   mp_int t0, t1, val, tmp, two2k;
2293 
2294   static const mp_digit d2 = 2;
2295   static const mp_int two = { 0, MP_ZPOS, 1, 1, (mp_digit *)&d2 };
2296 
2297   if (mp_iseven(a))
2298     return MP_UNDEF;
2299   if (k <= MP_DIGIT_BIT) {
2300     mp_digit i = s_mp_invmod_radix(MP_DIGIT(a,0));
2301     if (k < MP_DIGIT_BIT)
2302       i &= ((mp_digit)1 << k) - (mp_digit)1;
2303     mp_set(c, i);
2304     return MP_OKAY;
2305   }
2306   MP_DIGITS(&t0) = 0;
2307   MP_DIGITS(&t1) = 0;
2308   MP_DIGITS(&val) = 0;
2309   MP_DIGITS(&tmp) = 0;
2310   MP_DIGITS(&two2k) = 0;
2311   MP_CHECKOK( mp_init_copy(&val, a) );
2312   s_mp_mod_2d(&val, k);
2313   MP_CHECKOK( mp_init_copy(&t0, &val) );
2314   MP_CHECKOK( mp_init_copy(&t1, &t0)  );
2315   MP_CHECKOK( mp_init(&tmp, FLAG(a)) );
2316   MP_CHECKOK( mp_init(&two2k, FLAG(a)) );
2317   MP_CHECKOK( s_mp_2expt(&two2k, k) );
2318   do {
2319     MP_CHECKOK( mp_mul(&val, &t1, &tmp)  );
2320     MP_CHECKOK( mp_sub(&two, &tmp, &tmp) );
2321     MP_CHECKOK( mp_mul(&t1, &tmp, &t1)   );
2322     s_mp_mod_2d(&t1, k);
2323     while (MP_SIGN(&t1) != MP_ZPOS) {
2324       MP_CHECKOK( mp_add(&t1, &two2k, &t1) );
2325     }
2326     if (mp_cmp(&t1, &t0) == MP_EQ)
2327       break;
2328     MP_CHECKOK( mp_copy(&t1, &t0) );
2329   } while (--ix > 0);
2330   if (!ix) {
2331     res = MP_UNDEF;
2332   } else {
2333     mp_exch(c, &t1);
2334   }
2335 
2336 CLEANUP:
2337   mp_clear(&t0);
2338   mp_clear(&t1);
2339   mp_clear(&val);
2340   mp_clear(&tmp);
2341   mp_clear(&two2k);
2342   return res;
2343 }
2344 
2345 mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
2346 {
2347   mp_err res;
2348   mp_size k;
2349   mp_int oddFactor, evenFactor; /* factors of the modulus */
2350   mp_int oddPart, evenPart;     /* parts to combine via CRT. */
2351   mp_int C2, tmp1, tmp2;
2352 
2353   /*static const mp_digit d1 = 1; */
2354   /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
2355 
2356   if ((res = s_mp_ispow2(m)) >= 0) {
2357     k = res;
2358     return s_mp_invmod_2d(a, k, c);
2359   }
2360   MP_DIGITS(&oddFactor) = 0;
2361   MP_DIGITS(&evenFactor) = 0;
2362   MP_DIGITS(&oddPart) = 0;
2363   MP_DIGITS(&evenPart) = 0;
2364   MP_DIGITS(&C2)     = 0;
2365   MP_DIGITS(&tmp1)   = 0;
2366   MP_DIGITS(&tmp2)   = 0;
2367 
2368   MP_CHECKOK( mp_init_copy(&oddFactor, m) );    /* oddFactor = m */
2369   MP_CHECKOK( mp_init(&evenFactor, FLAG(m)) );
2370   MP_CHECKOK( mp_init(&oddPart, FLAG(m)) );
2371   MP_CHECKOK( mp_init(&evenPart, FLAG(m)) );
2372   MP_CHECKOK( mp_init(&C2, FLAG(m))     );
2373   MP_CHECKOK( mp_init(&tmp1, FLAG(m))   );
2374   MP_CHECKOK( mp_init(&tmp2, FLAG(m))   );
2375 
2376   k = mp_trailing_zeros(m);
2377   s_mp_div_2d(&oddFactor, k);
2378   MP_CHECKOK( s_mp_2expt(&evenFactor, k) );
2379 
2380   /* compute a**-1 mod oddFactor. */
2381   MP_CHECKOK( s_mp_invmod_odd_m(a, &oddFactor, &oddPart) );
2382   /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
2383   MP_CHECKOK( s_mp_invmod_2d(   a,       k,    &evenPart) );
2384 
2385   /* Use Chinese Remainer theorem to compute a**-1 mod m. */
2386   /* let m1 = oddFactor,  v1 = oddPart,
2387    * let m2 = evenFactor, v2 = evenPart.
2388    */
2389 
2390   /* Compute C2 = m1**-1 mod m2. */
2391   MP_CHECKOK( s_mp_invmod_2d(&oddFactor, k,    &C2) );
2392 
2393   /* compute u = (v2 - v1)*C2 mod m2 */
2394   MP_CHECKOK( mp_sub(&evenPart, &oddPart,   &tmp1) );
2395   MP_CHECKOK( mp_mul(&tmp1,     &C2,        &tmp2) );
2396   s_mp_mod_2d(&tmp2, k);
2397   while (MP_SIGN(&tmp2) != MP_ZPOS) {
2398     MP_CHECKOK( mp_add(&tmp2, &evenFactor, &tmp2) );
2399   }
2400 
2401   /* compute answer = v1 + u*m1 */
2402   MP_CHECKOK( mp_mul(&tmp2,     &oddFactor, c) );
2403   MP_CHECKOK( mp_add(&oddPart,  c,          c) );
2404   /* not sure this is necessary, but it's low cost if not. */
2405   MP_CHECKOK( mp_mod(c,         m,          c) );
2406 
2407 CLEANUP:
2408   mp_clear(&oddFactor);
2409   mp_clear(&evenFactor);
2410   mp_clear(&oddPart);
2411   mp_clear(&evenPart);
2412   mp_clear(&C2);
2413   mp_clear(&tmp1);
2414   mp_clear(&tmp2);
2415   return res;
2416 }
2417 
2418 
2419 /* {{{ mp_invmod(a, m, c) */
2420 
2421 /*
2422   mp_invmod(a, m, c)
2423 
2424   Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
2425   This is equivalent to the question of whether (a, m) = 1.  If not,
2426   MP_UNDEF is returned, and there is no inverse.
2427  */
2428 
2429 mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
2430 {
2431 
2432   ARGCHK(a && m && c, MP_BADARG);
2433 
2434   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2435     return MP_RANGE;
2436 
2437   if (mp_isodd(m)) {
2438     return s_mp_invmod_odd_m(a, m, c);
2439   }
2440   if (mp_iseven(a))
2441     return MP_UNDEF;    /* not invertable */
2442 
2443   return s_mp_invmod_even_m(a, m, c);
2444 
2445 } /* end mp_invmod() */
2446 
2447 /* }}} */
2448 #endif /* if MP_NUMTH */
2449 
2450 /* }}} */
2451 
2452 /*------------------------------------------------------------------------*/
2453 /* {{{ mp_print(mp, ofp) */
2454 
2455 #if MP_IOFUNC
2456 /*
2457   mp_print(mp, ofp)
2458 
2459   Print a textual representation of the given mp_int on the output
2460   stream 'ofp'.  Output is generated using the internal radix.
2461  */
2462 
2463 void   mp_print(mp_int *mp, FILE *ofp)
2464 {
2465   int   ix;
2466 
2467   if(mp == NULL || ofp == NULL)
2468     return;
2469 
2470   fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
2471 
2472   for(ix = USED(mp) - 1; ix >= 0; ix--) {
2473     fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
2474   }
2475 
2476 } /* end mp_print() */
2477 
2478 #endif /* if MP_IOFUNC */
2479 
2480 /* }}} */
2481 
2482 /*------------------------------------------------------------------------*/
2483 /* {{{ More I/O Functions */
2484 
2485 /* {{{ mp_read_raw(mp, str, len) */
2486 
2487 /*
2488    mp_read_raw(mp, str, len)
2489 
2490    Read in a raw value (base 256) into the given mp_int
2491  */
2492 
2493 mp_err  mp_read_raw(mp_int *mp, char *str, int len)
2494 {
2495   int            ix;
2496   mp_err         res;
2497   unsigned char *ustr = (unsigned char *)str;
2498 
2499   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
2500 
2501   mp_zero(mp);
2502 
2503   /* Get sign from first byte */
2504   if(ustr[0])
2505     SIGN(mp) = NEG;
2506   else
2507     SIGN(mp) = ZPOS;
2508 
2509   /* Read the rest of the digits */
2510   for(ix = 1; ix < len; ix++) {
2511     if((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
2512       return res;
2513     if((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
2514       return res;
2515   }
2516 
2517   return MP_OKAY;
2518 
2519 } /* end mp_read_raw() */
2520 
2521 /* }}} */
2522 
2523 /* {{{ mp_raw_size(mp) */
2524 
2525 int    mp_raw_size(mp_int *mp)
2526 {
2527   ARGCHK(mp != NULL, 0);
2528 
2529   return (USED(mp) * sizeof(mp_digit)) + 1;
2530 
2531 } /* end mp_raw_size() */
2532 
2533 /* }}} */
2534 
2535 /* {{{ mp_toraw(mp, str) */
2536 
2537 mp_err mp_toraw(mp_int *mp, char *str)
2538 {
2539   int  ix, jx, pos = 1;
2540 
2541   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2542 
2543   str[0] = (char)SIGN(mp);
2544 
2545   /* Iterate over each digit... */
2546   for(ix = USED(mp) - 1; ix >= 0; ix--) {
2547     mp_digit  d = DIGIT(mp, ix);
2548 
2549     /* Unpack digit bytes, high order first */
2550     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
2551       str[pos++] = (char)(d >> (jx * CHAR_BIT));
2552     }
2553   }
2554 
2555   return MP_OKAY;
2556 
2557 } /* end mp_toraw() */
2558 
2559 /* }}} */
2560 
2561 /* {{{ mp_read_radix(mp, str, radix) */
2562 
2563 /*
2564   mp_read_radix(mp, str, radix)
2565 
2566   Read an integer from the given string, and set mp to the resulting
2567   value.  The input is presumed to be in base 10.  Leading non-digit
2568   characters are ignored, and the function reads until a non-digit
2569   character or the end of the string.
2570  */
2571 
2572 mp_err  mp_read_radix(mp_int *mp, const char *str, int radix)
2573 {
2574   int     ix = 0, val = 0;
2575   mp_err  res;
2576   mp_sign sig = ZPOS;
2577 
2578   ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
2579          MP_BADARG);
2580 
2581   mp_zero(mp);
2582 
2583   /* Skip leading non-digit characters until a digit or '-' or '+' */
2584   while(str[ix] &&
2585         (s_mp_tovalue(str[ix], radix) < 0) &&
2586         str[ix] != '-' &&
2587         str[ix] != '+') {
2588     ++ix;
2589   }
2590 
2591   if(str[ix] == '-') {
2592     sig = NEG;
2593     ++ix;
2594   } else if(str[ix] == '+') {
2595     sig = ZPOS; /* this is the default anyway... */
2596     ++ix;
2597   }
2598 
2599   while((val = s_mp_tovalue(str[ix], radix)) >= 0) {
2600     if((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
2601       return res;
2602     if((res = s_mp_add_d(mp, val)) != MP_OKAY)
2603       return res;
2604     ++ix;
2605   }
2606 
2607   if(s_mp_cmp_d(mp, 0) == MP_EQ)
2608     SIGN(mp) = ZPOS;
2609   else
2610     SIGN(mp) = sig;
2611 
2612   return MP_OKAY;
2613 
2614 } /* end mp_read_radix() */
2615 
2616 mp_err mp_read_variable_radix(mp_int *a, const char * str, int default_radix)
2617 {
2618   int     radix = default_radix;
2619   int     cx;
2620   mp_sign sig   = ZPOS;
2621   mp_err  res;
2622 
2623   /* Skip leading non-digit characters until a digit or '-' or '+' */
2624   while ((cx = *str) != 0 &&
2625         (s_mp_tovalue(cx, radix) < 0) &&
2626         cx != '-' &&
2627         cx != '+') {
2628     ++str;
2629   }
2630 
2631   if (cx == '-') {
2632     sig = NEG;
2633     ++str;
2634   } else if (cx == '+') {
2635     sig = ZPOS; /* this is the default anyway... */
2636     ++str;
2637   }
2638 
2639   if (str[0] == '0') {
2640     if ((str[1] | 0x20) == 'x') {
2641       radix = 16;
2642       str += 2;
2643     } else {
2644       radix = 8;
2645       str++;
2646     }
2647   }
2648   res = mp_read_radix(a, str, radix);
2649   if (res == MP_OKAY) {
2650     MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
2651   }
2652   return res;
2653 }
2654 
2655 /* }}} */
2656 
2657 /* {{{ mp_radix_size(mp, radix) */
2658 
2659 int    mp_radix_size(mp_int *mp, int radix)
2660 {
2661   int  bits;
2662 
2663   if(!mp || radix < 2 || radix > MAX_RADIX)
2664     return 0;
2665 
2666   bits = USED(mp) * DIGIT_BIT - 1;
2667 
2668   return s_mp_outlen(bits, radix);
2669 
2670 } /* end mp_radix_size() */
2671 
2672 /* }}} */
2673 
2674 /* {{{ mp_toradix(mp, str, radix) */
2675 
2676 mp_err mp_toradix(mp_int *mp, char *str, int radix)
2677 {
2678   int  ix, pos = 0;
2679 
2680   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2681   ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
2682 
2683   if(mp_cmp_z(mp) == MP_EQ) {
2684     str[0] = '0';
2685     str[1] = '\0';
2686   } else {
2687     mp_err   res;
2688     mp_int   tmp;
2689     mp_sign  sgn;
2690     mp_digit rem, rdx = (mp_digit)radix;
2691     char     ch;
2692 
2693     if((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
2694       return res;
2695 
2696     /* Save sign for later, and take absolute value */
2697     sgn = SIGN(&tmp); SIGN(&tmp) = ZPOS;
2698 
2699     /* Generate output digits in reverse order      */
2700     while(mp_cmp_z(&tmp) != 0) {
2701       if((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
2702         mp_clear(&tmp);
2703         return res;
2704       }
2705 
2706       /* Generate digits, use capital letters */
2707       ch = s_mp_todigit(rem, radix, 0);
2708 
2709       str[pos++] = ch;
2710     }
2711 
2712     /* Add - sign if original value was negative */
2713     if(sgn == NEG)
2714       str[pos++] = '-';
2715 
2716     /* Add trailing NUL to end the string        */
2717     str[pos--] = '\0';
2718 
2719     /* Reverse the digits and sign indicator     */
2720     ix = 0;
2721     while(ix < pos) {
2722       char tmp = str[ix];
2723 
2724       str[ix] = str[pos];
2725       str[pos] = tmp;
2726       ++ix;
2727       --pos;
2728     }
2729 
2730     mp_clear(&tmp);
2731   }
2732 
2733   return MP_OKAY;
2734 
2735 } /* end mp_toradix() */
2736 
2737 /* }}} */
2738 
2739 /* {{{ mp_tovalue(ch, r) */
2740 
2741 int    mp_tovalue(char ch, int r)
2742 {
2743   return s_mp_tovalue(ch, r);
2744 
2745 } /* end mp_tovalue() */
2746 
2747 /* }}} */
2748 
2749 /* }}} */
2750 
2751 /* {{{ mp_strerror(ec) */
2752 
2753 /*
2754   mp_strerror(ec)
2755 
2756   Return a string describing the meaning of error code 'ec'.  The
2757   string returned is allocated in static memory, so the caller should
2758   not attempt to modify or free the memory associated with this
2759   string.
2760  */
2761 const char  *mp_strerror(mp_err ec)
2762 {
2763   int   aec = (ec < 0) ? -ec : ec;
2764 
2765   /* Code values are negative, so the senses of these comparisons
2766      are accurate */
2767   if(ec < MP_LAST_CODE || ec > MP_OKAY) {
2768     return mp_err_string[0];  /* unknown error code */
2769   } else {
2770     return mp_err_string[aec + 1];
2771   }
2772 
2773 } /* end mp_strerror() */
2774 
2775 /* }}} */
2776 
2777 /*========================================================================*/
2778 /*------------------------------------------------------------------------*/
2779 /* Static function definitions (internal use only)                        */
2780 
2781 /* {{{ Memory management */
2782 
2783 /* {{{ s_mp_grow(mp, min) */
2784 
2785 /* Make sure there are at least 'min' digits allocated to mp              */
2786 mp_err   s_mp_grow(mp_int *mp, mp_size min)
2787 {
2788   if(min > ALLOC(mp)) {
2789     mp_digit   *tmp;
2790 
2791     /* Set min to next nearest default precision block size */
2792     min = MP_ROUNDUP(min, s_mp_defprec);
2793 
2794     if((tmp = s_mp_alloc(min, sizeof(mp_digit), FLAG(mp))) == NULL)
2795       return MP_MEM;
2796 
2797     s_mp_copy(DIGITS(mp), tmp, USED(mp));
2798 
2799 #if MP_CRYPTO
2800     s_mp_setz(DIGITS(mp), ALLOC(mp));
2801 #endif
2802     s_mp_free(DIGITS(mp), ALLOC(mp));
2803     DIGITS(mp) = tmp;
2804     ALLOC(mp) = min;
2805   }
2806 
2807   return MP_OKAY;
2808 
2809 } /* end s_mp_grow() */
2810 
2811 /* }}} */
2812 
2813 /* {{{ s_mp_pad(mp, min) */
2814 
2815 /* Make sure the used size of mp is at least 'min', growing if needed     */
2816 mp_err   s_mp_pad(mp_int *mp, mp_size min)
2817 {
2818   if(min > USED(mp)) {
2819     mp_err  res;
2820 
2821     /* Make sure there is room to increase precision  */
2822     if (min > ALLOC(mp)) {
2823       if ((res = s_mp_grow(mp, min)) != MP_OKAY)
2824         return res;
2825     } else {
2826       s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
2827     }
2828 
2829     /* Increase precision; should already be 0-filled */
2830     USED(mp) = min;
2831   }
2832 
2833   return MP_OKAY;
2834 
2835 } /* end s_mp_pad() */
2836 
2837 /* }}} */
2838 
2839 /* {{{ s_mp_setz(dp, count) */
2840 
2841 #if MP_MACRO == 0
2842 /* Set 'count' digits pointed to by dp to be zeroes                       */
2843 void s_mp_setz(mp_digit *dp, mp_size count)
2844 {
2845 #if MP_MEMSET == 0
2846   int  ix;
2847 
2848   for(ix = 0; ix < count; ix++)
2849     dp[ix] = 0;
2850 #else
2851   memset(dp, 0, count * sizeof(mp_digit));
2852 #endif
2853 
2854 } /* end s_mp_setz() */
2855 #endif
2856 
2857 /* }}} */
2858 
2859 /* {{{ s_mp_copy(sp, dp, count) */
2860 
2861 #if MP_MACRO == 0
2862 /* Copy 'count' digits from sp to dp                                      */
2863 void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
2864 {
2865 #if MP_MEMCPY == 0
2866   int  ix;
2867 
2868   for(ix = 0; ix < count; ix++)
2869     dp[ix] = sp[ix];
2870 #else
2871   memcpy(dp, sp, count * sizeof(mp_digit));
2872 #endif
2873 
2874 } /* end s_mp_copy() */
2875 #endif
2876 
2877 /* }}} */
2878 
2879 /* {{{ s_mp_alloc(nb, ni, kmflag) */
2880 
2881 #if MP_MACRO == 0
2882 /* Allocate ni records of nb bytes each, and return a pointer to that     */
2883 void    *s_mp_alloc(size_t nb, size_t ni, int kmflag)
2884 {
2885   ++mp_allocs;
2886 #ifdef _KERNEL
2887   mp_int *mp;
2888   mp = kmem_zalloc(nb * ni, kmflag);
2889   if (mp != NULL)
2890     FLAG(mp) = kmflag;
2891   return (mp);
2892 #else
2893   return calloc(nb, ni);
2894 #endif
2895 
2896 } /* end s_mp_alloc() */
2897 #endif
2898 
2899 /* }}} */
2900 
2901 /* {{{ s_mp_free(ptr) */
2902 
2903 #if MP_MACRO == 0
2904 /* Free the memory pointed to by ptr                                      */
2905 void     s_mp_free(void *ptr, mp_size alloc)
2906 {
2907   if(ptr) {
2908     ++mp_frees;
2909 #ifdef _KERNEL
2910     kmem_free(ptr, alloc * sizeof (mp_digit));
2911 #else
2912     free(ptr);
2913 #endif
2914   }
2915 } /* end s_mp_free() */
2916 #endif
2917 
2918 /* }}} */
2919 
2920 /* {{{ s_mp_clamp(mp) */
2921 
2922 #if MP_MACRO == 0
2923 /* Remove leading zeroes from the given value                             */
2924 void     s_mp_clamp(mp_int *mp)
2925 {
2926   mp_size used = MP_USED(mp);
2927   while (used > 1 && DIGIT(mp, used - 1) == 0)
2928     --used;
2929   MP_USED(mp) = used;
2930 } /* end s_mp_clamp() */
2931 #endif
2932 
2933 /* }}} */
2934 
2935 /* {{{ s_mp_exch(a, b) */
2936 
2937 /* Exchange the data for a and b; (b, a) = (a, b)                         */
2938 void     s_mp_exch(mp_int *a, mp_int *b)
2939 {
2940   mp_int   tmp;
2941 
2942   tmp = *a;
2943   *a = *b;
2944   *b = tmp;
2945 
2946 } /* end s_mp_exch() */
2947 
2948 /* }}} */
2949 
2950 /* }}} */
2951 
2952 /* {{{ Arithmetic helpers */
2953 
2954 /* {{{ s_mp_lshd(mp, p) */
2955 
2956 /*
2957    Shift mp leftward by p digits, growing if needed, and zero-filling
2958    the in-shifted digits at the right end.  This is a convenient
2959    alternative to multiplication by powers of the radix
2960    The value of USED(mp) must already have been set to the value for
2961    the shifted result.
2962  */
2963 
2964 mp_err   s_mp_lshd(mp_int *mp, mp_size p)
2965 {
2966   mp_err  res;
2967   mp_size pos;
2968   int     ix;
2969 
2970   if(p == 0)
2971     return MP_OKAY;
2972 
2973   if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
2974     return MP_OKAY;
2975 
2976   if((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
2977     return res;
2978 
2979   pos = USED(mp) - 1;
2980 
2981   /* Shift all the significant figures over as needed */
2982   for(ix = pos - p; ix >= 0; ix--)
2983     DIGIT(mp, ix + p) = DIGIT(mp, ix);
2984 
2985   /* Fill the bottom digits with zeroes */
2986   for(ix = 0; ix < p; ix++)
2987     DIGIT(mp, ix) = 0;
2988 
2989   return MP_OKAY;
2990 
2991 } /* end s_mp_lshd() */
2992 
2993 /* }}} */
2994 
2995 /* {{{ s_mp_mul_2d(mp, d) */
2996 
2997 /*
2998   Multiply the integer by 2^d, where d is a number of bits.  This
2999   amounts to a bitwise shift of the value.
3000  */
3001 mp_err   s_mp_mul_2d(mp_int *mp, mp_digit d)
3002 {
3003   mp_err   res;
3004   mp_digit dshift, bshift;
3005   mp_digit mask;
3006 
3007   ARGCHK(mp != NULL,  MP_BADARG);
3008 
3009   dshift = d / MP_DIGIT_BIT;
3010   bshift = d % MP_DIGIT_BIT;
3011   /* bits to be shifted out of the top word */
3012   mask   = ((mp_digit)~0 << (MP_DIGIT_BIT - bshift));
3013   mask  &= MP_DIGIT(mp, MP_USED(mp) - 1);
3014 
3015   if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (mask != 0) )))
3016     return res;
3017 
3018   if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
3019     return res;
3020 
3021   if (bshift) {
3022     mp_digit *pa = MP_DIGITS(mp);
3023     mp_digit *alim = pa + MP_USED(mp);
3024     mp_digit  prev = 0;
3025 
3026     for (pa += dshift; pa < alim; ) {
3027       mp_digit x = *pa;
3028       *pa++ = (x << bshift) | prev;
3029       prev = x >> (DIGIT_BIT - bshift);
3030     }
3031   }
3032 
3033   s_mp_clamp(mp);
3034   return MP_OKAY;
3035 } /* end s_mp_mul_2d() */
3036 
3037 /* {{{ s_mp_rshd(mp, p) */
3038 
3039 /*
3040    Shift mp rightward by p digits.  Maintains the invariant that
3041    digits above the precision are all zero.  Digits shifted off the
3042    end are lost.  Cannot fail.
3043  */
3044 
3045 void     s_mp_rshd(mp_int *mp, mp_size p)
3046 {
3047   mp_size  ix;
3048   mp_digit *src, *dst;
3049 
3050   if(p == 0)
3051     return;
3052 
3053   /* Shortcut when all digits are to be shifted off */
3054   if(p >= USED(mp)) {
3055     s_mp_setz(DIGITS(mp), ALLOC(mp));
3056     USED(mp) = 1;
3057     SIGN(mp) = ZPOS;
3058     return;
3059   }
3060 
3061   /* Shift all the significant figures over as needed */
3062   dst = MP_DIGITS(mp);
3063   src = dst + p;
3064   for (ix = USED(mp) - p; ix > 0; ix--)
3065     *dst++ = *src++;
3066 
3067   MP_USED(mp) -= p;
3068   /* Fill the top digits with zeroes */
3069   while (p-- > 0)
3070     *dst++ = 0;
3071 
3072 #if 0
3073   /* Strip off any leading zeroes    */
3074   s_mp_clamp(mp);
3075 #endif
3076 
3077 } /* end s_mp_rshd() */
3078 
3079 /* }}} */
3080 
3081 /* {{{ s_mp_div_2(mp) */
3082 
3083 /* Divide by two -- take advantage of radix properties to do it fast      */
3084 void     s_mp_div_2(mp_int *mp)
3085 {
3086   s_mp_div_2d(mp, 1);
3087 
3088 } /* end s_mp_div_2() */
3089 
3090 /* }}} */
3091 
3092 /* {{{ s_mp_mul_2(mp) */
3093 
3094 mp_err s_mp_mul_2(mp_int *mp)
3095 {
3096   mp_digit *pd;
3097   unsigned int      ix, used;
3098   mp_digit kin = 0;
3099 
3100   /* Shift digits leftward by 1 bit */
3101   used = MP_USED(mp);
3102   pd = MP_DIGITS(mp);
3103   for (ix = 0; ix < used; ix++) {
3104     mp_digit d = *pd;
3105     *pd++ = (d << 1) | kin;
3106     kin = (d >> (DIGIT_BIT - 1));
3107   }
3108 
3109   /* Deal with rollover from last digit */
3110   if (kin) {
3111     if (ix >= ALLOC(mp)) {
3112       mp_err res;
3113       if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
3114         return res;
3115     }
3116 
3117     DIGIT(mp, ix) = kin;
3118     USED(mp) += 1;
3119   }
3120 
3121   return MP_OKAY;
3122 
3123 } /* end s_mp_mul_2() */
3124 
3125 /* }}} */
3126 
3127 /* {{{ s_mp_mod_2d(mp, d) */
3128 
3129 /*
3130   Remainder the integer by 2^d, where d is a number of bits.  This
3131   amounts to a bitwise AND of the value, and does not require the full
3132   division code
3133  */
3134 void     s_mp_mod_2d(mp_int *mp, mp_digit d)
3135 {
3136   mp_size  ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
3137   mp_size  ix;
3138   mp_digit dmask;
3139 
3140   if(ndig >= USED(mp))
3141     return;
3142 
3143   /* Flush all the bits above 2^d in its digit */
3144   dmask = ((mp_digit)1 << nbit) - 1;
3145   DIGIT(mp, ndig) &= dmask;
3146 
3147   /* Flush all digits above the one with 2^d in it */
3148   for(ix = ndig + 1; ix < USED(mp); ix++)
3149     DIGIT(mp, ix) = 0;
3150 
3151   s_mp_clamp(mp);
3152 
3153 } /* end s_mp_mod_2d() */
3154 
3155 /* }}} */
3156 
3157 /* {{{ s_mp_div_2d(mp, d) */
3158 
3159 /*
3160   Divide the integer by 2^d, where d is a number of bits.  This
3161   amounts to a bitwise shift of the value, and does not require the
3162   full division code (used in Barrett reduction, see below)
3163  */
3164 void     s_mp_div_2d(mp_int *mp, mp_digit d)
3165 {
3166   int       ix;
3167   mp_digit  save, next, mask;
3168 
3169   s_mp_rshd(mp, d / DIGIT_BIT);
3170   d %= DIGIT_BIT;
3171   if (d) {
3172     mask = ((mp_digit)1 << d) - 1;
3173     save = 0;
3174     for(ix = USED(mp) - 1; ix >= 0; ix--) {
3175       next = DIGIT(mp, ix) & mask;
3176       DIGIT(mp, ix) = (DIGIT(mp, ix) >> d) | (save << (DIGIT_BIT - d));
3177       save = next;
3178     }
3179   }
3180   s_mp_clamp(mp);
3181 
3182 } /* end s_mp_div_2d() */
3183 
3184 /* }}} */
3185 
3186 /* {{{ s_mp_norm(a, b, *d) */
3187 
3188 /*
3189   s_mp_norm(a, b, *d)
3190 
3191   Normalize a and b for division, where b is the divisor.  In order
3192   that we might make good guesses for quotient digits, we want the
3193   leading digit of b to be at least half the radix, which we
3194   accomplish by multiplying a and b by a power of 2.  The exponent
3195   (shift count) is placed in *pd, so that the remainder can be shifted
3196   back at the end of the division process.
3197  */
3198 
3199 mp_err   s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
3200 {
3201   mp_digit  d;
3202   mp_digit  mask;
3203   mp_digit  b_msd;
3204   mp_err    res    = MP_OKAY;
3205 
3206   d = 0;
3207   mask  = DIGIT_MAX & ~(DIGIT_MAX >> 1);        /* mask is msb of digit */
3208   b_msd = DIGIT(b, USED(b) - 1);
3209   while (!(b_msd & mask)) {
3210     b_msd <<= 1;
3211     ++d;
3212   }
3213 
3214   if (d) {
3215     MP_CHECKOK( s_mp_mul_2d(a, d) );
3216     MP_CHECKOK( s_mp_mul_2d(b, d) );
3217   }
3218 
3219   *pd = d;
3220 CLEANUP:
3221   return res;
3222 
3223 } /* end s_mp_norm() */
3224 
3225 /* }}} */
3226 
3227 /* }}} */
3228 
3229 /* {{{ Primitive digit arithmetic */
3230 
3231 /* {{{ s_mp_add_d(mp, d) */
3232 
3233 /* Add d to |mp| in place                                                 */
3234 mp_err   s_mp_add_d(mp_int *mp, mp_digit d)    /* unsigned digit addition */
3235 {
3236 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3237   mp_word   w, k = 0;
3238   mp_size   ix = 1;
3239 
3240   w = (mp_word)DIGIT(mp, 0) + d;
3241   DIGIT(mp, 0) = ACCUM(w);
3242   k = CARRYOUT(w);
3243 
3244   while(ix < USED(mp) && k) {
3245     w = (mp_word)DIGIT(mp, ix) + k;
3246     DIGIT(mp, ix) = ACCUM(w);
3247     k = CARRYOUT(w);
3248     ++ix;
3249   }
3250 
3251   if(k != 0) {
3252     mp_err  res;
3253 
3254     if((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
3255       return res;
3256 
3257     DIGIT(mp, ix) = (mp_digit)k;
3258   }
3259 
3260   return MP_OKAY;
3261 #else
3262   mp_digit * pmp = MP_DIGITS(mp);
3263   mp_digit sum, mp_i, carry = 0;
3264   mp_err   res = MP_OKAY;
3265   int used = (int)MP_USED(mp);
3266 
3267   mp_i = *pmp;
3268   *pmp++ = sum = d + mp_i;
3269   carry = (sum < d);
3270   while (carry && --used > 0) {
3271     mp_i = *pmp;
3272     *pmp++ = sum = carry + mp_i;
3273     carry = !sum;
3274   }
3275   if (carry && !used) {
3276     /* mp is growing */
3277     used = MP_USED(mp);
3278     MP_CHECKOK( s_mp_pad(mp, used + 1) );
3279     MP_DIGIT(mp, used) = carry;
3280   }
3281 CLEANUP:
3282   return res;
3283 #endif
3284 } /* end s_mp_add_d() */
3285 
3286 /* }}} */
3287 
3288 /* {{{ s_mp_sub_d(mp, d) */
3289 
3290 /* Subtract d from |mp| in place, assumes |mp| > d                        */
3291 mp_err   s_mp_sub_d(mp_int *mp, mp_digit d)    /* unsigned digit subtract */
3292 {
3293 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3294   mp_word   w, b = 0;
3295   mp_size   ix = 1;
3296 
3297   /* Compute initial subtraction    */
3298   w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
3299   b = CARRYOUT(w) ? 0 : 1;
3300   DIGIT(mp, 0) = ACCUM(w);
3301 
3302   /* Propagate borrows leftward     */
3303   while(b && ix < USED(mp)) {
3304     w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
3305     b = CARRYOUT(w) ? 0 : 1;
3306     DIGIT(mp, ix) = ACCUM(w);
3307     ++ix;
3308   }
3309 
3310   /* Remove leading zeroes          */
3311   s_mp_clamp(mp);
3312 
3313   /* If we have a borrow out, it's a violation of the input invariant */
3314   if(b)
3315     return MP_RANGE;
3316   else
3317     return MP_OKAY;
3318 #else
3319   mp_digit *pmp = MP_DIGITS(mp);
3320   mp_digit mp_i, diff, borrow;
3321   mp_size  used = MP_USED(mp);
3322 
3323   mp_i = *pmp;
3324   *pmp++ = diff = mp_i - d;
3325   borrow = (diff > mp_i);
3326   while (borrow && --used) {
3327     mp_i = *pmp;
3328     *pmp++ = diff = mp_i - borrow;
3329     borrow = (diff > mp_i);
3330   }
3331   s_mp_clamp(mp);
3332   return (borrow && !used) ? MP_RANGE : MP_OKAY;
3333 #endif
3334 } /* end s_mp_sub_d() */
3335 
3336 /* }}} */
3337 
3338 /* {{{ s_mp_mul_d(a, d) */
3339 
3340 /* Compute a = a * d, single digit multiplication                         */
3341 mp_err   s_mp_mul_d(mp_int *a, mp_digit d)
3342 {
3343   mp_err  res;
3344   mp_size used;
3345   int     pow;
3346 
3347   if (!d) {
3348     mp_zero(a);
3349     return MP_OKAY;
3350   }
3351   if (d == 1)
3352     return MP_OKAY;
3353   if (0 <= (pow = s_mp_ispow2d(d))) {
3354     return s_mp_mul_2d(a, (mp_digit)pow);
3355   }
3356 
3357   used = MP_USED(a);
3358   MP_CHECKOK( s_mp_pad(a, used + 1) );
3359 
3360   s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
3361 
3362   s_mp_clamp(a);
3363 
3364 CLEANUP:
3365   return res;
3366 
3367 } /* end s_mp_mul_d() */
3368 
3369 /* }}} */
3370 
3371 /* {{{ s_mp_div_d(mp, d, r) */
3372 
3373 /*
3374   s_mp_div_d(mp, d, r)
3375 
3376   Compute the quotient mp = mp / d and remainder r = mp mod d, for a
3377   single digit d.  If r is null, the remainder will be discarded.
3378  */
3379 
3380 mp_err   s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
3381 {
3382 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3383   mp_word   w = 0, q;
3384 #else
3385   mp_digit  w = 0, q;
3386 #endif
3387   int       ix;
3388   mp_err    res;
3389   mp_int    quot;
3390   mp_int    rem;
3391 
3392   if(d == 0)
3393     return MP_RANGE;
3394   if (d == 1) {
3395     if (r)
3396       *r = 0;
3397     return MP_OKAY;
3398   }
3399   /* could check for power of 2 here, but mp_div_d does that. */
3400   if (MP_USED(mp) == 1) {
3401     mp_digit n   = MP_DIGIT(mp,0);
3402     mp_digit rem;
3403 
3404     q   = n / d;
3405     rem = n % d;
3406     MP_DIGIT(mp,0) = q;
3407     if (r)
3408       *r = rem;
3409     return MP_OKAY;
3410   }
3411 
3412   MP_DIGITS(&rem)  = 0;
3413   MP_DIGITS(&quot) = 0;
3414   /* Make room for the quotient */
3415   MP_CHECKOK( mp_init_size(&quot, USED(mp), FLAG(mp)) );
3416 
3417 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3418   for(ix = USED(mp) - 1; ix >= 0; ix--) {
3419     w = (w << DIGIT_BIT) | DIGIT(mp, ix);
3420 
3421     if(w >= d) {
3422       q = w / d;
3423       w = w % d;
3424     } else {
3425       q = 0;
3426     }
3427 
3428     s_mp_lshd(&quot, 1);
3429     DIGIT(&quot, 0) = (mp_digit)q;
3430   }
3431 #else
3432   {
3433     mp_digit p;
3434 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3435     mp_digit norm;
3436 #endif
3437 
3438     MP_CHECKOK( mp_init_copy(&rem, mp) );
3439 
3440 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3441     MP_DIGIT(&quot, 0) = d;
3442     MP_CHECKOK( s_mp_norm(&rem, &quot, &norm) );
3443     if (norm)
3444       d <<= norm;
3445     MP_DIGIT(&quot, 0) = 0;
3446 #endif
3447 
3448     p = 0;
3449     for (ix = USED(&rem) - 1; ix >= 0; ix--) {
3450       w = DIGIT(&rem, ix);
3451 
3452       if (p) {
3453         MP_CHECKOK( s_mpv_div_2dx1d(p, w, d, &q, &w) );
3454       } else if (w >= d) {
3455         q = w / d;
3456         w = w % d;
3457       } else {
3458         q = 0;
3459       }
3460 
3461       MP_CHECKOK( s_mp_lshd(&quot, 1) );
3462       DIGIT(&quot, 0) = q;
3463       p = w;
3464     }
3465 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3466     if (norm)
3467       w >>= norm;
3468 #endif
3469   }
3470 #endif
3471 
3472   /* Deliver the remainder, if desired */
3473   if(r)
3474     *r = (mp_digit)w;
3475 
3476   s_mp_clamp(&quot);
3477   mp_exch(&quot, mp);
3478 CLEANUP:
3479   mp_clear(&quot);
3480   mp_clear(&rem);
3481 
3482   return res;
3483 } /* end s_mp_div_d() */
3484 
3485 /* }}} */
3486 
3487 
3488 /* }}} */
3489 
3490 /* {{{ Primitive full arithmetic */
3491 
3492 /* {{{ s_mp_add(a, b) */
3493 
3494 /* Compute a = |a| + |b|                                                  */
3495 mp_err   s_mp_add(mp_int *a, const mp_int *b)  /* magnitude addition      */
3496 {
3497 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3498   mp_word   w = 0;
3499 #else
3500   mp_digit  d, sum, carry = 0;
3501 #endif
3502   mp_digit *pa, *pb;
3503   mp_size   ix;
3504   mp_size   used;
3505   mp_err    res;
3506 
3507   /* Make sure a has enough precision for the output value */
3508   if((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
3509     return res;
3510 
3511   /*
3512     Add up all digits up to the precision of b.  If b had initially
3513     the same precision as a, or greater, we took care of it by the
3514     padding step above, so there is no problem.  If b had initially
3515     less precision, we'll have to make sure the carry out is duly
3516     propagated upward among the higher-order digits of the sum.
3517    */
3518   pa = MP_DIGITS(a);
3519   pb = MP_DIGITS(b);
3520   used = MP_USED(b);
3521   for(ix = 0; ix < used; ix++) {
3522 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3523     w = w + *pa + *pb++;
3524     *pa++ = ACCUM(w);
3525     w = CARRYOUT(w);
3526 #else
3527     d = *pa;
3528     sum = d + *pb++;
3529     d = (sum < d);                      /* detect overflow */
3530     *pa++ = sum += carry;
3531     carry = d + (sum < carry);          /* detect overflow */
3532 #endif
3533   }
3534 
3535   /* If we run out of 'b' digits before we're actually done, make
3536      sure the carries get propagated upward...
3537    */
3538   used = MP_USED(a);
3539 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3540   while (w && ix < used) {
3541     w = w + *pa;
3542     *pa++ = ACCUM(w);
3543     w = CARRYOUT(w);
3544     ++ix;
3545   }
3546 #else
3547   while (carry && ix < used) {
3548     sum = carry + *pa;
3549     *pa++ = sum;
3550     carry = !sum;
3551     ++ix;
3552   }
3553 #endif
3554 
3555   /* If there's an overall carry out, increase precision and include
3556      it.  We could have done this initially, but why touch the memory
3557      allocator unless we're sure we have to?
3558    */
3559 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3560   if (w) {
3561     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3562       return res;
3563 
3564     DIGIT(a, ix) = (mp_digit)w;
3565   }
3566 #else
3567   if (carry) {
3568     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3569       return res;
3570 
3571     DIGIT(a, used) = carry;
3572   }
3573 #endif
3574 
3575   return MP_OKAY;
3576 } /* end s_mp_add() */
3577 
3578 /* }}} */
3579 
3580 /* Compute c = |a| + |b|         */ /* magnitude addition      */
3581 mp_err   s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3582 {
3583   mp_digit *pa, *pb, *pc;
3584 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3585   mp_word   w = 0;
3586 #else
3587   mp_digit  sum, carry = 0, d;
3588 #endif
3589   mp_size   ix;
3590   mp_size   used;
3591   mp_err    res;
3592 
3593   MP_SIGN(c) = MP_SIGN(a);
3594   if (MP_USED(a) < MP_USED(b)) {
3595     const mp_int *xch = a;
3596     a = b;
3597     b = xch;
3598   }
3599 
3600   /* Make sure a has enough precision for the output value */
3601   if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3602     return res;
3603 
3604   /*
3605     Add up all digits up to the precision of b.  If b had initially
3606     the same precision as a, or greater, we took care of it by the
3607     exchange step above, so there is no problem.  If b had initially
3608     less precision, we'll have to make sure the carry out is duly
3609     propagated upward among the higher-order digits of the sum.
3610    */
3611   pa = MP_DIGITS(a);
3612   pb = MP_DIGITS(b);
3613   pc = MP_DIGITS(c);
3614   used = MP_USED(b);
3615   for (ix = 0; ix < used; ix++) {
3616 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3617     w = w + *pa++ + *pb++;
3618     *pc++ = ACCUM(w);
3619     w = CARRYOUT(w);
3620 #else
3621     d = *pa++;
3622     sum = d + *pb++;
3623     d = (sum < d);                      /* detect overflow */
3624     *pc++ = sum += carry;
3625     carry = d + (sum < carry);          /* detect overflow */
3626 #endif
3627   }
3628 
3629   /* If we run out of 'b' digits before we're actually done, make
3630      sure the carries get propagated upward...
3631    */
3632   for (used = MP_USED(a); ix < used; ++ix) {
3633 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3634     w = w + *pa++;
3635     *pc++ = ACCUM(w);
3636     w = CARRYOUT(w);
3637 #else
3638     *pc++ = sum = carry + *pa++;
3639     carry = (sum < carry);
3640 #endif
3641   }
3642 
3643   /* If there's an overall carry out, increase precision and include
3644      it.  We could have done this initially, but why touch the memory
3645      allocator unless we're sure we have to?
3646    */
3647 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3648   if (w) {
3649     if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3650       return res;
3651 
3652     DIGIT(c, used) = (mp_digit)w;
3653     ++used;
3654   }
3655 #else
3656   if (carry) {
3657     if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3658       return res;
3659 
3660     DIGIT(c, used) = carry;
3661     ++used;
3662   }
3663 #endif
3664   MP_USED(c) = used;
3665   return MP_OKAY;
3666 }
3667 /* {{{ s_mp_add_offset(a, b, offset) */
3668 
3669 /* Compute a = |a| + ( |b| * (RADIX ** offset) )             */
3670 mp_err   s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)
3671 {
3672 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3673   mp_word   w, k = 0;
3674 #else
3675   mp_digit  d, sum, carry = 0;
3676 #endif
3677   mp_size   ib;
3678   mp_size   ia;
3679   mp_size   lim;
3680   mp_err    res;
3681 
3682   /* Make sure a has enough precision for the output value */
3683   lim = MP_USED(b) + offset;
3684   if((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
3685     return res;
3686 
3687   /*
3688     Add up all digits up to the precision of b.  If b had initially
3689     the same precision as a, or greater, we took care of it by the
3690     padding step above, so there is no problem.  If b had initially
3691     less precision, we'll have to make sure the carry out is duly
3692     propagated upward among the higher-order digits of the sum.
3693    */
3694   lim = USED(b);
3695   for(ib = 0, ia = offset; ib < lim; ib++, ia++) {
3696 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3697     w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
3698     DIGIT(a, ia) = ACCUM(w);
3699     k = CARRYOUT(w);
3700 #else
3701     d = MP_DIGIT(a, ia);
3702     sum = d + MP_DIGIT(b, ib);
3703     d = (sum < d);
3704     MP_DIGIT(a,ia) = sum += carry;
3705     carry = d + (sum < carry);
3706 #endif
3707   }
3708 
3709   /* If we run out of 'b' digits before we're actually done, make
3710      sure the carries get propagated upward...
3711    */
3712 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3713   for (lim = MP_USED(a); k && (ia < lim); ++ia) {
3714     w = (mp_word)DIGIT(a, ia) + k;
3715     DIGIT(a, ia) = ACCUM(w);
3716     k = CARRYOUT(w);
3717   }
3718 #else
3719   for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
3720     d = MP_DIGIT(a, ia);
3721     MP_DIGIT(a,ia) = sum = d + carry;
3722     carry = (sum < d);
3723   }
3724 #endif
3725 
3726   /* If there's an overall carry out, increase precision and include
3727      it.  We could have done this initially, but why touch the memory
3728      allocator unless we're sure we have to?
3729    */
3730 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3731   if(k) {
3732     if((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
3733       return res;
3734 
3735     DIGIT(a, ia) = (mp_digit)k;
3736   }
3737 #else
3738   if (carry) {
3739     if((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
3740       return res;
3741 
3742     DIGIT(a, lim) = carry;
3743   }
3744 #endif
3745   s_mp_clamp(a);
3746 
3747   return MP_OKAY;
3748 
3749 } /* end s_mp_add_offset() */
3750 
3751 /* }}} */
3752 
3753 /* {{{ s_mp_sub(a, b) */
3754 
3755 /* Compute a = |a| - |b|, assumes |a| >= |b|                              */
3756 mp_err   s_mp_sub(mp_int *a, const mp_int *b)  /* magnitude subtract      */
3757 {
3758   mp_digit *pa, *pb, *limit;
3759 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3760   mp_sword  w = 0;
3761 #else
3762   mp_digit  d, diff, borrow = 0;
3763 #endif
3764 
3765   /*
3766     Subtract and propagate borrow.  Up to the precision of b, this
3767     accounts for the digits of b; after that, we just make sure the
3768     carries get to the right place.  This saves having to pad b out to
3769     the precision of a just to make the loops work right...
3770    */
3771   pa = MP_DIGITS(a);
3772   pb = MP_DIGITS(b);
3773   limit = pb + MP_USED(b);
3774   while (pb < limit) {
3775 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3776     w = w + *pa - *pb++;
3777     *pa++ = ACCUM(w);
3778     w >>= MP_DIGIT_BIT;
3779 #else
3780     d = *pa;
3781     diff = d - *pb++;
3782     d = (diff > d);                             /* detect borrow */
3783     if (borrow && --diff == MP_DIGIT_MAX)
3784       ++d;
3785     *pa++ = diff;
3786     borrow = d;
3787 #endif
3788   }
3789   limit = MP_DIGITS(a) + MP_USED(a);
3790 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3791   while (w && pa < limit) {
3792     w = w + *pa;
3793     *pa++ = ACCUM(w);
3794     w >>= MP_DIGIT_BIT;
3795   }
3796 #else
3797   while (borrow && pa < limit) {
3798     d = *pa;
3799     *pa++ = diff = d - borrow;
3800     borrow = (diff > d);
3801   }
3802 #endif
3803 
3804   /* Clobber any leading zeroes we created    */
3805   s_mp_clamp(a);
3806 
3807   /*
3808      If there was a borrow out, then |b| > |a| in violation
3809      of our input invariant.  We've already done the work,
3810      but we'll at least complain about it...
3811    */
3812 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3813   return w ? MP_RANGE : MP_OKAY;
3814 #else
3815   return borrow ? MP_RANGE : MP_OKAY;
3816 #endif
3817 } /* end s_mp_sub() */
3818 
3819 /* }}} */
3820 
3821 /* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract      */
3822 mp_err   s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3823 {
3824   mp_digit *pa, *pb, *pc;
3825 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3826   mp_sword  w = 0;
3827 #else
3828   mp_digit  d, diff, borrow = 0;
3829 #endif
3830   int       ix, limit;
3831   mp_err    res;
3832 
3833   MP_SIGN(c) = MP_SIGN(a);
3834 
3835   /* Make sure a has enough precision for the output value */
3836   if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3837     return res;
3838 
3839   /*
3840     Subtract and propagate borrow.  Up to the precision of b, this
3841     accounts for the digits of b; after that, we just make sure the
3842     carries get to the right place.  This saves having to pad b out to
3843     the precision of a just to make the loops work right...
3844    */
3845   pa = MP_DIGITS(a);
3846   pb = MP_DIGITS(b);
3847   pc = MP_DIGITS(c);
3848   limit = MP_USED(b);
3849   for (ix = 0; ix < limit; ++ix) {
3850 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3851     w = w + *pa++ - *pb++;
3852     *pc++ = ACCUM(w);
3853     w >>= MP_DIGIT_BIT;
3854 #else
3855     d = *pa++;
3856     diff = d - *pb++;
3857     d = (diff > d);
3858     if (borrow && --diff == MP_DIGIT_MAX)
3859       ++d;
3860     *pc++ = diff;
3861     borrow = d;
3862 #endif
3863   }
3864   for (limit = MP_USED(a); ix < limit; ++ix) {
3865 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3866     w = w + *pa++;
3867     *pc++ = ACCUM(w);
3868     w >>= MP_DIGIT_BIT;
3869 #else
3870     d = *pa++;
3871     *pc++ = diff = d - borrow;
3872     borrow = (diff > d);
3873 #endif
3874   }
3875 
3876   /* Clobber any leading zeroes we created    */
3877   MP_USED(c) = ix;
3878   s_mp_clamp(c);
3879 
3880   /*
3881      If there was a borrow out, then |b| > |a| in violation
3882      of our input invariant.  We've already done the work,
3883      but we'll at least complain about it...
3884    */
3885 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3886   return w ? MP_RANGE : MP_OKAY;
3887 #else
3888   return borrow ? MP_RANGE : MP_OKAY;
3889 #endif
3890 }
3891 /* {{{ s_mp_mul(a, b) */
3892 
3893 /* Compute a = |a| * |b|                                                  */
3894 mp_err   s_mp_mul(mp_int *a, const mp_int *b)
3895 {
3896   return mp_mul(a, b, a);
3897 } /* end s_mp_mul() */
3898 
3899 /* }}} */
3900 
3901 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
3902 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
3903 #define MP_MUL_DxD(a, b, Phi, Plo) \
3904   { unsigned long long product = (unsigned long long)a * b; \
3905     Plo = (mp_digit)product; \
3906     Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
3907 #elif defined(OSF1)
3908 #define MP_MUL_DxD(a, b, Phi, Plo) \
3909   { Plo = asm ("mulq %a0, %a1, %v0", a, b);\
3910     Phi = asm ("umulh %a0, %a1, %v0", a, b); }
3911 #else
3912 #define MP_MUL_DxD(a, b, Phi, Plo) \
3913   { mp_digit a0b1, a1b0; \
3914     Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \
3915     Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
3916     a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
3917     a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
3918     a1b0 += a0b1; \
3919     Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
3920     if (a1b0 < a0b1)  \
3921       Phi += MP_HALF_RADIX; \
3922     a1b0 <<= MP_HALF_DIGIT_BIT; \
3923     Plo += a1b0; \
3924     if (Plo < a1b0) \
3925       ++Phi; \
3926   }
3927 #endif
3928 
3929 #if !defined(MP_ASSEMBLY_MULTIPLY)
3930 /* c = a * b */
3931 void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
3932 {
3933 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3934   mp_digit   d = 0;
3935 
3936   /* Inner product:  Digits of a */
3937   while (a_len--) {
3938     mp_word w = ((mp_word)b * *a++) + d;
3939     *c++ = ACCUM(w);
3940     d = CARRYOUT(w);
3941   }
3942   *c = d;
3943 #else
3944   mp_digit carry = 0;
3945   while (a_len--) {
3946     mp_digit a_i = *a++;
3947     mp_digit a0b0, a1b1;
3948 
3949     MP_MUL_DxD(a_i, b, a1b1, a0b0);
3950 
3951     a0b0 += carry;
3952     if (a0b0 < carry)
3953       ++a1b1;
3954     *c++ = a0b0;
3955     carry = a1b1;
3956   }
3957   *c = carry;
3958 #endif
3959 }
3960 
3961 /* c += a * b */
3962 void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
3963                               mp_digit *c)
3964 {
3965 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3966   mp_digit   d = 0;
3967 
3968   /* Inner product:  Digits of a */
3969   while (a_len--) {
3970     mp_word w = ((mp_word)b * *a++) + *c + d;
3971     *c++ = ACCUM(w);
3972     d = CARRYOUT(w);
3973   }
3974   *c = d;
3975 #else
3976   mp_digit carry = 0;
3977   while (a_len--) {
3978     mp_digit a_i = *a++;
3979     mp_digit a0b0, a1b1;
3980 
3981     MP_MUL_DxD(a_i, b, a1b1, a0b0);
3982 
3983     a0b0 += carry;
3984     if (a0b0 < carry)
3985       ++a1b1;
3986     a0b0 += a_i = *c;
3987     if (a0b0 < a_i)
3988       ++a1b1;
3989     *c++ = a0b0;
3990     carry = a1b1;
3991   }
3992   *c = carry;
3993 #endif
3994 }
3995 
3996 /* Presently, this is only used by the Montgomery arithmetic code. */
3997 /* c += a * b */
3998 void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
3999 {
4000 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4001   mp_digit   d = 0;
4002 
4003   /* Inner product:  Digits of a */
4004   while (a_len--) {
4005     mp_word w = ((mp_word)b * *a++) + *c + d;
4006     *c++ = ACCUM(w);
4007     d = CARRYOUT(w);
4008   }
4009 
4010   while (d) {
4011     mp_word w = (mp_word)*c + d;
4012     *c++ = ACCUM(w);
4013     d = CARRYOUT(w);
4014   }
4015 #else
4016   mp_digit carry = 0;
4017   while (a_len--) {
4018     mp_digit a_i = *a++;
4019     mp_digit a0b0, a1b1;
4020 
4021     MP_MUL_DxD(a_i, b, a1b1, a0b0);
4022 
4023     a0b0 += carry;
4024     if (a0b0 < carry)
4025       ++a1b1;
4026 
4027     a0b0 += a_i = *c;
4028     if (a0b0 < a_i)
4029       ++a1b1;
4030 
4031     *c++ = a0b0;
4032     carry = a1b1;
4033   }
4034   while (carry) {
4035     mp_digit c_i = *c;
4036     carry += c_i;
4037     *c++ = carry;
4038     carry = carry < c_i;
4039   }
4040 #endif
4041 }
4042 #endif
4043 
4044 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
4045 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
4046 #define MP_SQR_D(a, Phi, Plo) \
4047   { unsigned long long square = (unsigned long long)a * a; \
4048     Plo = (mp_digit)square; \
4049     Phi = (mp_digit)(square >> MP_DIGIT_BIT); }
4050 #elif defined(OSF1)
4051 #define MP_SQR_D(a, Phi, Plo) \
4052   { Plo = asm ("mulq  %a0, %a0, %v0", a);\
4053     Phi = asm ("umulh %a0, %a0, %v0", a); }
4054 #else
4055 #define MP_SQR_D(a, Phi, Plo) \
4056   { mp_digit Pmid; \
4057     Plo  = (a  & MP_HALF_DIGIT_MAX) * (a  & MP_HALF_DIGIT_MAX); \
4058     Phi  = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
4059     Pmid = (a  & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
4060     Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1);  \
4061     Pmid <<= (MP_HALF_DIGIT_BIT + 1);  \
4062     Plo += Pmid;  \
4063     if (Plo < Pmid)  \
4064       ++Phi;  \
4065   }
4066 #endif
4067 
4068 #if !defined(MP_ASSEMBLY_SQUARE)
4069 /* Add the squares of the digits of a to the digits of b. */
4070 void s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
4071 {
4072 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4073   mp_word  w;
4074   mp_digit d;
4075   mp_size  ix;
4076 
4077   w  = 0;
4078 #define ADD_SQUARE(n) \
4079     d = pa[n]; \
4080     w += (d * (mp_word)d) + ps[2*n]; \
4081     ps[2*n] = ACCUM(w); \
4082     w = (w >> DIGIT_BIT) + ps[2*n+1]; \
4083     ps[2*n+1] = ACCUM(w); \
4084     w = (w >> DIGIT_BIT)
4085 
4086   for (ix = a_len; ix >= 4; ix -= 4) {
4087     ADD_SQUARE(0);
4088     ADD_SQUARE(1);
4089     ADD_SQUARE(2);
4090     ADD_SQUARE(3);
4091     pa += 4;
4092     ps += 8;
4093   }
4094   if (ix) {
4095     ps += 2*ix;
4096     pa += ix;
4097     switch (ix) {
4098     case 3: ADD_SQUARE(-3); /* FALLTHRU */
4099     case 2: ADD_SQUARE(-2); /* FALLTHRU */
4100     case 1: ADD_SQUARE(-1); /* FALLTHRU */
4101     case 0: break;
4102     }
4103   }
4104   while (w) {
4105     w += *ps;
4106     *ps++ = ACCUM(w);
4107     w = (w >> DIGIT_BIT);
4108   }
4109 #else
4110   mp_digit carry = 0;
4111   while (a_len--) {
4112     mp_digit a_i = *pa++;
4113     mp_digit a0a0, a1a1;
4114 
4115     MP_SQR_D(a_i, a1a1, a0a0);
4116 
4117     /* here a1a1 and a0a0 constitute a_i ** 2 */
4118     a0a0 += carry;
4119     if (a0a0 < carry)
4120       ++a1a1;
4121 
4122     /* now add to ps */
4123     a0a0 += a_i = *ps;
4124     if (a0a0 < a_i)
4125       ++a1a1;
4126     *ps++ = a0a0;
4127     a1a1 += a_i = *ps;
4128     carry = (a1a1 < a_i);
4129     *ps++ = a1a1;
4130   }
4131   while (carry) {
4132     mp_digit s_i = *ps;
4133     carry += s_i;
4134     *ps++ = carry;
4135     carry = carry < s_i;
4136   }
4137 #endif
4138 }
4139 #endif
4140 
4141 #if (defined(MP_NO_MP_WORD) || defined(MP_NO_DIV_WORD)) \
4142 && !defined(MP_ASSEMBLY_DIV_2DX1D)
4143 /*
4144 ** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
4145 ** so its high bit is 1.   This code is from NSPR.
4146 */
4147 mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
4148                        mp_digit *qp, mp_digit *rp)
4149 {
4150     mp_digit d1, d0, q1, q0;
4151     mp_digit r1, r0, m;
4152 
4153     d1 = divisor >> MP_HALF_DIGIT_BIT;
4154     d0 = divisor & MP_HALF_DIGIT_MAX;
4155     r1 = Nhi % d1;
4156     q1 = Nhi / d1;
4157     m = q1 * d0;
4158     r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
4159     if (r1 < m) {
4160         q1--, r1 += divisor;
4161         if (r1 >= divisor && r1 < m) {
4162             q1--, r1 += divisor;
4163         }
4164     }
4165     r1 -= m;
4166     r0 = r1 % d1;
4167     q0 = r1 / d1;
4168     m = q0 * d0;
4169     r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
4170     if (r0 < m) {
4171         q0--, r0 += divisor;
4172         if (r0 >= divisor && r0 < m) {
4173             q0--, r0 += divisor;
4174         }
4175     }
4176     if (qp)
4177         *qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
4178     if (rp)
4179         *rp = r0 - m;
4180     return MP_OKAY;
4181 }
4182 #endif
4183 
4184 #if MP_SQUARE
4185 /* {{{ s_mp_sqr(a) */
4186 
4187 mp_err   s_mp_sqr(mp_int *a)
4188 {
4189   mp_err   res;
4190   mp_int   tmp;
4191   tmp.flag = (mp_sign)0;
4192 
4193   if((res = mp_init_size(&tmp, 2 * USED(a), FLAG(a))) != MP_OKAY)
4194     return res;
4195   res = mp_sqr(a, &tmp);
4196   if (res == MP_OKAY) {
4197     s_mp_exch(&tmp, a);
4198   }
4199   mp_clear(&tmp);
4200   return res;
4201 }
4202 
4203 /* }}} */
4204 #endif
4205 
4206 /* {{{ s_mp_div(a, b) */
4207 
4208 /*
4209   s_mp_div(a, b)
4210 
4211   Compute a = a / b and b = a mod b.  Assumes b > a.
4212  */
4213 
4214 mp_err   s_mp_div(mp_int *rem,  /* i: dividend, o: remainder */
4215                   mp_int *div,  /* i: divisor                */
4216                   mp_int *quot) /* i: 0;        o: quotient  */
4217 {
4218   mp_int   part, t;
4219 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4220   mp_word  q_msd;
4221 #else
4222   mp_digit q_msd;
4223 #endif
4224   mp_err   res;
4225   mp_digit d;
4226   mp_digit div_msd;
4227   int      ix;
4228 
4229   t.dp = (mp_digit *)NULL;
4230 
4231   if(mp_cmp_z(div) == 0)
4232     return MP_RANGE;
4233 
4234   /* Shortcut if divisor is power of two */
4235   if((ix = s_mp_ispow2(div)) >= 0) {
4236     MP_CHECKOK( mp_copy(rem, quot) );
4237     s_mp_div_2d(quot, (mp_digit)ix);
4238     s_mp_mod_2d(rem,  (mp_digit)ix);
4239 
4240     return MP_OKAY;
4241   }
4242 
4243   DIGITS(&t) = 0;
4244   MP_SIGN(rem) = ZPOS;
4245   MP_SIGN(div) = ZPOS;
4246 
4247   /* A working temporary for division     */
4248   MP_CHECKOK( mp_init_size(&t, MP_ALLOC(rem), FLAG(rem)));
4249 
4250   /* Normalize to optimize guessing       */
4251   MP_CHECKOK( s_mp_norm(rem, div, &d) );
4252 
4253   part = *rem;
4254 
4255   /* Perform the division itself...woo!   */
4256   MP_USED(quot) = MP_ALLOC(quot);
4257 
4258   /* Find a partial substring of rem which is at least div */
4259   /* If we didn't find one, we're finished dividing    */
4260   while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
4261     int i;
4262     int unusedRem;
4263 
4264     unusedRem = MP_USED(rem) - MP_USED(div);
4265     MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
4266     MP_ALLOC(&part)  = MP_ALLOC(rem)  - unusedRem;
4267     MP_USED(&part)   = MP_USED(div);
4268     if (s_mp_cmp(&part, div) < 0) {
4269       -- unusedRem;
4270 #if MP_ARGCHK == 2
4271       assert(unusedRem >= 0);
4272 #endif
4273       -- MP_DIGITS(&part);
4274       ++ MP_USED(&part);
4275       ++ MP_ALLOC(&part);
4276     }
4277 
4278     /* Compute a guess for the next quotient digit       */
4279     q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
4280     div_msd = MP_DIGIT(div, MP_USED(div) - 1);
4281     if (q_msd >= div_msd) {
4282       q_msd = 1;
4283     } else if (MP_USED(&part) > 1) {
4284 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4285       q_msd = (q_msd << MP_DIGIT_BIT) | MP_DIGIT(&part, MP_USED(&part) - 2);
4286       q_msd /= div_msd;
4287       if (q_msd == RADIX)
4288         --q_msd;
4289 #else
4290       mp_digit r;
4291       MP_CHECKOK( s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2),
4292                                   div_msd, &q_msd, &r) );
4293 #endif
4294     } else {
4295       q_msd = 0;
4296     }
4297 #if MP_ARGCHK == 2
4298     assert(q_msd > 0); /* This case should never occur any more. */
4299 #endif
4300     if (q_msd <= 0)
4301       break;
4302 
4303     /* See what that multiplies out to                   */
4304     mp_copy(div, &t);
4305     MP_CHECKOK( s_mp_mul_d(&t, (mp_digit)q_msd) );
4306 
4307     /*
4308        If it's too big, back it off.  We should not have to do this
4309        more than once, or, in rare cases, twice.  Knuth describes a
4310        method by which this could be reduced to a maximum of once, but
4311        I didn't implement that here.
4312      * When using s_mpv_div_2dx1d, we may have to do this 3 times.
4313      */
4314     for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
4315       --q_msd;
4316       s_mp_sub(&t, div);        /* t -= div */
4317     }
4318     if (i < 0) {
4319       res = MP_RANGE;
4320       goto CLEANUP;
4321     }
4322 
4323     /* At this point, q_msd should be the right next digit   */
4324     MP_CHECKOK( s_mp_sub(&part, &t) );  /* part -= t */
4325     s_mp_clamp(rem);
4326 
4327     /*
4328       Include the digit in the quotient.  We allocated enough memory
4329       for any quotient we could ever possibly get, so we should not
4330       have to check for failures here
4331      */
4332     MP_DIGIT(quot, unusedRem) = (mp_digit)q_msd;
4333   }
4334 
4335   /* Denormalize remainder                */
4336   if (d) {
4337     s_mp_div_2d(rem, d);
4338   }
4339 
4340   s_mp_clamp(quot);
4341 
4342 CLEANUP:
4343   mp_clear(&t);
4344 
4345   return res;
4346 
4347 } /* end s_mp_div() */
4348 
4349 
4350 /* }}} */
4351 
4352 /* {{{ s_mp_2expt(a, k) */
4353 
4354 mp_err   s_mp_2expt(mp_int *a, mp_digit k)
4355 {
4356   mp_err    res;
4357   mp_size   dig, bit;
4358 
4359   dig = k / DIGIT_BIT;
4360   bit = k % DIGIT_BIT;
4361 
4362   mp_zero(a);
4363   if((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
4364     return res;
4365 
4366   DIGIT(a, dig) |= ((mp_digit)1 << bit);
4367 
4368   return MP_OKAY;
4369 
4370 } /* end s_mp_2expt() */
4371 
4372 /* }}} */
4373 
4374 /* {{{ s_mp_reduce(x, m, mu) */
4375 
4376 /*
4377   Compute Barrett reduction, x (mod m), given a precomputed value for
4378   mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
4379   faster than straight division, when many reductions by the same
4380   value of m are required (such as in modular exponentiation).  This
4381   can nearly halve the time required to do modular exponentiation,
4382   as compared to using the full integer divide to reduce.
4383 
4384   This algorithm was derived from the _Handbook of Applied
4385   Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
4386   pp. 603-604.
4387  */
4388 
4389 mp_err   s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
4390 {
4391   mp_int   q;
4392   mp_err   res;
4393 
4394   if((res = mp_init_copy(&q, x)) != MP_OKAY)
4395     return res;
4396 
4397   s_mp_rshd(&q, USED(m) - 1);  /* q1 = x / b^(k-1)  */
4398   s_mp_mul(&q, mu);            /* q2 = q1 * mu      */
4399   s_mp_rshd(&q, USED(m) + 1);  /* q3 = q2 / b^(k+1) */
4400 
4401   /* x = x mod b^(k+1), quick (no division) */
4402   s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
4403 
4404   /* q = q * m mod b^(k+1), quick (no division) */
4405   s_mp_mul(&q, m);
4406   s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
4407 
4408   /* x = x - q */
4409   if((res = mp_sub(x, &q, x)) != MP_OKAY)
4410     goto CLEANUP;
4411 
4412   /* If x < 0, add b^(k+1) to it */
4413   if(mp_cmp_z(x) < 0) {
4414     mp_set(&q, 1);
4415     if((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
4416       goto CLEANUP;
4417     if((res = mp_add(x, &q, x)) != MP_OKAY)
4418       goto CLEANUP;
4419   }
4420 
4421   /* Back off if it's too big */
4422   while(mp_cmp(x, m) >= 0) {
4423     if((res = s_mp_sub(x, m)) != MP_OKAY)
4424       break;
4425   }
4426 
4427  CLEANUP:
4428   mp_clear(&q);
4429 
4430   return res;
4431 
4432 } /* end s_mp_reduce() */
4433 
4434 /* }}} */
4435 
4436 /* }}} */
4437 
4438 /* {{{ Primitive comparisons */
4439 
4440 /* {{{ s_mp_cmp(a, b) */
4441 
4442 /* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
4443 int      s_mp_cmp(const mp_int *a, const mp_int *b)
4444 {
4445   mp_size used_a = MP_USED(a);
4446   {
4447     mp_size used_b = MP_USED(b);
4448 
4449     if (used_a > used_b)
4450       goto IS_GT;
4451     if (used_a < used_b)
4452       goto IS_LT;
4453   }
4454   {
4455     mp_digit *pa, *pb;
4456     mp_digit da = 0, db = 0;
4457 
4458 #define CMP_AB(n) if ((da = pa[n]) != (db = pb[n])) goto done
4459 
4460     pa = MP_DIGITS(a) + used_a;
4461     pb = MP_DIGITS(b) + used_a;
4462     while (used_a >= 4) {
4463       pa     -= 4;
4464       pb     -= 4;
4465       used_a -= 4;
4466       CMP_AB(3);
4467       CMP_AB(2);
4468       CMP_AB(1);
4469       CMP_AB(0);
4470     }
4471     while (used_a-- > 0 && ((da = *--pa) == (db = *--pb)))
4472       /* do nothing */;
4473 done:
4474     if (da > db)
4475       goto IS_GT;
4476     if (da < db)
4477       goto IS_LT;
4478   }
4479   return MP_EQ;
4480 IS_LT:
4481   return MP_LT;
4482 IS_GT:
4483   return MP_GT;
4484 } /* end s_mp_cmp() */
4485 
4486 /* }}} */
4487 
4488 /* {{{ s_mp_cmp_d(a, d) */
4489 
4490 /* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
4491 int      s_mp_cmp_d(const mp_int *a, mp_digit d)
4492 {
4493   if(USED(a) > 1)
4494     return MP_GT;
4495 
4496   if(DIGIT(a, 0) < d)
4497     return MP_LT;
4498   else if(DIGIT(a, 0) > d)
4499     return MP_GT;
4500   else
4501     return MP_EQ;
4502 
4503 } /* end s_mp_cmp_d() */
4504 
4505 /* }}} */
4506 
4507 /* {{{ s_mp_ispow2(v) */
4508 
4509 /*
4510   Returns -1 if the value is not a power of two; otherwise, it returns
4511   k such that v = 2^k, i.e. lg(v).
4512  */
4513 int      s_mp_ispow2(const mp_int *v)
4514 {
4515   mp_digit d;
4516   int      extra = 0, ix;
4517 
4518   ix = MP_USED(v) - 1;
4519   d = MP_DIGIT(v, ix); /* most significant digit of v */
4520 
4521   extra = s_mp_ispow2d(d);
4522   if (extra < 0 || ix == 0)
4523     return extra;
4524 
4525   while (--ix >= 0) {
4526     if (DIGIT(v, ix) != 0)
4527       return -1; /* not a power of two */
4528     extra += MP_DIGIT_BIT;
4529   }
4530 
4531   return extra;
4532 
4533 } /* end s_mp_ispow2() */
4534 
4535 /* }}} */
4536 
4537 /* {{{ s_mp_ispow2d(d) */
4538 
4539 int      s_mp_ispow2d(mp_digit d)
4540 {
4541   if ((d != 0) && ((d & (d-1)) == 0)) { /* d is a power of 2 */
4542     int pow = 0;
4543 #if defined (MP_USE_UINT_DIGIT)
4544     if (d & 0xffff0000U)
4545       pow += 16;
4546     if (d & 0xff00ff00U)
4547       pow += 8;
4548     if (d & 0xf0f0f0f0U)
4549       pow += 4;
4550     if (d & 0xccccccccU)
4551       pow += 2;
4552     if (d & 0xaaaaaaaaU)
4553       pow += 1;
4554 #elif defined(MP_USE_LONG_LONG_DIGIT)
4555     if (d & 0xffffffff00000000ULL)
4556       pow += 32;
4557     if (d & 0xffff0000ffff0000ULL)
4558       pow += 16;
4559     if (d & 0xff00ff00ff00ff00ULL)
4560       pow += 8;
4561     if (d & 0xf0f0f0f0f0f0f0f0ULL)
4562       pow += 4;
4563     if (d & 0xccccccccccccccccULL)
4564       pow += 2;
4565     if (d & 0xaaaaaaaaaaaaaaaaULL)
4566       pow += 1;
4567 #elif defined(MP_USE_LONG_DIGIT)
4568     if (d & 0xffffffff00000000UL)
4569       pow += 32;
4570     if (d & 0xffff0000ffff0000UL)
4571       pow += 16;
4572     if (d & 0xff00ff00ff00ff00UL)
4573       pow += 8;
4574     if (d & 0xf0f0f0f0f0f0f0f0UL)
4575       pow += 4;
4576     if (d & 0xccccccccccccccccUL)
4577       pow += 2;
4578     if (d & 0xaaaaaaaaaaaaaaaaUL)
4579       pow += 1;
4580 #else
4581 #error "unknown type for mp_digit"
4582 #endif
4583     return pow;
4584   }
4585   return -1;
4586 
4587 } /* end s_mp_ispow2d() */
4588 
4589 /* }}} */
4590 
4591 /* }}} */
4592 
4593 /* {{{ Primitive I/O helpers */
4594 
4595 /* {{{ s_mp_tovalue(ch, r) */
4596 
4597 /*
4598   Convert the given character to its digit value, in the given radix.
4599   If the given character is not understood in the given radix, -1 is
4600   returned.  Otherwise the digit's numeric value is returned.
4601 
4602   The results will be odd if you use a radix < 2 or > 62, you are
4603   expected to know what you're up to.
4604  */
4605 int      s_mp_tovalue(char ch, int r)
4606 {
4607   int    val, xch;
4608 
4609   if(r > 36)
4610     xch = ch;
4611   else
4612     xch = toupper(ch);
4613 
4614   if(isdigit(xch))
4615     val = xch - '0';
4616   else if(isupper(xch))
4617     val = xch - 'A' + 10;
4618   else if(islower(xch))
4619     val = xch - 'a' + 36;
4620   else if(xch == '+')
4621     val = 62;
4622   else if(xch == '/')
4623     val = 63;
4624   else
4625     return -1;
4626 
4627   if(val < 0 || val >= r)
4628     return -1;
4629 
4630   return val;
4631 
4632 } /* end s_mp_tovalue() */
4633 
4634 /* }}} */
4635 
4636 /* {{{ s_mp_todigit(val, r, low) */
4637 
4638 /*
4639   Convert val to a radix-r digit, if possible.  If val is out of range
4640   for r, returns zero.  Otherwise, returns an ASCII character denoting
4641   the value in the given radix.
4642 
4643   The results may be odd if you use a radix < 2 or > 64, you are
4644   expected to know what you're doing.
4645  */
4646 
4647 char     s_mp_todigit(mp_digit val, int r, int low)
4648 {
4649   char   ch;
4650 
4651   if(val >= (unsigned int)r)
4652     return 0;
4653 
4654   ch = s_dmap_1[val];
4655 
4656   if(r <= 36 && low)
4657     ch = tolower(ch);
4658 
4659   return ch;
4660 
4661 } /* end s_mp_todigit() */
4662 
4663 /* }}} */
4664 
4665 /* {{{ s_mp_outlen(bits, radix) */
4666 
4667 /*
4668    Return an estimate for how long a string is needed to hold a radix
4669    r representation of a number with 'bits' significant bits, plus an
4670    extra for a zero terminator (assuming C style strings here)
4671  */
4672 int      s_mp_outlen(int bits, int r)
4673 {
4674   return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
4675 
4676 } /* end s_mp_outlen() */
4677 
4678 /* }}} */
4679 
4680 /* }}} */
4681 
4682 /* {{{ mp_read_unsigned_octets(mp, str, len) */
4683 /* mp_read_unsigned_octets(mp, str, len)
4684    Read in a raw value (base 256) into the given mp_int
4685    No sign bit, number is positive.  Leading zeros ignored.
4686  */
4687 
4688 mp_err
4689 mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
4690 {
4691   int            count;
4692   mp_err         res;
4693   mp_digit       d;
4694 
4695   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
4696 
4697   mp_zero(mp);
4698 
4699   count = len % sizeof(mp_digit);
4700   if (count) {
4701     for (d = 0; count-- > 0; --len) {
4702       d = (d << 8) | *str++;
4703     }
4704     MP_DIGIT(mp, 0) = d;
4705   }
4706 
4707   /* Read the rest of the digits */
4708   for(; len > 0; len -= sizeof(mp_digit)) {
4709     for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
4710       d = (d << 8) | *str++;
4711     }
4712     if (MP_EQ == mp_cmp_z(mp)) {
4713       if (!d)
4714         continue;
4715     } else {
4716       if((res = s_mp_lshd(mp, 1)) != MP_OKAY)
4717         return res;
4718     }
4719     MP_DIGIT(mp, 0) = d;
4720   }
4721   return MP_OKAY;
4722 } /* end mp_read_unsigned_octets() */
4723 /* }}} */
4724 
4725 /* {{{ mp_unsigned_octet_size(mp) */
4726 int
4727 mp_unsigned_octet_size(const mp_int *mp)
4728 {
4729   int  bytes;
4730   int  ix;
4731   mp_digit  d = 0;
4732 
4733   ARGCHK(mp != NULL, MP_BADARG);
4734   ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
4735 
4736   bytes = (USED(mp) * sizeof(mp_digit));
4737 
4738   /* subtract leading zeros. */
4739   /* Iterate over each digit... */
4740   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4741     d = DIGIT(mp, ix);
4742     if (d)
4743         break;
4744     bytes -= sizeof(d);
4745   }
4746   if (!bytes)
4747     return 1;
4748 
4749   /* Have MSD, check digit bytes, high order first */
4750   for(ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
4751     unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
4752     if (x)
4753         break;
4754     --bytes;
4755   }
4756   return bytes;
4757 } /* end mp_unsigned_octet_size() */
4758 /* }}} */
4759 
4760 /* {{{ mp_to_unsigned_octets(mp, str) */
4761 /* output a buffer of big endian octets no longer than specified. */
4762 mp_err
4763 mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
4764 {
4765   int  ix, pos = 0;
4766   unsigned int  bytes;
4767 
4768   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4769 
4770   bytes = mp_unsigned_octet_size(mp);
4771   ARGCHK(bytes <= maxlen, MP_BADARG);
4772 
4773   /* Iterate over each digit... */
4774   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4775     mp_digit  d = DIGIT(mp, ix);
4776     int       jx;
4777 
4778     /* Unpack digit bytes, high order first */
4779     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4780       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4781       if (!pos && !x)   /* suppress leading zeros */
4782         continue;
4783       str[pos++] = x;
4784     }
4785   }
4786   if (!pos)
4787     str[pos++] = 0;
4788   return pos;
4789 } /* end mp_to_unsigned_octets() */
4790 /* }}} */
4791 
4792 /* {{{ mp_to_signed_octets(mp, str) */
4793 /* output a buffer of big endian octets no longer than specified. */
4794 mp_err
4795 mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
4796 {
4797   int  ix, pos = 0;
4798   unsigned int  bytes;
4799 
4800   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4801 
4802   bytes = mp_unsigned_octet_size(mp);
4803   ARGCHK(bytes <= maxlen, MP_BADARG);
4804 
4805   /* Iterate over each digit... */
4806   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4807     mp_digit  d = DIGIT(mp, ix);
4808     int       jx;
4809 
4810     /* Unpack digit bytes, high order first */
4811     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4812       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4813       if (!pos) {
4814         if (!x)         /* suppress leading zeros */
4815           continue;
4816         if (x & 0x80) { /* add one leading zero to make output positive.  */
4817           ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
4818           if (bytes + 1 > maxlen)
4819             return MP_BADARG;
4820           str[pos++] = 0;
4821         }
4822       }
4823       str[pos++] = x;
4824     }
4825   }
4826   if (!pos)
4827     str[pos++] = 0;
4828   return pos;
4829 } /* end mp_to_signed_octets() */
4830 /* }}} */
4831 
4832 /* {{{ mp_to_fixlen_octets(mp, str) */
4833 /* output a buffer of big endian octets exactly as long as requested. */
4834 mp_err
4835 mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
4836 {
4837   int  ix, pos = 0;
4838   unsigned int  bytes;
4839 
4840   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4841 
4842   bytes = mp_unsigned_octet_size(mp);
4843   ARGCHK(bytes <= length, MP_BADARG);
4844 
4845   /* place any needed leading zeros */
4846   for (;length > bytes; --length) {
4847         *str++ = 0;
4848   }
4849 
4850   /* Iterate over each digit... */
4851   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4852     mp_digit  d = DIGIT(mp, ix);
4853     int       jx;
4854 
4855     /* Unpack digit bytes, high order first */
4856     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4857       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4858       if (!pos && !x)   /* suppress leading zeros */
4859         continue;
4860       str[pos++] = x;
4861     }
4862   }
4863   if (!pos)
4864     str[pos++] = 0;
4865   return MP_OKAY;
4866 } /* end mp_to_fixlen_octets() */
4867 /* }}} */
4868 
4869 
4870 /*------------------------------------------------------------------------*/
4871 /* HERE THERE BE DRAGONS                                                  */