New src/jdk.crypto.ec/share/native/libsunec/impl/mpi.c

   1 /*
   2  * Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * Use is subject to license terms.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2.1 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public License
  16  * along with this library; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /* *********************************************************************
  25  *
  26  * The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
  27  *
  28  * The Initial Developer of the Original Code is
  29  * Michael J. Fromberger.
  30  * Portions created by the Initial Developer are Copyright (C) 1998
  31  * the Initial Developer. All Rights Reserved.
  32  *
  33  * Contributor(s):
  34  *   Netscape Communications Corporation
  35  *   Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
  36  *
  37  * Last Modified Date from the Original Code: Nov 2016
  38  *********************************************************************** */
  39 
  40 /*  Arbitrary precision integer arithmetic library */
  41 
  42 #include "mpi-priv.h"
  43 #if defined(OSF1)
  44 #include <c_asm.h>
  45 #endif
  46 
  47 #if MP_LOGTAB
  48 /*
  49   A table of the logs of 2 for various bases (the 0 and 1 entries of
  50   this table are meaningless and should not be referenced).
  51 
  52   This table is used to compute output lengths for the mp_toradix()
  53   function.  Since a number n in radix r takes up about log_r(n)
  54   digits, we estimate the output size by taking the least integer
  55   greater than log_r(n), where:
  56 
  57   log_r(n) = log_2(n) * log_r(2)
  58 
  59   This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
  60   which are the output bases supported.
  61  */
  62 #include "logtab.h"
  63 #endif
  64 
  65 /* {{{ Constant strings */
  66 
  67 /* Constant strings returned by mp_strerror() */
  68 static const char *mp_err_string[] = {
  69   "unknown result code",     /* say what?            */
  70   "boolean true",            /* MP_OKAY, MP_YES      */
  71   "boolean false",           /* MP_NO                */
  72   "out of memory",           /* MP_MEM               */
  73   "argument out of range",   /* MP_RANGE             */
  74   "invalid input parameter", /* MP_BADARG            */
  75   "result is undefined"      /* MP_UNDEF             */
  76 };
  77 
  78 /* Value to digit maps for radix conversion   */
  79 
  80 /* s_dmap_1 - standard digits and letters */
  81 static const char *s_dmap_1 =
  82   "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
  83 
  84 /* }}} */
  85 
  86 unsigned long mp_allocs;
  87 unsigned long mp_frees;
  88 unsigned long mp_copies;
  89 
  90 /* {{{ Default precision manipulation */
  91 
  92 /* Default precision for newly created mp_int's      */
  93 static mp_size s_mp_defprec = MP_DEFPREC;
  94 
  95 mp_size mp_get_prec(void)
  96 {
  97   return s_mp_defprec;
  98 
  99 } /* end mp_get_prec() */
 100 
 101 void         mp_set_prec(mp_size prec)
 102 {
 103   if(prec == 0)
 104     s_mp_defprec = MP_DEFPREC;
 105   else
 106     s_mp_defprec = prec;
 107 
 108 } /* end mp_set_prec() */
 109 
 110 /* }}} */
 111 
 112 /*------------------------------------------------------------------------*/
 113 /* {{{ mp_init(mp, kmflag) */
 114 
 115 /*
 116   mp_init(mp, kmflag)
 117 
 118   Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
 119   MP_MEM if memory could not be allocated for the structure.
 120  */
 121 
 122 mp_err mp_init(mp_int *mp, int kmflag)
 123 {
 124   return mp_init_size(mp, s_mp_defprec, kmflag);
 125 
 126 } /* end mp_init() */
 127 
 128 /* }}} */
 129 
 130 /* {{{ mp_init_size(mp, prec, kmflag) */
 131 
 132 /*
 133   mp_init_size(mp, prec, kmflag)
 134 
 135   Initialize a new zero-valued mp_int with at least the given
 136   precision; returns MP_OKAY if successful, or MP_MEM if memory could
 137   not be allocated for the structure.
 138  */
 139 
 140 mp_err mp_init_size(mp_int *mp, mp_size prec, int kmflag)
 141 {
 142   ARGCHK(mp != NULL && prec > 0, MP_BADARG);
 143 
 144   prec = MP_ROUNDUP(prec, s_mp_defprec);
 145   if((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit), kmflag)) == NULL)
 146     return MP_MEM;
 147 
 148   SIGN(mp) = ZPOS;
 149   USED(mp) = 1;
 150   ALLOC(mp) = prec;
 151 
 152   return MP_OKAY;
 153 
 154 } /* end mp_init_size() */
 155 
 156 /* }}} */
 157 
 158 /* {{{ mp_init_copy(mp, from) */
 159 
 160 /*
 161   mp_init_copy(mp, from)
 162 
 163   Initialize mp as an exact copy of from.  Returns MP_OKAY if
 164   successful, MP_MEM if memory could not be allocated for the new
 165   structure.
 166  */
 167 
 168 mp_err mp_init_copy(mp_int *mp, const mp_int *from)
 169 {
 170   ARGCHK(mp != NULL && from != NULL, MP_BADARG);
 171 
 172   if(mp == from)
 173     return MP_OKAY;
 174 
 175   if((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit), FLAG(from))) == NULL)
 176     return MP_MEM;
 177 
 178   s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
 179   USED(mp) = USED(from);
 180   ALLOC(mp) = ALLOC(from);
 181   SIGN(mp) = SIGN(from);
 182 
 183 #ifndef _WIN32
 184   FLAG(mp) = FLAG(from);
 185 #endif /* _WIN32 */
 186 
 187   return MP_OKAY;
 188 
 189 } /* end mp_init_copy() */
 190 
 191 /* }}} */
 192 
 193 /* {{{ mp_copy(from, to) */
 194 
 195 /*
 196   mp_copy(from, to)
 197 
 198   Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
 199   'to' has already been initialized (if not, use mp_init_copy()
 200   instead). If 'from' and 'to' are identical, nothing happens.
 201  */
 202 
 203 mp_err mp_copy(const mp_int *from, mp_int *to)
 204 {
 205   ARGCHK(from != NULL && to != NULL, MP_BADARG);
 206 
 207   if(from == to)
 208     return MP_OKAY;
 209 
 210   ++mp_copies;
 211   { /* copy */
 212     mp_digit   *tmp;
 213 
 214     /*
 215       If the allocated buffer in 'to' already has enough space to hold
 216       all the used digits of 'from', we'll re-use it to avoid hitting
 217       the memory allocater more than necessary; otherwise, we'd have
 218       to grow anyway, so we just allocate a hunk and make the copy as
 219       usual
 220      */
 221     if(ALLOC(to) >= USED(from)) {
 222       s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
 223       s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
 224 
 225     } else {
 226       if((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit), FLAG(from))) == NULL)
 227         return MP_MEM;
 228 
 229       s_mp_copy(DIGITS(from), tmp, USED(from));
 230 
 231       if(DIGITS(to) != NULL) {
 232 #if MP_CRYPTO
 233         s_mp_setz(DIGITS(to), ALLOC(to));
 234 #endif
 235         s_mp_free(DIGITS(to), ALLOC(to));
 236       }
 237 
 238       DIGITS(to) = tmp;
 239       ALLOC(to) = ALLOC(from);
 240     }
 241 
 242     /* Copy the precision and sign from the original */
 243     USED(to) = USED(from);
 244     SIGN(to) = SIGN(from);
 245   } /* end copy */
 246 
 247   return MP_OKAY;
 248 
 249 } /* end mp_copy() */
 250 
 251 /* }}} */
 252 
 253 /* {{{ mp_exch(mp1, mp2) */
 254 
 255 /*
 256   mp_exch(mp1, mp2)
 257 
 258   Exchange mp1 and mp2 without allocating any intermediate memory
 259   (well, unless you count the stack space needed for this call and the
 260   locals it creates...).  This cannot fail.
 261  */
 262 
 263 void mp_exch(mp_int *mp1, mp_int *mp2)
 264 {
 265 #if MP_ARGCHK == 2
 266   assert(mp1 != NULL && mp2 != NULL);
 267 #else
 268   if(mp1 == NULL || mp2 == NULL)
 269     return;
 270 #endif
 271 
 272   s_mp_exch(mp1, mp2);
 273 
 274 } /* end mp_exch() */
 275 
 276 /* }}} */
 277 
 278 /* {{{ mp_clear(mp) */
 279 
 280 /*
 281   mp_clear(mp)
 282 
 283   Release the storage used by an mp_int, and void its fields so that
 284   if someone calls mp_clear() again for the same int later, we won't
 285   get tollchocked.
 286  */
 287 
 288 void   mp_clear(mp_int *mp)
 289 {
 290   if(mp == NULL)
 291     return;
 292 
 293   if(DIGITS(mp) != NULL) {
 294 #if MP_CRYPTO
 295     s_mp_setz(DIGITS(mp), ALLOC(mp));
 296 #endif
 297     s_mp_free(DIGITS(mp), ALLOC(mp));
 298     DIGITS(mp) = NULL;
 299   }
 300 
 301   USED(mp) = 0;
 302   ALLOC(mp) = 0;
 303 
 304 } /* end mp_clear() */
 305 
 306 /* }}} */
 307 
 308 /* {{{ mp_zero(mp) */
 309 
 310 /*
 311   mp_zero(mp)
 312 
 313   Set mp to zero.  Does not change the allocated size of the structure,
 314   and therefore cannot fail (except on a bad argument, which we ignore)
 315  */
 316 void   mp_zero(mp_int *mp)
 317 {
 318   if(mp == NULL)
 319     return;
 320 
 321   s_mp_setz(DIGITS(mp), ALLOC(mp));
 322   USED(mp) = 1;
 323   SIGN(mp) = ZPOS;
 324 
 325 } /* end mp_zero() */
 326 
 327 /* }}} */
 328 
 329 /* {{{ mp_set(mp, d) */
 330 
 331 void   mp_set(mp_int *mp, mp_digit d)
 332 {
 333   if(mp == NULL)
 334     return;
 335 
 336   mp_zero(mp);
 337   DIGIT(mp, 0) = d;
 338 
 339 } /* end mp_set() */
 340 
 341 /* }}} */
 342 
 343 /* {{{ mp_set_int(mp, z) */
 344 
 345 mp_err mp_set_int(mp_int *mp, long z)
 346 {
 347   int            ix;
 348   unsigned long  v = labs(z);
 349   mp_err         res;
 350 
 351   ARGCHK(mp != NULL, MP_BADARG);
 352 
 353   mp_zero(mp);
 354   if(z == 0)
 355     return MP_OKAY;  /* shortcut for zero */
 356 
 357   if (sizeof v <= sizeof(mp_digit)) {
 358     DIGIT(mp,0) = v;
 359   } else {
 360     for (ix = sizeof(long) - 1; ix >= 0; ix--) {
 361       if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
 362         return res;
 363 
 364       res = s_mp_add_d(mp, (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
 365       if (res != MP_OKAY)
 366         return res;
 367     }
 368   }
 369   if(z < 0)
 370     SIGN(mp) = NEG;
 371 
 372   return MP_OKAY;
 373 
 374 } /* end mp_set_int() */
 375 
 376 /* }}} */
 377 
 378 /* {{{ mp_set_ulong(mp, z) */
 379 
 380 mp_err mp_set_ulong(mp_int *mp, unsigned long z)
 381 {
 382   int            ix;
 383   mp_err         res;
 384 
 385   ARGCHK(mp != NULL, MP_BADARG);
 386 
 387   mp_zero(mp);
 388   if(z == 0)
 389     return MP_OKAY;  /* shortcut for zero */
 390 
 391   if (sizeof z <= sizeof(mp_digit)) {
 392     DIGIT(mp,0) = z;
 393   } else {
 394     for (ix = sizeof(long) - 1; ix >= 0; ix--) {
 395       if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
 396         return res;
 397 
 398       res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
 399       if (res != MP_OKAY)
 400         return res;
 401     }
 402   }
 403   return MP_OKAY;
 404 } /* end mp_set_ulong() */
 405 
 406 /* }}} */
 407 
 408 /*------------------------------------------------------------------------*/
 409 /* {{{ Digit arithmetic */
 410 
 411 /* {{{ mp_add_d(a, d, b) */
 412 
 413 /*
 414   mp_add_d(a, d, b)
 415 
 416   Compute the sum b = a + d, for a single digit d.  Respects the sign of
 417   its primary addend (single digits are unsigned anyway).
 418  */
 419 
 420 mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
 421 {
 422   mp_int   tmp;
 423   mp_err   res;
 424 
 425   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 426 
 427   if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 428     return res;
 429 
 430   if(SIGN(&tmp) == ZPOS) {
 431     if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
 432       goto CLEANUP;
 433   } else if(s_mp_cmp_d(&tmp, d) >= 0) {
 434     if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
 435       goto CLEANUP;
 436   } else {
 437     mp_neg(&tmp, &tmp);
 438 
 439     DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
 440   }
 441 
 442   if(s_mp_cmp_d(&tmp, 0) == 0)
 443     SIGN(&tmp) = ZPOS;
 444 
 445   s_mp_exch(&tmp, b);
 446 
 447 CLEANUP:
 448   mp_clear(&tmp);
 449   return res;
 450 
 451 } /* end mp_add_d() */
 452 
 453 /* }}} */
 454 
 455 /* {{{ mp_sub_d(a, d, b) */
 456 
 457 /*
 458   mp_sub_d(a, d, b)
 459 
 460   Compute the difference b = a - d, for a single digit d.  Respects the
 461   sign of its subtrahend (single digits are unsigned anyway).
 462  */
 463 
 464 mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
 465 {
 466   mp_int   tmp;
 467   mp_err   res;
 468 
 469   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 470 
 471   if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 472     return res;
 473 
 474   if(SIGN(&tmp) == NEG) {
 475     if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
 476       goto CLEANUP;
 477   } else if(s_mp_cmp_d(&tmp, d) >= 0) {
 478     if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
 479       goto CLEANUP;
 480   } else {
 481     mp_neg(&tmp, &tmp);
 482 
 483     DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
 484     SIGN(&tmp) = NEG;
 485   }
 486 
 487   if(s_mp_cmp_d(&tmp, 0) == 0)
 488     SIGN(&tmp) = ZPOS;
 489 
 490   s_mp_exch(&tmp, b);
 491 
 492 CLEANUP:
 493   mp_clear(&tmp);
 494   return res;
 495 
 496 } /* end mp_sub_d() */
 497 
 498 /* }}} */
 499 
 500 /* {{{ mp_mul_d(a, d, b) */
 501 
 502 /*
 503   mp_mul_d(a, d, b)
 504 
 505   Compute the product b = a * d, for a single digit d.  Respects the sign
 506   of its multiplicand (single digits are unsigned anyway)
 507  */
 508 
 509 mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
 510 {
 511   mp_err  res;
 512 
 513   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 514 
 515   if(d == 0) {
 516     mp_zero(b);
 517     return MP_OKAY;
 518   }
 519 
 520   if((res = mp_copy(a, b)) != MP_OKAY)
 521     return res;
 522 
 523   res = s_mp_mul_d(b, d);
 524 
 525   return res;
 526 
 527 } /* end mp_mul_d() */
 528 
 529 /* }}} */
 530 
 531 /* {{{ mp_mul_2(a, c) */
 532 
 533 mp_err mp_mul_2(const mp_int *a, mp_int *c)
 534 {
 535   mp_err  res;
 536 
 537   ARGCHK(a != NULL && c != NULL, MP_BADARG);
 538 
 539   if((res = mp_copy(a, c)) != MP_OKAY)
 540     return res;
 541 
 542   return s_mp_mul_2(c);
 543 
 544 } /* end mp_mul_2() */
 545 
 546 /* }}} */
 547 
 548 /* {{{ mp_div_d(a, d, q, r) */
 549 
 550 /*
 551   mp_div_d(a, d, q, r)
 552 
 553   Compute the quotient q = a / d and remainder r = a mod d, for a
 554   single digit d.  Respects the sign of its divisor (single digits are
 555   unsigned anyway).
 556  */
 557 
 558 mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
 559 {
 560   mp_err   res;
 561   mp_int   qp;
 562   mp_digit rem;
 563   int      pow;
 564 
 565   ARGCHK(a != NULL, MP_BADARG);
 566 
 567   if(d == 0)
 568     return MP_RANGE;
 569 
 570   /* Shortcut for powers of two ... */
 571   if((pow = s_mp_ispow2d(d)) >= 0) {
 572     mp_digit  mask;
 573 
 574     mask = ((mp_digit)1 << pow) - 1;
 575     rem = DIGIT(a, 0) & mask;
 576 
 577     if(q) {
 578       mp_copy(a, q);
 579       s_mp_div_2d(q, pow);
 580     }
 581 
 582     if(r)
 583       *r = rem;
 584 
 585     return MP_OKAY;
 586   }
 587 
 588   if((res = mp_init_copy(&qp, a)) != MP_OKAY)
 589     return res;
 590 
 591   res = s_mp_div_d(&qp, d, &rem);
 592 
 593   if(s_mp_cmp_d(&qp, 0) == 0)
 594     SIGN(q) = ZPOS;
 595 
 596   if(r)
 597     *r = rem;
 598 
 599   if(q)
 600     s_mp_exch(&qp, q);
 601 
 602   mp_clear(&qp);
 603   return res;
 604 
 605 } /* end mp_div_d() */
 606 
 607 /* }}} */
 608 
 609 /* {{{ mp_div_2(a, c) */
 610 
 611 /*
 612   mp_div_2(a, c)
 613 
 614   Compute c = a / 2, disregarding the remainder.
 615  */
 616 
 617 mp_err mp_div_2(const mp_int *a, mp_int *c)
 618 {
 619   mp_err  res;
 620 
 621   ARGCHK(a != NULL && c != NULL, MP_BADARG);
 622 
 623   if((res = mp_copy(a, c)) != MP_OKAY)
 624     return res;
 625 
 626   s_mp_div_2(c);
 627 
 628   return MP_OKAY;
 629 
 630 } /* end mp_div_2() */
 631 
 632 /* }}} */
 633 
 634 /* {{{ mp_expt_d(a, d, b) */
 635 
 636 mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
 637 {
 638   mp_int   s, x;
 639   mp_err   res;
 640 
 641   ARGCHK(a != NULL && c != NULL, MP_BADARG);
 642 
 643   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
 644     return res;
 645   if((res = mp_init_copy(&x, a)) != MP_OKAY)
 646     goto X;
 647 
 648   DIGIT(&s, 0) = 1;
 649 
 650   while(d != 0) {
 651     if(d & 1) {
 652       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
 653         goto CLEANUP;
 654     }
 655 
 656     d /= 2;
 657 
 658     if((res = s_mp_sqr(&x)) != MP_OKAY)
 659       goto CLEANUP;
 660   }
 661 
 662   s.flag = (mp_flag)0;
 663   s_mp_exch(&s, c);
 664 
 665 CLEANUP:
 666   mp_clear(&x);
 667 X:
 668   mp_clear(&s);
 669 
 670   return res;
 671 
 672 } /* end mp_expt_d() */
 673 
 674 /* }}} */
 675 
 676 /* }}} */
 677 
 678 /*------------------------------------------------------------------------*/
 679 /* {{{ Full arithmetic */
 680 
 681 /* {{{ mp_abs(a, b) */
 682 
 683 /*
 684   mp_abs(a, b)
 685 
 686   Compute b = |a|.  'a' and 'b' may be identical.
 687  */
 688 
 689 mp_err mp_abs(const mp_int *a, mp_int *b)
 690 {
 691   mp_err   res;
 692 
 693   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 694 
 695   if((res = mp_copy(a, b)) != MP_OKAY)
 696     return res;
 697 
 698   SIGN(b) = ZPOS;
 699 
 700   return MP_OKAY;
 701 
 702 } /* end mp_abs() */
 703 
 704 /* }}} */
 705 
 706 /* {{{ mp_neg(a, b) */
 707 
 708 /*
 709   mp_neg(a, b)
 710 
 711   Compute b = -a.  'a' and 'b' may be identical.
 712  */
 713 
 714 mp_err mp_neg(const mp_int *a, mp_int *b)
 715 {
 716   mp_err   res;
 717 
 718   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 719 
 720   if((res = mp_copy(a, b)) != MP_OKAY)
 721     return res;
 722 
 723   if(s_mp_cmp_d(b, 0) == MP_EQ)
 724     SIGN(b) = ZPOS;
 725   else
 726     SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
 727 
 728   return MP_OKAY;
 729 
 730 } /* end mp_neg() */
 731 
 732 /* }}} */
 733 
 734 /* {{{ mp_add(a, b, c) */
 735 
 736 /*
 737   mp_add(a, b, c)
 738 
 739   Compute c = a + b.  All parameters may be identical.
 740  */
 741 
 742 mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c)
 743 {
 744   mp_err  res;
 745 
 746   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 747 
 748   if(SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
 749     MP_CHECKOK( s_mp_add_3arg(a, b, c) );
 750   } else if(s_mp_cmp(a, b) >= 0) {  /* different sign: |a| >= |b|   */
 751     MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
 752   } else {                          /* different sign: |a|  < |b|   */
 753     MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
 754   }
 755 
 756   if (s_mp_cmp_d(c, 0) == MP_EQ)
 757     SIGN(c) = ZPOS;
 758 
 759 CLEANUP:
 760   return res;
 761 
 762 } /* end mp_add() */
 763 
 764 /* }}} */
 765 
 766 /* {{{ mp_sub(a, b, c) */
 767 
 768 /*
 769   mp_sub(a, b, c)
 770 
 771   Compute c = a - b.  All parameters may be identical.
 772  */
 773 
 774 mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
 775 {
 776   mp_err  res;
 777   int     magDiff;
 778 
 779   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 780 
 781   if (a == b) {
 782     mp_zero(c);
 783     return MP_OKAY;
 784   }
 785 
 786   if (MP_SIGN(a) != MP_SIGN(b)) {
 787     MP_CHECKOK( s_mp_add_3arg(a, b, c) );
 788   } else if (!(magDiff = s_mp_cmp(a, b))) {
 789     mp_zero(c);
 790     res = MP_OKAY;
 791   } else if (magDiff > 0) {
 792     MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
 793   } else {
 794     MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
 795     MP_SIGN(c) = !MP_SIGN(a);
 796   }
 797 
 798   if (s_mp_cmp_d(c, 0) == MP_EQ)
 799     MP_SIGN(c) = MP_ZPOS;
 800 
 801 CLEANUP:
 802   return res;
 803 
 804 } /* end mp_sub() */
 805 
 806 /* }}} */
 807 
 808 /* {{{ mp_mul(a, b, c) */
 809 
 810 /*
 811   mp_mul(a, b, c)
 812 
 813   Compute c = a * b.  All parameters may be identical.
 814  */
 815 mp_err   mp_mul(const mp_int *a, const mp_int *b, mp_int * c)
 816 {
 817   mp_digit *pb;
 818   mp_int   tmp;
 819   mp_err   res;
 820   mp_size  ib;
 821   mp_size  useda, usedb;
 822 
 823   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 824 
 825   if (a == c) {
 826     if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 827       return res;
 828     if (a == b)
 829       b = &tmp;
 830     a = &tmp;
 831   } else if (b == c) {
 832     if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
 833       return res;
 834     b = &tmp;
 835   } else {
 836     MP_DIGITS(&tmp) = 0;
 837   }
 838 
 839   if (MP_USED(a) < MP_USED(b)) {
 840     const mp_int *xch = b;      /* switch a and b, to do fewer outer loops */
 841     b = a;
 842     a = xch;
 843   }
 844 
 845   MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
 846   if((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
 847     goto CLEANUP;
 848 
 849 #ifdef NSS_USE_COMBA
 850   if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
 851       if (MP_USED(a) == 4) {
 852           s_mp_mul_comba_4(a, b, c);
 853           goto CLEANUP;
 854       }
 855       if (MP_USED(a) == 8) {
 856           s_mp_mul_comba_8(a, b, c);
 857           goto CLEANUP;
 858       }
 859       if (MP_USED(a) == 16) {
 860           s_mp_mul_comba_16(a, b, c);
 861           goto CLEANUP;
 862       }
 863       if (MP_USED(a) == 32) {
 864           s_mp_mul_comba_32(a, b, c);
 865           goto CLEANUP;
 866       }
 867   }
 868 #endif
 869 
 870   pb = MP_DIGITS(b);
 871   s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
 872 
 873   /* Outer loop:  Digits of b */
 874   useda = MP_USED(a);
 875   usedb = MP_USED(b);
 876   for (ib = 1; ib < usedb; ib++) {
 877     mp_digit b_i    = *pb++;
 878 
 879     /* Inner product:  Digits of a */
 880     if (b_i)
 881       s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
 882     else
 883       MP_DIGIT(c, ib + useda) = b_i;
 884   }
 885 
 886   s_mp_clamp(c);
 887 
 888   if(SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
 889     SIGN(c) = ZPOS;
 890   else
 891     SIGN(c) = NEG;
 892 
 893 CLEANUP:
 894   mp_clear(&tmp);
 895   return res;
 896 } /* end mp_mul() */
 897 
 898 /* }}} */
 899 
 900 /* {{{ mp_sqr(a, sqr) */
 901 
 902 #if MP_SQUARE
 903 /*
 904   Computes the square of a.  This can be done more
 905   efficiently than a general multiplication, because many of the
 906   computation steps are redundant when squaring.  The inner product
 907   step is a bit more complicated, but we save a fair number of
 908   iterations of the multiplication loop.
 909  */
 910 
 911 /* sqr = a^2;   Caller provides both a and tmp; */
 912 mp_err   mp_sqr(const mp_int *a, mp_int *sqr)
 913 {
 914   mp_digit *pa;
 915   mp_digit d;
 916   mp_err   res;
 917   mp_size  ix;
 918   mp_int   tmp;
 919   int      count;
 920 
 921   ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
 922 
 923   if (a == sqr) {
 924     if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 925       return res;
 926     a = &tmp;
 927   } else {
 928     DIGITS(&tmp) = 0;
 929     res = MP_OKAY;
 930   }
 931 
 932   ix = 2 * MP_USED(a);
 933   if (ix > MP_ALLOC(sqr)) {
 934     MP_USED(sqr) = 1;
 935     MP_CHECKOK( s_mp_grow(sqr, ix) );
 936   }
 937   MP_USED(sqr) = ix;
 938   MP_DIGIT(sqr, 0) = 0;
 939 
 940 #ifdef NSS_USE_COMBA
 941   if (IS_POWER_OF_2(MP_USED(a))) {
 942       if (MP_USED(a) == 4) {
 943           s_mp_sqr_comba_4(a, sqr);
 944           goto CLEANUP;
 945       }
 946       if (MP_USED(a) == 8) {
 947           s_mp_sqr_comba_8(a, sqr);
 948           goto CLEANUP;
 949       }
 950       if (MP_USED(a) == 16) {
 951           s_mp_sqr_comba_16(a, sqr);
 952           goto CLEANUP;
 953       }
 954       if (MP_USED(a) == 32) {
 955           s_mp_sqr_comba_32(a, sqr);
 956           goto CLEANUP;
 957       }
 958   }
 959 #endif
 960 
 961   pa = MP_DIGITS(a);
 962   count = MP_USED(a) - 1;
 963   if (count > 0) {
 964     d = *pa++;
 965     s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
 966     for (ix = 3; --count > 0; ix += 2) {
 967       d = *pa++;
 968       s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
 969     } /* for(ix ...) */
 970     MP_DIGIT(sqr, MP_USED(sqr)-1) = 0; /* above loop stopped short of this. */
 971 
 972     /* now sqr *= 2 */
 973     s_mp_mul_2(sqr);
 974   } else {
 975     MP_DIGIT(sqr, 1) = 0;
 976   }
 977 
 978   /* now add the squares of the digits of a to sqr. */
 979   s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
 980 
 981   SIGN(sqr) = ZPOS;
 982   s_mp_clamp(sqr);
 983 
 984 CLEANUP:
 985   mp_clear(&tmp);
 986   return res;
 987 
 988 } /* end mp_sqr() */
 989 #endif
 990 
 991 /* }}} */
 992 
 993 /* {{{ mp_div(a, b, q, r) */
 994 
 995 /*
 996   mp_div(a, b, q, r)
 997 
 998   Compute q = a / b and r = a mod b.  Input parameters may be re-used
 999   as output parameters.  If q or r is NULL, that portion of the
1000   computation will be discarded (although it will still be computed)
1001  */
1002 mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
1003 {
1004   mp_err   res;
1005   mp_int   *pQ, *pR;
1006   mp_int   qtmp, rtmp, btmp;
1007   int      cmp;
1008   mp_sign  signA;
1009   mp_sign  signB;
1010 
1011   ARGCHK(a != NULL && b != NULL, MP_BADARG);
1012 
1013   signA = MP_SIGN(a);
1014   signB = MP_SIGN(b);
1015 
1016   if(mp_cmp_z(b) == MP_EQ)
1017     return MP_RANGE;
1018 
1019   DIGITS(&qtmp) = 0;
1020   DIGITS(&rtmp) = 0;
1021   DIGITS(&btmp) = 0;
1022 
1023   /* Set up some temporaries... */
1024   if (!r || r == a || r == b) {
1025     MP_CHECKOK( mp_init_copy(&rtmp, a) );
1026     pR = &rtmp;
1027   } else {
1028     MP_CHECKOK( mp_copy(a, r) );
1029     pR = r;
1030   }
1031 
1032   if (!q || q == a || q == b) {
1033     MP_CHECKOK( mp_init_size(&qtmp, MP_USED(a), FLAG(a)) );
1034     pQ = &qtmp;
1035   } else {
1036     MP_CHECKOK( s_mp_pad(q, MP_USED(a)) );
1037     pQ = q;
1038     mp_zero(pQ);
1039   }
1040 
1041   /*
1042     If |a| <= |b|, we can compute the solution without division;
1043     otherwise, we actually do the work required.
1044    */
1045   if ((cmp = s_mp_cmp(a, b)) <= 0) {
1046     if (cmp) {
1047       /* r was set to a above. */
1048       mp_zero(pQ);
1049     } else {
1050       mp_set(pQ, 1);
1051       mp_zero(pR);
1052     }
1053   } else {
1054     MP_CHECKOK( mp_init_copy(&btmp, b) );
1055     MP_CHECKOK( s_mp_div(pR, &btmp, pQ) );
1056   }
1057 
1058   /* Compute the signs for the output  */
1059   MP_SIGN(pR) = signA;   /* Sr = Sa              */
1060   /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
1061   MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
1062 
1063   if(s_mp_cmp_d(pQ, 0) == MP_EQ)
1064     SIGN(pQ) = ZPOS;
1065   if(s_mp_cmp_d(pR, 0) == MP_EQ)
1066     SIGN(pR) = ZPOS;
1067 
1068   /* Copy output, if it is needed      */
1069   if(q && q != pQ)
1070     s_mp_exch(pQ, q);
1071 
1072   if(r && r != pR)
1073     s_mp_exch(pR, r);
1074 
1075 CLEANUP:
1076   mp_clear(&btmp);
1077   mp_clear(&rtmp);
1078   mp_clear(&qtmp);
1079 
1080   return res;
1081 
1082 } /* end mp_div() */
1083 
1084 /* }}} */
1085 
1086 /* {{{ mp_div_2d(a, d, q, r) */
1087 
1088 mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
1089 {
1090   mp_err  res;
1091 
1092   ARGCHK(a != NULL, MP_BADARG);
1093 
1094   if(q) {
1095     if((res = mp_copy(a, q)) != MP_OKAY)
1096       return res;
1097   }
1098   if(r) {
1099     if((res = mp_copy(a, r)) != MP_OKAY)
1100       return res;
1101   }
1102   if(q) {
1103     s_mp_div_2d(q, d);
1104   }
1105   if(r) {
1106     s_mp_mod_2d(r, d);
1107   }
1108 
1109   return MP_OKAY;
1110 
1111 } /* end mp_div_2d() */
1112 
1113 /* }}} */
1114 
1115 /* {{{ mp_expt(a, b, c) */
1116 
1117 /*
1118   mp_expt(a, b, c)
1119 
1120   Compute c = a ** b, that is, raise a to the b power.  Uses a
1121   standard iterative square-and-multiply technique.
1122  */
1123 
1124 mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c)
1125 {
1126   mp_int   s, x;
1127   mp_err   res;
1128   mp_digit d;
1129   unsigned int      dig, bit;
1130 
1131   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1132 
1133   if(mp_cmp_z(b) < 0)
1134     return MP_RANGE;
1135 
1136   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1137     return res;
1138 
1139   mp_set(&s, 1);
1140 
1141   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1142     goto X;
1143 
1144   /* Loop over low-order digits in ascending order */
1145   for(dig = 0; dig < (USED(b) - 1); dig++) {
1146     d = DIGIT(b, dig);
1147 
1148     /* Loop over bits of each non-maximal digit */
1149     for(bit = 0; bit < DIGIT_BIT; bit++) {
1150       if(d & 1) {
1151         if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1152           goto CLEANUP;
1153       }
1154 
1155       d >>= 1;
1156 
1157       if((res = s_mp_sqr(&x)) != MP_OKAY)
1158         goto CLEANUP;
1159     }
1160   }
1161 
1162   /* Consider now the last digit... */
1163   d = DIGIT(b, dig);
1164 
1165   while(d) {
1166     if(d & 1) {
1167       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1168         goto CLEANUP;
1169     }
1170 
1171     d >>= 1;
1172 
1173     if((res = s_mp_sqr(&x)) != MP_OKAY)
1174       goto CLEANUP;
1175   }
1176 
1177   if(mp_iseven(b))
1178     SIGN(&s) = SIGN(a);
1179 
1180   res = mp_copy(&s, c);
1181 
1182 CLEANUP:
1183   mp_clear(&x);
1184 X:
1185   mp_clear(&s);
1186 
1187   return res;
1188 
1189 } /* end mp_expt() */
1190 
1191 /* }}} */
1192 
1193 /* {{{ mp_2expt(a, k) */
1194 
1195 /* Compute a = 2^k */
1196 
1197 mp_err mp_2expt(mp_int *a, mp_digit k)
1198 {
1199   ARGCHK(a != NULL, MP_BADARG);
1200 
1201   return s_mp_2expt(a, k);
1202 
1203 } /* end mp_2expt() */
1204 
1205 /* }}} */
1206 
1207 /* {{{ mp_mod(a, m, c) */
1208 
1209 /*
1210   mp_mod(a, m, c)
1211 
1212   Compute c = a (mod m).  Result will always be 0 <= c < m.
1213  */
1214 
1215 mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
1216 {
1217   mp_err  res;
1218   int     mag;
1219 
1220   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1221 
1222   if(SIGN(m) == NEG)
1223     return MP_RANGE;
1224 
1225   /*
1226      If |a| > m, we need to divide to get the remainder and take the
1227      absolute value.
1228 
1229      If |a| < m, we don't need to do any division, just copy and adjust
1230      the sign (if a is negative).
1231 
1232      If |a| == m, we can simply set the result to zero.
1233 
1234      This order is intended to minimize the average path length of the
1235      comparison chain on common workloads -- the most frequent cases are
1236      that |a| != m, so we do those first.
1237    */
1238   if((mag = s_mp_cmp(a, m)) > 0) {
1239     if((res = mp_div(a, m, NULL, c)) != MP_OKAY)
1240       return res;
1241 
1242     if(SIGN(c) == NEG) {
1243       if((res = mp_add(c, m, c)) != MP_OKAY)
1244         return res;
1245     }
1246 
1247   } else if(mag < 0) {
1248     if((res = mp_copy(a, c)) != MP_OKAY)
1249       return res;
1250 
1251     if(mp_cmp_z(a) < 0) {
1252       if((res = mp_add(c, m, c)) != MP_OKAY)
1253         return res;
1254 
1255     }
1256 
1257   } else {
1258     mp_zero(c);
1259 
1260   }
1261 
1262   return MP_OKAY;
1263 
1264 } /* end mp_mod() */
1265 
1266 /* }}} */
1267 
1268 /* {{{ mp_mod_d(a, d, c) */
1269 
1270 /*
1271   mp_mod_d(a, d, c)
1272 
1273   Compute c = a (mod d).  Result will always be 0 <= c < d
1274  */
1275 mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
1276 {
1277   mp_err   res;
1278   mp_digit rem;
1279 
1280   ARGCHK(a != NULL && c != NULL, MP_BADARG);
1281 
1282   if(s_mp_cmp_d(a, d) > 0) {
1283     if((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
1284       return res;
1285 
1286   } else {
1287     if(SIGN(a) == NEG)
1288       rem = d - DIGIT(a, 0);
1289     else
1290       rem = DIGIT(a, 0);
1291   }
1292 
1293   if(c)
1294     *c = rem;
1295 
1296   return MP_OKAY;
1297 
1298 } /* end mp_mod_d() */
1299 
1300 /* }}} */
1301 
1302 /* {{{ mp_sqrt(a, b) */
1303 
1304 /*
1305   mp_sqrt(a, b)
1306 
1307   Compute the integer square root of a, and store the result in b.
1308   Uses an integer-arithmetic version of Newton's iterative linear
1309   approximation technique to determine this value; the result has the
1310   following two properties:
1311 
1312      b^2 <= a
1313      (b+1)^2 >= a
1314 
1315   It is a range error to pass a negative value.
1316  */
1317 mp_err mp_sqrt(const mp_int *a, mp_int *b)
1318 {
1319   mp_int   x, t;
1320   mp_err   res;
1321   mp_size  used;
1322 
1323   ARGCHK(a != NULL && b != NULL, MP_BADARG);
1324 
1325   /* Cannot take square root of a negative value */
1326   if(SIGN(a) == NEG)
1327     return MP_RANGE;
1328 
1329   /* Special cases for zero and one, trivial     */
1330   if(mp_cmp_d(a, 1) <= 0)
1331     return mp_copy(a, b);
1332 
1333   /* Initialize the temporaries we'll use below  */
1334   if((res = mp_init_size(&t, USED(a), FLAG(a))) != MP_OKAY)
1335     return res;
1336 
1337   /* Compute an initial guess for the iteration as a itself */
1338   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1339     goto X;
1340 
1341   used = MP_USED(&x);
1342   if (used > 1) {
1343     s_mp_rshd(&x, used / 2);
1344   }
1345 
1346   for(;;) {
1347     /* t = (x * x) - a */
1348     mp_copy(&x, &t);      /* can't fail, t is big enough for original x */
1349     if((res = mp_sqr(&t, &t)) != MP_OKAY ||
1350        (res = mp_sub(&t, a, &t)) != MP_OKAY)
1351       goto CLEANUP;
1352 
1353     /* t = t / 2x       */
1354     s_mp_mul_2(&x);
1355     if((res = mp_div(&t, &x, &t, NULL)) != MP_OKAY)
1356       goto CLEANUP;
1357     s_mp_div_2(&x);
1358 
1359     /* Terminate the loop, if the quotient is zero */
1360     if(mp_cmp_z(&t) == MP_EQ)
1361       break;
1362 
1363     /* x = x - t       */
1364     if((res = mp_sub(&x, &t, &x)) != MP_OKAY)
1365       goto CLEANUP;
1366 
1367   }
1368 
1369   /* Copy result to output parameter */
1370   mp_sub_d(&x, 1, &x);
1371   s_mp_exch(&x, b);
1372 
1373  CLEANUP:
1374   mp_clear(&x);
1375  X:
1376   mp_clear(&t);
1377 
1378   return res;
1379 
1380 } /* end mp_sqrt() */
1381 
1382 /* }}} */
1383 
1384 /* }}} */
1385 
1386 /*------------------------------------------------------------------------*/
1387 /* {{{ Modular arithmetic */
1388 
1389 #if MP_MODARITH
1390 /* {{{ mp_addmod(a, b, m, c) */
1391 
1392 /*
1393   mp_addmod(a, b, m, c)
1394 
1395   Compute c = (a + b) mod m
1396  */
1397 
1398 mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1399 {
1400   mp_err  res;
1401 
1402   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1403 
1404   if((res = mp_add(a, b, c)) != MP_OKAY)
1405     return res;
1406   if((res = mp_mod(c, m, c)) != MP_OKAY)
1407     return res;
1408 
1409   return MP_OKAY;
1410 
1411 }
1412 
1413 /* }}} */
1414 
1415 /* {{{ mp_submod(a, b, m, c) */
1416 
1417 /*
1418   mp_submod(a, b, m, c)
1419 
1420   Compute c = (a - b) mod m
1421  */
1422 
1423 mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1424 {
1425   mp_err  res;
1426 
1427   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1428 
1429   if((res = mp_sub(a, b, c)) != MP_OKAY)
1430     return res;
1431   if((res = mp_mod(c, m, c)) != MP_OKAY)
1432     return res;
1433 
1434   return MP_OKAY;
1435 
1436 }
1437 
1438 /* }}} */
1439 
1440 /* {{{ mp_mulmod(a, b, m, c) */
1441 
1442 /*
1443   mp_mulmod(a, b, m, c)
1444 
1445   Compute c = (a * b) mod m
1446  */
1447 
1448 mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1449 {
1450   mp_err  res;
1451 
1452   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1453 
1454   if((res = mp_mul(a, b, c)) != MP_OKAY)
1455     return res;
1456   if((res = mp_mod(c, m, c)) != MP_OKAY)
1457     return res;
1458 
1459   return MP_OKAY;
1460 
1461 }
1462 
1463 /* }}} */
1464 
1465 /* {{{ mp_sqrmod(a, m, c) */
1466 
1467 #if MP_SQUARE
1468 mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
1469 {
1470   mp_err  res;
1471 
1472   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1473 
1474   if((res = mp_sqr(a, c)) != MP_OKAY)
1475     return res;
1476   if((res = mp_mod(c, m, c)) != MP_OKAY)
1477     return res;
1478 
1479   return MP_OKAY;
1480 
1481 } /* end mp_sqrmod() */
1482 #endif
1483 
1484 /* }}} */
1485 
1486 /* {{{ s_mp_exptmod(a, b, m, c) */
1487 
1488 /*
1489   s_mp_exptmod(a, b, m, c)
1490 
1491   Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
1492   method with modular reductions at each step. (This is basically the
1493   same code as mp_expt(), except for the addition of the reductions)
1494 
1495   The modular reductions are done using Barrett's algorithm (see
1496   s_mp_reduce() below for details)
1497  */
1498 
1499 mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1500 {
1501   mp_int   s, x, mu;
1502   mp_err   res;
1503   mp_digit d;
1504   unsigned int      dig, bit;
1505 
1506   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1507 
1508   if(mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
1509     return MP_RANGE;
1510 
1511   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1512     return res;
1513   if((res = mp_init_copy(&x, a)) != MP_OKAY ||
1514      (res = mp_mod(&x, m, &x)) != MP_OKAY)
1515     goto X;
1516   if((res = mp_init(&mu, FLAG(a))) != MP_OKAY)
1517     goto MU;
1518 
1519   mp_set(&s, 1);
1520 
1521   /* mu = b^2k / m */
1522   s_mp_add_d(&mu, 1);
1523   s_mp_lshd(&mu, 2 * USED(m));
1524   if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
1525     goto CLEANUP;
1526 
1527   /* Loop over digits of b in ascending order, except highest order */
1528   for(dig = 0; dig < (USED(b) - 1); dig++) {
1529     d = DIGIT(b, dig);
1530 
1531     /* Loop over the bits of the lower-order digits */
1532     for(bit = 0; bit < DIGIT_BIT; bit++) {
1533       if(d & 1) {
1534         if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1535           goto CLEANUP;
1536         if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1537           goto CLEANUP;
1538       }
1539 
1540       d >>= 1;
1541 
1542       if((res = s_mp_sqr(&x)) != MP_OKAY)
1543         goto CLEANUP;
1544       if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1545         goto CLEANUP;
1546     }
1547   }
1548 
1549   /* Now do the last digit... */
1550   d = DIGIT(b, dig);
1551 
1552   while(d) {
1553     if(d & 1) {
1554       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1555         goto CLEANUP;
1556       if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1557         goto CLEANUP;
1558     }
1559 
1560     d >>= 1;
1561 
1562     if((res = s_mp_sqr(&x)) != MP_OKAY)
1563       goto CLEANUP;
1564     if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1565       goto CLEANUP;
1566   }
1567 
1568   s_mp_exch(&s, c);
1569 
1570  CLEANUP:
1571   mp_clear(&mu);
1572  MU:
1573   mp_clear(&x);
1574  X:
1575   mp_clear(&s);
1576 
1577   return res;
1578 
1579 } /* end s_mp_exptmod() */
1580 
1581 /* }}} */
1582 
1583 /* {{{ mp_exptmod_d(a, d, m, c) */
1584 
1585 mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
1586 {
1587   mp_int   s, x;
1588   mp_err   res;
1589 
1590   ARGCHK(a != NULL && c != NULL, MP_BADARG);
1591 
1592   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1593     return res;
1594   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1595     goto X;
1596 
1597   mp_set(&s, 1);
1598 
1599   while(d != 0) {
1600     if(d & 1) {
1601       if((res = s_mp_mul(&s, &x)) != MP_OKAY ||
1602          (res = mp_mod(&s, m, &s)) != MP_OKAY)
1603         goto CLEANUP;
1604     }
1605 
1606     d /= 2;
1607 
1608     if((res = s_mp_sqr(&x)) != MP_OKAY ||
1609        (res = mp_mod(&x, m, &x)) != MP_OKAY)
1610       goto CLEANUP;
1611   }
1612 
1613   s.flag = (mp_flag)0;
1614   s_mp_exch(&s, c);
1615 
1616 CLEANUP:
1617   mp_clear(&x);
1618 X:
1619   mp_clear(&s);
1620 
1621   return res;
1622 
1623 } /* end mp_exptmod_d() */
1624 
1625 /* }}} */
1626 #endif /* if MP_MODARITH */
1627 
1628 /* }}} */
1629 
1630 /*------------------------------------------------------------------------*/
1631 /* {{{ Comparison functions */
1632 
1633 /* {{{ mp_cmp_z(a) */
1634 
1635 /*
1636   mp_cmp_z(a)
1637 
1638   Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
1639  */
1640 
1641 int    mp_cmp_z(const mp_int *a)
1642 {
1643   if(SIGN(a) == NEG)
1644     return MP_LT;
1645   else if(USED(a) == 1 && DIGIT(a, 0) == 0)
1646     return MP_EQ;
1647   else
1648     return MP_GT;
1649 
1650 } /* end mp_cmp_z() */
1651 
1652 /* }}} */
1653 
1654 /* {{{ mp_cmp_d(a, d) */
1655 
1656 /*
1657   mp_cmp_d(a, d)
1658 
1659   Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
1660  */
1661 
1662 int    mp_cmp_d(const mp_int *a, mp_digit d)
1663 {
1664   ARGCHK(a != NULL, MP_EQ);
1665 
1666   if(SIGN(a) == NEG)
1667     return MP_LT;
1668 
1669   return s_mp_cmp_d(a, d);
1670 
1671 } /* end mp_cmp_d() */
1672 
1673 /* }}} */
1674 
1675 /* {{{ mp_cmp(a, b) */
1676 
1677 int    mp_cmp(const mp_int *a, const mp_int *b)
1678 {
1679   ARGCHK(a != NULL && b != NULL, MP_EQ);
1680 
1681   if(SIGN(a) == SIGN(b)) {
1682     int  mag;
1683 
1684     if((mag = s_mp_cmp(a, b)) == MP_EQ)
1685       return MP_EQ;
1686 
1687     if(SIGN(a) == ZPOS)
1688       return mag;
1689     else
1690       return -mag;
1691 
1692   } else if(SIGN(a) == ZPOS) {
1693     return MP_GT;
1694   } else {
1695     return MP_LT;
1696   }
1697 
1698 } /* end mp_cmp() */
1699 
1700 /* }}} */
1701 
1702 /* {{{ mp_cmp_mag(a, b) */
1703 
1704 /*
1705   mp_cmp_mag(a, b)
1706 
1707   Compares |a| <=> |b|, and returns an appropriate comparison result
1708  */
1709 
1710 int    mp_cmp_mag(mp_int *a, mp_int *b)
1711 {
1712   ARGCHK(a != NULL && b != NULL, MP_EQ);
1713 
1714   return s_mp_cmp(a, b);
1715 
1716 } /* end mp_cmp_mag() */
1717 
1718 /* }}} */
1719 
1720 /* {{{ mp_cmp_int(a, z, kmflag) */
1721 
1722 /*
1723   This just converts z to an mp_int, and uses the existing comparison
1724   routines.  This is sort of inefficient, but it's not clear to me how
1725   frequently this wil get used anyway.  For small positive constants,
1726   you can always use mp_cmp_d(), and for zero, there is mp_cmp_z().
1727  */
1728 int    mp_cmp_int(const mp_int *a, long z, int kmflag)
1729 {
1730   mp_int  tmp;
1731   int     out;
1732 
1733   ARGCHK(a != NULL, MP_EQ);
1734 
1735   mp_init(&tmp, kmflag); mp_set_int(&tmp, z);
1736   out = mp_cmp(a, &tmp);
1737   mp_clear(&tmp);
1738 
1739   return out;
1740 
1741 } /* end mp_cmp_int() */
1742 
1743 /* }}} */
1744 
1745 /* {{{ mp_isodd(a) */
1746 
1747 /*
1748   mp_isodd(a)
1749 
1750   Returns a true (non-zero) value if a is odd, false (zero) otherwise.
1751  */
1752 int    mp_isodd(const mp_int *a)
1753 {
1754   ARGCHK(a != NULL, 0);
1755 
1756   return (int)(DIGIT(a, 0) & 1);
1757 
1758 } /* end mp_isodd() */
1759 
1760 /* }}} */
1761 
1762 /* {{{ mp_iseven(a) */
1763 
1764 int    mp_iseven(const mp_int *a)
1765 {
1766   return !mp_isodd(a);
1767 
1768 } /* end mp_iseven() */
1769 
1770 /* }}} */
1771 
1772 /* }}} */
1773 
1774 /*------------------------------------------------------------------------*/
1775 /* {{{ Number theoretic functions */
1776 
1777 #if MP_NUMTH
1778 /* {{{ mp_gcd(a, b, c) */
1779 
1780 /*
1781   Like the old mp_gcd() function, except computes the GCD using the
1782   binary algorithm due to Josef Stein in 1961 (via Knuth).
1783  */
1784 mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c)
1785 {
1786   mp_err   res;
1787   mp_int   u, v, t;
1788   mp_size  k = 0;
1789 
1790   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1791 
1792   if(mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ)
1793       return MP_RANGE;
1794   if(mp_cmp_z(a) == MP_EQ) {
1795     return mp_copy(b, c);
1796   } else if(mp_cmp_z(b) == MP_EQ) {
1797     return mp_copy(a, c);
1798   }
1799 
1800   if((res = mp_init(&t, FLAG(a))) != MP_OKAY)
1801     return res;
1802   if((res = mp_init_copy(&u, a)) != MP_OKAY)
1803     goto U;
1804   if((res = mp_init_copy(&v, b)) != MP_OKAY)
1805     goto V;
1806 
1807   SIGN(&u) = ZPOS;
1808   SIGN(&v) = ZPOS;
1809 
1810   /* Divide out common factors of 2 until at least 1 of a, b is even */
1811   while(mp_iseven(&u) && mp_iseven(&v)) {
1812     s_mp_div_2(&u);
1813     s_mp_div_2(&v);
1814     ++k;
1815   }
1816 
1817   /* Initialize t */
1818   if(mp_isodd(&u)) {
1819     if((res = mp_copy(&v, &t)) != MP_OKAY)
1820       goto CLEANUP;
1821 
1822     /* t = -v */
1823     if(SIGN(&v) == ZPOS)
1824       SIGN(&t) = NEG;
1825     else
1826       SIGN(&t) = ZPOS;
1827 
1828   } else {
1829     if((res = mp_copy(&u, &t)) != MP_OKAY)
1830       goto CLEANUP;
1831 
1832   }
1833 
1834   for(;;) {
1835     while(mp_iseven(&t)) {
1836       s_mp_div_2(&t);
1837     }
1838 
1839     if(mp_cmp_z(&t) == MP_GT) {
1840       if((res = mp_copy(&t, &u)) != MP_OKAY)
1841         goto CLEANUP;
1842 
1843     } else {
1844       if((res = mp_copy(&t, &v)) != MP_OKAY)
1845         goto CLEANUP;
1846 
1847       /* v = -t */
1848       if(SIGN(&t) == ZPOS)
1849         SIGN(&v) = NEG;
1850       else
1851         SIGN(&v) = ZPOS;
1852     }
1853 
1854     if((res = mp_sub(&u, &v, &t)) != MP_OKAY)
1855       goto CLEANUP;
1856 
1857     if(s_mp_cmp_d(&t, 0) == MP_EQ)
1858       break;
1859   }
1860 
1861   s_mp_2expt(&v, k);       /* v = 2^k   */
1862   res = mp_mul(&u, &v, c); /* c = u * v */
1863 
1864  CLEANUP:
1865   mp_clear(&v);
1866  V:
1867   mp_clear(&u);
1868  U:
1869   mp_clear(&t);
1870 
1871   return res;
1872 
1873 } /* end mp_gcd() */
1874 
1875 /* }}} */
1876 
1877 /* {{{ mp_lcm(a, b, c) */
1878 
1879 /* We compute the least common multiple using the rule:
1880 
1881    ab = [a, b](a, b)
1882 
1883    ... by computing the product, and dividing out the gcd.
1884  */
1885 
1886 mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c)
1887 {
1888   mp_int  gcd, prod;
1889   mp_err  res;
1890 
1891   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1892 
1893   /* Set up temporaries */
1894   if((res = mp_init(&gcd, FLAG(a))) != MP_OKAY)
1895     return res;
1896   if((res = mp_init(&prod, FLAG(a))) != MP_OKAY)
1897     goto GCD;
1898 
1899   if((res = mp_mul(a, b, &prod)) != MP_OKAY)
1900     goto CLEANUP;
1901   if((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
1902     goto CLEANUP;
1903 
1904   res = mp_div(&prod, &gcd, c, NULL);
1905 
1906  CLEANUP:
1907   mp_clear(&prod);
1908  GCD:
1909   mp_clear(&gcd);
1910 
1911   return res;
1912 
1913 } /* end mp_lcm() */
1914 
1915 /* }}} */
1916 
1917 /* {{{ mp_xgcd(a, b, g, x, y) */
1918 
1919 /*
1920   mp_xgcd(a, b, g, x, y)
1921 
1922   Compute g = (a, b) and values x and y satisfying Bezout's identity
1923   (that is, ax + by = g).  This uses the binary extended GCD algorithm
1924   based on the Stein algorithm used for mp_gcd()
1925   See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
1926  */
1927 
1928 mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
1929 {
1930   mp_int   gx, xc, yc, u, v, A, B, C, D;
1931   mp_int  *clean[9];
1932   mp_err   res;
1933   int      last = -1;
1934 
1935   if(mp_cmp_z(b) == 0)
1936     return MP_RANGE;
1937 
1938   /* Initialize all these variables we need */
1939   MP_CHECKOK( mp_init(&u, FLAG(a)) );
1940   clean[++last] = &u;
1941   MP_CHECKOK( mp_init(&v, FLAG(a)) );
1942   clean[++last] = &v;
1943   MP_CHECKOK( mp_init(&gx, FLAG(a)) );
1944   clean[++last] = &gx;
1945   MP_CHECKOK( mp_init(&A, FLAG(a)) );
1946   clean[++last] = &A;
1947   MP_CHECKOK( mp_init(&B, FLAG(a)) );
1948   clean[++last] = &B;
1949   MP_CHECKOK( mp_init(&C, FLAG(a)) );
1950   clean[++last] = &C;
1951   MP_CHECKOK( mp_init(&D, FLAG(a)) );
1952   clean[++last] = &D;
1953   MP_CHECKOK( mp_init_copy(&xc, a) );
1954   clean[++last] = &xc;
1955   mp_abs(&xc, &xc);
1956   MP_CHECKOK( mp_init_copy(&yc, b) );
1957   clean[++last] = &yc;
1958   mp_abs(&yc, &yc);
1959 
1960   mp_set(&gx, 1);
1961 
1962   /* Divide by two until at least one of them is odd */
1963   while(mp_iseven(&xc) && mp_iseven(&yc)) {
1964     mp_size nx = mp_trailing_zeros(&xc);
1965     mp_size ny = mp_trailing_zeros(&yc);
1966     mp_size n  = MP_MIN(nx, ny);
1967     s_mp_div_2d(&xc,n);
1968     s_mp_div_2d(&yc,n);
1969     MP_CHECKOK( s_mp_mul_2d(&gx,n) );
1970   }
1971 
1972   mp_copy(&xc, &u);
1973   mp_copy(&yc, &v);
1974   mp_set(&A, 1); mp_set(&D, 1);
1975 
1976   /* Loop through binary GCD algorithm */
1977   do {
1978     while(mp_iseven(&u)) {
1979       s_mp_div_2(&u);
1980 
1981       if(mp_iseven(&A) && mp_iseven(&B)) {
1982         s_mp_div_2(&A); s_mp_div_2(&B);
1983       } else {
1984         MP_CHECKOK( mp_add(&A, &yc, &A) );
1985         s_mp_div_2(&A);
1986         MP_CHECKOK( mp_sub(&B, &xc, &B) );
1987         s_mp_div_2(&B);
1988       }
1989     }
1990 
1991     while(mp_iseven(&v)) {
1992       s_mp_div_2(&v);
1993 
1994       if(mp_iseven(&C) && mp_iseven(&D)) {
1995         s_mp_div_2(&C); s_mp_div_2(&D);
1996       } else {
1997         MP_CHECKOK( mp_add(&C, &yc, &C) );
1998         s_mp_div_2(&C);
1999         MP_CHECKOK( mp_sub(&D, &xc, &D) );
2000         s_mp_div_2(&D);
2001       }
2002     }
2003 
2004     if(mp_cmp(&u, &v) >= 0) {
2005       MP_CHECKOK( mp_sub(&u, &v, &u) );
2006       MP_CHECKOK( mp_sub(&A, &C, &A) );
2007       MP_CHECKOK( mp_sub(&B, &D, &B) );
2008     } else {
2009       MP_CHECKOK( mp_sub(&v, &u, &v) );
2010       MP_CHECKOK( mp_sub(&C, &A, &C) );
2011       MP_CHECKOK( mp_sub(&D, &B, &D) );
2012     }
2013   } while (mp_cmp_z(&u) != 0);
2014 
2015   /* copy results to output */
2016   if(x)
2017     MP_CHECKOK( mp_copy(&C, x) );
2018 
2019   if(y)
2020     MP_CHECKOK( mp_copy(&D, y) );
2021 
2022   if(g)
2023     MP_CHECKOK( mp_mul(&gx, &v, g) );
2024 
2025  CLEANUP:
2026   while(last >= 0)
2027     mp_clear(clean[last--]);
2028 
2029   return res;
2030 
2031 } /* end mp_xgcd() */
2032 
2033 /* }}} */
2034 
2035 mp_size mp_trailing_zeros(const mp_int *mp)
2036 {
2037   mp_digit d;
2038   mp_size  n = 0;
2039   unsigned int      ix;
2040 
2041   if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
2042     return n;
2043 
2044   for (ix = 0; !(d = MP_DIGIT(mp,ix)) && (ix < MP_USED(mp)); ++ix)
2045     n += MP_DIGIT_BIT;
2046   if (!d)
2047     return 0;   /* shouldn't happen, but ... */
2048 #if !defined(MP_USE_UINT_DIGIT)
2049   if (!(d & 0xffffffffU)) {
2050     d >>= 32;
2051     n  += 32;
2052   }
2053 #endif
2054   if (!(d & 0xffffU)) {
2055     d >>= 16;
2056     n  += 16;
2057   }
2058   if (!(d & 0xffU)) {
2059     d >>= 8;
2060     n  += 8;
2061   }
2062   if (!(d & 0xfU)) {
2063     d >>= 4;
2064     n  += 4;
2065   }
2066   if (!(d & 0x3U)) {
2067     d >>= 2;
2068     n  += 2;
2069   }
2070   if (!(d & 0x1U)) {
2071     d >>= 1;
2072     n  += 1;
2073   }
2074 #if MP_ARGCHK == 2
2075   assert(0 != (d & 1));
2076 #endif
2077   return n;
2078 }
2079 
2080 /* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
2081 ** Returns k (positive) or error (negative).
2082 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2083 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2084 */
2085 mp_err s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
2086 {
2087   mp_err res;
2088   mp_err k    = 0;
2089   mp_int d, f, g;
2090 
2091   ARGCHK(a && p && c, MP_BADARG);
2092 
2093   MP_DIGITS(&d) = 0;
2094   MP_DIGITS(&f) = 0;
2095   MP_DIGITS(&g) = 0;
2096   MP_CHECKOK( mp_init(&d, FLAG(a)) );
2097   MP_CHECKOK( mp_init_copy(&f, a) );    /* f = a */
2098   MP_CHECKOK( mp_init_copy(&g, p) );    /* g = p */
2099 
2100   mp_set(c, 1);
2101   mp_zero(&d);
2102 
2103   if (mp_cmp_z(&f) == 0) {
2104     res = MP_UNDEF;
2105   } else
2106   for (;;) {
2107     int diff_sign;
2108     while (mp_iseven(&f)) {
2109       mp_size n = mp_trailing_zeros(&f);
2110       if (!n) {
2111         res = MP_UNDEF;
2112         goto CLEANUP;
2113       }
2114       s_mp_div_2d(&f, n);
2115       MP_CHECKOK( s_mp_mul_2d(&d, n) );
2116       k += n;
2117     }
2118     if (mp_cmp_d(&f, 1) == MP_EQ) {     /* f == 1 */
2119       res = k;
2120       break;
2121     }
2122     diff_sign = mp_cmp(&f, &g);
2123     if (diff_sign < 0) {                /* f < g */
2124       s_mp_exch(&f, &g);
2125       s_mp_exch(c, &d);
2126     } else if (diff_sign == 0) {                /* f == g */
2127       res = MP_UNDEF;           /* a and p are not relatively prime */
2128       break;
2129     }
2130     if ((MP_DIGIT(&f,0) % 4) == (MP_DIGIT(&g,0) % 4)) {
2131       MP_CHECKOK( mp_sub(&f, &g, &f) ); /* f = f - g */
2132       MP_CHECKOK( mp_sub(c,  &d,  c) ); /* c = c - d */
2133     } else {
2134       MP_CHECKOK( mp_add(&f, &g, &f) ); /* f = f + g */
2135       MP_CHECKOK( mp_add(c,  &d,  c) ); /* c = c + d */
2136     }
2137   }
2138   if (res >= 0) {
2139     while (MP_SIGN(c) != MP_ZPOS) {
2140       MP_CHECKOK( mp_add(c, p, c) );
2141     }
2142     res = k;
2143   }
2144 
2145 CLEANUP:
2146   mp_clear(&d);
2147   mp_clear(&f);
2148   mp_clear(&g);
2149   return res;
2150 }
2151 
2152 /* Compute T = (P ** -1) mod MP_RADIX.  Also works for 16-bit mp_digits.
2153 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2154 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2155 */
2156 mp_digit  s_mp_invmod_radix(mp_digit P)
2157 {
2158   mp_digit T = P;
2159   T *= 2 - (P * T);
2160   T *= 2 - (P * T);
2161   T *= 2 - (P * T);
2162   T *= 2 - (P * T);
2163 #if !defined(MP_USE_UINT_DIGIT)
2164   T *= 2 - (P * T);
2165   T *= 2 - (P * T);
2166 #endif
2167   return T;
2168 }
2169 
2170 /* Given c, k, and prime p, where a*c == 2**k (mod p),
2171 ** Compute x = (a ** -1) mod p.  This is similar to Montgomery reduction.
2172 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2173 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2174 */
2175 mp_err  s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
2176 {
2177   int      k_orig = k;
2178   mp_digit r;
2179   mp_size  ix;
2180   mp_err   res;
2181 
2182   if (mp_cmp_z(c) < 0) {                /* c < 0 */
2183     MP_CHECKOK( mp_add(c, p, x) );      /* x = c + p */
2184   } else {
2185     MP_CHECKOK( mp_copy(c, x) );        /* x = c */
2186   }
2187 
2188   /* make sure x is large enough */
2189   ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
2190   ix = MP_MAX(ix, MP_USED(x));
2191   MP_CHECKOK( s_mp_pad(x, ix) );
2192 
2193   r = 0 - s_mp_invmod_radix(MP_DIGIT(p,0));
2194 
2195   for (ix = 0; k > 0; ix++) {
2196     int      j = MP_MIN(k, MP_DIGIT_BIT);
2197     mp_digit v = r * MP_DIGIT(x, ix);
2198     if (j < MP_DIGIT_BIT) {
2199       v &= ((mp_digit)1 << j) - 1;      /* v = v mod (2 ** j) */
2200     }
2201     s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
2202     k -= j;
2203   }
2204   s_mp_clamp(x);
2205   s_mp_div_2d(x, k_orig);
2206   res = MP_OKAY;
2207 
2208 CLEANUP:
2209   return res;
2210 }
2211 
2212 /* compute mod inverse using Schroeppel's method, only if m is odd */
2213 mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
2214 {
2215   int k;
2216   mp_err  res;
2217   mp_int  x;
2218 
2219   ARGCHK(a && m && c, MP_BADARG);
2220 
2221   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2222     return MP_RANGE;
2223   if (mp_iseven(m))
2224     return MP_UNDEF;
2225 
2226   MP_DIGITS(&x) = 0;
2227 
2228   if (a == c) {
2229     if ((res = mp_init_copy(&x, a)) != MP_OKAY)
2230       return res;
2231     if (a == m)
2232       m = &x;
2233     a = &x;
2234   } else if (m == c) {
2235     if ((res = mp_init_copy(&x, m)) != MP_OKAY)
2236       return res;
2237     m = &x;
2238   } else {
2239     MP_DIGITS(&x) = 0;
2240   }
2241 
2242   MP_CHECKOK( s_mp_almost_inverse(a, m, c) );
2243   k = res;
2244   MP_CHECKOK( s_mp_fixup_reciprocal(c, m, k, c) );
2245 CLEANUP:
2246   mp_clear(&x);
2247   return res;
2248 }
2249 
2250 /* Known good algorithm for computing modular inverse.  But slow. */
2251 mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
2252 {
2253   mp_int  g, x;
2254   mp_err  res;
2255 
2256   ARGCHK(a && m && c, MP_BADARG);
2257 
2258   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2259     return MP_RANGE;
2260 
2261   MP_DIGITS(&g) = 0;
2262   MP_DIGITS(&x) = 0;
2263   MP_CHECKOK( mp_init(&x, FLAG(a)) );
2264   MP_CHECKOK( mp_init(&g, FLAG(a)) );
2265 
2266   MP_CHECKOK( mp_xgcd(a, m, &g, &x, NULL) );
2267 
2268   if (mp_cmp_d(&g, 1) != MP_EQ) {
2269     res = MP_UNDEF;
2270     goto CLEANUP;
2271   }
2272 
2273   res = mp_mod(&x, m, c);
2274   SIGN(c) = SIGN(a);
2275 
2276 CLEANUP:
2277   mp_clear(&x);
2278   mp_clear(&g);
2279 
2280   return res;
2281 }
2282 
2283 /* modular inverse where modulus is 2**k. */
2284 /* c = a**-1 mod 2**k */
2285 mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
2286 {
2287   mp_err res;
2288   mp_size ix = k + 4;
2289   mp_int t0, t1, val, tmp, two2k;
2290 
2291   static const mp_digit d2 = 2;
2292   static const mp_int two = { 0, MP_ZPOS, 1, 1, (mp_digit *)&d2 };
2293 
2294   if (mp_iseven(a))
2295     return MP_UNDEF;
2296   if (k <= MP_DIGIT_BIT) {
2297     mp_digit i = s_mp_invmod_radix(MP_DIGIT(a,0));
2298     if (k < MP_DIGIT_BIT)
2299       i &= ((mp_digit)1 << k) - (mp_digit)1;
2300     mp_set(c, i);
2301     return MP_OKAY;
2302   }
2303   MP_DIGITS(&t0) = 0;
2304   MP_DIGITS(&t1) = 0;
2305   MP_DIGITS(&val) = 0;
2306   MP_DIGITS(&tmp) = 0;
2307   MP_DIGITS(&two2k) = 0;
2308   MP_CHECKOK( mp_init_copy(&val, a) );
2309   s_mp_mod_2d(&val, k);
2310   MP_CHECKOK( mp_init_copy(&t0, &val) );
2311   MP_CHECKOK( mp_init_copy(&t1, &t0)  );
2312   MP_CHECKOK( mp_init(&tmp, FLAG(a)) );
2313   MP_CHECKOK( mp_init(&two2k, FLAG(a)) );
2314   MP_CHECKOK( s_mp_2expt(&two2k, k) );
2315   do {
2316     MP_CHECKOK( mp_mul(&val, &t1, &tmp)  );
2317     MP_CHECKOK( mp_sub(&two, &tmp, &tmp) );
2318     MP_CHECKOK( mp_mul(&t1, &tmp, &t1)   );
2319     s_mp_mod_2d(&t1, k);
2320     while (MP_SIGN(&t1) != MP_ZPOS) {
2321       MP_CHECKOK( mp_add(&t1, &two2k, &t1) );
2322     }
2323     if (mp_cmp(&t1, &t0) == MP_EQ)
2324       break;
2325     MP_CHECKOK( mp_copy(&t1, &t0) );
2326   } while (--ix > 0);
2327   if (!ix) {
2328     res = MP_UNDEF;
2329   } else {
2330     mp_exch(c, &t1);
2331   }
2332 
2333 CLEANUP:
2334   mp_clear(&t0);
2335   mp_clear(&t1);
2336   mp_clear(&val);
2337   mp_clear(&tmp);
2338   mp_clear(&two2k);
2339   return res;
2340 }
2341 
2342 mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
2343 {
2344   mp_err res;
2345   mp_size k;
2346   mp_int oddFactor, evenFactor; /* factors of the modulus */
2347   mp_int oddPart, evenPart;     /* parts to combine via CRT. */
2348   mp_int C2, tmp1, tmp2;
2349 
2350   /*static const mp_digit d1 = 1; */
2351   /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
2352 
2353   if ((res = s_mp_ispow2(m)) >= 0) {
2354     k = res;
2355     return s_mp_invmod_2d(a, k, c);
2356   }
2357   MP_DIGITS(&oddFactor) = 0;
2358   MP_DIGITS(&evenFactor) = 0;
2359   MP_DIGITS(&oddPart) = 0;
2360   MP_DIGITS(&evenPart) = 0;
2361   MP_DIGITS(&C2)     = 0;
2362   MP_DIGITS(&tmp1)   = 0;
2363   MP_DIGITS(&tmp2)   = 0;
2364 
2365   MP_CHECKOK( mp_init_copy(&oddFactor, m) );    /* oddFactor = m */
2366   MP_CHECKOK( mp_init(&evenFactor, FLAG(m)) );
2367   MP_CHECKOK( mp_init(&oddPart, FLAG(m)) );
2368   MP_CHECKOK( mp_init(&evenPart, FLAG(m)) );
2369   MP_CHECKOK( mp_init(&C2, FLAG(m))     );
2370   MP_CHECKOK( mp_init(&tmp1, FLAG(m))   );
2371   MP_CHECKOK( mp_init(&tmp2, FLAG(m))   );
2372 
2373   k = mp_trailing_zeros(m);
2374   s_mp_div_2d(&oddFactor, k);
2375   MP_CHECKOK( s_mp_2expt(&evenFactor, k) );
2376 
2377   /* compute a**-1 mod oddFactor. */
2378   MP_CHECKOK( s_mp_invmod_odd_m(a, &oddFactor, &oddPart) );
2379   /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
2380   MP_CHECKOK( s_mp_invmod_2d(   a,       k,    &evenPart) );
2381 
2382   /* Use Chinese Remainer theorem to compute a**-1 mod m. */
2383   /* let m1 = oddFactor,  v1 = oddPart,
2384    * let m2 = evenFactor, v2 = evenPart.
2385    */
2386 
2387   /* Compute C2 = m1**-1 mod m2. */
2388   MP_CHECKOK( s_mp_invmod_2d(&oddFactor, k,    &C2) );
2389 
2390   /* compute u = (v2 - v1)*C2 mod m2 */
2391   MP_CHECKOK( mp_sub(&evenPart, &oddPart,   &tmp1) );
2392   MP_CHECKOK( mp_mul(&tmp1,     &C2,        &tmp2) );
2393   s_mp_mod_2d(&tmp2, k);
2394   while (MP_SIGN(&tmp2) != MP_ZPOS) {
2395     MP_CHECKOK( mp_add(&tmp2, &evenFactor, &tmp2) );
2396   }
2397 
2398   /* compute answer = v1 + u*m1 */
2399   MP_CHECKOK( mp_mul(&tmp2,     &oddFactor, c) );
2400   MP_CHECKOK( mp_add(&oddPart,  c,          c) );
2401   /* not sure this is necessary, but it's low cost if not. */
2402   MP_CHECKOK( mp_mod(c,         m,          c) );
2403 
2404 CLEANUP:
2405   mp_clear(&oddFactor);
2406   mp_clear(&evenFactor);
2407   mp_clear(&oddPart);
2408   mp_clear(&evenPart);
2409   mp_clear(&C2);
2410   mp_clear(&tmp1);
2411   mp_clear(&tmp2);
2412   return res;
2413 }
2414 
2415 
2416 /* {{{ mp_invmod(a, m, c) */
2417 
2418 /*
2419   mp_invmod(a, m, c)
2420 
2421   Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
2422   This is equivalent to the question of whether (a, m) = 1.  If not,
2423   MP_UNDEF is returned, and there is no inverse.
2424  */
2425 
2426 mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
2427 {
2428 
2429   ARGCHK(a && m && c, MP_BADARG);
2430 
2431   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2432     return MP_RANGE;
2433 
2434   if (mp_isodd(m)) {
2435     return s_mp_invmod_odd_m(a, m, c);
2436   }
2437   if (mp_iseven(a))
2438     return MP_UNDEF;    /* not invertable */
2439 
2440   return s_mp_invmod_even_m(a, m, c);
2441 
2442 } /* end mp_invmod() */
2443 
2444 /* }}} */
2445 #endif /* if MP_NUMTH */
2446 
2447 /* }}} */
2448 
2449 /*------------------------------------------------------------------------*/
2450 /* {{{ mp_print(mp, ofp) */
2451 
2452 #if MP_IOFUNC
2453 /*
2454   mp_print(mp, ofp)
2455 
2456   Print a textual representation of the given mp_int on the output
2457   stream 'ofp'.  Output is generated using the internal radix.
2458  */
2459 
2460 void   mp_print(mp_int *mp, FILE *ofp)
2461 {
2462   int   ix;
2463 
2464   if(mp == NULL || ofp == NULL)
2465     return;
2466 
2467   fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
2468 
2469   for(ix = USED(mp) - 1; ix >= 0; ix--) {
2470     fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
2471   }
2472 
2473 } /* end mp_print() */
2474 
2475 #endif /* if MP_IOFUNC */
2476 
2477 /* }}} */
2478 
2479 /*------------------------------------------------------------------------*/
2480 /* {{{ More I/O Functions */
2481 
2482 /* {{{ mp_read_raw(mp, str, len) */
2483 
2484 /*
2485    mp_read_raw(mp, str, len)
2486 
2487    Read in a raw value (base 256) into the given mp_int
2488  */
2489 
2490 mp_err  mp_read_raw(mp_int *mp, char *str, int len)
2491 {
2492   int            ix;
2493   mp_err         res;
2494   unsigned char *ustr = (unsigned char *)str;
2495 
2496   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
2497 
2498   mp_zero(mp);
2499 
2500   /* Get sign from first byte */
2501   if(ustr[0])
2502     SIGN(mp) = NEG;
2503   else
2504     SIGN(mp) = ZPOS;
2505 
2506   /* Read the rest of the digits */
2507   for(ix = 1; ix < len; ix++) {
2508     if((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
2509       return res;
2510     if((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
2511       return res;
2512   }
2513 
2514   return MP_OKAY;
2515 
2516 } /* end mp_read_raw() */
2517 
2518 /* }}} */
2519 
2520 /* {{{ mp_raw_size(mp) */
2521 
2522 int    mp_raw_size(mp_int *mp)
2523 {
2524   ARGCHK(mp != NULL, 0);
2525 
2526   return (USED(mp) * sizeof(mp_digit)) + 1;
2527 
2528 } /* end mp_raw_size() */
2529 
2530 /* }}} */
2531 
2532 /* {{{ mp_toraw(mp, str) */
2533 
2534 mp_err mp_toraw(mp_int *mp, char *str)
2535 {
2536   int  ix, jx, pos = 1;
2537 
2538   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2539 
2540   str[0] = (char)SIGN(mp);
2541 
2542   /* Iterate over each digit... */
2543   for(ix = USED(mp) - 1; ix >= 0; ix--) {
2544     mp_digit  d = DIGIT(mp, ix);
2545 
2546     /* Unpack digit bytes, high order first */
2547     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
2548       str[pos++] = (char)(d >> (jx * CHAR_BIT));
2549     }
2550   }
2551 
2552   return MP_OKAY;
2553 
2554 } /* end mp_toraw() */
2555 
2556 /* }}} */
2557 
2558 /* {{{ mp_read_radix(mp, str, radix) */
2559 
2560 /*
2561   mp_read_radix(mp, str, radix)
2562 
2563   Read an integer from the given string, and set mp to the resulting
2564   value.  The input is presumed to be in base 10.  Leading non-digit
2565   characters are ignored, and the function reads until a non-digit
2566   character or the end of the string.
2567  */
2568 
2569 mp_err  mp_read_radix(mp_int *mp, const char *str, int radix)
2570 {
2571   int     ix = 0, val = 0;
2572   mp_err  res;
2573   mp_sign sig = ZPOS;
2574 
2575   ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
2576          MP_BADARG);
2577 
2578   mp_zero(mp);
2579 
2580   /* Skip leading non-digit characters until a digit or '-' or '+' */
2581   while(str[ix] &&
2582         (s_mp_tovalue(str[ix], radix) < 0) &&
2583         str[ix] != '-' &&
2584         str[ix] != '+') {
2585     ++ix;
2586   }
2587 
2588   if(str[ix] == '-') {
2589     sig = NEG;
2590     ++ix;
2591   } else if(str[ix] == '+') {
2592     sig = ZPOS; /* this is the default anyway... */
2593     ++ix;
2594   }
2595 
2596   while((val = s_mp_tovalue(str[ix], radix)) >= 0) {
2597     if((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
2598       return res;
2599     if((res = s_mp_add_d(mp, val)) != MP_OKAY)
2600       return res;
2601     ++ix;
2602   }
2603 
2604   if(s_mp_cmp_d(mp, 0) == MP_EQ)
2605     SIGN(mp) = ZPOS;
2606   else
2607     SIGN(mp) = sig;
2608 
2609   return MP_OKAY;
2610 
2611 } /* end mp_read_radix() */
2612 
2613 mp_err mp_read_variable_radix(mp_int *a, const char * str, int default_radix)
2614 {
2615   int     radix = default_radix;
2616   int     cx;
2617   mp_sign sig   = ZPOS;
2618   mp_err  res;
2619 
2620   /* Skip leading non-digit characters until a digit or '-' or '+' */
2621   while ((cx = *str) != 0 &&
2622         (s_mp_tovalue(cx, radix) < 0) &&
2623         cx != '-' &&
2624         cx != '+') {
2625     ++str;
2626   }
2627 
2628   if (cx == '-') {
2629     sig = NEG;
2630     ++str;
2631   } else if (cx == '+') {
2632     sig = ZPOS; /* this is the default anyway... */
2633     ++str;
2634   }
2635 
2636   if (str[0] == '0') {
2637     if ((str[1] | 0x20) == 'x') {
2638       radix = 16;
2639       str += 2;
2640     } else {
2641       radix = 8;
2642       str++;
2643     }
2644   }
2645   res = mp_read_radix(a, str, radix);
2646   if (res == MP_OKAY) {
2647     MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
2648   }
2649   return res;
2650 }
2651 
2652 /* }}} */
2653 
2654 /* {{{ mp_radix_size(mp, radix) */
2655 
2656 int    mp_radix_size(mp_int *mp, int radix)
2657 {
2658   int  bits;
2659 
2660   if(!mp || radix < 2 || radix > MAX_RADIX)
2661     return 0;
2662 
2663   bits = USED(mp) * DIGIT_BIT - 1;
2664 
2665   return s_mp_outlen(bits, radix);
2666 
2667 } /* end mp_radix_size() */
2668 
2669 /* }}} */
2670 
2671 /* {{{ mp_toradix(mp, str, radix) */
2672 
2673 mp_err mp_toradix(mp_int *mp, char *str, int radix)
2674 {
2675   int  ix, pos = 0;
2676 
2677   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2678   ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
2679 
2680   if(mp_cmp_z(mp) == MP_EQ) {
2681     str[0] = '0';
2682     str[1] = '\0';
2683   } else {
2684     mp_err   res;
2685     mp_int   tmp;
2686     mp_sign  sgn;
2687     mp_digit rem, rdx = (mp_digit)radix;
2688     char     ch;
2689 
2690     if((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
2691       return res;
2692 
2693     /* Save sign for later, and take absolute value */
2694     sgn = SIGN(&tmp); SIGN(&tmp) = ZPOS;
2695 
2696     /* Generate output digits in reverse order      */
2697     while(mp_cmp_z(&tmp) != 0) {
2698       if((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
2699         mp_clear(&tmp);
2700         return res;
2701       }
2702 
2703       /* Generate digits, use capital letters */
2704       ch = s_mp_todigit(rem, radix, 0);
2705 
2706       str[pos++] = ch;
2707     }
2708 
2709     /* Add - sign if original value was negative */
2710     if(sgn == NEG)
2711       str[pos++] = '-';
2712 
2713     /* Add trailing NUL to end the string        */
2714     str[pos--] = '\0';
2715 
2716     /* Reverse the digits and sign indicator     */
2717     ix = 0;
2718     while(ix < pos) {
2719       char tmp = str[ix];
2720 
2721       str[ix] = str[pos];
2722       str[pos] = tmp;
2723       ++ix;
2724       --pos;
2725     }
2726 
2727     mp_clear(&tmp);
2728   }
2729 
2730   return MP_OKAY;
2731 
2732 } /* end mp_toradix() */
2733 
2734 /* }}} */
2735 
2736 /* {{{ mp_tovalue(ch, r) */
2737 
2738 int    mp_tovalue(char ch, int r)
2739 {
2740   return s_mp_tovalue(ch, r);
2741 
2742 } /* end mp_tovalue() */
2743 
2744 /* }}} */
2745 
2746 /* }}} */
2747 
2748 /* {{{ mp_strerror(ec) */
2749 
2750 /*
2751   mp_strerror(ec)
2752 
2753   Return a string describing the meaning of error code 'ec'.  The
2754   string returned is allocated in static memory, so the caller should
2755   not attempt to modify or free the memory associated with this
2756   string.
2757  */
2758 const char  *mp_strerror(mp_err ec)
2759 {
2760   int   aec = (ec < 0) ? -ec : ec;
2761 
2762   /* Code values are negative, so the senses of these comparisons
2763      are accurate */
2764   if(ec < MP_LAST_CODE || ec > MP_OKAY) {
2765     return mp_err_string[0];  /* unknown error code */
2766   } else {
2767     return mp_err_string[aec + 1];
2768   }
2769 
2770 } /* end mp_strerror() */
2771 
2772 /* }}} */
2773 
2774 /*========================================================================*/
2775 /*------------------------------------------------------------------------*/
2776 /* Static function definitions (internal use only)                        */
2777 
2778 /* {{{ Memory management */
2779 
2780 /* {{{ s_mp_grow(mp, min) */
2781 
2782 /* Make sure there are at least 'min' digits allocated to mp              */
2783 mp_err   s_mp_grow(mp_int *mp, mp_size min)
2784 {
2785   if(min > ALLOC(mp)) {
2786     mp_digit   *tmp;
2787 
2788     /* Set min to next nearest default precision block size */
2789     min = MP_ROUNDUP(min, s_mp_defprec);
2790 
2791     if((tmp = s_mp_alloc(min, sizeof(mp_digit), FLAG(mp))) == NULL)
2792       return MP_MEM;
2793 
2794     s_mp_copy(DIGITS(mp), tmp, USED(mp));
2795 
2796 #if MP_CRYPTO
2797     s_mp_setz(DIGITS(mp), ALLOC(mp));
2798 #endif
2799     s_mp_free(DIGITS(mp), ALLOC(mp));
2800     DIGITS(mp) = tmp;
2801     ALLOC(mp) = min;
2802   }
2803 
2804   return MP_OKAY;
2805 
2806 } /* end s_mp_grow() */
2807 
2808 /* }}} */
2809 
2810 /* {{{ s_mp_pad(mp, min) */
2811 
2812 /* Make sure the used size of mp is at least 'min', growing if needed     */
2813 mp_err   s_mp_pad(mp_int *mp, mp_size min)
2814 {
2815   if(min > USED(mp)) {
2816     mp_err  res;
2817 
2818     /* Make sure there is room to increase precision  */
2819     if (min > ALLOC(mp)) {
2820       if ((res = s_mp_grow(mp, min)) != MP_OKAY)
2821         return res;
2822     } else {
2823       s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
2824     }
2825 
2826     /* Increase precision; should already be 0-filled */
2827     USED(mp) = min;
2828   }
2829 
2830   return MP_OKAY;
2831 
2832 } /* end s_mp_pad() */
2833 
2834 /* }}} */
2835 
2836 /* {{{ s_mp_setz(dp, count) */
2837 
2838 #if MP_MACRO == 0
2839 /* Set 'count' digits pointed to by dp to be zeroes                       */
2840 void s_mp_setz(mp_digit *dp, mp_size count)
2841 {
2842 #if MP_MEMSET == 0
2843   int  ix;
2844 
2845   for(ix = 0; ix < count; ix++)
2846     dp[ix] = 0;
2847 #else
2848   memset(dp, 0, count * sizeof(mp_digit));
2849 #endif
2850 
2851 } /* end s_mp_setz() */
2852 #endif
2853 
2854 /* }}} */
2855 
2856 /* {{{ s_mp_copy(sp, dp, count) */
2857 
2858 #if MP_MACRO == 0
2859 /* Copy 'count' digits from sp to dp                                      */
2860 void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
2861 {
2862 #if MP_MEMCPY == 0
2863   int  ix;
2864 
2865   for(ix = 0; ix < count; ix++)
2866     dp[ix] = sp[ix];
2867 #else
2868   memcpy(dp, sp, count * sizeof(mp_digit));
2869 #endif
2870 
2871 } /* end s_mp_copy() */
2872 #endif
2873 
2874 /* }}} */
2875 
2876 /* {{{ s_mp_alloc(nb, ni, kmflag) */
2877 
2878 #if MP_MACRO == 0
2879 /* Allocate ni records of nb bytes each, and return a pointer to that     */
2880 void    *s_mp_alloc(size_t nb, size_t ni, int kmflag)
2881 {
2882   ++mp_allocs;
2883 #ifdef _KERNEL
2884   mp_int *mp;
2885   mp = kmem_zalloc(nb * ni, kmflag);
2886   if (mp != NULL)
2887     FLAG(mp) = kmflag;
2888   return (mp);
2889 #else
2890   return calloc(nb, ni);
2891 #endif
2892 
2893 } /* end s_mp_alloc() */
2894 #endif
2895 
2896 /* }}} */
2897 
2898 /* {{{ s_mp_free(ptr) */
2899 
2900 #if MP_MACRO == 0
2901 /* Free the memory pointed to by ptr                                      */
2902 void     s_mp_free(void *ptr, mp_size alloc)
2903 {
2904   if(ptr) {
2905     ++mp_frees;
2906 #ifdef _KERNEL
2907     kmem_free(ptr, alloc * sizeof (mp_digit));
2908 #else
2909     free(ptr);
2910 #endif
2911   }
2912 } /* end s_mp_free() */
2913 #endif
2914 
2915 /* }}} */
2916 
2917 /* {{{ s_mp_clamp(mp) */
2918 
2919 #if MP_MACRO == 0
2920 /* Remove leading zeroes from the given value                             */
2921 void     s_mp_clamp(mp_int *mp)
2922 {
2923   mp_size used = MP_USED(mp);
2924   while (used > 1 && DIGIT(mp, used - 1) == 0)
2925     --used;
2926   MP_USED(mp) = used;
2927 } /* end s_mp_clamp() */
2928 #endif
2929 
2930 /* }}} */
2931 
2932 /* {{{ s_mp_exch(a, b) */
2933 
2934 /* Exchange the data for a and b; (b, a) = (a, b)                         */
2935 void     s_mp_exch(mp_int *a, mp_int *b)
2936 {
2937   mp_int   tmp;
2938 
2939   tmp = *a;
2940   *a = *b;
2941   *b = tmp;
2942 
2943 } /* end s_mp_exch() */
2944 
2945 /* }}} */
2946 
2947 /* }}} */
2948 
2949 /* {{{ Arithmetic helpers */
2950 
2951 /* {{{ s_mp_lshd(mp, p) */
2952 
2953 /*
2954    Shift mp leftward by p digits, growing if needed, and zero-filling
2955    the in-shifted digits at the right end.  This is a convenient
2956    alternative to multiplication by powers of the radix
2957    The value of USED(mp) must already have been set to the value for
2958    the shifted result.
2959  */
2960 
2961 mp_err   s_mp_lshd(mp_int *mp, mp_size p)
2962 {
2963   mp_err  res;
2964   mp_size pos;
2965   int     ix;
2966 
2967   if(p == 0)
2968     return MP_OKAY;
2969 
2970   if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
2971     return MP_OKAY;
2972 
2973   if((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
2974     return res;
2975 
2976   pos = USED(mp) - 1;
2977 
2978   /* Shift all the significant figures over as needed */
2979   for(ix = pos - p; ix >= 0; ix--)
2980     DIGIT(mp, ix + p) = DIGIT(mp, ix);
2981 
2982   /* Fill the bottom digits with zeroes */
2983   for(ix = 0; ix < p; ix++)
2984     DIGIT(mp, ix) = 0;
2985 
2986   return MP_OKAY;
2987 
2988 } /* end s_mp_lshd() */
2989 
2990 /* }}} */
2991 
2992 /* {{{ s_mp_mul_2d(mp, d) */
2993 
2994 /*
2995   Multiply the integer by 2^d, where d is a number of bits.  This
2996   amounts to a bitwise shift of the value.
2997  */
2998 mp_err   s_mp_mul_2d(mp_int *mp, mp_digit d)
2999 {
3000   mp_err   res;
3001   mp_digit dshift, bshift;
3002   mp_digit mask;
3003 
3004   ARGCHK(mp != NULL,  MP_BADARG);
3005 
3006   dshift = d / MP_DIGIT_BIT;
3007   bshift = d % MP_DIGIT_BIT;
3008   /* bits to be shifted out of the top word */
3009   mask   = ((mp_digit)~0 << (MP_DIGIT_BIT - bshift));
3010   mask  &= MP_DIGIT(mp, MP_USED(mp) - 1);
3011 
3012   if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (mask != 0) )))
3013     return res;
3014 
3015   if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
3016     return res;
3017 
3018   if (bshift) {
3019     mp_digit *pa = MP_DIGITS(mp);
3020     mp_digit *alim = pa + MP_USED(mp);
3021     mp_digit  prev = 0;
3022 
3023     for (pa += dshift; pa < alim; ) {
3024       mp_digit x = *pa;
3025       *pa++ = (x << bshift) | prev;
3026       prev = x >> (DIGIT_BIT - bshift);
3027     }
3028   }
3029 
3030   s_mp_clamp(mp);
3031   return MP_OKAY;
3032 } /* end s_mp_mul_2d() */
3033 
3034 /* {{{ s_mp_rshd(mp, p) */
3035 
3036 /*
3037    Shift mp rightward by p digits.  Maintains the invariant that
3038    digits above the precision are all zero.  Digits shifted off the
3039    end are lost.  Cannot fail.
3040  */
3041 
3042 void     s_mp_rshd(mp_int *mp, mp_size p)
3043 {
3044   mp_size  ix;
3045   mp_digit *src, *dst;
3046 
3047   if(p == 0)
3048     return;
3049 
3050   /* Shortcut when all digits are to be shifted off */
3051   if(p >= USED(mp)) {
3052     s_mp_setz(DIGITS(mp), ALLOC(mp));
3053     USED(mp) = 1;
3054     SIGN(mp) = ZPOS;
3055     return;
3056   }
3057 
3058   /* Shift all the significant figures over as needed */
3059   dst = MP_DIGITS(mp);
3060   src = dst + p;
3061   for (ix = USED(mp) - p; ix > 0; ix--)
3062     *dst++ = *src++;
3063 
3064   MP_USED(mp) -= p;
3065   /* Fill the top digits with zeroes */
3066   while (p-- > 0)
3067     *dst++ = 0;
3068 
3069 #if 0
3070   /* Strip off any leading zeroes    */
3071   s_mp_clamp(mp);
3072 #endif
3073 
3074 } /* end s_mp_rshd() */
3075 
3076 /* }}} */
3077 
3078 /* {{{ s_mp_div_2(mp) */
3079 
3080 /* Divide by two -- take advantage of radix properties to do it fast      */
3081 void     s_mp_div_2(mp_int *mp)
3082 {
3083   s_mp_div_2d(mp, 1);
3084 
3085 } /* end s_mp_div_2() */
3086 
3087 /* }}} */
3088 
3089 /* {{{ s_mp_mul_2(mp) */
3090 
3091 mp_err s_mp_mul_2(mp_int *mp)
3092 {
3093   mp_digit *pd;
3094   unsigned int      ix, used;
3095   mp_digit kin = 0;
3096 
3097   /* Shift digits leftward by 1 bit */
3098   used = MP_USED(mp);
3099   pd = MP_DIGITS(mp);
3100   for (ix = 0; ix < used; ix++) {
3101     mp_digit d = *pd;
3102     *pd++ = (d << 1) | kin;
3103     kin = (d >> (DIGIT_BIT - 1));
3104   }
3105 
3106   /* Deal with rollover from last digit */
3107   if (kin) {
3108     if (ix >= ALLOC(mp)) {
3109       mp_err res;
3110       if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
3111         return res;
3112     }
3113 
3114     DIGIT(mp, ix) = kin;
3115     USED(mp) += 1;
3116   }
3117 
3118   return MP_OKAY;
3119 
3120 } /* end s_mp_mul_2() */
3121 
3122 /* }}} */
3123 
3124 /* {{{ s_mp_mod_2d(mp, d) */
3125 
3126 /*
3127   Remainder the integer by 2^d, where d is a number of bits.  This
3128   amounts to a bitwise AND of the value, and does not require the full
3129   division code
3130  */
3131 void     s_mp_mod_2d(mp_int *mp, mp_digit d)
3132 {
3133   mp_size  ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
3134   mp_size  ix;
3135   mp_digit dmask;
3136 
3137   if(ndig >= USED(mp))
3138     return;
3139 
3140   /* Flush all the bits above 2^d in its digit */
3141   dmask = ((mp_digit)1 << nbit) - 1;
3142   DIGIT(mp, ndig) &= dmask;
3143 
3144   /* Flush all digits above the one with 2^d in it */
3145   for(ix = ndig + 1; ix < USED(mp); ix++)
3146     DIGIT(mp, ix) = 0;
3147 
3148   s_mp_clamp(mp);
3149 
3150 } /* end s_mp_mod_2d() */
3151 
3152 /* }}} */
3153 
3154 /* {{{ s_mp_div_2d(mp, d) */
3155 
3156 /*
3157   Divide the integer by 2^d, where d is a number of bits.  This
3158   amounts to a bitwise shift of the value, and does not require the
3159   full division code (used in Barrett reduction, see below)
3160  */
3161 void     s_mp_div_2d(mp_int *mp, mp_digit d)
3162 {
3163   int       ix;
3164   mp_digit  save, next, mask;
3165 
3166   s_mp_rshd(mp, d / DIGIT_BIT);
3167   d %= DIGIT_BIT;
3168   if (d) {
3169     mask = ((mp_digit)1 << d) - 1;
3170     save = 0;
3171     for(ix = USED(mp) - 1; ix >= 0; ix--) {
3172       next = DIGIT(mp, ix) & mask;
3173       DIGIT(mp, ix) = (DIGIT(mp, ix) >> d) | (save << (DIGIT_BIT - d));
3174       save = next;
3175     }
3176   }
3177   s_mp_clamp(mp);
3178 
3179 } /* end s_mp_div_2d() */
3180 
3181 /* }}} */
3182 
3183 /* {{{ s_mp_norm(a, b, *d) */
3184 
3185 /*
3186   s_mp_norm(a, b, *d)
3187 
3188   Normalize a and b for division, where b is the divisor.  In order
3189   that we might make good guesses for quotient digits, we want the
3190   leading digit of b to be at least half the radix, which we
3191   accomplish by multiplying a and b by a power of 2.  The exponent
3192   (shift count) is placed in *pd, so that the remainder can be shifted
3193   back at the end of the division process.
3194  */
3195 
3196 mp_err   s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
3197 {
3198   mp_digit  d;
3199   mp_digit  mask;
3200   mp_digit  b_msd;
3201   mp_err    res    = MP_OKAY;
3202 
3203   d = 0;
3204   mask  = DIGIT_MAX & ~(DIGIT_MAX >> 1);        /* mask is msb of digit */
3205   b_msd = DIGIT(b, USED(b) - 1);
3206   while (!(b_msd & mask)) {
3207     b_msd <<= 1;
3208     ++d;
3209   }
3210 
3211   if (d) {
3212     MP_CHECKOK( s_mp_mul_2d(a, d) );
3213     MP_CHECKOK( s_mp_mul_2d(b, d) );
3214   }
3215 
3216   *pd = d;
3217 CLEANUP:
3218   return res;
3219 
3220 } /* end s_mp_norm() */
3221 
3222 /* }}} */
3223 
3224 /* }}} */
3225 
3226 /* {{{ Primitive digit arithmetic */
3227 
3228 /* {{{ s_mp_add_d(mp, d) */
3229 
3230 /* Add d to |mp| in place                                                 */
3231 mp_err   s_mp_add_d(mp_int *mp, mp_digit d)    /* unsigned digit addition */
3232 {
3233 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3234   mp_word   w, k = 0;
3235   mp_size   ix = 1;
3236 
3237   w = (mp_word)DIGIT(mp, 0) + d;
3238   DIGIT(mp, 0) = ACCUM(w);
3239   k = CARRYOUT(w);
3240 
3241   while(ix < USED(mp) && k) {
3242     w = (mp_word)DIGIT(mp, ix) + k;
3243     DIGIT(mp, ix) = ACCUM(w);
3244     k = CARRYOUT(w);
3245     ++ix;
3246   }
3247 
3248   if(k != 0) {
3249     mp_err  res;
3250 
3251     if((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
3252       return res;
3253 
3254     DIGIT(mp, ix) = (mp_digit)k;
3255   }
3256 
3257   return MP_OKAY;
3258 #else
3259   mp_digit * pmp = MP_DIGITS(mp);
3260   mp_digit sum, mp_i, carry = 0;
3261   mp_err   res = MP_OKAY;
3262   int used = (int)MP_USED(mp);
3263 
3264   mp_i = *pmp;
3265   *pmp++ = sum = d + mp_i;
3266   carry = (sum < d);
3267   while (carry && --used > 0) {
3268     mp_i = *pmp;
3269     *pmp++ = sum = carry + mp_i;
3270     carry = !sum;
3271   }
3272   if (carry && !used) {
3273     /* mp is growing */
3274     used = MP_USED(mp);
3275     MP_CHECKOK( s_mp_pad(mp, used + 1) );
3276     MP_DIGIT(mp, used) = carry;
3277   }
3278 CLEANUP:
3279   return res;
3280 #endif
3281 } /* end s_mp_add_d() */
3282 
3283 /* }}} */
3284 
3285 /* {{{ s_mp_sub_d(mp, d) */
3286 
3287 /* Subtract d from |mp| in place, assumes |mp| > d                        */
3288 mp_err   s_mp_sub_d(mp_int *mp, mp_digit d)    /* unsigned digit subtract */
3289 {
3290 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3291   mp_word   w, b = 0;
3292   mp_size   ix = 1;
3293 
3294   /* Compute initial subtraction    */
3295   w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
3296   b = CARRYOUT(w) ? 0 : 1;
3297   DIGIT(mp, 0) = ACCUM(w);
3298 
3299   /* Propagate borrows leftward     */
3300   while(b && ix < USED(mp)) {
3301     w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
3302     b = CARRYOUT(w) ? 0 : 1;
3303     DIGIT(mp, ix) = ACCUM(w);
3304     ++ix;
3305   }
3306 
3307   /* Remove leading zeroes          */
3308   s_mp_clamp(mp);
3309 
3310   /* If we have a borrow out, it's a violation of the input invariant */
3311   if(b)
3312     return MP_RANGE;
3313   else
3314     return MP_OKAY;
3315 #else
3316   mp_digit *pmp = MP_DIGITS(mp);
3317   mp_digit mp_i, diff, borrow;
3318   mp_size  used = MP_USED(mp);
3319 
3320   mp_i = *pmp;
3321   *pmp++ = diff = mp_i - d;
3322   borrow = (diff > mp_i);
3323   while (borrow && --used) {
3324     mp_i = *pmp;
3325     *pmp++ = diff = mp_i - borrow;
3326     borrow = (diff > mp_i);
3327   }
3328   s_mp_clamp(mp);
3329   return (borrow && !used) ? MP_RANGE : MP_OKAY;
3330 #endif
3331 } /* end s_mp_sub_d() */
3332 
3333 /* }}} */
3334 
3335 /* {{{ s_mp_mul_d(a, d) */
3336 
3337 /* Compute a = a * d, single digit multiplication                         */
3338 mp_err   s_mp_mul_d(mp_int *a, mp_digit d)
3339 {
3340   mp_err  res;
3341   mp_size used;
3342   int     pow;
3343 
3344   if (!d) {
3345     mp_zero(a);
3346     return MP_OKAY;
3347   }
3348   if (d == 1)
3349     return MP_OKAY;
3350   if (0 <= (pow = s_mp_ispow2d(d))) {
3351     return s_mp_mul_2d(a, (mp_digit)pow);
3352   }
3353 
3354   used = MP_USED(a);
3355   MP_CHECKOK( s_mp_pad(a, used + 1) );
3356 
3357   s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
3358 
3359   s_mp_clamp(a);
3360 
3361 CLEANUP:
3362   return res;
3363 
3364 } /* end s_mp_mul_d() */
3365 
3366 /* }}} */
3367 
3368 /* {{{ s_mp_div_d(mp, d, r) */
3369 
3370 /*
3371   s_mp_div_d(mp, d, r)
3372 
3373   Compute the quotient mp = mp / d and remainder r = mp mod d, for a
3374   single digit d.  If r is null, the remainder will be discarded.
3375  */
3376 
3377 mp_err   s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
3378 {
3379 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3380   mp_word   w = 0, q;
3381 #else
3382   mp_digit  w = 0, q;
3383 #endif
3384   int       ix;
3385   mp_err    res;
3386   mp_int    quot;
3387   mp_int    rem;
3388 
3389   if(d == 0)
3390     return MP_RANGE;
3391   if (d == 1) {
3392     if (r)
3393       *r = 0;
3394     return MP_OKAY;
3395   }
3396   /* could check for power of 2 here, but mp_div_d does that. */
3397   if (MP_USED(mp) == 1) {
3398     mp_digit n   = MP_DIGIT(mp,0);
3399     mp_digit rem;
3400 
3401     q   = n / d;
3402     rem = n % d;
3403     MP_DIGIT(mp,0) = q;
3404     if (r)
3405       *r = rem;
3406     return MP_OKAY;
3407   }
3408 
3409   MP_DIGITS(&rem)  = 0;
3410   MP_DIGITS(&quot) = 0;
3411   /* Make room for the quotient */
3412   MP_CHECKOK( mp_init_size(&quot, USED(mp), FLAG(mp)) );
3413 
3414 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3415   for(ix = USED(mp) - 1; ix >= 0; ix--) {
3416     w = (w << DIGIT_BIT) | DIGIT(mp, ix);
3417 
3418     if(w >= d) {
3419       q = w / d;
3420       w = w % d;
3421     } else {
3422       q = 0;
3423     }
3424 
3425     s_mp_lshd(&quot, 1);
3426     DIGIT(&quot, 0) = (mp_digit)q;
3427   }
3428 #else
3429   {
3430     mp_digit p;
3431 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3432     mp_digit norm;
3433 #endif
3434 
3435     MP_CHECKOK( mp_init_copy(&rem, mp) );
3436 
3437 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3438     MP_DIGIT(&quot, 0) = d;
3439     MP_CHECKOK( s_mp_norm(&rem, &quot, &norm) );
3440     if (norm)
3441       d <<= norm;
3442     MP_DIGIT(&quot, 0) = 0;
3443 #endif
3444 
3445     p = 0;
3446     for (ix = USED(&rem) - 1; ix >= 0; ix--) {
3447       w = DIGIT(&rem, ix);
3448 
3449       if (p) {
3450         MP_CHECKOK( s_mpv_div_2dx1d(p, w, d, &q, &w) );
3451       } else if (w >= d) {
3452         q = w / d;
3453         w = w % d;
3454       } else {
3455         q = 0;
3456       }
3457 
3458       MP_CHECKOK( s_mp_lshd(&quot, 1) );
3459       DIGIT(&quot, 0) = q;
3460       p = w;
3461     }
3462 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3463     if (norm)
3464       w >>= norm;
3465 #endif
3466   }
3467 #endif
3468 
3469   /* Deliver the remainder, if desired */
3470   if(r)
3471     *r = (mp_digit)w;
3472 
3473   s_mp_clamp(&quot);
3474   mp_exch(&quot, mp);
3475 CLEANUP:
3476   mp_clear(&quot);
3477   mp_clear(&rem);
3478 
3479   return res;
3480 } /* end s_mp_div_d() */
3481 
3482 /* }}} */
3483 
3484 
3485 /* }}} */
3486 
3487 /* {{{ Primitive full arithmetic */
3488 
3489 /* {{{ s_mp_add(a, b) */
3490 
3491 /* Compute a = |a| + |b|                                                  */
3492 mp_err   s_mp_add(mp_int *a, const mp_int *b)  /* magnitude addition      */
3493 {
3494 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3495   mp_word   w = 0;
3496 #else
3497   mp_digit  d, sum, carry = 0;
3498 #endif
3499   mp_digit *pa, *pb;
3500   mp_size   ix;
3501   mp_size   used;
3502   mp_err    res;
3503 
3504   /* Make sure a has enough precision for the output value */
3505   if((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
3506     return res;
3507 
3508   /*
3509     Add up all digits up to the precision of b.  If b had initially
3510     the same precision as a, or greater, we took care of it by the
3511     padding step above, so there is no problem.  If b had initially
3512     less precision, we'll have to make sure the carry out is duly
3513     propagated upward among the higher-order digits of the sum.
3514    */
3515   pa = MP_DIGITS(a);
3516   pb = MP_DIGITS(b);
3517   used = MP_USED(b);
3518   for(ix = 0; ix < used; ix++) {
3519 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3520     w = w + *pa + *pb++;
3521     *pa++ = ACCUM(w);
3522     w = CARRYOUT(w);
3523 #else
3524     d = *pa;
3525     sum = d + *pb++;
3526     d = (sum < d);                      /* detect overflow */
3527     *pa++ = sum += carry;
3528     carry = d + (sum < carry);          /* detect overflow */
3529 #endif
3530   }
3531 
3532   /* If we run out of 'b' digits before we're actually done, make
3533      sure the carries get propagated upward...
3534    */
3535   used = MP_USED(a);
3536 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3537   while (w && ix < used) {
3538     w = w + *pa;
3539     *pa++ = ACCUM(w);
3540     w = CARRYOUT(w);
3541     ++ix;
3542   }
3543 #else
3544   while (carry && ix < used) {
3545     sum = carry + *pa;
3546     *pa++ = sum;
3547     carry = !sum;
3548     ++ix;
3549   }
3550 #endif
3551 
3552   /* If there's an overall carry out, increase precision and include
3553      it.  We could have done this initially, but why touch the memory
3554      allocator unless we're sure we have to?
3555    */
3556 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3557   if (w) {
3558     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3559       return res;
3560 
3561     DIGIT(a, ix) = (mp_digit)w;
3562   }
3563 #else
3564   if (carry) {
3565     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3566       return res;
3567 
3568     DIGIT(a, used) = carry;
3569   }
3570 #endif
3571 
3572   return MP_OKAY;
3573 } /* end s_mp_add() */
3574 
3575 /* }}} */
3576 
3577 /* Compute c = |a| + |b|         */ /* magnitude addition      */
3578 mp_err   s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3579 {
3580   mp_digit *pa, *pb, *pc;
3581 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3582   mp_word   w = 0;
3583 #else
3584   mp_digit  sum, carry = 0, d;
3585 #endif
3586   mp_size   ix;
3587   mp_size   used;
3588   mp_err    res;
3589 
3590   MP_SIGN(c) = MP_SIGN(a);
3591   if (MP_USED(a) < MP_USED(b)) {
3592     const mp_int *xch = a;
3593     a = b;
3594     b = xch;
3595   }
3596 
3597   /* Make sure a has enough precision for the output value */
3598   if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3599     return res;
3600 
3601   /*
3602     Add up all digits up to the precision of b.  If b had initially
3603     the same precision as a, or greater, we took care of it by the
3604     exchange step above, so there is no problem.  If b had initially
3605     less precision, we'll have to make sure the carry out is duly
3606     propagated upward among the higher-order digits of the sum.
3607    */
3608   pa = MP_DIGITS(a);
3609   pb = MP_DIGITS(b);
3610   pc = MP_DIGITS(c);
3611   used = MP_USED(b);
3612   for (ix = 0; ix < used; ix++) {
3613 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3614     w = w + *pa++ + *pb++;
3615     *pc++ = ACCUM(w);
3616     w = CARRYOUT(w);
3617 #else
3618     d = *pa++;
3619     sum = d + *pb++;
3620     d = (sum < d);                      /* detect overflow */
3621     *pc++ = sum += carry;
3622     carry = d + (sum < carry);          /* detect overflow */
3623 #endif
3624   }
3625 
3626   /* If we run out of 'b' digits before we're actually done, make
3627      sure the carries get propagated upward...
3628    */
3629   for (used = MP_USED(a); ix < used; ++ix) {
3630 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3631     w = w + *pa++;
3632     *pc++ = ACCUM(w);
3633     w = CARRYOUT(w);
3634 #else
3635     *pc++ = sum = carry + *pa++;
3636     carry = (sum < carry);
3637 #endif
3638   }
3639 
3640   /* If there's an overall carry out, increase precision and include
3641      it.  We could have done this initially, but why touch the memory
3642      allocator unless we're sure we have to?
3643    */
3644 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3645   if (w) {
3646     if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3647       return res;
3648 
3649     DIGIT(c, used) = (mp_digit)w;
3650     ++used;
3651   }
3652 #else
3653   if (carry) {
3654     if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3655       return res;
3656 
3657     DIGIT(c, used) = carry;
3658     ++used;
3659   }
3660 #endif
3661   MP_USED(c) = used;
3662   return MP_OKAY;
3663 }
3664 /* {{{ s_mp_add_offset(a, b, offset) */
3665 
3666 /* Compute a = |a| + ( |b| * (RADIX ** offset) )             */
3667 mp_err   s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)
3668 {
3669 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3670   mp_word   w, k = 0;
3671 #else
3672   mp_digit  d, sum, carry = 0;
3673 #endif
3674   mp_size   ib;
3675   mp_size   ia;
3676   mp_size   lim;
3677   mp_err    res;
3678 
3679   /* Make sure a has enough precision for the output value */
3680   lim = MP_USED(b) + offset;
3681   if((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
3682     return res;
3683 
3684   /*
3685     Add up all digits up to the precision of b.  If b had initially
3686     the same precision as a, or greater, we took care of it by the
3687     padding step above, so there is no problem.  If b had initially
3688     less precision, we'll have to make sure the carry out is duly
3689     propagated upward among the higher-order digits of the sum.
3690    */
3691   lim = USED(b);
3692   for(ib = 0, ia = offset; ib < lim; ib++, ia++) {
3693 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3694     w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
3695     DIGIT(a, ia) = ACCUM(w);
3696     k = CARRYOUT(w);
3697 #else
3698     d = MP_DIGIT(a, ia);
3699     sum = d + MP_DIGIT(b, ib);
3700     d = (sum < d);
3701     MP_DIGIT(a,ia) = sum += carry;
3702     carry = d + (sum < carry);
3703 #endif
3704   }
3705 
3706   /* If we run out of 'b' digits before we're actually done, make
3707      sure the carries get propagated upward...
3708    */
3709 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3710   for (lim = MP_USED(a); k && (ia < lim); ++ia) {
3711     w = (mp_word)DIGIT(a, ia) + k;
3712     DIGIT(a, ia) = ACCUM(w);
3713     k = CARRYOUT(w);
3714   }
3715 #else
3716   for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
3717     d = MP_DIGIT(a, ia);
3718     MP_DIGIT(a,ia) = sum = d + carry;
3719     carry = (sum < d);
3720   }
3721 #endif
3722 
3723   /* If there's an overall carry out, increase precision and include
3724      it.  We could have done this initially, but why touch the memory
3725      allocator unless we're sure we have to?
3726    */
3727 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3728   if(k) {
3729     if((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
3730       return res;
3731 
3732     DIGIT(a, ia) = (mp_digit)k;
3733   }
3734 #else
3735   if (carry) {
3736     if((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
3737       return res;
3738 
3739     DIGIT(a, lim) = carry;
3740   }
3741 #endif
3742   s_mp_clamp(a);
3743 
3744   return MP_OKAY;
3745 
3746 } /* end s_mp_add_offset() */
3747 
3748 /* }}} */
3749 
3750 /* {{{ s_mp_sub(a, b) */
3751 
3752 /* Compute a = |a| - |b|, assumes |a| >= |b|                              */
3753 mp_err   s_mp_sub(mp_int *a, const mp_int *b)  /* magnitude subtract      */
3754 {
3755   mp_digit *pa, *pb, *limit;
3756 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3757   mp_sword  w = 0;
3758 #else
3759   mp_digit  d, diff, borrow = 0;
3760 #endif
3761 
3762   /*
3763     Subtract and propagate borrow.  Up to the precision of b, this
3764     accounts for the digits of b; after that, we just make sure the
3765     carries get to the right place.  This saves having to pad b out to
3766     the precision of a just to make the loops work right...
3767    */
3768   pa = MP_DIGITS(a);
3769   pb = MP_DIGITS(b);
3770   limit = pb + MP_USED(b);
3771   while (pb < limit) {
3772 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3773     w = w + *pa - *pb++;
3774     *pa++ = ACCUM(w);
3775     w >>= MP_DIGIT_BIT;
3776 #else
3777     d = *pa;
3778     diff = d - *pb++;
3779     d = (diff > d);                             /* detect borrow */
3780     if (borrow && --diff == MP_DIGIT_MAX)
3781       ++d;
3782     *pa++ = diff;
3783     borrow = d;
3784 #endif
3785   }
3786   limit = MP_DIGITS(a) + MP_USED(a);
3787 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3788   while (w && pa < limit) {
3789     w = w + *pa;
3790     *pa++ = ACCUM(w);
3791     w >>= MP_DIGIT_BIT;
3792   }
3793 #else
3794   while (borrow && pa < limit) {
3795     d = *pa;
3796     *pa++ = diff = d - borrow;
3797     borrow = (diff > d);
3798   }
3799 #endif
3800 
3801   /* Clobber any leading zeroes we created    */
3802   s_mp_clamp(a);
3803 
3804   /*
3805      If there was a borrow out, then |b| > |a| in violation
3806      of our input invariant.  We've already done the work,
3807      but we'll at least complain about it...
3808    */
3809 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3810   return w ? MP_RANGE : MP_OKAY;
3811 #else
3812   return borrow ? MP_RANGE : MP_OKAY;
3813 #endif
3814 } /* end s_mp_sub() */
3815 
3816 /* }}} */
3817 
3818 /* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract      */
3819 mp_err   s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3820 {
3821   mp_digit *pa, *pb, *pc;
3822 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3823   mp_sword  w = 0;
3824 #else
3825   mp_digit  d, diff, borrow = 0;
3826 #endif
3827   int       ix, limit;
3828   mp_err    res;
3829 
3830   MP_SIGN(c) = MP_SIGN(a);
3831 
3832   /* Make sure a has enough precision for the output value */
3833   if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3834     return res;
3835 
3836   /*
3837     Subtract and propagate borrow.  Up to the precision of b, this
3838     accounts for the digits of b; after that, we just make sure the
3839     carries get to the right place.  This saves having to pad b out to
3840     the precision of a just to make the loops work right...
3841    */
3842   pa = MP_DIGITS(a);
3843   pb = MP_DIGITS(b);
3844   pc = MP_DIGITS(c);
3845   limit = MP_USED(b);
3846   for (ix = 0; ix < limit; ++ix) {
3847 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3848     w = w + *pa++ - *pb++;
3849     *pc++ = ACCUM(w);
3850     w >>= MP_DIGIT_BIT;
3851 #else
3852     d = *pa++;
3853     diff = d - *pb++;
3854     d = (diff > d);
3855     if (borrow && --diff == MP_DIGIT_MAX)
3856       ++d;
3857     *pc++ = diff;
3858     borrow = d;
3859 #endif
3860   }
3861   for (limit = MP_USED(a); ix < limit; ++ix) {
3862 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3863     w = w + *pa++;
3864     *pc++ = ACCUM(w);
3865     w >>= MP_DIGIT_BIT;
3866 #else
3867     d = *pa++;
3868     *pc++ = diff = d - borrow;
3869     borrow = (diff > d);
3870 #endif
3871   }
3872 
3873   /* Clobber any leading zeroes we created    */
3874   MP_USED(c) = ix;
3875   s_mp_clamp(c);
3876 
3877   /*
3878      If there was a borrow out, then |b| > |a| in violation
3879      of our input invariant.  We've already done the work,
3880      but we'll at least complain about it...
3881    */
3882 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3883   return w ? MP_RANGE : MP_OKAY;
3884 #else
3885   return borrow ? MP_RANGE : MP_OKAY;
3886 #endif
3887 }
3888 /* {{{ s_mp_mul(a, b) */
3889 
3890 /* Compute a = |a| * |b|                                                  */
3891 mp_err   s_mp_mul(mp_int *a, const mp_int *b)
3892 {
3893   return mp_mul(a, b, a);
3894 } /* end s_mp_mul() */
3895 
3896 /* }}} */
3897 
3898 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
3899 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
3900 #define MP_MUL_DxD(a, b, Phi, Plo) \
3901   { unsigned long long product = (unsigned long long)a * b; \
3902     Plo = (mp_digit)product; \
3903     Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
3904 #elif defined(OSF1)
3905 #define MP_MUL_DxD(a, b, Phi, Plo) \
3906   { Plo = asm ("mulq %a0, %a1, %v0", a, b);\
3907     Phi = asm ("umulh %a0, %a1, %v0", a, b); }
3908 #else
3909 #define MP_MUL_DxD(a, b, Phi, Plo) \
3910   { mp_digit a0b1, a1b0; \
3911     Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \
3912     Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
3913     a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
3914     a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
3915     a1b0 += a0b1; \
3916     Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
3917     if (a1b0 < a0b1)  \
3918       Phi += MP_HALF_RADIX; \
3919     a1b0 <<= MP_HALF_DIGIT_BIT; \
3920     Plo += a1b0; \
3921     if (Plo < a1b0) \
3922       ++Phi; \
3923   }
3924 #endif
3925 
3926 #if !defined(MP_ASSEMBLY_MULTIPLY)
3927 /* c = a * b */
3928 void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
3929 {
3930 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3931   mp_digit   d = 0;
3932 
3933   /* Inner product:  Digits of a */
3934   while (a_len--) {
3935     mp_word w = ((mp_word)b * *a++) + d;
3936     *c++ = ACCUM(w);
3937     d = CARRYOUT(w);
3938   }
3939   *c = d;
3940 #else
3941   mp_digit carry = 0;
3942   while (a_len--) {
3943     mp_digit a_i = *a++;
3944     mp_digit a0b0, a1b1;
3945 
3946     MP_MUL_DxD(a_i, b, a1b1, a0b0);
3947 
3948     a0b0 += carry;
3949     if (a0b0 < carry)
3950       ++a1b1;
3951     *c++ = a0b0;
3952     carry = a1b1;
3953   }
3954   *c = carry;
3955 #endif
3956 }
3957 
3958 /* c += a * b */
3959 void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
3960                               mp_digit *c)
3961 {
3962 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3963   mp_digit   d = 0;
3964 
3965   /* Inner product:  Digits of a */
3966   while (a_len--) {
3967     mp_word w = ((mp_word)b * *a++) + *c + d;
3968     *c++ = ACCUM(w);
3969     d = CARRYOUT(w);
3970   }
3971   *c = d;
3972 #else
3973   mp_digit carry = 0;
3974   while (a_len--) {
3975     mp_digit a_i = *a++;
3976     mp_digit a0b0, a1b1;
3977 
3978     MP_MUL_DxD(a_i, b, a1b1, a0b0);
3979 
3980     a0b0 += carry;
3981     if (a0b0 < carry)
3982       ++a1b1;
3983     a0b0 += a_i = *c;
3984     if (a0b0 < a_i)
3985       ++a1b1;
3986     *c++ = a0b0;
3987     carry = a1b1;
3988   }
3989   *c = carry;
3990 #endif
3991 }
3992 
3993 /* Presently, this is only used by the Montgomery arithmetic code. */
3994 /* c += a * b */
3995 void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
3996 {
3997 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3998   mp_digit   d = 0;
3999 
4000   /* Inner product:  Digits of a */
4001   while (a_len--) {
4002     mp_word w = ((mp_word)b * *a++) + *c + d;
4003     *c++ = ACCUM(w);
4004     d = CARRYOUT(w);
4005   }
4006 
4007   while (d) {
4008     mp_word w = (mp_word)*c + d;
4009     *c++ = ACCUM(w);
4010     d = CARRYOUT(w);
4011   }
4012 #else
4013   mp_digit carry = 0;
4014   while (a_len--) {
4015     mp_digit a_i = *a++;
4016     mp_digit a0b0, a1b1;
4017 
4018     MP_MUL_DxD(a_i, b, a1b1, a0b0);
4019 
4020     a0b0 += carry;
4021     if (a0b0 < carry)
4022       ++a1b1;
4023 
4024     a0b0 += a_i = *c;
4025     if (a0b0 < a_i)
4026       ++a1b1;
4027 
4028     *c++ = a0b0;
4029     carry = a1b1;
4030   }
4031   while (carry) {
4032     mp_digit c_i = *c;
4033     carry += c_i;
4034     *c++ = carry;
4035     carry = carry < c_i;
4036   }
4037 #endif
4038 }
4039 #endif
4040 
4041 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
4042 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
4043 #define MP_SQR_D(a, Phi, Plo) \
4044   { unsigned long long square = (unsigned long long)a * a; \
4045     Plo = (mp_digit)square; \
4046     Phi = (mp_digit)(square >> MP_DIGIT_BIT); }
4047 #elif defined(OSF1)
4048 #define MP_SQR_D(a, Phi, Plo) \
4049   { Plo = asm ("mulq  %a0, %a0, %v0", a);\
4050     Phi = asm ("umulh %a0, %a0, %v0", a); }
4051 #else
4052 #define MP_SQR_D(a, Phi, Plo) \
4053   { mp_digit Pmid; \
4054     Plo  = (a  & MP_HALF_DIGIT_MAX) * (a  & MP_HALF_DIGIT_MAX); \
4055     Phi  = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
4056     Pmid = (a  & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
4057     Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1);  \
4058     Pmid <<= (MP_HALF_DIGIT_BIT + 1);  \
4059     Plo += Pmid;  \
4060     if (Plo < Pmid)  \
4061       ++Phi;  \
4062   }
4063 #endif
4064 
4065 #if !defined(MP_ASSEMBLY_SQUARE)
4066 /* Add the squares of the digits of a to the digits of b. */
4067 void s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
4068 {
4069 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4070   mp_word  w;
4071   mp_digit d;
4072   mp_size  ix;
4073 
4074   w  = 0;
4075 #define ADD_SQUARE(n) \
4076     d = pa[n]; \
4077     w += (d * (mp_word)d) + ps[2*n]; \
4078     ps[2*n] = ACCUM(w); \
4079     w = (w >> DIGIT_BIT) + ps[2*n+1]; \
4080     ps[2*n+1] = ACCUM(w); \
4081     w = (w >> DIGIT_BIT)
4082 
4083   for (ix = a_len; ix >= 4; ix -= 4) {
4084     ADD_SQUARE(0);
4085     ADD_SQUARE(1);
4086     ADD_SQUARE(2);
4087     ADD_SQUARE(3);
4088     pa += 4;
4089     ps += 8;
4090   }
4091   if (ix) {
4092     ps += 2*ix;
4093     pa += ix;
4094     switch (ix) {
4095     case 3: ADD_SQUARE(-3); /* FALLTHRU */
4096     case 2: ADD_SQUARE(-2); /* FALLTHRU */
4097     case 1: ADD_SQUARE(-1); /* FALLTHRU */
4098     case 0: break;
4099     }
4100   }
4101   while (w) {
4102     w += *ps;
4103     *ps++ = ACCUM(w);
4104     w = (w >> DIGIT_BIT);
4105   }
4106 #else
4107   mp_digit carry = 0;
4108   while (a_len--) {
4109     mp_digit a_i = *pa++;
4110     mp_digit a0a0, a1a1;
4111 
4112     MP_SQR_D(a_i, a1a1, a0a0);
4113 
4114     /* here a1a1 and a0a0 constitute a_i ** 2 */
4115     a0a0 += carry;
4116     if (a0a0 < carry)
4117       ++a1a1;
4118 
4119     /* now add to ps */
4120     a0a0 += a_i = *ps;
4121     if (a0a0 < a_i)
4122       ++a1a1;
4123     *ps++ = a0a0;
4124     a1a1 += a_i = *ps;
4125     carry = (a1a1 < a_i);
4126     *ps++ = a1a1;
4127   }
4128   while (carry) {
4129     mp_digit s_i = *ps;
4130     carry += s_i;
4131     *ps++ = carry;
4132     carry = carry < s_i;
4133   }
4134 #endif
4135 }
4136 #endif
4137 
4138 #if (defined(MP_NO_MP_WORD) || defined(MP_NO_DIV_WORD)) \
4139 && !defined(MP_ASSEMBLY_DIV_2DX1D)
4140 /*
4141 ** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
4142 ** so its high bit is 1.   This code is from NSPR.
4143 */
4144 mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
4145                        mp_digit *qp, mp_digit *rp)
4146 {
4147     mp_digit d1, d0, q1, q0;
4148     mp_digit r1, r0, m;
4149 
4150     d1 = divisor >> MP_HALF_DIGIT_BIT;
4151     d0 = divisor & MP_HALF_DIGIT_MAX;
4152     r1 = Nhi % d1;
4153     q1 = Nhi / d1;
4154     m = q1 * d0;
4155     r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
4156     if (r1 < m) {
4157         q1--, r1 += divisor;
4158         if (r1 >= divisor && r1 < m) {
4159             q1--, r1 += divisor;
4160         }
4161     }
4162     r1 -= m;
4163     r0 = r1 % d1;
4164     q0 = r1 / d1;
4165     m = q0 * d0;
4166     r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
4167     if (r0 < m) {
4168         q0--, r0 += divisor;
4169         if (r0 >= divisor && r0 < m) {
4170             q0--, r0 += divisor;
4171         }
4172     }
4173     if (qp)
4174         *qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
4175     if (rp)
4176         *rp = r0 - m;
4177     return MP_OKAY;
4178 }
4179 #endif
4180 
4181 #if MP_SQUARE
4182 /* {{{ s_mp_sqr(a) */
4183 
4184 mp_err   s_mp_sqr(mp_int *a)
4185 {
4186   mp_err   res;
4187   mp_int   tmp;
4188   tmp.flag = (mp_flag)0;
4189 
4190   if((res = mp_init_size(&tmp, 2 * USED(a), FLAG(a))) != MP_OKAY)
4191     return res;
4192   res = mp_sqr(a, &tmp);
4193   if (res == MP_OKAY) {
4194     s_mp_exch(&tmp, a);
4195   }
4196   mp_clear(&tmp);
4197   return res;
4198 }
4199 
4200 /* }}} */
4201 #endif
4202 
4203 /* {{{ s_mp_div(a, b) */
4204 
4205 /*
4206   s_mp_div(a, b)
4207 
4208   Compute a = a / b and b = a mod b.  Assumes b > a.
4209  */
4210 
4211 mp_err   s_mp_div(mp_int *rem,  /* i: dividend, o: remainder */
4212                   mp_int *div,  /* i: divisor                */
4213                   mp_int *quot) /* i: 0;        o: quotient  */
4214 {
4215   mp_int   part, t;
4216 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4217   mp_word  q_msd;
4218 #else
4219   mp_digit q_msd;
4220 #endif
4221   mp_err   res;
4222   mp_digit d;
4223   mp_digit div_msd;
4224   int      ix;
4225 
4226   t.dp = (mp_digit)0;
4227 
4228   if(mp_cmp_z(div) == 0)
4229     return MP_RANGE;
4230 
4231   /* Shortcut if divisor is power of two */
4232   if((ix = s_mp_ispow2(div)) >= 0) {
4233     MP_CHECKOK( mp_copy(rem, quot) );
4234     s_mp_div_2d(quot, (mp_digit)ix);
4235     s_mp_mod_2d(rem,  (mp_digit)ix);
4236 
4237     return MP_OKAY;
4238   }
4239 
4240   DIGITS(&t) = 0;
4241   MP_SIGN(rem) = ZPOS;
4242   MP_SIGN(div) = ZPOS;
4243 
4244   /* A working temporary for division     */
4245   MP_CHECKOK( mp_init_size(&t, MP_ALLOC(rem), FLAG(rem)));
4246 
4247   /* Normalize to optimize guessing       */
4248   MP_CHECKOK( s_mp_norm(rem, div, &d) );
4249 
4250   part = *rem;
4251 
4252   /* Perform the division itself...woo!   */
4253   MP_USED(quot) = MP_ALLOC(quot);
4254 
4255   /* Find a partial substring of rem which is at least div */
4256   /* If we didn't find one, we're finished dividing    */
4257   while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
4258     int i;
4259     int unusedRem;
4260 
4261     unusedRem = MP_USED(rem) - MP_USED(div);
4262     MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
4263     MP_ALLOC(&part)  = MP_ALLOC(rem)  - unusedRem;
4264     MP_USED(&part)   = MP_USED(div);
4265     if (s_mp_cmp(&part, div) < 0) {
4266       -- unusedRem;
4267 #if MP_ARGCHK == 2
4268       assert(unusedRem >= 0);
4269 #endif
4270       -- MP_DIGITS(&part);
4271       ++ MP_USED(&part);
4272       ++ MP_ALLOC(&part);
4273     }
4274 
4275     /* Compute a guess for the next quotient digit       */
4276     q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
4277     div_msd = MP_DIGIT(div, MP_USED(div) - 1);
4278     if (q_msd >= div_msd) {
4279       q_msd = 1;
4280     } else if (MP_USED(&part) > 1) {
4281 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4282       q_msd = (q_msd << MP_DIGIT_BIT) | MP_DIGIT(&part, MP_USED(&part) - 2);
4283       q_msd /= div_msd;
4284       if (q_msd == RADIX)
4285         --q_msd;
4286 #else
4287       mp_digit r;
4288       MP_CHECKOK( s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2),
4289                                   div_msd, &q_msd, &r) );
4290 #endif
4291     } else {
4292       q_msd = 0;
4293     }
4294 #if MP_ARGCHK == 2
4295     assert(q_msd > 0); /* This case should never occur any more. */
4296 #endif
4297     if (q_msd <= 0)
4298       break;
4299 
4300     /* See what that multiplies out to                   */
4301     mp_copy(div, &t);
4302     MP_CHECKOK( s_mp_mul_d(&t, (mp_digit)q_msd) );
4303 
4304     /*
4305        If it's too big, back it off.  We should not have to do this
4306        more than once, or, in rare cases, twice.  Knuth describes a
4307        method by which this could be reduced to a maximum of once, but
4308        I didn't implement that here.
4309      * When using s_mpv_div_2dx1d, we may have to do this 3 times.
4310      */
4311     for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
4312       --q_msd;
4313       s_mp_sub(&t, div);        /* t -= div */
4314     }
4315     if (i < 0) {
4316       res = MP_RANGE;
4317       goto CLEANUP;
4318     }
4319 
4320     /* At this point, q_msd should be the right next digit   */
4321     MP_CHECKOK( s_mp_sub(&part, &t) );  /* part -= t */
4322     s_mp_clamp(rem);
4323 
4324     /*
4325       Include the digit in the quotient.  We allocated enough memory
4326       for any quotient we could ever possibly get, so we should not
4327       have to check for failures here
4328      */
4329     MP_DIGIT(quot, unusedRem) = (mp_digit)q_msd;
4330   }
4331 
4332   /* Denormalize remainder                */
4333   if (d) {
4334     s_mp_div_2d(rem, d);
4335   }
4336 
4337   s_mp_clamp(quot);
4338 
4339 CLEANUP:
4340   mp_clear(&t);
4341 
4342   return res;
4343 
4344 } /* end s_mp_div() */
4345 
4346 
4347 /* }}} */
4348 
4349 /* {{{ s_mp_2expt(a, k) */
4350 
4351 mp_err   s_mp_2expt(mp_int *a, mp_digit k)
4352 {
4353   mp_err    res;
4354   mp_size   dig, bit;
4355 
4356   dig = k / DIGIT_BIT;
4357   bit = k % DIGIT_BIT;
4358 
4359   mp_zero(a);
4360   if((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
4361     return res;
4362 
4363   DIGIT(a, dig) |= ((mp_digit)1 << bit);
4364 
4365   return MP_OKAY;
4366 
4367 } /* end s_mp_2expt() */
4368 
4369 /* }}} */
4370 
4371 /* {{{ s_mp_reduce(x, m, mu) */
4372 
4373 /*
4374   Compute Barrett reduction, x (mod m), given a precomputed value for
4375   mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
4376   faster than straight division, when many reductions by the same
4377   value of m are required (such as in modular exponentiation).  This
4378   can nearly halve the time required to do modular exponentiation,
4379   as compared to using the full integer divide to reduce.
4380 
4381   This algorithm was derived from the _Handbook of Applied
4382   Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
4383   pp. 603-604.
4384  */
4385 
4386 mp_err   s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
4387 {
4388   mp_int   q;
4389   mp_err   res;
4390 
4391   if((res = mp_init_copy(&q, x)) != MP_OKAY)
4392     return res;
4393 
4394   s_mp_rshd(&q, USED(m) - 1);  /* q1 = x / b^(k-1)  */
4395   s_mp_mul(&q, mu);            /* q2 = q1 * mu      */
4396   s_mp_rshd(&q, USED(m) + 1);  /* q3 = q2 / b^(k+1) */
4397 
4398   /* x = x mod b^(k+1), quick (no division) */
4399   s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
4400 
4401   /* q = q * m mod b^(k+1), quick (no division) */
4402   s_mp_mul(&q, m);
4403   s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
4404 
4405   /* x = x - q */
4406   if((res = mp_sub(x, &q, x)) != MP_OKAY)
4407     goto CLEANUP;
4408 
4409   /* If x < 0, add b^(k+1) to it */
4410   if(mp_cmp_z(x) < 0) {
4411     mp_set(&q, 1);
4412     if((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
4413       goto CLEANUP;
4414     if((res = mp_add(x, &q, x)) != MP_OKAY)
4415       goto CLEANUP;
4416   }
4417 
4418   /* Back off if it's too big */
4419   while(mp_cmp(x, m) >= 0) {
4420     if((res = s_mp_sub(x, m)) != MP_OKAY)
4421       break;
4422   }
4423 
4424  CLEANUP:
4425   mp_clear(&q);
4426 
4427   return res;
4428 
4429 } /* end s_mp_reduce() */
4430 
4431 /* }}} */
4432 
4433 /* }}} */
4434 
4435 /* {{{ Primitive comparisons */
4436 
4437 /* {{{ s_mp_cmp(a, b) */
4438 
4439 /* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
4440 int      s_mp_cmp(const mp_int *a, const mp_int *b)
4441 {
4442   mp_size used_a = MP_USED(a);
4443   {
4444     mp_size used_b = MP_USED(b);
4445 
4446     if (used_a > used_b)
4447       goto IS_GT;
4448     if (used_a < used_b)
4449       goto IS_LT;
4450   }
4451   {
4452     mp_digit *pa, *pb;
4453     mp_digit da = 0, db = 0;
4454 
4455 #define CMP_AB(n) if ((da = pa[n]) != (db = pb[n])) goto done
4456 
4457     pa = MP_DIGITS(a) + used_a;
4458     pb = MP_DIGITS(b) + used_a;
4459     while (used_a >= 4) {
4460       pa     -= 4;
4461       pb     -= 4;
4462       used_a -= 4;
4463       CMP_AB(3);
4464       CMP_AB(2);
4465       CMP_AB(1);
4466       CMP_AB(0);
4467     }
4468     while (used_a-- > 0 && ((da = *--pa) == (db = *--pb)))
4469       /* do nothing */;
4470 done:
4471     if (da > db)
4472       goto IS_GT;
4473     if (da < db)
4474       goto IS_LT;
4475   }
4476   return MP_EQ;
4477 IS_LT:
4478   return MP_LT;
4479 IS_GT:
4480   return MP_GT;
4481 } /* end s_mp_cmp() */
4482 
4483 /* }}} */
4484 
4485 /* {{{ s_mp_cmp_d(a, d) */
4486 
4487 /* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
4488 int      s_mp_cmp_d(const mp_int *a, mp_digit d)
4489 {
4490   if(USED(a) > 1)
4491     return MP_GT;
4492 
4493   if(DIGIT(a, 0) < d)
4494     return MP_LT;
4495   else if(DIGIT(a, 0) > d)
4496     return MP_GT;
4497   else
4498     return MP_EQ;
4499 
4500 } /* end s_mp_cmp_d() */
4501 
4502 /* }}} */
4503 
4504 /* {{{ s_mp_ispow2(v) */
4505 
4506 /*
4507   Returns -1 if the value is not a power of two; otherwise, it returns
4508   k such that v = 2^k, i.e. lg(v).
4509  */
4510 int      s_mp_ispow2(const mp_int *v)
4511 {
4512   mp_digit d;
4513   int      extra = 0, ix;
4514 
4515   ix = MP_USED(v) - 1;
4516   d = MP_DIGIT(v, ix); /* most significant digit of v */
4517 
4518   extra = s_mp_ispow2d(d);
4519   if (extra < 0 || ix == 0)
4520     return extra;
4521 
4522   while (--ix >= 0) {
4523     if (DIGIT(v, ix) != 0)
4524       return -1; /* not a power of two */
4525     extra += MP_DIGIT_BIT;
4526   }
4527 
4528   return extra;
4529 
4530 } /* end s_mp_ispow2() */
4531 
4532 /* }}} */
4533 
4534 /* {{{ s_mp_ispow2d(d) */
4535 
4536 int      s_mp_ispow2d(mp_digit d)
4537 {
4538   if ((d != 0) && ((d & (d-1)) == 0)) { /* d is a power of 2 */
4539     int pow = 0;
4540 #if defined (MP_USE_UINT_DIGIT)
4541     if (d & 0xffff0000U)
4542       pow += 16;
4543     if (d & 0xff00ff00U)
4544       pow += 8;
4545     if (d & 0xf0f0f0f0U)
4546       pow += 4;
4547     if (d & 0xccccccccU)
4548       pow += 2;
4549     if (d & 0xaaaaaaaaU)
4550       pow += 1;
4551 #elif defined(MP_USE_LONG_LONG_DIGIT)
4552     if (d & 0xffffffff00000000ULL)
4553       pow += 32;
4554     if (d & 0xffff0000ffff0000ULL)
4555       pow += 16;
4556     if (d & 0xff00ff00ff00ff00ULL)
4557       pow += 8;
4558     if (d & 0xf0f0f0f0f0f0f0f0ULL)
4559       pow += 4;
4560     if (d & 0xccccccccccccccccULL)
4561       pow += 2;
4562     if (d & 0xaaaaaaaaaaaaaaaaULL)
4563       pow += 1;
4564 #elif defined(MP_USE_LONG_DIGIT)
4565     if (d & 0xffffffff00000000UL)
4566       pow += 32;
4567     if (d & 0xffff0000ffff0000UL)
4568       pow += 16;
4569     if (d & 0xff00ff00ff00ff00UL)
4570       pow += 8;
4571     if (d & 0xf0f0f0f0f0f0f0f0UL)
4572       pow += 4;
4573     if (d & 0xccccccccccccccccUL)
4574       pow += 2;
4575     if (d & 0xaaaaaaaaaaaaaaaaUL)
4576       pow += 1;
4577 #else
4578 #error "unknown type for mp_digit"
4579 #endif
4580     return pow;
4581   }
4582   return -1;
4583 
4584 } /* end s_mp_ispow2d() */
4585 
4586 /* }}} */
4587 
4588 /* }}} */
4589 
4590 /* {{{ Primitive I/O helpers */
4591 
4592 /* {{{ s_mp_tovalue(ch, r) */
4593 
4594 /*
4595   Convert the given character to its digit value, in the given radix.
4596   If the given character is not understood in the given radix, -1 is
4597   returned.  Otherwise the digit's numeric value is returned.
4598 
4599   The results will be odd if you use a radix < 2 or > 62, you are
4600   expected to know what you're up to.
4601  */
4602 int      s_mp_tovalue(char ch, int r)
4603 {
4604   int    val, xch;
4605 
4606   if(r > 36)
4607     xch = ch;
4608   else
4609     xch = toupper(ch);
4610 
4611   if(isdigit(xch))
4612     val = xch - '0';
4613   else if(isupper(xch))
4614     val = xch - 'A' + 10;
4615   else if(islower(xch))
4616     val = xch - 'a' + 36;
4617   else if(xch == '+')
4618     val = 62;
4619   else if(xch == '/')
4620     val = 63;
4621   else
4622     return -1;
4623 
4624   if(val < 0 || val >= r)
4625     return -1;
4626 
4627   return val;
4628 
4629 } /* end s_mp_tovalue() */
4630 
4631 /* }}} */
4632 
4633 /* {{{ s_mp_todigit(val, r, low) */
4634 
4635 /*
4636   Convert val to a radix-r digit, if possible.  If val is out of range
4637   for r, returns zero.  Otherwise, returns an ASCII character denoting
4638   the value in the given radix.
4639 
4640   The results may be odd if you use a radix < 2 or > 64, you are
4641   expected to know what you're doing.
4642  */
4643 
4644 char     s_mp_todigit(mp_digit val, int r, int low)
4645 {
4646   char   ch;
4647 
4648   if(val >= (unsigned int)r)
4649     return 0;
4650 
4651   ch = s_dmap_1[val];
4652 
4653   if(r <= 36 && low)
4654     ch = tolower(ch);
4655 
4656   return ch;
4657 
4658 } /* end s_mp_todigit() */
4659 
4660 /* }}} */
4661 
4662 /* {{{ s_mp_outlen(bits, radix) */
4663 
4664 /*
4665    Return an estimate for how long a string is needed to hold a radix
4666    r representation of a number with 'bits' significant bits, plus an
4667    extra for a zero terminator (assuming C style strings here)
4668  */
4669 int      s_mp_outlen(int bits, int r)
4670 {
4671   return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
4672 
4673 } /* end s_mp_outlen() */
4674 
4675 /* }}} */
4676 
4677 /* }}} */
4678 
4679 /* {{{ mp_read_unsigned_octets(mp, str, len) */
4680 /* mp_read_unsigned_octets(mp, str, len)
4681    Read in a raw value (base 256) into the given mp_int
4682    No sign bit, number is positive.  Leading zeros ignored.
4683  */
4684 
4685 mp_err
4686 mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
4687 {
4688   int            count;
4689   mp_err         res;
4690   mp_digit       d;
4691 
4692   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
4693 
4694   mp_zero(mp);
4695 
4696   count = len % sizeof(mp_digit);
4697   if (count) {
4698     for (d = 0; count-- > 0; --len) {
4699       d = (d << 8) | *str++;
4700     }
4701     MP_DIGIT(mp, 0) = d;
4702   }
4703 
4704   /* Read the rest of the digits */
4705   for(; len > 0; len -= sizeof(mp_digit)) {
4706     for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
4707       d = (d << 8) | *str++;
4708     }
4709     if (MP_EQ == mp_cmp_z(mp)) {
4710       if (!d)
4711         continue;
4712     } else {
4713       if((res = s_mp_lshd(mp, 1)) != MP_OKAY)
4714         return res;
4715     }
4716     MP_DIGIT(mp, 0) = d;
4717   }
4718   return MP_OKAY;
4719 } /* end mp_read_unsigned_octets() */
4720 /* }}} */
4721 
4722 /* {{{ mp_unsigned_octet_size(mp) */
4723 int
4724 mp_unsigned_octet_size(const mp_int *mp)
4725 {
4726   int  bytes;
4727   int  ix;
4728   mp_digit  d = 0;
4729 
4730   ARGCHK(mp != NULL, MP_BADARG);
4731   ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
4732 
4733   bytes = (USED(mp) * sizeof(mp_digit));
4734 
4735   /* subtract leading zeros. */
4736   /* Iterate over each digit... */
4737   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4738     d = DIGIT(mp, ix);
4739     if (d)
4740         break;
4741     bytes -= sizeof(d);
4742   }
4743   if (!bytes)
4744     return 1;
4745 
4746   /* Have MSD, check digit bytes, high order first */
4747   for(ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
4748     unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
4749     if (x)
4750         break;
4751     --bytes;
4752   }
4753   return bytes;
4754 } /* end mp_unsigned_octet_size() */
4755 /* }}} */
4756 
4757 /* {{{ mp_to_unsigned_octets(mp, str) */
4758 /* output a buffer of big endian octets no longer than specified. */
4759 mp_err
4760 mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
4761 {
4762   int  ix, pos = 0;
4763   unsigned int  bytes;
4764 
4765   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4766 
4767   bytes = mp_unsigned_octet_size(mp);
4768   ARGCHK(bytes <= maxlen, MP_BADARG);
4769 
4770   /* Iterate over each digit... */
4771   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4772     mp_digit  d = DIGIT(mp, ix);
4773     int       jx;
4774 
4775     /* Unpack digit bytes, high order first */
4776     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4777       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4778       if (!pos && !x)   /* suppress leading zeros */
4779         continue;
4780       str[pos++] = x;
4781     }
4782   }
4783   if (!pos)
4784     str[pos++] = 0;
4785   return pos;
4786 } /* end mp_to_unsigned_octets() */
4787 /* }}} */
4788 
4789 /* {{{ mp_to_signed_octets(mp, str) */
4790 /* output a buffer of big endian octets no longer than specified. */
4791 mp_err
4792 mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
4793 {
4794   int  ix, pos = 0;
4795   unsigned int  bytes;
4796 
4797   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4798 
4799   bytes = mp_unsigned_octet_size(mp);
4800   ARGCHK(bytes <= maxlen, MP_BADARG);
4801 
4802   /* Iterate over each digit... */
4803   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4804     mp_digit  d = DIGIT(mp, ix);
4805     int       jx;
4806 
4807     /* Unpack digit bytes, high order first */
4808     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4809       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4810       if (!pos) {
4811         if (!x)         /* suppress leading zeros */
4812           continue;
4813         if (x & 0x80) { /* add one leading zero to make output positive.  */
4814           ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
4815           if (bytes + 1 > maxlen)
4816             return MP_BADARG;
4817           str[pos++] = 0;
4818         }
4819       }
4820       str[pos++] = x;
4821     }
4822   }
4823   if (!pos)
4824     str[pos++] = 0;
4825   return pos;
4826 } /* end mp_to_signed_octets() */
4827 /* }}} */
4828 
4829 /* {{{ mp_to_fixlen_octets(mp, str) */
4830 /* output a buffer of big endian octets exactly as long as requested. */
4831 mp_err
4832 mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
4833 {
4834   int  ix, pos = 0;
4835   unsigned int  bytes;
4836 
4837   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4838 
4839   bytes = mp_unsigned_octet_size(mp);
4840   ARGCHK(bytes <= length, MP_BADARG);
4841 
4842   /* place any needed leading zeros */
4843   for (;length > bytes; --length) {
4844         *str++ = 0;
4845   }
4846 
4847   /* Iterate over each digit... */
4848   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4849     mp_digit  d = DIGIT(mp, ix);
4850     int       jx;
4851 
4852     /* Unpack digit bytes, high order first */
4853     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4854       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4855       if (!pos && !x)   /* suppress leading zeros */
4856         continue;
4857       str[pos++] = x;
4858     }
4859   }
4860   if (!pos)
4861     str[pos++] = 0;
4862   return MP_OKAY;
4863 } /* end mp_to_fixlen_octets() */
4864 /* }}} */
4865 
4866 
4867 /*------------------------------------------------------------------------*/
4868 /* HERE THERE BE DRAGONS                                                  */