1 /*
   2  * Copyright (c) 2015, Intel Corporation.
   3  * Intel Math Library (LIBM) Source Code
   4  *
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 /******************************************************************************/
  28 //                     ALGORITHM DESCRIPTION
  29 //                     ---------------------
  30 //
  31 // Description:
  32 //  Let K = 64 (table size).
  33 //        x    x/log(2)     n
  34 //       e  = 2          = 2 * T[j] * (1 + P(y))
  35 //  where
  36 //       x = m*log(2)/K + y,    y in [-log(2)/K..log(2)/K]
  37 //       m = n*K + j,           m,n,j - signed integer, j in [-K/2..K/2]
  38 //                  j/K
  39 //       values of 2   are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
  40 //
  41 //       P(y) is a minimax polynomial approximation of exp(x)-1
  42 //       on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
  43 //
  44 //  To avoid problems with arithmetic overflow and underflow,
  45 //            n                        n1  n2
  46 //  value of 2  is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
  47 //  where BIAS is a value of exponent bias.
  48 //
  49 // Special cases:
  50 //  exp(NaN) = NaN
  51 //  exp(+INF) = +INF
  52 //  exp(-INF) = 0
  53 //  exp(x) = 1 for subnormals
  54 //  for finite argument, only exp(0)=1 is exact
  55 //  For IEEE double
  56 //    if x >  709.782712893383973096 then exp(x) overflow
  57 //    if x < -745.133219101941108420 then exp(x) underflow
  58 //
  59 /******************************************************************************/
  60 
  61 
  62 #include "precompiled.hpp"
  63 #include "asm/assembler.hpp"
  64 #include "asm/assembler.inline.hpp"
  65 
  66 
  67 #ifdef _LP64
  68 
  69 juint _cv[] =
  70 {
  71     0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
  72     0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
  73     0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
  74     0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
  75     0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL 
  76 };
  77 
  78 juint _shifter[] =
  79 {
  80     0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
  81 };
  82 
  83 juint _mmask[] =
  84 {
  85     0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
  86 };
  87 
  88 juint _bias[] =
  89 {
  90     0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
  91 };
  92 
  93 juint _Tbl_addr[] =
  94 {
  95     0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
  96     0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
  97     0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
  98     0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
  99     0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
 100     0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
 101     0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
 102     0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
 103     0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
 104     0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
 105     0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
 106     0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
 107     0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
 108     0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
 109     0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
 110     0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
 111     0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
 112     0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
 113     0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
 114     0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
 115     0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
 116     0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
 117     0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
 118     0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
 119     0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
 120     0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
 121     0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
 122     0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
 123     0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
 124     0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
 125     0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
 126     0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
 127     0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
 128     0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
 129     0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
 130     0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
 131     0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
 132     0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
 133     0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
 134     0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
 135     0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
 136     0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
 137     0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
 138     0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
 139     0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
 140     0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
 141     0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
 142     0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
 143     0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
 144     0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
 145     0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
 146     0x000fa7c1UL
 147 };
 148 
 149 juint _ALLONES[] =
 150 {
 151     0xffffffffUL, 0xffffffffUL, 0xffffffffUL
 152 };
 153 
 154 juint _ebias[] =
 155 {
 156     0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
 157 };
 158 
 159 juint _XMAX[] =
 160 {
 161     0xffffffffUL, 0x7fefffffUL
 162 };
 163 
 164 juint _XMIN[] =
 165 {
 166     0x00000000UL, 0x00100000UL
 167 };
 168 
 169 juint _INF[] =
 170 {
 171     0x00000000UL, 0x7ff00000UL
 172 };
 173 
 174 juint _ZERO[] =
 175 {
 176     0x00000000UL, 0x00000000UL
 177 };
 178 
 179 juint _ONE_val[] =
 180 {
 181     0x00000000UL, 0x3ff00000UL
 182 };
 183 
 184 
 185 //registers, 
 186 // input: xmm0
 187 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
 188 //          rax, rdx, rcx, tmp - r11
 189 
 190 // Code generated by Intel C compiler for LIBM library
 191 
 192 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
 193   Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
 194   Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
 195   Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
 196   Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
 197   
 198   assert_different_registers(tmp, eax, ecx, edx);
 199   jmp(start);
 200   address cv = (address)_cv;
 201   address Shifter = (address)_shifter;
 202   address mmask = (address)_mmask;
 203   address bias = (address)_bias;
 204   address Tbl_addr = (address)_Tbl_addr;
 205   address ALLONES = (address)_ALLONES;
 206   address ebias = (address)_ebias;
 207   address XMAX = (address)_XMAX;
 208   address XMIN = (address)_XMIN;
 209   address INF = (address)_INF;
 210   address ZERO = (address)_ZERO;
 211   address ONE_val = (address)_ONE_val;
 212   
 213   bind(start);
 214   subq(rsp, 24);
 215   movsd(Address(rsp, 8), xmm0);
 216   unpcklpd(xmm0, xmm0);
 217   movdqu(xmm1, InternalAddress(cv));       // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
 218   movdqu(xmm6, InternalAddress(Shifter));  // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
 219   movdqu(xmm2, InternalAddress(16+cv));    // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
 220   movdqu(xmm3, InternalAddress(32+cv));    // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
 221   pextrw(eax, xmm0, 3);
 222   andl(eax, 32767);
 223   movl(edx, 16527);
 224   subl(edx, eax);
 225   subl(eax, 15504);
 226   orl(edx, eax);
 227   cmpl(edx, INT_MIN);
 228   jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
 229   mulpd(xmm1, xmm0);
 230   addpd(xmm1, xmm6);
 231   movapd(xmm7, xmm1);
 232   subpd(xmm1, xmm6);
 233   mulpd(xmm2, xmm1);
 234   movdqu(xmm4, InternalAddress(64+cv));    // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
 235   mulpd(xmm3, xmm1);
 236   movdqu(xmm5, InternalAddress(80+cv));    // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
 237   subpd(xmm0, xmm2);
 238   movdl(eax, xmm7);
 239   movl(ecx, eax);
 240   andl(ecx, 63);
 241   shll(ecx, 4);
 242   sarl(eax, 6);
 243   movl(edx, eax);
 244   movdqu(xmm6, InternalAddress(mmask));    // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
 245   pand(xmm7, xmm6);
 246   movdqu(xmm6, InternalAddress(bias));     // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
 247   paddq(xmm7, xmm6);
 248   psllq(xmm7, 46);
 249   subpd(xmm0, xmm3);
 250   lea(tmp, InternalAddress(Tbl_addr));
 251   movdqu(xmm2, Address(ecx,tmp));
 252   mulpd(xmm4, xmm0);
 253   movapd(xmm6, xmm0);
 254   movapd(xmm1, xmm0);
 255   mulpd(xmm6, xmm6);
 256   mulpd(xmm0, xmm6);
 257   addpd(xmm5, xmm4);
 258   mulsd(xmm0, xmm6);
 259   mulpd(xmm6, InternalAddress(48+cv));     // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
 260   addsd(xmm1, xmm2);
 261   unpckhpd(xmm2, xmm2);
 262   mulpd(xmm0, xmm5);
 263   addsd(xmm1, xmm0);
 264   por(xmm2, xmm7);
 265   unpckhpd(xmm0, xmm0);
 266   addsd(xmm0, xmm1);
 267   addsd(xmm0, xmm6);
 268   addl(edx, 894);
 269   cmpl(edx, 1916);
 270   jcc (Assembler::above, L_2TAG_PACKET_1_0_2);
 271   mulsd(xmm0, xmm2);
 272   addsd(xmm0, xmm2);
 273   jmp (B1_5);
 274   
 275   bind(L_2TAG_PACKET_1_0_2);
 276   xorpd(xmm3, xmm3);
 277   movdqu(xmm4, InternalAddress(ALLONES));  // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0x00000000UL
 278   movl(edx, -1022);
 279   subl(edx, eax);
 280   movdl(xmm5, edx);
 281   psllq(xmm4, xmm5);
 282   movl(ecx, eax);
 283   sarl(eax, 1);
 284   pinsrw(xmm3, eax, 3);
 285   movdqu(xmm6, InternalAddress(ebias));    // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
 286   psllq(xmm3, 4);
 287   psubd(xmm2, xmm3);
 288   mulsd(xmm0, xmm2);
 289   cmpl(edx, 52);
 290   jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
 291   pand(xmm4, xmm2);
 292   paddd(xmm3, xmm6);
 293   subsd(xmm2, xmm4);
 294   addsd(xmm0, xmm2);
 295   cmpl(ecx, 1023);
 296   jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
 297   pextrw(ecx, xmm0, 3);
 298   andl(ecx, 32768);
 299   orl(edx, ecx);
 300   cmpl(edx, 0);
 301   jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
 302   movapd(xmm6, xmm0);
 303   addsd(xmm0, xmm4);
 304   mulsd(xmm0, xmm3);
 305   pextrw(ecx, xmm0, 3);
 306   andl(ecx, 32752);
 307   cmpl(ecx, 0);
 308   jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
 309   jmp(B1_5);
 310   
 311   bind(L_2TAG_PACKET_5_0_2);
 312   mulsd(xmm6, xmm3);
 313   mulsd(xmm4, xmm3);
 314   movdqu(xmm0, xmm6);
 315   pxor(xmm6, xmm4);
 316   psrad(xmm6, 31);
 317   pshufd(xmm6, xmm6, 85);
 318   psllq(xmm0, 1);
 319   psrlq(xmm0, 1);
 320   pxor(xmm0, xmm6);
 321   psrlq(xmm6, 63);
 322   paddq(xmm0, xmm6);
 323   paddq(xmm0, xmm4);
 324   movl(Address(rsp,0), 15);
 325   jmp(L_2TAG_PACKET_6_0_2);
 326   
 327   bind(L_2TAG_PACKET_4_0_2);
 328   addsd(xmm0, xmm4);
 329   mulsd(xmm0, xmm3);
 330   jmp(B1_5);
 331   
 332   bind(L_2TAG_PACKET_3_0_2);
 333   addsd(xmm0, xmm4);
 334   mulsd(xmm0, xmm3);
 335   pextrw(ecx, xmm0, 3);
 336   andl(ecx, 32752);
 337   cmpl(ecx, 32752);
 338   jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
 339   jmp(B1_5);
 340   
 341   bind(L_2TAG_PACKET_2_0_2);
 342   paddd(xmm3, xmm6);
 343   addpd(xmm0, xmm2);
 344   mulsd(xmm0, xmm3);
 345   movl(Address(rsp,0), 15);
 346   jmp(L_2TAG_PACKET_6_0_2);
 347   
 348   bind(L_2TAG_PACKET_8_0_2);
 349   cmpl(eax, 2146435072);
 350   jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
 351   movl(eax, Address(rsp,12)); 
 352   cmpl(eax, INT_MIN);
 353   jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
 354   movsd(xmm0, InternalAddress(XMAX));      // 0xffffffffUL, 0x7fefffffUL
 355   mulsd(xmm0, xmm0);
 356   
 357   bind(L_2TAG_PACKET_7_0_2);
 358   movl(Address(rsp,0), 14);
 359   jmp(L_2TAG_PACKET_6_0_2);
 360   
 361   bind(L_2TAG_PACKET_10_0_2);
 362   movsd(xmm0, InternalAddress(XMIN));      // 0x00000000UL, 0x00100000UL
 363   mulsd(xmm0, xmm0);
 364   movl(Address(rsp,0), 15);
 365   jmp(L_2TAG_PACKET_6_0_2);
 366   
 367   bind(L_2TAG_PACKET_9_0_2);
 368   movl(edx, Address(rsp,8));
 369   cmpl(eax, 2146435072);
 370   jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
 371   cmpl(edx, 0);
 372   jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
 373   movl(eax, Address(rsp,12));
 374   cmpl(eax, 2146435072);
 375   jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
 376   movsd(xmm0, InternalAddress(INF));       // 0x00000000UL, 0x7ff00000UL
 377   jmp(B1_5);
 378   
 379   bind(L_2TAG_PACKET_12_0_2);
 380   movsd(xmm0, InternalAddress(ZERO));      // 0x00000000UL, 0x00000000UL
 381   jmp(B1_5);
 382   
 383   bind(L_2TAG_PACKET_11_0_2);
 384   movsd(xmm0, Address(rsp, 8));
 385   addsd(xmm0, xmm0);
 386   jmp(B1_5);
 387   
 388   bind(L_2TAG_PACKET_0_0_2);
 389   movl(eax, Address(rsp, 12));
 390   andl(eax, 2147483647);
 391   cmpl(eax, 1083179008);
 392   jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
 393   movsd(Address(rsp, 8), xmm0);
 394   addsd(xmm0, InternalAddress(ONE_val));   // 0x00000000UL, 0x3ff00000UL
 395   jmp(B1_5);
 396   
 397   bind(L_2TAG_PACKET_6_0_2);
 398   movq(Address(rsp, 16), xmm0);
 399   
 400   bind(B1_3);
 401   movq(xmm0, Address(rsp, 16));
 402   
 403   bind(B1_5);
 404   addq(rsp, 24);
 405 }
 406 #endif
 407 
 408 #ifndef _LP64
 409 
 410 juint _static_const_table[] = 
 411 {
 412     0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
 413     0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
 414     0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
 415     0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
 416     0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
 417     0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
 418     0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
 419     0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
 420     0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
 421     0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
 422     0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
 423     0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
 424     0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
 425     0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
 426     0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
 427     0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
 428     0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
 429     0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
 430     0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
 431     0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
 432     0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
 433     0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
 434     0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
 435     0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
 436     0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
 437     0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
 438     0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
 439     0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
 440     0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
 441     0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
 442     0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
 443     0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
 444     0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
 445     0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
 446     0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
 447     0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
 448     0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
 449     0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
 450     0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
 451     0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
 452     0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
 453     0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
 454     0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
 455     0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
 456     0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
 457     0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
 458     0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
 459     0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
 460     0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
 461     0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
 462     0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
 463     0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
 464     0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
 465     0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
 466     0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
 467     0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
 468     0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
 469     0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
 470     0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
 471     0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
 472     0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
 473     0x00100000UL
 474 };
 475 
 476 //registers, 
 477 // input: (rbp + 8)
 478 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
 479 //          rax, rdx, rcx, rbx (tmp)
 480 
 481 // Code generated by Intel C compiler for LIBM library
 482 
 483 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
 484   Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
 485   Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
 486   Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
 487   Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
 488   
 489   assert_different_registers(tmp, eax, ecx, edx);
 490   jmp(start);
 491   address static_const_table = (address)_static_const_table;
 492   
 493   bind(start);
 494   subl(rsp, 120);
 495   movl(Address(rsp, 64), tmp);
 496   lea(tmp, InternalAddress(static_const_table));
 497   movdqu(xmm0, Address(rsp, 128));
 498   unpcklpd(xmm0, xmm0);
 499   movdqu(xmm1, Address(tmp, 64));          // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
 500   movdqu(xmm6, Address(tmp, 48));          // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
 501   movdqu(xmm2, Address(tmp, 80));          // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
 502   movdqu(xmm3, Address(tmp, 96));          // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
 503   pextrw(eax, xmm0, 3);
 504   andl(eax, 32767);
 505   movl(edx, 16527);
 506   subl(edx, eax);
 507   subl(eax, 15504);
 508   orl(edx, eax);
 509   cmpl(edx, INT_MIN);
 510   jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
 511   mulpd(xmm1, xmm0);
 512   addpd(xmm1, xmm6);
 513   movapd(xmm7, xmm1);
 514   subpd(xmm1, xmm6);
 515   mulpd(xmm2, xmm1);
 516   movdqu(xmm4, Address(tmp, 128));         // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
 517   mulpd(xmm3, xmm1);
 518   movdqu(xmm5, Address(tmp, 144));         // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
 519   subpd(xmm0, xmm2);
 520   movdl(eax, xmm7);
 521   movl(ecx, eax);
 522   andl(ecx, 63);
 523   shll(ecx, 4);
 524   sarl(eax, 6);
 525   movl(edx, eax);
 526   movdqu(xmm6, Address(tmp, 16));          // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
 527   pand(xmm7, xmm6);
 528   movdqu(xmm6, Address(tmp, 32));          // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
 529   paddq(xmm7, xmm6);
 530   psllq(xmm7, 46);
 531   subpd(xmm0, xmm3);
 532   movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
 533   mulpd(xmm4, xmm0);
 534   movapd(xmm6, xmm0);
 535   movapd(xmm1, xmm0);
 536   mulpd(xmm6, xmm6);
 537   mulpd(xmm0, xmm6);
 538   addpd(xmm5, xmm4);
 539   mulsd(xmm0, xmm6);
 540   mulpd(xmm6, Address(tmp, 112));          // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
 541   addsd(xmm1, xmm2);
 542   unpckhpd(xmm2, xmm2);
 543   mulpd(xmm0, xmm5);
 544   addsd(xmm1, xmm0);
 545   por(xmm2, xmm7);
 546   unpckhpd(xmm0, xmm0);
 547   addsd(xmm0, xmm1);
 548   addsd(xmm0, xmm6);
 549   addl(edx, 894);
 550   cmpl(edx, 1916);
 551   jcc (Assembler::above, L_2TAG_PACKET_1_0_2);
 552   mulsd(xmm0, xmm2);
 553   addsd(xmm0, xmm2);
 554   jmp(L_2TAG_PACKET_2_0_2);
 555   
 556   bind(L_2TAG_PACKET_1_0_2);
 557   fnstcw(Address(rsp, 24));
 558   movzwl(edx, Address(rsp, 24));
 559   orl(edx, 768);
 560   movw(Address(rsp, 28), edx);
 561   fldcw(Address(rsp, 28));
 562   movl(edx, eax);
 563   sarl(eax, 1);
 564   subl(edx, eax);
 565   movdqu(xmm6, Address(tmp, 0));           // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
 566   pandn(xmm6, xmm2);
 567   addl(eax, 1023);
 568   movdl(xmm3, eax);
 569   psllq(xmm3, 52);
 570   por(xmm6, xmm3);
 571   addl(edx, 1023);
 572   movdl(xmm4, edx);
 573   psllq(xmm4, 52);
 574   movsd(Address(rsp, 8), xmm0);
 575   fld_d(Address(rsp, 8));
 576   movsd(Address(rsp, 16), xmm6);
 577   fld_d(Address(rsp, 16));
 578   fmula(1);
 579   faddp(1);
 580   movsd(Address(rsp, 8), xmm4);
 581   fld_d(Address(rsp, 8));
 582   fmulp(1);
 583   fstp_d(Address(rsp, 8));
 584   movsd(xmm0,Address(rsp, 8));
 585   fldcw(Address(rsp, 24));
 586   pextrw(ecx, xmm0, 3);
 587   andl(ecx, 32752);
 588   cmpl(ecx, 32752);
 589   jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
 590   cmpl(ecx, 0);
 591   jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
 592   jmp(L_2TAG_PACKET_2_0_2);
 593   cmpl(ecx, INT_MIN);
 594   jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
 595   cmpl(ecx, -1064950997);
 596   jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
 597   jcc(Assembler::greater, L_2TAG_PACKET_4_0_2);
 598   movl(edx, Address(rsp, 128));
 599   cmpl(edx ,-17155601);
 600   jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
 601   jmp(L_2TAG_PACKET_4_0_2);
 602   
 603   bind(L_2TAG_PACKET_3_0_2);
 604   movl(edx, 14);
 605   jmp(L_2TAG_PACKET_5_0_2);
 606   
 607   bind(L_2TAG_PACKET_4_0_2);
 608   movl(edx, 15);
 609   
 610   bind(L_2TAG_PACKET_5_0_2);
 611   movsd(Address(rsp, 0), xmm0);
 612   movsd(xmm0, Address(rsp, 128));
 613   fld_d(Address(rsp, 0));
 614   jmp(L_2TAG_PACKET_6_0_2);
 615   
 616   bind(L_2TAG_PACKET_7_0_2);
 617   cmpl(eax, 2146435072);
 618   jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2);
 619   movl(eax, Address(rsp, 132));
 620   cmpl(eax, INT_MIN);
 621   jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2);
 622   movsd(xmm0, Address(tmp, 1208));         // 0xffffffffUL, 0x7fefffffUL
 623   mulsd(xmm0, xmm0);
 624   movl(edx, 14);
 625   jmp(L_2TAG_PACKET_5_0_2);
 626   
 627   bind(L_2TAG_PACKET_9_0_2);
 628   movsd(xmm0, Address(tmp, 1216));
 629   mulsd(xmm0, xmm0);
 630   movl(edx, 15);
 631   jmp(L_2TAG_PACKET_5_0_2);
 632   
 633   bind(L_2TAG_PACKET_8_0_2);
 634   movl(edx, Address(rsp, 128));
 635   cmpl(eax, 2146435072);
 636   jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
 637   cmpl(edx, 0);
 638   jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
 639   movl(eax, Address(rsp, 132));
 640   cmpl(eax, 2146435072);
 641   jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
 642   movsd(xmm0, Address(tmp, 1192));         // 0x00000000UL, 0x7ff00000UL
 643   jmp(L_2TAG_PACKET_2_0_2);
 644   
 645   bind(L_2TAG_PACKET_11_0_2);
 646   movsd(xmm0, Address(tmp, 1200));         // 0x00000000UL, 0x00000000UL
 647   jmp(L_2TAG_PACKET_2_0_2);
 648   
 649   bind(L_2TAG_PACKET_10_0_2);
 650   movsd(xmm0, Address(rsp, 128));
 651   addsd(xmm0, xmm0);
 652   jmp(L_2TAG_PACKET_2_0_2);
 653   
 654   bind(L_2TAG_PACKET_0_0_2);
 655   movl(eax, Address(rsp, 132));
 656   andl(eax, 2147483647);
 657   cmpl(eax, 1083179008);
 658   jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
 659   movsd(xmm0, Address(rsp, 128));
 660   addsd(xmm0, Address(tmp, 1184));         // 0x00000000UL, 0x3ff00000UL
 661   jmp(L_2TAG_PACKET_2_0_2);
 662   
 663   bind(L_2TAG_PACKET_2_0_2);
 664   movsd(Address(rsp, 48), xmm0);
 665   fld_d(Address(rsp, 48));
 666   
 667   bind(L_2TAG_PACKET_6_0_2);
 668   movl(tmp, Address(rsp, 64));
 669 }
 670 
 671 #endif