1 /*
   2 * Copyright (c) 2016, Intel Corporation.
   3 * Intel Math Library (LIBM) Source Code
   4 *
   5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6 *
   7 * This code is free software; you can redistribute it and/or modify it
   8 * under the terms of the GNU General Public License version 2 only, as
   9 * published by the Free Software Foundation.
  10 *
  11 * This code is distributed in the hope that it will be useful, but WITHOUT
  12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 * version 2 for more details (a copy is included in the LICENSE file that
  15 * accompanied this code).
  16 *
  17 * You should have received a copy of the GNU General Public License version
  18 * 2 along with this work; if not, write to the Free Software Foundation,
  19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20 *
  21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22 * or visit www.oracle.com if you need additional information or have any
  23 * questions.
  24 *
  25 */
  26 
  27 #include "precompiled.hpp"
  28 #include "asm/assembler.hpp"
  29 #include "asm/assembler.inline.hpp"
  30 #include "runtime/stubRoutines.hpp"
  31 #include "macroAssembler_x86.hpp"
  32 
  33 #ifdef _MSC_VER
  34 #define ALIGNED_(x) __declspec(align(x))
  35 #else
  36 #define ALIGNED_(x) __attribute__ ((aligned(x)))
  37 #endif
  38 
  39 /******************************************************************************/
  40 //                     ALGORITHM DESCRIPTION - EXP()
  41 //                     ---------------------
  42 //
  43 // Description:
  44 //  Let K = 64 (table size).
  45 //        x    x/log(2)     n
  46 //       e  = 2          = 2 * T[j] * (1 + P(y))
  47 //  where
  48 //       x = m*log(2)/K + y,    y in [-log(2)/K..log(2)/K]
  49 //       m = n*K + j,           m,n,j - signed integer, j in [-K/2..K/2]
  50 //                  j/K
  51 //       values of 2   are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
  52 //
  53 //       P(y) is a minimax polynomial approximation of exp(x)-1
  54 //       on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
  55 //
  56 //  To avoid problems with arithmetic overflow and underflow,
  57 //            n                        n1  n2
  58 //  value of 2  is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
  59 //  where BIAS is a value of exponent bias.
  60 //
  61 // Special cases:
  62 //  exp(NaN) = NaN
  63 //  exp(+INF) = +INF
  64 //  exp(-INF) = 0
  65 //  exp(x) = 1 for subnormals
  66 //  for finite argument, only exp(0)=1 is exact
  67 //  For IEEE double
  68 //    if x >  709.782712893383973096 then exp(x) overflow
  69 //    if x < -745.133219101941108420 then exp(x) underflow
  70 //
  71 /******************************************************************************/
  72 
  73 #ifdef _LP64
  74 // The 64 bit code is at most SSE2 compliant
  75 ALIGNED_(16) juint _cv[] =
  76 {
  77     0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
  78     0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
  79     0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
  80     0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
  81     0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
  82 };
  83 
  84 ALIGNED_(16) juint _shifter[] =
  85 {
  86     0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
  87 };
  88 
  89 ALIGNED_(16) juint _mmask[] =
  90 {
  91     0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
  92 };
  93 
  94 ALIGNED_(16) juint _bias[] =
  95 {
  96     0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
  97 };
  98 
  99 ALIGNED_(16) juint _Tbl_addr[] =
 100 {
 101     0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
 102     0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
 103     0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
 104     0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
 105     0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
 106     0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
 107     0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
 108     0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
 109     0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
 110     0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
 111     0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
 112     0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
 113     0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
 114     0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
 115     0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
 116     0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
 117     0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
 118     0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
 119     0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
 120     0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
 121     0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
 122     0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
 123     0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
 124     0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
 125     0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
 126     0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
 127     0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
 128     0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
 129     0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
 130     0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
 131     0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
 132     0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
 133     0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
 134     0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
 135     0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
 136     0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
 137     0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
 138     0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
 139     0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
 140     0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
 141     0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
 142     0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
 143     0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
 144     0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
 145     0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
 146     0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
 147     0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
 148     0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
 149     0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
 150     0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
 151     0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
 152     0x000fa7c1UL
 153 };
 154 
 155 ALIGNED_(16) juint _ALLONES[] =
 156 {
 157     0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
 158 };
 159 
 160 ALIGNED_(16) juint _ebias[] =
 161 {
 162     0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
 163 };
 164 
 165 ALIGNED_(4) juint _XMAX[] =
 166 {
 167     0xffffffffUL, 0x7fefffffUL
 168 };
 169 
 170 ALIGNED_(4) juint _XMIN[] =
 171 {
 172     0x00000000UL, 0x00100000UL
 173 };
 174 
 175 ALIGNED_(4) juint _INF[] =
 176 {
 177     0x00000000UL, 0x7ff00000UL
 178 };
 179 
 180 ALIGNED_(4) juint _ZERO[] =
 181 {
 182     0x00000000UL, 0x00000000UL
 183 };
 184 
 185 ALIGNED_(4) juint _ONE_val[] =
 186 {
 187     0x00000000UL, 0x3ff00000UL
 188 };
 189 
 190 
 191 // Registers:
 192 // input: xmm0
 193 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
 194 //          rax, rdx, rcx, tmp - r11
 195 
 196 // Code generated by Intel C compiler for LIBM library
 197 
 198 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
 199   Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
 200   Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
 201   Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
 202   Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
 203 
 204   assert_different_registers(tmp, eax, ecx, edx);
 205   jmp(start);
 206   address cv = (address)_cv;
 207   address Shifter = (address)_shifter;
 208   address mmask = (address)_mmask;
 209   address bias = (address)_bias;
 210   address Tbl_addr = (address)_Tbl_addr;
 211   address ALLONES = (address)_ALLONES;
 212   address ebias = (address)_ebias;
 213   address XMAX = (address)_XMAX;
 214   address XMIN = (address)_XMIN;
 215   address INF = (address)_INF;
 216   address ZERO = (address)_ZERO;
 217   address ONE_val = (address)_ONE_val;
 218 
 219   bind(start);
 220   subq(rsp, 24);
 221   movsd(Address(rsp, 8), xmm0);
 222   unpcklpd(xmm0, xmm0);
 223   movdqu(xmm1, ExternalAddress(cv));       // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
 224   movdqu(xmm6, ExternalAddress(Shifter));  // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
 225   movdqu(xmm2, ExternalAddress(16 + cv));    // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
 226   movdqu(xmm3, ExternalAddress(32 + cv));    // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
 227   pextrw(eax, xmm0, 3);
 228   andl(eax, 32767);
 229   movl(edx, 16527);
 230   subl(edx, eax);
 231   subl(eax, 15504);
 232   orl(edx, eax);
 233   cmpl(edx, INT_MIN);
 234   jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
 235   mulpd(xmm1, xmm0);
 236   addpd(xmm1, xmm6);
 237   movapd(xmm7, xmm1);
 238   subpd(xmm1, xmm6);
 239   mulpd(xmm2, xmm1);
 240   movdqu(xmm4, ExternalAddress(64 + cv));    // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
 241   mulpd(xmm3, xmm1);
 242   movdqu(xmm5, ExternalAddress(80 + cv));    // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
 243   subpd(xmm0, xmm2);
 244   movdl(eax, xmm7);
 245   movl(ecx, eax);
 246   andl(ecx, 63);
 247   shll(ecx, 4);
 248   sarl(eax, 6);
 249   movl(edx, eax);
 250   movdqu(xmm6, ExternalAddress(mmask));    // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
 251   pand(xmm7, xmm6);
 252   movdqu(xmm6, ExternalAddress(bias));     // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
 253   paddq(xmm7, xmm6);
 254   psllq(xmm7, 46);
 255   subpd(xmm0, xmm3);
 256   lea(tmp, ExternalAddress(Tbl_addr));
 257   movdqu(xmm2, Address(ecx, tmp));
 258   mulpd(xmm4, xmm0);
 259   movapd(xmm6, xmm0);
 260   movapd(xmm1, xmm0);
 261   mulpd(xmm6, xmm6);
 262   mulpd(xmm0, xmm6);
 263   addpd(xmm5, xmm4);
 264   mulsd(xmm0, xmm6);
 265   mulpd(xmm6, ExternalAddress(48 + cv));     // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
 266   addsd(xmm1, xmm2);
 267   unpckhpd(xmm2, xmm2);
 268   mulpd(xmm0, xmm5);
 269   addsd(xmm1, xmm0);
 270   por(xmm2, xmm7);
 271   unpckhpd(xmm0, xmm0);
 272   addsd(xmm0, xmm1);
 273   addsd(xmm0, xmm6);
 274   addl(edx, 894);
 275   cmpl(edx, 1916);
 276   jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
 277   mulsd(xmm0, xmm2);
 278   addsd(xmm0, xmm2);
 279   jmp(B1_5);
 280 
 281   bind(L_2TAG_PACKET_1_0_2);
 282   xorpd(xmm3, xmm3);
 283   movdqu(xmm4, ExternalAddress(ALLONES));  // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
 284   movl(edx, -1022);
 285   subl(edx, eax);
 286   movdl(xmm5, edx);
 287   psllq(xmm4, xmm5);
 288   movl(ecx, eax);
 289   sarl(eax, 1);
 290   pinsrw(xmm3, eax, 3);
 291   movdqu(xmm6, ExternalAddress(ebias));    // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
 292   psllq(xmm3, 4);
 293   psubd(xmm2, xmm3);
 294   mulsd(xmm0, xmm2);
 295   cmpl(edx, 52);
 296   jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
 297   pand(xmm4, xmm2);
 298   paddd(xmm3, xmm6);
 299   subsd(xmm2, xmm4);
 300   addsd(xmm0, xmm2);
 301   cmpl(ecx, 1023);
 302   jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
 303   pextrw(ecx, xmm0, 3);
 304   andl(ecx, 32768);
 305   orl(edx, ecx);
 306   cmpl(edx, 0);
 307   jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
 308   movapd(xmm6, xmm0);
 309   addsd(xmm0, xmm4);
 310   mulsd(xmm0, xmm3);
 311   pextrw(ecx, xmm0, 3);
 312   andl(ecx, 32752);
 313   cmpl(ecx, 0);
 314   jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
 315   jmp(B1_5);
 316 
 317   bind(L_2TAG_PACKET_5_0_2);
 318   mulsd(xmm6, xmm3);
 319   mulsd(xmm4, xmm3);
 320   movdqu(xmm0, xmm6);
 321   pxor(xmm6, xmm4);
 322   psrad(xmm6, 31);
 323   pshufd(xmm6, xmm6, 85);
 324   psllq(xmm0, 1);
 325   psrlq(xmm0, 1);
 326   pxor(xmm0, xmm6);
 327   psrlq(xmm6, 63);
 328   paddq(xmm0, xmm6);
 329   paddq(xmm0, xmm4);
 330   movl(Address(rsp, 0), 15);
 331   jmp(L_2TAG_PACKET_6_0_2);
 332 
 333   bind(L_2TAG_PACKET_4_0_2);
 334   addsd(xmm0, xmm4);
 335   mulsd(xmm0, xmm3);
 336   jmp(B1_5);
 337 
 338   bind(L_2TAG_PACKET_3_0_2);
 339   addsd(xmm0, xmm4);
 340   mulsd(xmm0, xmm3);
 341   pextrw(ecx, xmm0, 3);
 342   andl(ecx, 32752);
 343   cmpl(ecx, 32752);
 344   jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
 345   jmp(B1_5);
 346 
 347   bind(L_2TAG_PACKET_2_0_2);
 348   paddd(xmm3, xmm6);
 349   addpd(xmm0, xmm2);
 350   mulsd(xmm0, xmm3);
 351   movl(Address(rsp, 0), 15);
 352   jmp(L_2TAG_PACKET_6_0_2);
 353 
 354   bind(L_2TAG_PACKET_8_0_2);
 355   cmpl(eax, 2146435072);
 356   jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
 357   movl(eax, Address(rsp, 12));
 358   cmpl(eax, INT_MIN);
 359   jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
 360   movsd(xmm0, ExternalAddress(XMAX));      // 0xffffffffUL, 0x7fefffffUL
 361   mulsd(xmm0, xmm0);
 362 
 363   bind(L_2TAG_PACKET_7_0_2);
 364   movl(Address(rsp, 0), 14);
 365   jmp(L_2TAG_PACKET_6_0_2);
 366 
 367   bind(L_2TAG_PACKET_10_0_2);
 368   movsd(xmm0, ExternalAddress(XMIN));      // 0x00000000UL, 0x00100000UL
 369   mulsd(xmm0, xmm0);
 370   movl(Address(rsp, 0), 15);
 371   jmp(L_2TAG_PACKET_6_0_2);
 372 
 373   bind(L_2TAG_PACKET_9_0_2);
 374   movl(edx, Address(rsp, 8));
 375   cmpl(eax, 2146435072);
 376   jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
 377   cmpl(edx, 0);
 378   jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
 379   movl(eax, Address(rsp, 12));
 380   cmpl(eax, 2146435072);
 381   jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
 382   movsd(xmm0, ExternalAddress(INF));       // 0x00000000UL, 0x7ff00000UL
 383   jmp(B1_5);
 384 
 385   bind(L_2TAG_PACKET_12_0_2);
 386   movsd(xmm0, ExternalAddress(ZERO));      // 0x00000000UL, 0x00000000UL
 387   jmp(B1_5);
 388 
 389   bind(L_2TAG_PACKET_11_0_2);
 390   movsd(xmm0, Address(rsp, 8));
 391   addsd(xmm0, xmm0);
 392   jmp(B1_5);
 393 
 394   bind(L_2TAG_PACKET_0_0_2);
 395   movl(eax, Address(rsp, 12));
 396   andl(eax, 2147483647);
 397   cmpl(eax, 1083179008);
 398   jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
 399   movsd(Address(rsp, 8), xmm0);
 400   addsd(xmm0, ExternalAddress(ONE_val));   // 0x00000000UL, 0x3ff00000UL
 401   jmp(B1_5);
 402 
 403   bind(L_2TAG_PACKET_6_0_2);
 404   movq(Address(rsp, 16), xmm0);
 405 
 406   bind(B1_3);
 407   movq(xmm0, Address(rsp, 16));
 408 
 409   bind(B1_5);
 410   addq(rsp, 24);
 411 }
 412 #else
 413 // The 32 bit code is at most SSE2 compliant
 414 ALIGNED_(16) juint _static_const_table[] =
 415 {
 416     0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
 417     0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
 418     0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
 419     0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
 420     0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
 421     0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
 422     0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
 423     0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
 424     0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
 425     0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
 426     0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
 427     0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
 428     0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
 429     0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
 430     0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
 431     0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
 432     0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
 433     0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
 434     0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
 435     0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
 436     0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
 437     0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
 438     0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
 439     0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
 440     0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
 441     0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
 442     0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
 443     0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
 444     0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
 445     0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
 446     0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
 447     0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
 448     0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
 449     0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
 450     0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
 451     0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
 452     0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
 453     0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
 454     0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
 455     0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
 456     0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
 457     0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
 458     0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
 459     0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
 460     0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
 461     0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
 462     0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
 463     0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
 464     0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
 465     0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
 466     0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
 467     0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
 468     0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
 469     0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
 470     0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
 471     0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
 472     0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
 473     0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
 474     0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
 475     0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
 476     0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
 477     0x00100000UL
 478 };
 479 
 480 //registers,
 481 // input: (rbp + 8)
 482 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
 483 //          rax, rdx, rcx, rbx (tmp)
 484 
 485 // Code generated by Intel C compiler for LIBM library
 486 
 487 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
 488   Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
 489   Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
 490   Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
 491   Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
 492 
 493   assert_different_registers(tmp, eax, ecx, edx);
 494   jmp(start);
 495   address static_const_table = (address)_static_const_table;
 496 
 497   bind(start);
 498   subl(rsp, 120);
 499   movl(Address(rsp, 64), tmp);
 500   lea(tmp, ExternalAddress(static_const_table));
 501   movdqu(xmm0, Address(rsp, 128));
 502   unpcklpd(xmm0, xmm0);
 503   movdqu(xmm1, Address(tmp, 64));          // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
 504   movdqu(xmm6, Address(tmp, 48));          // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
 505   movdqu(xmm2, Address(tmp, 80));          // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
 506   movdqu(xmm3, Address(tmp, 96));          // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
 507   pextrw(eax, xmm0, 3);
 508   andl(eax, 32767);
 509   movl(edx, 16527);
 510   subl(edx, eax);
 511   subl(eax, 15504);
 512   orl(edx, eax);
 513   cmpl(edx, INT_MIN);
 514   jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
 515   mulpd(xmm1, xmm0);
 516   addpd(xmm1, xmm6);
 517   movapd(xmm7, xmm1);
 518   subpd(xmm1, xmm6);
 519   mulpd(xmm2, xmm1);
 520   movdqu(xmm4, Address(tmp, 128));         // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
 521   mulpd(xmm3, xmm1);
 522   movdqu(xmm5, Address(tmp, 144));         // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
 523   subpd(xmm0, xmm2);
 524   movdl(eax, xmm7);
 525   movl(ecx, eax);
 526   andl(ecx, 63);
 527   shll(ecx, 4);
 528   sarl(eax, 6);
 529   movl(edx, eax);
 530   movdqu(xmm6, Address(tmp, 16));          // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
 531   pand(xmm7, xmm6);
 532   movdqu(xmm6, Address(tmp, 32));          // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
 533   paddq(xmm7, xmm6);
 534   psllq(xmm7, 46);
 535   subpd(xmm0, xmm3);
 536   movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
 537   mulpd(xmm4, xmm0);
 538   movapd(xmm6, xmm0);
 539   movapd(xmm1, xmm0);
 540   mulpd(xmm6, xmm6);
 541   mulpd(xmm0, xmm6);
 542   addpd(xmm5, xmm4);
 543   mulsd(xmm0, xmm6);
 544   mulpd(xmm6, Address(tmp, 112));          // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
 545   addsd(xmm1, xmm2);
 546   unpckhpd(xmm2, xmm2);
 547   mulpd(xmm0, xmm5);
 548   addsd(xmm1, xmm0);
 549   por(xmm2, xmm7);
 550   unpckhpd(xmm0, xmm0);
 551   addsd(xmm0, xmm1);
 552   addsd(xmm0, xmm6);
 553   addl(edx, 894);
 554   cmpl(edx, 1916);
 555   jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
 556   mulsd(xmm0, xmm2);
 557   addsd(xmm0, xmm2);
 558   jmp(L_2TAG_PACKET_2_0_2);
 559 
 560   bind(L_2TAG_PACKET_1_0_2);
 561   fnstcw(Address(rsp, 24));
 562   movzwl(edx, Address(rsp, 24));
 563   orl(edx, 768);
 564   movw(Address(rsp, 28), edx);
 565   fldcw(Address(rsp, 28));
 566   movl(edx, eax);
 567   sarl(eax, 1);
 568   subl(edx, eax);
 569   movdqu(xmm6, Address(tmp, 0));           // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
 570   pandn(xmm6, xmm2);
 571   addl(eax, 1023);
 572   movdl(xmm3, eax);
 573   psllq(xmm3, 52);
 574   por(xmm6, xmm3);
 575   addl(edx, 1023);
 576   movdl(xmm4, edx);
 577   psllq(xmm4, 52);
 578   movsd(Address(rsp, 8), xmm0);
 579   fld_d(Address(rsp, 8));
 580   movsd(Address(rsp, 16), xmm6);
 581   fld_d(Address(rsp, 16));
 582   fmula(1);
 583   faddp(1);
 584   movsd(Address(rsp, 8), xmm4);
 585   fld_d(Address(rsp, 8));
 586   fmulp(1);
 587   fstp_d(Address(rsp, 8));
 588   movsd(xmm0, Address(rsp, 8));
 589   fldcw(Address(rsp, 24));
 590   pextrw(ecx, xmm0, 3);
 591   andl(ecx, 32752);
 592   cmpl(ecx, 32752);
 593   jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
 594   cmpl(ecx, 0);
 595   jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
 596   jmp(L_2TAG_PACKET_2_0_2);
 597   cmpl(ecx, INT_MIN);
 598   jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
 599   cmpl(ecx, -1064950997);
 600   jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
 601   jcc(Assembler::greater, L_2TAG_PACKET_4_0_2);
 602   movl(edx, Address(rsp, 128));
 603   cmpl(edx, -17155601);
 604   jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
 605   jmp(L_2TAG_PACKET_4_0_2);
 606 
 607   bind(L_2TAG_PACKET_3_0_2);
 608   movl(edx, 14);
 609   jmp(L_2TAG_PACKET_5_0_2);
 610 
 611   bind(L_2TAG_PACKET_4_0_2);
 612   movl(edx, 15);
 613 
 614   bind(L_2TAG_PACKET_5_0_2);
 615   movsd(Address(rsp, 0), xmm0);
 616   movsd(xmm0, Address(rsp, 128));
 617   fld_d(Address(rsp, 0));
 618   jmp(L_2TAG_PACKET_6_0_2);
 619 
 620   bind(L_2TAG_PACKET_7_0_2);
 621   cmpl(eax, 2146435072);
 622   jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2);
 623   movl(eax, Address(rsp, 132));
 624   cmpl(eax, INT_MIN);
 625   jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2);
 626   movsd(xmm0, Address(tmp, 1208));         // 0xffffffffUL, 0x7fefffffUL
 627   mulsd(xmm0, xmm0);
 628   movl(edx, 14);
 629   jmp(L_2TAG_PACKET_5_0_2);
 630 
 631   bind(L_2TAG_PACKET_9_0_2);
 632   movsd(xmm0, Address(tmp, 1216));
 633   mulsd(xmm0, xmm0);
 634   movl(edx, 15);
 635   jmp(L_2TAG_PACKET_5_0_2);
 636 
 637   bind(L_2TAG_PACKET_8_0_2);
 638   movl(edx, Address(rsp, 128));
 639   cmpl(eax, 2146435072);
 640   jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
 641   cmpl(edx, 0);
 642   jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
 643   movl(eax, Address(rsp, 132));
 644   cmpl(eax, 2146435072);
 645   jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
 646   movsd(xmm0, Address(tmp, 1192));         // 0x00000000UL, 0x7ff00000UL
 647   jmp(L_2TAG_PACKET_2_0_2);
 648 
 649   bind(L_2TAG_PACKET_11_0_2);
 650   movsd(xmm0, Address(tmp, 1200));         // 0x00000000UL, 0x00000000UL
 651   jmp(L_2TAG_PACKET_2_0_2);
 652 
 653   bind(L_2TAG_PACKET_10_0_2);
 654   movsd(xmm0, Address(rsp, 128));
 655   addsd(xmm0, xmm0);
 656   jmp(L_2TAG_PACKET_2_0_2);
 657 
 658   bind(L_2TAG_PACKET_0_0_2);
 659   movl(eax, Address(rsp, 132));
 660   andl(eax, 2147483647);
 661   cmpl(eax, 1083179008);
 662   jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
 663   movsd(xmm0, Address(rsp, 128));
 664   addsd(xmm0, Address(tmp, 1184));         // 0x00000000UL, 0x3ff00000UL
 665   jmp(L_2TAG_PACKET_2_0_2);
 666 
 667   bind(L_2TAG_PACKET_2_0_2);
 668   movsd(Address(rsp, 48), xmm0);
 669   fld_d(Address(rsp, 48));
 670 
 671   bind(L_2TAG_PACKET_6_0_2);
 672   movl(tmp, Address(rsp, 64));
 673 }
 674 #endif