1 /*
   2 * Copyright (c) 2016, Intel Corporation.
   3 * Intel Math Library (LIBM) Source Code
   4 *
   5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6 *
   7 * This code is free software; you can redistribute it and/or modify it
   8 * under the terms of the GNU General Public License version 2 only, as
   9 * published by the Free Software Foundation.
  10 *
  11 * This code is distributed in the hope that it will be useful, but WITHOUT
  12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 * version 2 for more details (a copy is included in the LICENSE file that
  15 * accompanied this code).
  16 *
  17 * You should have received a copy of the GNU General Public License version
  18 * 2 along with this work; if not, write to the Free Software Foundation,
  19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20 *
  21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22 * or visit www.oracle.com if you need additional information or have any
  23 * questions.
  24 *
  25 */
  26 
  27 #include "precompiled.hpp"
  28 #include "asm/assembler.hpp"
  29 #include "asm/assembler.inline.hpp"
  30 #include "macroAssembler_x86.hpp"
  31 #include "runtime/stubRoutines.hpp"
  32 #include "utilities/globalDefinitions.hpp"
  33 
  34 /******************************************************************************/
  35 //                     ALGORITHM DESCRIPTION - EXP()
  36 //                     ---------------------
  37 //
  38 // Description:
  39 //  Let K = 64 (table size).
  40 //        x    x/log(2)     n
  41 //       e  = 2          = 2 * T[j] * (1 + P(y))
  42 //  where
  43 //       x = m*log(2)/K + y,    y in [-log(2)/K..log(2)/K]
  44 //       m = n*K + j,           m,n,j - signed integer, j in [-K/2..K/2]
  45 //                  j/K
  46 //       values of 2   are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
  47 //
  48 //       P(y) is a minimax polynomial approximation of exp(x)-1
  49 //       on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
  50 //
  51 //  To avoid problems with arithmetic overflow and underflow,
  52 //            n                        n1  n2
  53 //  value of 2  is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
  54 //  where BIAS is a value of exponent bias.
  55 //
  56 // Special cases:
  57 //  exp(NaN) = NaN
  58 //  exp(+INF) = +INF
  59 //  exp(-INF) = 0
  60 //  exp(x) = 1 for subnormals
  61 //  for finite argument, only exp(0)=1 is exact
  62 //  For IEEE double
  63 //    if x >  709.782712893383973096 then exp(x) overflow
  64 //    if x < -745.133219101941108420 then exp(x) underflow
  65 //
  66 /******************************************************************************/
  67 
  68 #ifdef _LP64
  69 // The 64 bit code is at most SSE2 compliant
  70 ATTRIBUTE_ALIGNED(16) juint _cv[] =
  71 {
  72     0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
  73     0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
  74     0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
  75     0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
  76     0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
  77 };
  78 
  79 ATTRIBUTE_ALIGNED(16) juint _shifter[] =
  80 {
  81     0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
  82 };
  83 
  84 ATTRIBUTE_ALIGNED(16) juint _mmask[] =
  85 {
  86     0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
  87 };
  88 
  89 ATTRIBUTE_ALIGNED(16) juint _bias[] =
  90 {
  91     0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
  92 };
  93 
  94 ATTRIBUTE_ALIGNED(16) juint _Tbl_addr[] =
  95 {
  96     0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
  97     0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
  98     0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
  99     0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
 100     0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
 101     0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
 102     0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
 103     0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
 104     0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
 105     0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
 106     0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
 107     0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
 108     0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
 109     0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
 110     0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
 111     0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
 112     0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
 113     0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
 114     0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
 115     0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
 116     0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
 117     0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
 118     0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
 119     0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
 120     0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
 121     0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
 122     0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
 123     0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
 124     0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
 125     0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
 126     0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
 127     0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
 128     0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
 129     0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
 130     0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
 131     0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
 132     0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
 133     0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
 134     0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
 135     0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
 136     0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
 137     0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
 138     0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
 139     0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
 140     0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
 141     0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
 142     0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
 143     0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
 144     0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
 145     0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
 146     0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
 147     0x000fa7c1UL
 148 };
 149 
 150 ATTRIBUTE_ALIGNED(16) juint _ALLONES[] =
 151 {
 152     0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
 153 };
 154 
 155 ATTRIBUTE_ALIGNED(16) juint _ebias[] =
 156 {
 157     0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
 158 };
 159 
 160 ATTRIBUTE_ALIGNED(4) juint _XMAX[] =
 161 {
 162     0xffffffffUL, 0x7fefffffUL
 163 };
 164 
 165 ATTRIBUTE_ALIGNED(4) juint _XMIN[] =
 166 {
 167     0x00000000UL, 0x00100000UL
 168 };
 169 
 170 ATTRIBUTE_ALIGNED(4) juint _INF[] =
 171 {
 172     0x00000000UL, 0x7ff00000UL
 173 };
 174 
 175 ATTRIBUTE_ALIGNED(4) juint _ZERO[] =
 176 {
 177     0x00000000UL, 0x00000000UL
 178 };
 179 
 180 ATTRIBUTE_ALIGNED(4) juint _ONE_val[] =
 181 {
 182     0x00000000UL, 0x3ff00000UL
 183 };
 184 
 185 
 186 // Registers:
 187 // input: xmm0
 188 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
 189 //          rax, rdx, rcx, tmp - r11
 190 
 191 // Code generated by Intel C compiler for LIBM library
 192 
 193 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
 194   Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
 195   Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
 196   Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
 197   Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
 198 
 199   assert_different_registers(tmp, eax, ecx, edx);
 200   jmp(start);
 201   address cv = (address)_cv;
 202   address Shifter = (address)_shifter;
 203   address mmask = (address)_mmask;
 204   address bias = (address)_bias;
 205   address Tbl_addr = (address)_Tbl_addr;
 206   address ALLONES = (address)_ALLONES;
 207   address ebias = (address)_ebias;
 208   address XMAX = (address)_XMAX;
 209   address XMIN = (address)_XMIN;
 210   address INF = (address)_INF;
 211   address ZERO = (address)_ZERO;
 212   address ONE_val = (address)_ONE_val;
 213 
 214   bind(start);
 215   subq(rsp, 24);
 216   movsd(Address(rsp, 8), xmm0);
 217   unpcklpd(xmm0, xmm0);
 218   movdqu(xmm1, ExternalAddress(cv));       // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
 219   movdqu(xmm6, ExternalAddress(Shifter));  // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
 220   movdqu(xmm2, ExternalAddress(16 + cv));    // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
 221   movdqu(xmm3, ExternalAddress(32 + cv));    // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
 222   pextrw(eax, xmm0, 3);
 223   andl(eax, 32767);
 224   movl(edx, 16527);
 225   subl(edx, eax);
 226   subl(eax, 15504);
 227   orl(edx, eax);
 228   cmpl(edx, INT_MIN);
 229   jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
 230   mulpd(xmm1, xmm0);
 231   addpd(xmm1, xmm6);
 232   movapd(xmm7, xmm1);
 233   subpd(xmm1, xmm6);
 234   mulpd(xmm2, xmm1);
 235   movdqu(xmm4, ExternalAddress(64 + cv));    // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
 236   mulpd(xmm3, xmm1);
 237   movdqu(xmm5, ExternalAddress(80 + cv));    // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
 238   subpd(xmm0, xmm2);
 239   movdl(eax, xmm7);
 240   movl(ecx, eax);
 241   andl(ecx, 63);
 242   shll(ecx, 4);
 243   sarl(eax, 6);
 244   movl(edx, eax);
 245   movdqu(xmm6, ExternalAddress(mmask));    // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
 246   pand(xmm7, xmm6);
 247   movdqu(xmm6, ExternalAddress(bias));     // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
 248   paddq(xmm7, xmm6);
 249   psllq(xmm7, 46);
 250   subpd(xmm0, xmm3);
 251   lea(tmp, ExternalAddress(Tbl_addr));
 252   movdqu(xmm2, Address(ecx, tmp));
 253   mulpd(xmm4, xmm0);
 254   movapd(xmm6, xmm0);
 255   movapd(xmm1, xmm0);
 256   mulpd(xmm6, xmm6);
 257   mulpd(xmm0, xmm6);
 258   addpd(xmm5, xmm4);
 259   mulsd(xmm0, xmm6);
 260   mulpd(xmm6, ExternalAddress(48 + cv));     // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
 261   addsd(xmm1, xmm2);
 262   unpckhpd(xmm2, xmm2);
 263   mulpd(xmm0, xmm5);
 264   addsd(xmm1, xmm0);
 265   por(xmm2, xmm7);
 266   unpckhpd(xmm0, xmm0);
 267   addsd(xmm0, xmm1);
 268   addsd(xmm0, xmm6);
 269   addl(edx, 894);
 270   cmpl(edx, 1916);
 271   jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
 272   mulsd(xmm0, xmm2);
 273   addsd(xmm0, xmm2);
 274   jmp(B1_5);
 275 
 276   bind(L_2TAG_PACKET_1_0_2);
 277   xorpd(xmm3, xmm3);
 278   movdqu(xmm4, ExternalAddress(ALLONES));  // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
 279   movl(edx, -1022);
 280   subl(edx, eax);
 281   movdl(xmm5, edx);
 282   psllq(xmm4, xmm5);
 283   movl(ecx, eax);
 284   sarl(eax, 1);
 285   pinsrw(xmm3, eax, 3);
 286   movdqu(xmm6, ExternalAddress(ebias));    // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
 287   psllq(xmm3, 4);
 288   psubd(xmm2, xmm3);
 289   mulsd(xmm0, xmm2);
 290   cmpl(edx, 52);
 291   jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
 292   pand(xmm4, xmm2);
 293   paddd(xmm3, xmm6);
 294   subsd(xmm2, xmm4);
 295   addsd(xmm0, xmm2);
 296   cmpl(ecx, 1023);
 297   jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
 298   pextrw(ecx, xmm0, 3);
 299   andl(ecx, 32768);
 300   orl(edx, ecx);
 301   cmpl(edx, 0);
 302   jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
 303   movapd(xmm6, xmm0);
 304   addsd(xmm0, xmm4);
 305   mulsd(xmm0, xmm3);
 306   pextrw(ecx, xmm0, 3);
 307   andl(ecx, 32752);
 308   cmpl(ecx, 0);
 309   jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
 310   jmp(B1_5);
 311 
 312   bind(L_2TAG_PACKET_5_0_2);
 313   mulsd(xmm6, xmm3);
 314   mulsd(xmm4, xmm3);
 315   movdqu(xmm0, xmm6);
 316   pxor(xmm6, xmm4);
 317   psrad(xmm6, 31);
 318   pshufd(xmm6, xmm6, 85);
 319   psllq(xmm0, 1);
 320   psrlq(xmm0, 1);
 321   pxor(xmm0, xmm6);
 322   psrlq(xmm6, 63);
 323   paddq(xmm0, xmm6);
 324   paddq(xmm0, xmm4);
 325   movl(Address(rsp, 0), 15);
 326   jmp(L_2TAG_PACKET_6_0_2);
 327 
 328   bind(L_2TAG_PACKET_4_0_2);
 329   addsd(xmm0, xmm4);
 330   mulsd(xmm0, xmm3);
 331   jmp(B1_5);
 332 
 333   bind(L_2TAG_PACKET_3_0_2);
 334   addsd(xmm0, xmm4);
 335   mulsd(xmm0, xmm3);
 336   pextrw(ecx, xmm0, 3);
 337   andl(ecx, 32752);
 338   cmpl(ecx, 32752);
 339   jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
 340   jmp(B1_5);
 341 
 342   bind(L_2TAG_PACKET_2_0_2);
 343   paddd(xmm3, xmm6);
 344   addpd(xmm0, xmm2);
 345   mulsd(xmm0, xmm3);
 346   movl(Address(rsp, 0), 15);
 347   jmp(L_2TAG_PACKET_6_0_2);
 348 
 349   bind(L_2TAG_PACKET_8_0_2);
 350   cmpl(eax, 2146435072);
 351   jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
 352   movl(eax, Address(rsp, 12));
 353   cmpl(eax, INT_MIN);
 354   jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
 355   movsd(xmm0, ExternalAddress(XMAX));      // 0xffffffffUL, 0x7fefffffUL
 356   mulsd(xmm0, xmm0);
 357 
 358   bind(L_2TAG_PACKET_7_0_2);
 359   movl(Address(rsp, 0), 14);
 360   jmp(L_2TAG_PACKET_6_0_2);
 361 
 362   bind(L_2TAG_PACKET_10_0_2);
 363   movsd(xmm0, ExternalAddress(XMIN));      // 0x00000000UL, 0x00100000UL
 364   mulsd(xmm0, xmm0);
 365   movl(Address(rsp, 0), 15);
 366   jmp(L_2TAG_PACKET_6_0_2);
 367 
 368   bind(L_2TAG_PACKET_9_0_2);
 369   movl(edx, Address(rsp, 8));
 370   cmpl(eax, 2146435072);
 371   jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
 372   cmpl(edx, 0);
 373   jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
 374   movl(eax, Address(rsp, 12));
 375   cmpl(eax, 2146435072);
 376   jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
 377   movsd(xmm0, ExternalAddress(INF));       // 0x00000000UL, 0x7ff00000UL
 378   jmp(B1_5);
 379 
 380   bind(L_2TAG_PACKET_12_0_2);
 381   movsd(xmm0, ExternalAddress(ZERO));      // 0x00000000UL, 0x00000000UL
 382   jmp(B1_5);
 383 
 384   bind(L_2TAG_PACKET_11_0_2);
 385   movsd(xmm0, Address(rsp, 8));
 386   addsd(xmm0, xmm0);
 387   jmp(B1_5);
 388 
 389   bind(L_2TAG_PACKET_0_0_2);
 390   movl(eax, Address(rsp, 12));
 391   andl(eax, 2147483647);
 392   cmpl(eax, 1083179008);
 393   jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
 394   movsd(Address(rsp, 8), xmm0);
 395   addsd(xmm0, ExternalAddress(ONE_val));   // 0x00000000UL, 0x3ff00000UL
 396   jmp(B1_5);
 397 
 398   bind(L_2TAG_PACKET_6_0_2);
 399   movq(Address(rsp, 16), xmm0);
 400 
 401   bind(B1_3);
 402   movq(xmm0, Address(rsp, 16));
 403 
 404   bind(B1_5);
 405   addq(rsp, 24);
 406 }
 407 #else
 408 // The 32 bit code is at most SSE2 compliant
 409 ATTRIBUTE_ALIGNED(16) juint _static_const_table[] =
 410 {
 411     0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
 412     0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
 413     0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
 414     0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
 415     0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
 416     0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
 417     0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
 418     0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
 419     0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
 420     0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
 421     0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
 422     0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
 423     0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
 424     0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
 425     0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
 426     0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
 427     0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
 428     0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
 429     0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
 430     0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
 431     0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
 432     0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
 433     0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
 434     0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
 435     0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
 436     0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
 437     0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
 438     0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
 439     0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
 440     0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
 441     0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
 442     0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
 443     0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
 444     0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
 445     0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
 446     0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
 447     0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
 448     0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
 449     0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
 450     0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
 451     0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
 452     0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
 453     0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
 454     0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
 455     0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
 456     0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
 457     0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
 458     0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
 459     0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
 460     0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
 461     0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
 462     0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
 463     0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
 464     0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
 465     0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
 466     0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
 467     0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
 468     0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
 469     0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
 470     0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
 471     0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
 472     0x00100000UL
 473 };
 474 
 475 //registers,
 476 // input: (rbp + 8)
 477 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
 478 //          rax, rdx, rcx, rbx (tmp)
 479 
 480 // Code generated by Intel C compiler for LIBM library
 481 
 482 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
 483   Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
 484   Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
 485   Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
 486   Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
 487 
 488   assert_different_registers(tmp, eax, ecx, edx);
 489   jmp(start);
 490   address static_const_table = (address)_static_const_table;
 491 
 492   bind(start);
 493   subl(rsp, 120);
 494   movl(Address(rsp, 64), tmp);
 495   lea(tmp, ExternalAddress(static_const_table));
 496   movdqu(xmm0, Address(rsp, 128));
 497   unpcklpd(xmm0, xmm0);
 498   movdqu(xmm1, Address(tmp, 64));          // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
 499   movdqu(xmm6, Address(tmp, 48));          // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
 500   movdqu(xmm2, Address(tmp, 80));          // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
 501   movdqu(xmm3, Address(tmp, 96));          // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
 502   pextrw(eax, xmm0, 3);
 503   andl(eax, 32767);
 504   movl(edx, 16527);
 505   subl(edx, eax);
 506   subl(eax, 15504);
 507   orl(edx, eax);
 508   cmpl(edx, INT_MIN);
 509   jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
 510   mulpd(xmm1, xmm0);
 511   addpd(xmm1, xmm6);
 512   movapd(xmm7, xmm1);
 513   subpd(xmm1, xmm6);
 514   mulpd(xmm2, xmm1);
 515   movdqu(xmm4, Address(tmp, 128));         // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
 516   mulpd(xmm3, xmm1);
 517   movdqu(xmm5, Address(tmp, 144));         // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
 518   subpd(xmm0, xmm2);
 519   movdl(eax, xmm7);
 520   movl(ecx, eax);
 521   andl(ecx, 63);
 522   shll(ecx, 4);
 523   sarl(eax, 6);
 524   movl(edx, eax);
 525   movdqu(xmm6, Address(tmp, 16));          // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
 526   pand(xmm7, xmm6);
 527   movdqu(xmm6, Address(tmp, 32));          // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
 528   paddq(xmm7, xmm6);
 529   psllq(xmm7, 46);
 530   subpd(xmm0, xmm3);
 531   movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
 532   mulpd(xmm4, xmm0);
 533   movapd(xmm6, xmm0);
 534   movapd(xmm1, xmm0);
 535   mulpd(xmm6, xmm6);
 536   mulpd(xmm0, xmm6);
 537   addpd(xmm5, xmm4);
 538   mulsd(xmm0, xmm6);
 539   mulpd(xmm6, Address(tmp, 112));          // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
 540   addsd(xmm1, xmm2);
 541   unpckhpd(xmm2, xmm2);
 542   mulpd(xmm0, xmm5);
 543   addsd(xmm1, xmm0);
 544   por(xmm2, xmm7);
 545   unpckhpd(xmm0, xmm0);
 546   addsd(xmm0, xmm1);
 547   addsd(xmm0, xmm6);
 548   addl(edx, 894);
 549   cmpl(edx, 1916);
 550   jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
 551   mulsd(xmm0, xmm2);
 552   addsd(xmm0, xmm2);
 553   jmp(L_2TAG_PACKET_2_0_2);
 554 
 555   bind(L_2TAG_PACKET_1_0_2);
 556   fnstcw(Address(rsp, 24));
 557   movzwl(edx, Address(rsp, 24));
 558   orl(edx, 768);
 559   movw(Address(rsp, 28), edx);
 560   fldcw(Address(rsp, 28));
 561   movl(edx, eax);
 562   sarl(eax, 1);
 563   subl(edx, eax);
 564   movdqu(xmm6, Address(tmp, 0));           // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
 565   pandn(xmm6, xmm2);
 566   addl(eax, 1023);
 567   movdl(xmm3, eax);
 568   psllq(xmm3, 52);
 569   por(xmm6, xmm3);
 570   addl(edx, 1023);
 571   movdl(xmm4, edx);
 572   psllq(xmm4, 52);
 573   movsd(Address(rsp, 8), xmm0);
 574   fld_d(Address(rsp, 8));
 575   movsd(Address(rsp, 16), xmm6);
 576   fld_d(Address(rsp, 16));
 577   fmula(1);
 578   faddp(1);
 579   movsd(Address(rsp, 8), xmm4);
 580   fld_d(Address(rsp, 8));
 581   fmulp(1);
 582   fstp_d(Address(rsp, 8));
 583   movsd(xmm0, Address(rsp, 8));
 584   fldcw(Address(rsp, 24));
 585   pextrw(ecx, xmm0, 3);
 586   andl(ecx, 32752);
 587   cmpl(ecx, 32752);
 588   jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
 589   cmpl(ecx, 0);
 590   jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
 591   jmp(L_2TAG_PACKET_2_0_2);
 592   cmpl(ecx, INT_MIN);
 593   jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
 594   cmpl(ecx, -1064950997);
 595   jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
 596   jcc(Assembler::greater, L_2TAG_PACKET_4_0_2);
 597   movl(edx, Address(rsp, 128));
 598   cmpl(edx, -17155601);
 599   jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
 600   jmp(L_2TAG_PACKET_4_0_2);
 601 
 602   bind(L_2TAG_PACKET_3_0_2);
 603   movl(edx, 14);
 604   jmp(L_2TAG_PACKET_5_0_2);
 605 
 606   bind(L_2TAG_PACKET_4_0_2);
 607   movl(edx, 15);
 608 
 609   bind(L_2TAG_PACKET_5_0_2);
 610   movsd(Address(rsp, 0), xmm0);
 611   movsd(xmm0, Address(rsp, 128));
 612   fld_d(Address(rsp, 0));
 613   jmp(L_2TAG_PACKET_6_0_2);
 614 
 615   bind(L_2TAG_PACKET_7_0_2);
 616   cmpl(eax, 2146435072);
 617   jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2);
 618   movl(eax, Address(rsp, 132));
 619   cmpl(eax, INT_MIN);
 620   jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2);
 621   movsd(xmm0, Address(tmp, 1208));         // 0xffffffffUL, 0x7fefffffUL
 622   mulsd(xmm0, xmm0);
 623   movl(edx, 14);
 624   jmp(L_2TAG_PACKET_5_0_2);
 625 
 626   bind(L_2TAG_PACKET_9_0_2);
 627   movsd(xmm0, Address(tmp, 1216));
 628   mulsd(xmm0, xmm0);
 629   movl(edx, 15);
 630   jmp(L_2TAG_PACKET_5_0_2);
 631 
 632   bind(L_2TAG_PACKET_8_0_2);
 633   movl(edx, Address(rsp, 128));
 634   cmpl(eax, 2146435072);
 635   jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
 636   cmpl(edx, 0);
 637   jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
 638   movl(eax, Address(rsp, 132));
 639   cmpl(eax, 2146435072);
 640   jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
 641   movsd(xmm0, Address(tmp, 1192));         // 0x00000000UL, 0x7ff00000UL
 642   jmp(L_2TAG_PACKET_2_0_2);
 643 
 644   bind(L_2TAG_PACKET_11_0_2);
 645   movsd(xmm0, Address(tmp, 1200));         // 0x00000000UL, 0x00000000UL
 646   jmp(L_2TAG_PACKET_2_0_2);
 647 
 648   bind(L_2TAG_PACKET_10_0_2);
 649   movsd(xmm0, Address(rsp, 128));
 650   addsd(xmm0, xmm0);
 651   jmp(L_2TAG_PACKET_2_0_2);
 652 
 653   bind(L_2TAG_PACKET_0_0_2);
 654   movl(eax, Address(rsp, 132));
 655   andl(eax, 2147483647);
 656   cmpl(eax, 1083179008);
 657   jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
 658   movsd(xmm0, Address(rsp, 128));
 659   addsd(xmm0, Address(tmp, 1184));         // 0x00000000UL, 0x3ff00000UL
 660   jmp(L_2TAG_PACKET_2_0_2);
 661 
 662   bind(L_2TAG_PACKET_2_0_2);
 663   movsd(Address(rsp, 48), xmm0);
 664   fld_d(Address(rsp, 48));
 665 
 666   bind(L_2TAG_PACKET_6_0_2);
 667   movl(tmp, Address(rsp, 64));
 668 }
 669 #endif