1 /* 2 * Copyright (c) 2015, Intel Corporation. 3 * Intel Math Library (LIBM) Source Code 4 * 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 /******************************************************************************/ 28 // ALGORITHM DESCRIPTION 29 // --------------------- 30 // 31 // Description: 32 // Let K = 64 (table size). 33 // x x/log(2) n 34 // e = 2 = 2 * T[j] * (1 + P(y)) 35 // where 36 // x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K] 37 // m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2] 38 // j/K 39 // values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). 40 // 41 // P(y) is a minimax polynomial approximation of exp(x)-1 42 // on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V). 43 // 44 // To avoid problems with arithmetic overflow and underflow, 45 // n n1 n2 46 // value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] 47 // where BIAS is a value of exponent bias. 48 // 49 // Special cases: 50 // exp(NaN) = NaN 51 // exp(+INF) = +INF 52 // exp(-INF) = 0 53 // exp(x) = 1 for subnormals 54 // for finite argument, only exp(0)=1 is exact 55 // For IEEE double 56 // if x > 709.782712893383973096 then exp(x) overflow 57 // if x < -745.133219101941108420 then exp(x) underflow 58 // 59 /******************************************************************************/ 60 61 62 #include "precompiled.hpp" 63 #include "asm/assembler.hpp" 64 #include "asm/assembler.inline.hpp" 65 66 67 #ifdef _LP64 68 69 juint _cv[] = 70 { 71 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL, 72 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 73 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 74 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL, 75 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL 76 }; 77 78 juint _shifter[] = 79 { 80 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL 81 }; 82 83 juint _mmask[] = 84 { 85 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL 86 }; 87 88 juint _bias[] = 89 { 90 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL 91 }; 92 93 juint _Tbl_addr[] = 94 { 95 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL, 96 0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL, 97 0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL, 98 0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL, 99 0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL, 100 0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL, 101 0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL, 102 0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL, 103 0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL, 104 0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL, 105 0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL, 106 0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL, 107 0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL, 108 0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL, 109 0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL, 110 0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL, 111 0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL, 112 0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL, 113 0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL, 114 0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL, 115 0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL, 116 0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL, 117 0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL, 118 0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL, 119 0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL, 120 0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL, 121 0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL, 122 0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL, 123 0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL, 124 0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL, 125 0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL, 126 0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL, 127 0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL, 128 0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL, 129 0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL, 130 0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL, 131 0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL, 132 0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL, 133 0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL, 134 0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL, 135 0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL, 136 0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL, 137 0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL, 138 0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL, 139 0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL, 140 0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL, 141 0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL, 142 0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL, 143 0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL, 144 0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL, 145 0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL, 146 0x000fa7c1UL 147 }; 148 149 juint _ALLONES[] = 150 { 151 0xffffffffUL, 0xffffffffUL, 0xffffffffUL 152 }; 153 154 juint _ebias[] = 155 { 156 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL 157 }; 158 159 juint _XMAX[] = 160 { 161 0xffffffffUL, 0x7fefffffUL 162 }; 163 164 juint _XMIN[] = 165 { 166 0x00000000UL, 0x00100000UL 167 }; 168 169 juint _INF[] = 170 { 171 0x00000000UL, 0x7ff00000UL 172 }; 173 174 juint _ZERO[] = 175 { 176 0x00000000UL, 0x00000000UL 177 }; 178 179 juint _ONE_val[] = 180 { 181 0x00000000UL, 0x3ff00000UL 182 }; 183 184 185 //registers, 186 // input: xmm0 187 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 188 // rax, rdx, rcx, tmp - r11 189 190 // Code generated by Intel C compiler for LIBM library 191 192 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { 193 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; 194 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; 195 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; 196 Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start; 197 198 assert_different_registers(tmp, eax, ecx, edx); 199 jmp(start); 200 address cv = (address)_cv; 201 address Shifter = (address)_shifter; 202 address mmask = (address)_mmask; 203 address bias = (address)_bias; 204 address Tbl_addr = (address)_Tbl_addr; 205 address ALLONES = (address)_ALLONES; 206 address ebias = (address)_ebias; 207 address XMAX = (address)_XMAX; 208 address XMIN = (address)_XMIN; 209 address INF = (address)_INF; 210 address ZERO = (address)_ZERO; 211 address ONE_val = (address)_ONE_val; 212 213 bind(start); 214 subq(rsp, 24); 215 movsd(Address(rsp, 8), xmm0); 216 unpcklpd(xmm0, xmm0); 217 movdqu(xmm1, InternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL 218 movdqu(xmm6, InternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL 219 movdqu(xmm2, InternalAddress(16+cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL 220 movdqu(xmm3, InternalAddress(32+cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL 221 pextrw(eax, xmm0, 3); 222 andl(eax, 32767); 223 movl(edx, 16527); 224 subl(edx, eax); 225 subl(eax, 15504); 226 orl(edx, eax); 227 cmpl(edx, INT_MIN); 228 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); 229 mulpd(xmm1, xmm0); 230 addpd(xmm1, xmm6); 231 movapd(xmm7, xmm1); 232 subpd(xmm1, xmm6); 233 mulpd(xmm2, xmm1); 234 movdqu(xmm4, InternalAddress(64+cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL 235 mulpd(xmm3, xmm1); 236 movdqu(xmm5, InternalAddress(80+cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL 237 subpd(xmm0, xmm2); 238 movdl(eax, xmm7); 239 movl(ecx, eax); 240 andl(ecx, 63); 241 shll(ecx, 4); 242 sarl(eax, 6); 243 movl(edx, eax); 244 movdqu(xmm6, InternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL 245 pand(xmm7, xmm6); 246 movdqu(xmm6, InternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL 247 paddq(xmm7, xmm6); 248 psllq(xmm7, 46); 249 subpd(xmm0, xmm3); 250 lea(tmp, InternalAddress(Tbl_addr)); 251 movdqu(xmm2, Address(ecx,tmp)); 252 mulpd(xmm4, xmm0); 253 movapd(xmm6, xmm0); 254 movapd(xmm1, xmm0); 255 mulpd(xmm6, xmm6); 256 mulpd(xmm0, xmm6); 257 addpd(xmm5, xmm4); 258 mulsd(xmm0, xmm6); 259 mulpd(xmm6, InternalAddress(48+cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL 260 addsd(xmm1, xmm2); 261 unpckhpd(xmm2, xmm2); 262 mulpd(xmm0, xmm5); 263 addsd(xmm1, xmm0); 264 por(xmm2, xmm7); 265 unpckhpd(xmm0, xmm0); 266 addsd(xmm0, xmm1); 267 addsd(xmm0, xmm6); 268 addl(edx, 894); 269 cmpl(edx, 1916); 270 jcc (Assembler::above, L_2TAG_PACKET_1_0_2); 271 mulsd(xmm0, xmm2); 272 addsd(xmm0, xmm2); 273 jmp (B1_5); 274 275 bind(L_2TAG_PACKET_1_0_2); 276 xorpd(xmm3, xmm3); 277 movdqu(xmm4, InternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0x00000000UL 278 movl(edx, -1022); 279 subl(edx, eax); 280 movdl(xmm5, edx); 281 psllq(xmm4, xmm5); 282 movl(ecx, eax); 283 sarl(eax, 1); 284 pinsrw(xmm3, eax, 3); 285 movdqu(xmm6, InternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL 286 psllq(xmm3, 4); 287 psubd(xmm2, xmm3); 288 mulsd(xmm0, xmm2); 289 cmpl(edx, 52); 290 jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); 291 pand(xmm4, xmm2); 292 paddd(xmm3, xmm6); 293 subsd(xmm2, xmm4); 294 addsd(xmm0, xmm2); 295 cmpl(ecx, 1023); 296 jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2); 297 pextrw(ecx, xmm0, 3); 298 andl(ecx, 32768); 299 orl(edx, ecx); 300 cmpl(edx, 0); 301 jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); 302 movapd(xmm6, xmm0); 303 addsd(xmm0, xmm4); 304 mulsd(xmm0, xmm3); 305 pextrw(ecx, xmm0, 3); 306 andl(ecx, 32752); 307 cmpl(ecx, 0); 308 jcc(Assembler::equal, L_2TAG_PACKET_5_0_2); 309 jmp(B1_5); 310 311 bind(L_2TAG_PACKET_5_0_2); 312 mulsd(xmm6, xmm3); 313 mulsd(xmm4, xmm3); 314 movdqu(xmm0, xmm6); 315 pxor(xmm6, xmm4); 316 psrad(xmm6, 31); 317 pshufd(xmm6, xmm6, 85); 318 psllq(xmm0, 1); 319 psrlq(xmm0, 1); 320 pxor(xmm0, xmm6); 321 psrlq(xmm6, 63); 322 paddq(xmm0, xmm6); 323 paddq(xmm0, xmm4); 324 movl(Address(rsp,0), 15); 325 jmp(L_2TAG_PACKET_6_0_2); 326 327 bind(L_2TAG_PACKET_4_0_2); 328 addsd(xmm0, xmm4); 329 mulsd(xmm0, xmm3); 330 jmp(B1_5); 331 332 bind(L_2TAG_PACKET_3_0_2); 333 addsd(xmm0, xmm4); 334 mulsd(xmm0, xmm3); 335 pextrw(ecx, xmm0, 3); 336 andl(ecx, 32752); 337 cmpl(ecx, 32752); 338 jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); 339 jmp(B1_5); 340 341 bind(L_2TAG_PACKET_2_0_2); 342 paddd(xmm3, xmm6); 343 addpd(xmm0, xmm2); 344 mulsd(xmm0, xmm3); 345 movl(Address(rsp,0), 15); 346 jmp(L_2TAG_PACKET_6_0_2); 347 348 bind(L_2TAG_PACKET_8_0_2); 349 cmpl(eax, 2146435072); 350 jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2); 351 movl(eax, Address(rsp,12)); 352 cmpl(eax, INT_MIN); 353 jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2); 354 movsd(xmm0, InternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL 355 mulsd(xmm0, xmm0); 356 357 bind(L_2TAG_PACKET_7_0_2); 358 movl(Address(rsp,0), 14); 359 jmp(L_2TAG_PACKET_6_0_2); 360 361 bind(L_2TAG_PACKET_10_0_2); 362 movsd(xmm0, InternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL 363 mulsd(xmm0, xmm0); 364 movl(Address(rsp,0), 15); 365 jmp(L_2TAG_PACKET_6_0_2); 366 367 bind(L_2TAG_PACKET_9_0_2); 368 movl(edx, Address(rsp,8)); 369 cmpl(eax, 2146435072); 370 jcc(Assembler::above, L_2TAG_PACKET_11_0_2); 371 cmpl(edx, 0); 372 jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); 373 movl(eax, Address(rsp,12)); 374 cmpl(eax, 2146435072); 375 jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2); 376 movsd(xmm0, InternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL 377 jmp(B1_5); 378 379 bind(L_2TAG_PACKET_12_0_2); 380 movsd(xmm0, InternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL 381 jmp(B1_5); 382 383 bind(L_2TAG_PACKET_11_0_2); 384 movsd(xmm0, Address(rsp, 8)); 385 addsd(xmm0, xmm0); 386 jmp(B1_5); 387 388 bind(L_2TAG_PACKET_0_0_2); 389 movl(eax, Address(rsp, 12)); 390 andl(eax, 2147483647); 391 cmpl(eax, 1083179008); 392 jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2); 393 movsd(Address(rsp, 8), xmm0); 394 addsd(xmm0, InternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL 395 jmp(B1_5); 396 397 bind(L_2TAG_PACKET_6_0_2); 398 movq(Address(rsp, 16), xmm0); 399 400 bind(B1_3); 401 movq(xmm0, Address(rsp, 16)); 402 403 bind(B1_5); 404 addq(rsp, 24); 405 } 406 #endif 407 408 #ifndef _LP64 409 410 juint _static_const_table[] = 411 { 412 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL, 413 0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL, 414 0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL, 415 0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 416 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 417 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 418 0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 419 0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL, 420 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL, 421 0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL, 422 0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL, 423 0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL, 424 0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL, 425 0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL, 426 0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL, 427 0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL, 428 0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL, 429 0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL, 430 0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL, 431 0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL, 432 0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL, 433 0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL, 434 0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL, 435 0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL, 436 0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL, 437 0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL, 438 0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL, 439 0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL, 440 0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL, 441 0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL, 442 0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL, 443 0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL, 444 0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL, 445 0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL, 446 0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL, 447 0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL, 448 0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL, 449 0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL, 450 0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL, 451 0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL, 452 0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL, 453 0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL, 454 0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL, 455 0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL, 456 0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL, 457 0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL, 458 0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL, 459 0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL, 460 0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL, 461 0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL, 462 0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL, 463 0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL, 464 0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL, 465 0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL, 466 0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL, 467 0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL, 468 0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL, 469 0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL, 470 0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL, 471 0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL, 472 0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL, 473 0x00100000UL 474 }; 475 476 //registers, 477 // input: (rbp + 8) 478 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 479 // rax, rdx, rcx, rbx (tmp) 480 481 // Code generated by Intel C compiler for LIBM library 482 483 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { 484 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; 485 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; 486 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; 487 Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; 488 489 assert_different_registers(tmp, eax, ecx, edx); 490 jmp(start); 491 address static_const_table = (address)_static_const_table; 492 493 bind(start); 494 subl(rsp, 120); 495 movl(Address(rsp, 64), tmp); 496 lea(tmp, InternalAddress(static_const_table)); 497 movdqu(xmm0, Address(rsp, 128)); 498 unpcklpd(xmm0, xmm0); 499 movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL 500 movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL 501 movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL 502 movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL 503 pextrw(eax, xmm0, 3); 504 andl(eax, 32767); 505 movl(edx, 16527); 506 subl(edx, eax); 507 subl(eax, 15504); 508 orl(edx, eax); 509 cmpl(edx, INT_MIN); 510 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); 511 mulpd(xmm1, xmm0); 512 addpd(xmm1, xmm6); 513 movapd(xmm7, xmm1); 514 subpd(xmm1, xmm6); 515 mulpd(xmm2, xmm1); 516 movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL 517 mulpd(xmm3, xmm1); 518 movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL 519 subpd(xmm0, xmm2); 520 movdl(eax, xmm7); 521 movl(ecx, eax); 522 andl(ecx, 63); 523 shll(ecx, 4); 524 sarl(eax, 6); 525 movl(edx, eax); 526 movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL 527 pand(xmm7, xmm6); 528 movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL 529 paddq(xmm7, xmm6); 530 psllq(xmm7, 46); 531 subpd(xmm0, xmm3); 532 movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160)); 533 mulpd(xmm4, xmm0); 534 movapd(xmm6, xmm0); 535 movapd(xmm1, xmm0); 536 mulpd(xmm6, xmm6); 537 mulpd(xmm0, xmm6); 538 addpd(xmm5, xmm4); 539 mulsd(xmm0, xmm6); 540 mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL 541 addsd(xmm1, xmm2); 542 unpckhpd(xmm2, xmm2); 543 mulpd(xmm0, xmm5); 544 addsd(xmm1, xmm0); 545 por(xmm2, xmm7); 546 unpckhpd(xmm0, xmm0); 547 addsd(xmm0, xmm1); 548 addsd(xmm0, xmm6); 549 addl(edx, 894); 550 cmpl(edx, 1916); 551 jcc (Assembler::above, L_2TAG_PACKET_1_0_2); 552 mulsd(xmm0, xmm2); 553 addsd(xmm0, xmm2); 554 jmp(L_2TAG_PACKET_2_0_2); 555 556 bind(L_2TAG_PACKET_1_0_2); 557 fnstcw(Address(rsp, 24)); 558 movzwl(edx, Address(rsp, 24)); 559 orl(edx, 768); 560 movw(Address(rsp, 28), edx); 561 fldcw(Address(rsp, 28)); 562 movl(edx, eax); 563 sarl(eax, 1); 564 subl(edx, eax); 565 movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL 566 pandn(xmm6, xmm2); 567 addl(eax, 1023); 568 movdl(xmm3, eax); 569 psllq(xmm3, 52); 570 por(xmm6, xmm3); 571 addl(edx, 1023); 572 movdl(xmm4, edx); 573 psllq(xmm4, 52); 574 movsd(Address(rsp, 8), xmm0); 575 fld_d(Address(rsp, 8)); 576 movsd(Address(rsp, 16), xmm6); 577 fld_d(Address(rsp, 16)); 578 fmula(1); 579 faddp(1); 580 movsd(Address(rsp, 8), xmm4); 581 fld_d(Address(rsp, 8)); 582 fmulp(1); 583 fstp_d(Address(rsp, 8)); 584 movsd(xmm0,Address(rsp, 8)); 585 fldcw(Address(rsp, 24)); 586 pextrw(ecx, xmm0, 3); 587 andl(ecx, 32752); 588 cmpl(ecx, 32752); 589 jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2); 590 cmpl(ecx, 0); 591 jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); 592 jmp(L_2TAG_PACKET_2_0_2); 593 cmpl(ecx, INT_MIN); 594 jcc(Assembler::less, L_2TAG_PACKET_3_0_2); 595 cmpl(ecx, -1064950997); 596 jcc(Assembler::less, L_2TAG_PACKET_2_0_2); 597 jcc(Assembler::greater, L_2TAG_PACKET_4_0_2); 598 movl(edx, Address(rsp, 128)); 599 cmpl(edx ,-17155601); 600 jcc(Assembler::less, L_2TAG_PACKET_2_0_2); 601 jmp(L_2TAG_PACKET_4_0_2); 602 603 bind(L_2TAG_PACKET_3_0_2); 604 movl(edx, 14); 605 jmp(L_2TAG_PACKET_5_0_2); 606 607 bind(L_2TAG_PACKET_4_0_2); 608 movl(edx, 15); 609 610 bind(L_2TAG_PACKET_5_0_2); 611 movsd(Address(rsp, 0), xmm0); 612 movsd(xmm0, Address(rsp, 128)); 613 fld_d(Address(rsp, 0)); 614 jmp(L_2TAG_PACKET_6_0_2); 615 616 bind(L_2TAG_PACKET_7_0_2); 617 cmpl(eax, 2146435072); 618 jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2); 619 movl(eax, Address(rsp, 132)); 620 cmpl(eax, INT_MIN); 621 jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2); 622 movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL 623 mulsd(xmm0, xmm0); 624 movl(edx, 14); 625 jmp(L_2TAG_PACKET_5_0_2); 626 627 bind(L_2TAG_PACKET_9_0_2); 628 movsd(xmm0, Address(tmp, 1216)); 629 mulsd(xmm0, xmm0); 630 movl(edx, 15); 631 jmp(L_2TAG_PACKET_5_0_2); 632 633 bind(L_2TAG_PACKET_8_0_2); 634 movl(edx, Address(rsp, 128)); 635 cmpl(eax, 2146435072); 636 jcc(Assembler::above, L_2TAG_PACKET_10_0_2); 637 cmpl(edx, 0); 638 jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2); 639 movl(eax, Address(rsp, 132)); 640 cmpl(eax, 2146435072); 641 jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); 642 movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL 643 jmp(L_2TAG_PACKET_2_0_2); 644 645 bind(L_2TAG_PACKET_11_0_2); 646 movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL 647 jmp(L_2TAG_PACKET_2_0_2); 648 649 bind(L_2TAG_PACKET_10_0_2); 650 movsd(xmm0, Address(rsp, 128)); 651 addsd(xmm0, xmm0); 652 jmp(L_2TAG_PACKET_2_0_2); 653 654 bind(L_2TAG_PACKET_0_0_2); 655 movl(eax, Address(rsp, 132)); 656 andl(eax, 2147483647); 657 cmpl(eax, 1083179008); 658 jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); 659 movsd(xmm0, Address(rsp, 128)); 660 addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL 661 jmp(L_2TAG_PACKET_2_0_2); 662 663 bind(L_2TAG_PACKET_2_0_2); 664 movsd(Address(rsp, 48), xmm0); 665 fld_d(Address(rsp, 48)); 666 667 bind(L_2TAG_PACKET_6_0_2); 668 movl(tmp, Address(rsp, 64)); 669 } 670 671 #endif