1 /* 2 * Copyright (c) 2016, Intel Corporation. 3 * Intel Math Library (LIBM) Source Code 4 * 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #include "precompiled.hpp" 28 #include "asm/assembler.hpp" 29 #include "asm/assembler.inline.hpp" 30 #include "runtime/stubRoutines.hpp" 31 #include "macroAssembler_x86.hpp" 32 33 #ifdef _MSC_VER 34 #define ALIGNED_(x) __declspec(align(x)) 35 #else 36 #define ALIGNED_(x) __attribute__ ((aligned(x))) 37 #endif 38 39 /******************************************************************************/ 40 // ALGORITHM DESCRIPTION - EXP() 41 // --------------------- 42 // 43 // Description: 44 // Let K = 64 (table size). 45 // x x/log(2) n 46 // e = 2 = 2 * T[j] * (1 + P(y)) 47 // where 48 // x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K] 49 // m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2] 50 // j/K 51 // values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). 52 // 53 // P(y) is a minimax polynomial approximation of exp(x)-1 54 // on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V). 55 // 56 // To avoid problems with arithmetic overflow and underflow, 57 // n n1 n2 58 // value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] 59 // where BIAS is a value of exponent bias. 60 // 61 // Special cases: 62 // exp(NaN) = NaN 63 // exp(+INF) = +INF 64 // exp(-INF) = 0 65 // exp(x) = 1 for subnormals 66 // for finite argument, only exp(0)=1 is exact 67 // For IEEE double 68 // if x > 709.782712893383973096 then exp(x) overflow 69 // if x < -745.133219101941108420 then exp(x) underflow 70 // 71 /******************************************************************************/ 72 73 #ifdef _LP64 74 // The 64 bit code is at most SSE2 compliant 75 ALIGNED_(16) juint _cv[] = 76 { 77 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL, 78 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 79 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 80 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL, 81 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL 82 }; 83 84 ALIGNED_(16) juint _shifter[] = 85 { 86 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL 87 }; 88 89 ALIGNED_(16) juint _mmask[] = 90 { 91 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL 92 }; 93 94 ALIGNED_(16) juint _bias[] = 95 { 96 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL 97 }; 98 99 ALIGNED_(16) juint _Tbl_addr[] = 100 { 101 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL, 102 0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL, 103 0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL, 104 0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL, 105 0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL, 106 0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL, 107 0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL, 108 0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL, 109 0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL, 110 0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL, 111 0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL, 112 0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL, 113 0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL, 114 0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL, 115 0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL, 116 0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL, 117 0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL, 118 0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL, 119 0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL, 120 0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL, 121 0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL, 122 0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL, 123 0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL, 124 0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL, 125 0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL, 126 0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL, 127 0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL, 128 0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL, 129 0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL, 130 0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL, 131 0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL, 132 0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL, 133 0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL, 134 0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL, 135 0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL, 136 0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL, 137 0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL, 138 0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL, 139 0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL, 140 0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL, 141 0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL, 142 0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL, 143 0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL, 144 0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL, 145 0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL, 146 0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL, 147 0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL, 148 0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL, 149 0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL, 150 0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL, 151 0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL, 152 0x000fa7c1UL 153 }; 154 155 ALIGNED_(16) juint _ALLONES[] = 156 { 157 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL 158 }; 159 160 ALIGNED_(16) juint _ebias[] = 161 { 162 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL 163 }; 164 165 ALIGNED_(4) juint _XMAX[] = 166 { 167 0xffffffffUL, 0x7fefffffUL 168 }; 169 170 ALIGNED_(4) juint _XMIN[] = 171 { 172 0x00000000UL, 0x00100000UL 173 }; 174 175 ALIGNED_(4) juint _INF[] = 176 { 177 0x00000000UL, 0x7ff00000UL 178 }; 179 180 ALIGNED_(4) juint _ZERO[] = 181 { 182 0x00000000UL, 0x00000000UL 183 }; 184 185 ALIGNED_(4) juint _ONE_val[] = 186 { 187 0x00000000UL, 0x3ff00000UL 188 }; 189 190 191 // Registers: 192 // input: xmm0 193 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 194 // rax, rdx, rcx, tmp - r11 195 196 // Code generated by Intel C compiler for LIBM library 197 198 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { 199 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; 200 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; 201 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; 202 Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start; 203 204 assert_different_registers(tmp, eax, ecx, edx); 205 jmp(start); 206 address cv = (address)_cv; 207 address Shifter = (address)_shifter; 208 address mmask = (address)_mmask; 209 address bias = (address)_bias; 210 address Tbl_addr = (address)_Tbl_addr; 211 address ALLONES = (address)_ALLONES; 212 address ebias = (address)_ebias; 213 address XMAX = (address)_XMAX; 214 address XMIN = (address)_XMIN; 215 address INF = (address)_INF; 216 address ZERO = (address)_ZERO; 217 address ONE_val = (address)_ONE_val; 218 219 bind(start); 220 subq(rsp, 24); 221 movsd(Address(rsp, 8), xmm0); 222 unpcklpd(xmm0, xmm0); 223 movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL 224 movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL 225 movdqu(xmm2, ExternalAddress(16 + cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL 226 movdqu(xmm3, ExternalAddress(32 + cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL 227 pextrw(eax, xmm0, 3); 228 andl(eax, 32767); 229 movl(edx, 16527); 230 subl(edx, eax); 231 subl(eax, 15504); 232 orl(edx, eax); 233 cmpl(edx, INT_MIN); 234 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); 235 mulpd(xmm1, xmm0); 236 addpd(xmm1, xmm6); 237 movapd(xmm7, xmm1); 238 subpd(xmm1, xmm6); 239 mulpd(xmm2, xmm1); 240 movdqu(xmm4, ExternalAddress(64 + cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL 241 mulpd(xmm3, xmm1); 242 movdqu(xmm5, ExternalAddress(80 + cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL 243 subpd(xmm0, xmm2); 244 movdl(eax, xmm7); 245 movl(ecx, eax); 246 andl(ecx, 63); 247 shll(ecx, 4); 248 sarl(eax, 6); 249 movl(edx, eax); 250 movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL 251 pand(xmm7, xmm6); 252 movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL 253 paddq(xmm7, xmm6); 254 psllq(xmm7, 46); 255 subpd(xmm0, xmm3); 256 lea(tmp, ExternalAddress(Tbl_addr)); 257 movdqu(xmm2, Address(ecx, tmp)); 258 mulpd(xmm4, xmm0); 259 movapd(xmm6, xmm0); 260 movapd(xmm1, xmm0); 261 mulpd(xmm6, xmm6); 262 mulpd(xmm0, xmm6); 263 addpd(xmm5, xmm4); 264 mulsd(xmm0, xmm6); 265 mulpd(xmm6, ExternalAddress(48 + cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL 266 addsd(xmm1, xmm2); 267 unpckhpd(xmm2, xmm2); 268 mulpd(xmm0, xmm5); 269 addsd(xmm1, xmm0); 270 por(xmm2, xmm7); 271 unpckhpd(xmm0, xmm0); 272 addsd(xmm0, xmm1); 273 addsd(xmm0, xmm6); 274 addl(edx, 894); 275 cmpl(edx, 1916); 276 jcc(Assembler::above, L_2TAG_PACKET_1_0_2); 277 mulsd(xmm0, xmm2); 278 addsd(xmm0, xmm2); 279 jmp(B1_5); 280 281 bind(L_2TAG_PACKET_1_0_2); 282 xorpd(xmm3, xmm3); 283 movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL 284 movl(edx, -1022); 285 subl(edx, eax); 286 movdl(xmm5, edx); 287 psllq(xmm4, xmm5); 288 movl(ecx, eax); 289 sarl(eax, 1); 290 pinsrw(xmm3, eax, 3); 291 movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL 292 psllq(xmm3, 4); 293 psubd(xmm2, xmm3); 294 mulsd(xmm0, xmm2); 295 cmpl(edx, 52); 296 jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); 297 pand(xmm4, xmm2); 298 paddd(xmm3, xmm6); 299 subsd(xmm2, xmm4); 300 addsd(xmm0, xmm2); 301 cmpl(ecx, 1023); 302 jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2); 303 pextrw(ecx, xmm0, 3); 304 andl(ecx, 32768); 305 orl(edx, ecx); 306 cmpl(edx, 0); 307 jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); 308 movapd(xmm6, xmm0); 309 addsd(xmm0, xmm4); 310 mulsd(xmm0, xmm3); 311 pextrw(ecx, xmm0, 3); 312 andl(ecx, 32752); 313 cmpl(ecx, 0); 314 jcc(Assembler::equal, L_2TAG_PACKET_5_0_2); 315 jmp(B1_5); 316 317 bind(L_2TAG_PACKET_5_0_2); 318 mulsd(xmm6, xmm3); 319 mulsd(xmm4, xmm3); 320 movdqu(xmm0, xmm6); 321 pxor(xmm6, xmm4); 322 psrad(xmm6, 31); 323 pshufd(xmm6, xmm6, 85); 324 psllq(xmm0, 1); 325 psrlq(xmm0, 1); 326 pxor(xmm0, xmm6); 327 psrlq(xmm6, 63); 328 paddq(xmm0, xmm6); 329 paddq(xmm0, xmm4); 330 movl(Address(rsp, 0), 15); 331 jmp(L_2TAG_PACKET_6_0_2); 332 333 bind(L_2TAG_PACKET_4_0_2); 334 addsd(xmm0, xmm4); 335 mulsd(xmm0, xmm3); 336 jmp(B1_5); 337 338 bind(L_2TAG_PACKET_3_0_2); 339 addsd(xmm0, xmm4); 340 mulsd(xmm0, xmm3); 341 pextrw(ecx, xmm0, 3); 342 andl(ecx, 32752); 343 cmpl(ecx, 32752); 344 jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); 345 jmp(B1_5); 346 347 bind(L_2TAG_PACKET_2_0_2); 348 paddd(xmm3, xmm6); 349 addpd(xmm0, xmm2); 350 mulsd(xmm0, xmm3); 351 movl(Address(rsp, 0), 15); 352 jmp(L_2TAG_PACKET_6_0_2); 353 354 bind(L_2TAG_PACKET_8_0_2); 355 cmpl(eax, 2146435072); 356 jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2); 357 movl(eax, Address(rsp, 12)); 358 cmpl(eax, INT_MIN); 359 jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2); 360 movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL 361 mulsd(xmm0, xmm0); 362 363 bind(L_2TAG_PACKET_7_0_2); 364 movl(Address(rsp, 0), 14); 365 jmp(L_2TAG_PACKET_6_0_2); 366 367 bind(L_2TAG_PACKET_10_0_2); 368 movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL 369 mulsd(xmm0, xmm0); 370 movl(Address(rsp, 0), 15); 371 jmp(L_2TAG_PACKET_6_0_2); 372 373 bind(L_2TAG_PACKET_9_0_2); 374 movl(edx, Address(rsp, 8)); 375 cmpl(eax, 2146435072); 376 jcc(Assembler::above, L_2TAG_PACKET_11_0_2); 377 cmpl(edx, 0); 378 jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); 379 movl(eax, Address(rsp, 12)); 380 cmpl(eax, 2146435072); 381 jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2); 382 movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL 383 jmp(B1_5); 384 385 bind(L_2TAG_PACKET_12_0_2); 386 movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL 387 jmp(B1_5); 388 389 bind(L_2TAG_PACKET_11_0_2); 390 movsd(xmm0, Address(rsp, 8)); 391 addsd(xmm0, xmm0); 392 jmp(B1_5); 393 394 bind(L_2TAG_PACKET_0_0_2); 395 movl(eax, Address(rsp, 12)); 396 andl(eax, 2147483647); 397 cmpl(eax, 1083179008); 398 jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2); 399 movsd(Address(rsp, 8), xmm0); 400 addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL 401 jmp(B1_5); 402 403 bind(L_2TAG_PACKET_6_0_2); 404 movq(Address(rsp, 16), xmm0); 405 406 bind(B1_3); 407 movq(xmm0, Address(rsp, 16)); 408 409 bind(B1_5); 410 addq(rsp, 24); 411 } 412 #else 413 // The 32 bit code is at most SSE2 compliant 414 ALIGNED_(16) juint _static_const_table[] = 415 { 416 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL, 417 0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL, 418 0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL, 419 0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 420 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 421 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 422 0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 423 0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL, 424 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL, 425 0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL, 426 0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL, 427 0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL, 428 0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL, 429 0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL, 430 0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL, 431 0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL, 432 0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL, 433 0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL, 434 0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL, 435 0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL, 436 0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL, 437 0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL, 438 0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL, 439 0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL, 440 0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL, 441 0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL, 442 0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL, 443 0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL, 444 0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL, 445 0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL, 446 0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL, 447 0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL, 448 0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL, 449 0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL, 450 0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL, 451 0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL, 452 0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL, 453 0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL, 454 0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL, 455 0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL, 456 0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL, 457 0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL, 458 0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL, 459 0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL, 460 0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL, 461 0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL, 462 0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL, 463 0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL, 464 0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL, 465 0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL, 466 0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL, 467 0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL, 468 0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL, 469 0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL, 470 0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL, 471 0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL, 472 0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL, 473 0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL, 474 0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL, 475 0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL, 476 0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL, 477 0x00100000UL 478 }; 479 480 //registers, 481 // input: (rbp + 8) 482 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 483 // rax, rdx, rcx, rbx (tmp) 484 485 // Code generated by Intel C compiler for LIBM library 486 487 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { 488 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; 489 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; 490 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; 491 Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; 492 493 assert_different_registers(tmp, eax, ecx, edx); 494 jmp(start); 495 address static_const_table = (address)_static_const_table; 496 497 bind(start); 498 subl(rsp, 120); 499 movl(Address(rsp, 64), tmp); 500 lea(tmp, ExternalAddress(static_const_table)); 501 movdqu(xmm0, Address(rsp, 128)); 502 unpcklpd(xmm0, xmm0); 503 movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL 504 movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL 505 movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL 506 movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL 507 pextrw(eax, xmm0, 3); 508 andl(eax, 32767); 509 movl(edx, 16527); 510 subl(edx, eax); 511 subl(eax, 15504); 512 orl(edx, eax); 513 cmpl(edx, INT_MIN); 514 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); 515 mulpd(xmm1, xmm0); 516 addpd(xmm1, xmm6); 517 movapd(xmm7, xmm1); 518 subpd(xmm1, xmm6); 519 mulpd(xmm2, xmm1); 520 movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL 521 mulpd(xmm3, xmm1); 522 movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL 523 subpd(xmm0, xmm2); 524 movdl(eax, xmm7); 525 movl(ecx, eax); 526 andl(ecx, 63); 527 shll(ecx, 4); 528 sarl(eax, 6); 529 movl(edx, eax); 530 movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL 531 pand(xmm7, xmm6); 532 movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL 533 paddq(xmm7, xmm6); 534 psllq(xmm7, 46); 535 subpd(xmm0, xmm3); 536 movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160)); 537 mulpd(xmm4, xmm0); 538 movapd(xmm6, xmm0); 539 movapd(xmm1, xmm0); 540 mulpd(xmm6, xmm6); 541 mulpd(xmm0, xmm6); 542 addpd(xmm5, xmm4); 543 mulsd(xmm0, xmm6); 544 mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL 545 addsd(xmm1, xmm2); 546 unpckhpd(xmm2, xmm2); 547 mulpd(xmm0, xmm5); 548 addsd(xmm1, xmm0); 549 por(xmm2, xmm7); 550 unpckhpd(xmm0, xmm0); 551 addsd(xmm0, xmm1); 552 addsd(xmm0, xmm6); 553 addl(edx, 894); 554 cmpl(edx, 1916); 555 jcc(Assembler::above, L_2TAG_PACKET_1_0_2); 556 mulsd(xmm0, xmm2); 557 addsd(xmm0, xmm2); 558 jmp(L_2TAG_PACKET_2_0_2); 559 560 bind(L_2TAG_PACKET_1_0_2); 561 fnstcw(Address(rsp, 24)); 562 movzwl(edx, Address(rsp, 24)); 563 orl(edx, 768); 564 movw(Address(rsp, 28), edx); 565 fldcw(Address(rsp, 28)); 566 movl(edx, eax); 567 sarl(eax, 1); 568 subl(edx, eax); 569 movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL 570 pandn(xmm6, xmm2); 571 addl(eax, 1023); 572 movdl(xmm3, eax); 573 psllq(xmm3, 52); 574 por(xmm6, xmm3); 575 addl(edx, 1023); 576 movdl(xmm4, edx); 577 psllq(xmm4, 52); 578 movsd(Address(rsp, 8), xmm0); 579 fld_d(Address(rsp, 8)); 580 movsd(Address(rsp, 16), xmm6); 581 fld_d(Address(rsp, 16)); 582 fmula(1); 583 faddp(1); 584 movsd(Address(rsp, 8), xmm4); 585 fld_d(Address(rsp, 8)); 586 fmulp(1); 587 fstp_d(Address(rsp, 8)); 588 movsd(xmm0, Address(rsp, 8)); 589 fldcw(Address(rsp, 24)); 590 pextrw(ecx, xmm0, 3); 591 andl(ecx, 32752); 592 cmpl(ecx, 32752); 593 jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2); 594 cmpl(ecx, 0); 595 jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); 596 jmp(L_2TAG_PACKET_2_0_2); 597 cmpl(ecx, INT_MIN); 598 jcc(Assembler::less, L_2TAG_PACKET_3_0_2); 599 cmpl(ecx, -1064950997); 600 jcc(Assembler::less, L_2TAG_PACKET_2_0_2); 601 jcc(Assembler::greater, L_2TAG_PACKET_4_0_2); 602 movl(edx, Address(rsp, 128)); 603 cmpl(edx, -17155601); 604 jcc(Assembler::less, L_2TAG_PACKET_2_0_2); 605 jmp(L_2TAG_PACKET_4_0_2); 606 607 bind(L_2TAG_PACKET_3_0_2); 608 movl(edx, 14); 609 jmp(L_2TAG_PACKET_5_0_2); 610 611 bind(L_2TAG_PACKET_4_0_2); 612 movl(edx, 15); 613 614 bind(L_2TAG_PACKET_5_0_2); 615 movsd(Address(rsp, 0), xmm0); 616 movsd(xmm0, Address(rsp, 128)); 617 fld_d(Address(rsp, 0)); 618 jmp(L_2TAG_PACKET_6_0_2); 619 620 bind(L_2TAG_PACKET_7_0_2); 621 cmpl(eax, 2146435072); 622 jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2); 623 movl(eax, Address(rsp, 132)); 624 cmpl(eax, INT_MIN); 625 jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2); 626 movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL 627 mulsd(xmm0, xmm0); 628 movl(edx, 14); 629 jmp(L_2TAG_PACKET_5_0_2); 630 631 bind(L_2TAG_PACKET_9_0_2); 632 movsd(xmm0, Address(tmp, 1216)); 633 mulsd(xmm0, xmm0); 634 movl(edx, 15); 635 jmp(L_2TAG_PACKET_5_0_2); 636 637 bind(L_2TAG_PACKET_8_0_2); 638 movl(edx, Address(rsp, 128)); 639 cmpl(eax, 2146435072); 640 jcc(Assembler::above, L_2TAG_PACKET_10_0_2); 641 cmpl(edx, 0); 642 jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2); 643 movl(eax, Address(rsp, 132)); 644 cmpl(eax, 2146435072); 645 jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); 646 movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL 647 jmp(L_2TAG_PACKET_2_0_2); 648 649 bind(L_2TAG_PACKET_11_0_2); 650 movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL 651 jmp(L_2TAG_PACKET_2_0_2); 652 653 bind(L_2TAG_PACKET_10_0_2); 654 movsd(xmm0, Address(rsp, 128)); 655 addsd(xmm0, xmm0); 656 jmp(L_2TAG_PACKET_2_0_2); 657 658 bind(L_2TAG_PACKET_0_0_2); 659 movl(eax, Address(rsp, 132)); 660 andl(eax, 2147483647); 661 cmpl(eax, 1083179008); 662 jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); 663 movsd(xmm0, Address(rsp, 128)); 664 addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL 665 jmp(L_2TAG_PACKET_2_0_2); 666 667 bind(L_2TAG_PACKET_2_0_2); 668 movsd(Address(rsp, 48), xmm0); 669 fld_d(Address(rsp, 48)); 670 671 bind(L_2TAG_PACKET_6_0_2); 672 movl(tmp, Address(rsp, 64)); 673 } 674 #endif