1 /* 2 * Copyright (c) 2015, Intel Corporation. 3 * Intel Math Library (LIBM) Source Code 4 * 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #include "precompiled.hpp" 28 #include "asm/assembler.hpp" 29 #include "asm/assembler.inline.hpp" 30 #include "macroAssembler_x86.hpp" 31 32 #ifdef _MSC_VER 33 #define ALIGNED_(x) __declspec(align(x)) 34 #else 35 #define ALIGNED_(x) __attribute__ ((aligned(x))) 36 #endif 37 38 /******************************************************************************/ 39 // ALGORITHM DESCRIPTION - EXP() 40 // --------------------- 41 // 42 // Description: 43 // Let K = 64 (table size). 44 // x x/log(2) n 45 // e = 2 = 2 * T[j] * (1 + P(y)) 46 // where 47 // x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K] 48 // m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2] 49 // j/K 50 // values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). 51 // 52 // P(y) is a minimax polynomial approximation of exp(x)-1 53 // on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V). 54 // 55 // To avoid problems with arithmetic overflow and underflow, 56 // n n1 n2 57 // value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] 58 // where BIAS is a value of exponent bias. 59 // 60 // Special cases: 61 // exp(NaN) = NaN 62 // exp(+INF) = +INF 63 // exp(-INF) = 0 64 // exp(x) = 1 for subnormals 65 // for finite argument, only exp(0)=1 is exact 66 // For IEEE double 67 // if x > 709.782712893383973096 then exp(x) overflow 68 // if x < -745.133219101941108420 then exp(x) underflow 69 // 70 /******************************************************************************/ 71 72 #ifdef _LP64 73 74 ALIGNED_(16) juint _cv[] = 75 { 76 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL, 77 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 78 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 79 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL, 80 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL 81 }; 82 83 ALIGNED_(16) juint _shifter[] = 84 { 85 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL 86 }; 87 88 ALIGNED_(16) juint _mmask[] = 89 { 90 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL 91 }; 92 93 ALIGNED_(16) juint _bias[] = 94 { 95 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL 96 }; 97 98 ALIGNED_(16) juint _Tbl_addr[] = 99 { 100 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL, 101 0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL, 102 0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL, 103 0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL, 104 0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL, 105 0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL, 106 0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL, 107 0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL, 108 0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL, 109 0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL, 110 0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL, 111 0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL, 112 0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL, 113 0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL, 114 0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL, 115 0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL, 116 0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL, 117 0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL, 118 0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL, 119 0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL, 120 0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL, 121 0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL, 122 0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL, 123 0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL, 124 0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL, 125 0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL, 126 0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL, 127 0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL, 128 0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL, 129 0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL, 130 0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL, 131 0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL, 132 0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL, 133 0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL, 134 0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL, 135 0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL, 136 0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL, 137 0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL, 138 0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL, 139 0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL, 140 0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL, 141 0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL, 142 0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL, 143 0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL, 144 0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL, 145 0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL, 146 0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL, 147 0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL, 148 0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL, 149 0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL, 150 0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL, 151 0x000fa7c1UL 152 }; 153 154 ALIGNED_(16) juint _ALLONES[] = 155 { 156 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL 157 }; 158 159 ALIGNED_(16) juint _ebias[] = 160 { 161 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL 162 }; 163 164 ALIGNED_(4) juint _XMAX[] = 165 { 166 0xffffffffUL, 0x7fefffffUL 167 }; 168 169 ALIGNED_(4) juint _XMIN[] = 170 { 171 0x00000000UL, 0x00100000UL 172 }; 173 174 ALIGNED_(4) juint _INF[] = 175 { 176 0x00000000UL, 0x7ff00000UL 177 }; 178 179 ALIGNED_(4) juint _ZERO[] = 180 { 181 0x00000000UL, 0x00000000UL 182 }; 183 184 ALIGNED_(4) juint _ONE_val[] = 185 { 186 0x00000000UL, 0x3ff00000UL 187 }; 188 189 190 // Registers: 191 // input: xmm0 192 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 193 // rax, rdx, rcx, tmp - r11 194 195 // Code generated by Intel C compiler for LIBM library 196 197 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { 198 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; 199 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; 200 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; 201 Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start; 202 203 assert_different_registers(tmp, eax, ecx, edx); 204 jmp(start); 205 address cv = (address)_cv; 206 address Shifter = (address)_shifter; 207 address mmask = (address)_mmask; 208 address bias = (address)_bias; 209 address Tbl_addr = (address)_Tbl_addr; 210 address ALLONES = (address)_ALLONES; 211 address ebias = (address)_ebias; 212 address XMAX = (address)_XMAX; 213 address XMIN = (address)_XMIN; 214 address INF = (address)_INF; 215 address ZERO = (address)_ZERO; 216 address ONE_val = (address)_ONE_val; 217 218 bind(start); 219 subq(rsp, 24); 220 movsd(Address(rsp, 8), xmm0); 221 unpcklpd(xmm0, xmm0); 222 movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL 223 movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL 224 movdqu(xmm2, ExternalAddress(16+cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL 225 movdqu(xmm3, ExternalAddress(32+cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL 226 pextrw(eax, xmm0, 3); 227 andl(eax, 32767); 228 movl(edx, 16527); 229 subl(edx, eax); 230 subl(eax, 15504); 231 orl(edx, eax); 232 cmpl(edx, INT_MIN); 233 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); 234 mulpd(xmm1, xmm0); 235 addpd(xmm1, xmm6); 236 movapd(xmm7, xmm1); 237 subpd(xmm1, xmm6); 238 mulpd(xmm2, xmm1); 239 movdqu(xmm4, ExternalAddress(64+cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL 240 mulpd(xmm3, xmm1); 241 movdqu(xmm5, ExternalAddress(80+cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL 242 subpd(xmm0, xmm2); 243 movdl(eax, xmm7); 244 movl(ecx, eax); 245 andl(ecx, 63); 246 shll(ecx, 4); 247 sarl(eax, 6); 248 movl(edx, eax); 249 movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL 250 pand(xmm7, xmm6); 251 movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL 252 paddq(xmm7, xmm6); 253 psllq(xmm7, 46); 254 subpd(xmm0, xmm3); 255 lea(tmp, ExternalAddress(Tbl_addr)); 256 movdqu(xmm2, Address(ecx,tmp)); 257 mulpd(xmm4, xmm0); 258 movapd(xmm6, xmm0); 259 movapd(xmm1, xmm0); 260 mulpd(xmm6, xmm6); 261 mulpd(xmm0, xmm6); 262 addpd(xmm5, xmm4); 263 mulsd(xmm0, xmm6); 264 mulpd(xmm6, ExternalAddress(48+cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL 265 addsd(xmm1, xmm2); 266 unpckhpd(xmm2, xmm2); 267 mulpd(xmm0, xmm5); 268 addsd(xmm1, xmm0); 269 por(xmm2, xmm7); 270 unpckhpd(xmm0, xmm0); 271 addsd(xmm0, xmm1); 272 addsd(xmm0, xmm6); 273 addl(edx, 894); 274 cmpl(edx, 1916); 275 jcc (Assembler::above, L_2TAG_PACKET_1_0_2); 276 mulsd(xmm0, xmm2); 277 addsd(xmm0, xmm2); 278 jmp (B1_5); 279 280 bind(L_2TAG_PACKET_1_0_2); 281 xorpd(xmm3, xmm3); 282 movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL 283 movl(edx, -1022); 284 subl(edx, eax); 285 movdl(xmm5, edx); 286 psllq(xmm4, xmm5); 287 movl(ecx, eax); 288 sarl(eax, 1); 289 pinsrw(xmm3, eax, 3); 290 movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL 291 psllq(xmm3, 4); 292 psubd(xmm2, xmm3); 293 mulsd(xmm0, xmm2); 294 cmpl(edx, 52); 295 jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); 296 pand(xmm4, xmm2); 297 paddd(xmm3, xmm6); 298 subsd(xmm2, xmm4); 299 addsd(xmm0, xmm2); 300 cmpl(ecx, 1023); 301 jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2); 302 pextrw(ecx, xmm0, 3); 303 andl(ecx, 32768); 304 orl(edx, ecx); 305 cmpl(edx, 0); 306 jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); 307 movapd(xmm6, xmm0); 308 addsd(xmm0, xmm4); 309 mulsd(xmm0, xmm3); 310 pextrw(ecx, xmm0, 3); 311 andl(ecx, 32752); 312 cmpl(ecx, 0); 313 jcc(Assembler::equal, L_2TAG_PACKET_5_0_2); 314 jmp(B1_5); 315 316 bind(L_2TAG_PACKET_5_0_2); 317 mulsd(xmm6, xmm3); 318 mulsd(xmm4, xmm3); 319 movdqu(xmm0, xmm6); 320 pxor(xmm6, xmm4); 321 psrad(xmm6, 31); 322 pshufd(xmm6, xmm6, 85); 323 psllq(xmm0, 1); 324 psrlq(xmm0, 1); 325 pxor(xmm0, xmm6); 326 psrlq(xmm6, 63); 327 paddq(xmm0, xmm6); 328 paddq(xmm0, xmm4); 329 movl(Address(rsp,0), 15); 330 jmp(L_2TAG_PACKET_6_0_2); 331 332 bind(L_2TAG_PACKET_4_0_2); 333 addsd(xmm0, xmm4); 334 mulsd(xmm0, xmm3); 335 jmp(B1_5); 336 337 bind(L_2TAG_PACKET_3_0_2); 338 addsd(xmm0, xmm4); 339 mulsd(xmm0, xmm3); 340 pextrw(ecx, xmm0, 3); 341 andl(ecx, 32752); 342 cmpl(ecx, 32752); 343 jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); 344 jmp(B1_5); 345 346 bind(L_2TAG_PACKET_2_0_2); 347 paddd(xmm3, xmm6); 348 addpd(xmm0, xmm2); 349 mulsd(xmm0, xmm3); 350 movl(Address(rsp,0), 15); 351 jmp(L_2TAG_PACKET_6_0_2); 352 353 bind(L_2TAG_PACKET_8_0_2); 354 cmpl(eax, 2146435072); 355 jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2); 356 movl(eax, Address(rsp,12)); 357 cmpl(eax, INT_MIN); 358 jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2); 359 movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL 360 mulsd(xmm0, xmm0); 361 362 bind(L_2TAG_PACKET_7_0_2); 363 movl(Address(rsp,0), 14); 364 jmp(L_2TAG_PACKET_6_0_2); 365 366 bind(L_2TAG_PACKET_10_0_2); 367 movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL 368 mulsd(xmm0, xmm0); 369 movl(Address(rsp,0), 15); 370 jmp(L_2TAG_PACKET_6_0_2); 371 372 bind(L_2TAG_PACKET_9_0_2); 373 movl(edx, Address(rsp,8)); 374 cmpl(eax, 2146435072); 375 jcc(Assembler::above, L_2TAG_PACKET_11_0_2); 376 cmpl(edx, 0); 377 jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); 378 movl(eax, Address(rsp,12)); 379 cmpl(eax, 2146435072); 380 jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2); 381 movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL 382 jmp(B1_5); 383 384 bind(L_2TAG_PACKET_12_0_2); 385 movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL 386 jmp(B1_5); 387 388 bind(L_2TAG_PACKET_11_0_2); 389 movsd(xmm0, Address(rsp, 8)); 390 addsd(xmm0, xmm0); 391 jmp(B1_5); 392 393 bind(L_2TAG_PACKET_0_0_2); 394 movl(eax, Address(rsp, 12)); 395 andl(eax, 2147483647); 396 cmpl(eax, 1083179008); 397 jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2); 398 movsd(Address(rsp, 8), xmm0); 399 addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL 400 jmp(B1_5); 401 402 bind(L_2TAG_PACKET_6_0_2); 403 movq(Address(rsp, 16), xmm0); 404 405 bind(B1_3); 406 movq(xmm0, Address(rsp, 16)); 407 408 bind(B1_5); 409 addq(rsp, 24); 410 } 411 412 #endif 413 414 #ifndef _LP64 415 416 ALIGNED_(16) juint _static_const_table[] = 417 { 418 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL, 419 0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL, 420 0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL, 421 0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 422 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 423 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 424 0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 425 0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL, 426 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL, 427 0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL, 428 0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL, 429 0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL, 430 0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL, 431 0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL, 432 0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL, 433 0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL, 434 0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL, 435 0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL, 436 0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL, 437 0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL, 438 0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL, 439 0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL, 440 0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL, 441 0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL, 442 0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL, 443 0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL, 444 0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL, 445 0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL, 446 0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL, 447 0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL, 448 0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL, 449 0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL, 450 0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL, 451 0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL, 452 0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL, 453 0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL, 454 0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL, 455 0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL, 456 0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL, 457 0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL, 458 0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL, 459 0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL, 460 0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL, 461 0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL, 462 0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL, 463 0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL, 464 0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL, 465 0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL, 466 0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL, 467 0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL, 468 0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL, 469 0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL, 470 0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL, 471 0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL, 472 0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL, 473 0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL, 474 0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL, 475 0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL, 476 0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL, 477 0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL, 478 0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL, 479 0x00100000UL 480 }; 481 482 //registers, 483 // input: (rbp + 8) 484 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 485 // rax, rdx, rcx, rbx (tmp) 486 487 // Code generated by Intel C compiler for LIBM library 488 489 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { 490 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; 491 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; 492 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; 493 Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; 494 495 assert_different_registers(tmp, eax, ecx, edx); 496 jmp(start); 497 address static_const_table = (address)_static_const_table; 498 499 bind(start); 500 subl(rsp, 120); 501 movl(Address(rsp, 64), tmp); 502 lea(tmp, ExternalAddress(static_const_table)); 503 movdqu(xmm0, Address(rsp, 128)); 504 unpcklpd(xmm0, xmm0); 505 movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL 506 movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL 507 movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL 508 movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL 509 pextrw(eax, xmm0, 3); 510 andl(eax, 32767); 511 movl(edx, 16527); 512 subl(edx, eax); 513 subl(eax, 15504); 514 orl(edx, eax); 515 cmpl(edx, INT_MIN); 516 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); 517 mulpd(xmm1, xmm0); 518 addpd(xmm1, xmm6); 519 movapd(xmm7, xmm1); 520 subpd(xmm1, xmm6); 521 mulpd(xmm2, xmm1); 522 movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL 523 mulpd(xmm3, xmm1); 524 movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL 525 subpd(xmm0, xmm2); 526 movdl(eax, xmm7); 527 movl(ecx, eax); 528 andl(ecx, 63); 529 shll(ecx, 4); 530 sarl(eax, 6); 531 movl(edx, eax); 532 movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL 533 pand(xmm7, xmm6); 534 movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL 535 paddq(xmm7, xmm6); 536 psllq(xmm7, 46); 537 subpd(xmm0, xmm3); 538 movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160)); 539 mulpd(xmm4, xmm0); 540 movapd(xmm6, xmm0); 541 movapd(xmm1, xmm0); 542 mulpd(xmm6, xmm6); 543 mulpd(xmm0, xmm6); 544 addpd(xmm5, xmm4); 545 mulsd(xmm0, xmm6); 546 mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL 547 addsd(xmm1, xmm2); 548 unpckhpd(xmm2, xmm2); 549 mulpd(xmm0, xmm5); 550 addsd(xmm1, xmm0); 551 por(xmm2, xmm7); 552 unpckhpd(xmm0, xmm0); 553 addsd(xmm0, xmm1); 554 addsd(xmm0, xmm6); 555 addl(edx, 894); 556 cmpl(edx, 1916); 557 jcc (Assembler::above, L_2TAG_PACKET_1_0_2); 558 mulsd(xmm0, xmm2); 559 addsd(xmm0, xmm2); 560 jmp(L_2TAG_PACKET_2_0_2); 561 562 bind(L_2TAG_PACKET_1_0_2); 563 fnstcw(Address(rsp, 24)); 564 movzwl(edx, Address(rsp, 24)); 565 orl(edx, 768); 566 movw(Address(rsp, 28), edx); 567 fldcw(Address(rsp, 28)); 568 movl(edx, eax); 569 sarl(eax, 1); 570 subl(edx, eax); 571 movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL 572 pandn(xmm6, xmm2); 573 addl(eax, 1023); 574 movdl(xmm3, eax); 575 psllq(xmm3, 52); 576 por(xmm6, xmm3); 577 addl(edx, 1023); 578 movdl(xmm4, edx); 579 psllq(xmm4, 52); 580 movsd(Address(rsp, 8), xmm0); 581 fld_d(Address(rsp, 8)); 582 movsd(Address(rsp, 16), xmm6); 583 fld_d(Address(rsp, 16)); 584 fmula(1); 585 faddp(1); 586 movsd(Address(rsp, 8), xmm4); 587 fld_d(Address(rsp, 8)); 588 fmulp(1); 589 fstp_d(Address(rsp, 8)); 590 movsd(xmm0,Address(rsp, 8)); 591 fldcw(Address(rsp, 24)); 592 pextrw(ecx, xmm0, 3); 593 andl(ecx, 32752); 594 cmpl(ecx, 32752); 595 jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2); 596 cmpl(ecx, 0); 597 jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); 598 jmp(L_2TAG_PACKET_2_0_2); 599 cmpl(ecx, INT_MIN); 600 jcc(Assembler::less, L_2TAG_PACKET_3_0_2); 601 cmpl(ecx, -1064950997); 602 jcc(Assembler::less, L_2TAG_PACKET_2_0_2); 603 jcc(Assembler::greater, L_2TAG_PACKET_4_0_2); 604 movl(edx, Address(rsp, 128)); 605 cmpl(edx ,-17155601); 606 jcc(Assembler::less, L_2TAG_PACKET_2_0_2); 607 jmp(L_2TAG_PACKET_4_0_2); 608 609 bind(L_2TAG_PACKET_3_0_2); 610 movl(edx, 14); 611 jmp(L_2TAG_PACKET_5_0_2); 612 613 bind(L_2TAG_PACKET_4_0_2); 614 movl(edx, 15); 615 616 bind(L_2TAG_PACKET_5_0_2); 617 movsd(Address(rsp, 0), xmm0); 618 movsd(xmm0, Address(rsp, 128)); 619 fld_d(Address(rsp, 0)); 620 jmp(L_2TAG_PACKET_6_0_2); 621 622 bind(L_2TAG_PACKET_7_0_2); 623 cmpl(eax, 2146435072); 624 jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2); 625 movl(eax, Address(rsp, 132)); 626 cmpl(eax, INT_MIN); 627 jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2); 628 movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL 629 mulsd(xmm0, xmm0); 630 movl(edx, 14); 631 jmp(L_2TAG_PACKET_5_0_2); 632 633 bind(L_2TAG_PACKET_9_0_2); 634 movsd(xmm0, Address(tmp, 1216)); 635 mulsd(xmm0, xmm0); 636 movl(edx, 15); 637 jmp(L_2TAG_PACKET_5_0_2); 638 639 bind(L_2TAG_PACKET_8_0_2); 640 movl(edx, Address(rsp, 128)); 641 cmpl(eax, 2146435072); 642 jcc(Assembler::above, L_2TAG_PACKET_10_0_2); 643 cmpl(edx, 0); 644 jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2); 645 movl(eax, Address(rsp, 132)); 646 cmpl(eax, 2146435072); 647 jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); 648 movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL 649 jmp(L_2TAG_PACKET_2_0_2); 650 651 bind(L_2TAG_PACKET_11_0_2); 652 movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL 653 jmp(L_2TAG_PACKET_2_0_2); 654 655 bind(L_2TAG_PACKET_10_0_2); 656 movsd(xmm0, Address(rsp, 128)); 657 addsd(xmm0, xmm0); 658 jmp(L_2TAG_PACKET_2_0_2); 659 660 bind(L_2TAG_PACKET_0_0_2); 661 movl(eax, Address(rsp, 132)); 662 andl(eax, 2147483647); 663 cmpl(eax, 1083179008); 664 jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); 665 movsd(xmm0, Address(rsp, 128)); 666 addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL 667 jmp(L_2TAG_PACKET_2_0_2); 668 669 bind(L_2TAG_PACKET_2_0_2); 670 movsd(Address(rsp, 48), xmm0); 671 fld_d(Address(rsp, 48)); 672 673 bind(L_2TAG_PACKET_6_0_2); 674 movl(tmp, Address(rsp, 64)); 675 } 676 677 #endif 678 679 /******************************************************************************/ 680 // ALGORITHM DESCRIPTION - LOG() 681 // --------------------- 682 // 683 // x=2^k * mx, mx in [1,2) 684 // 685 // Get B~1/mx based on the output of rcpss instruction (B0) 686 // B = int((B0*2^7+0.5))/2^7 687 // 688 // Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts) 689 // 690 // Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and 691 // p(r) is a degree 7 polynomial 692 // -log(B) read from data table (high, low parts) 693 // Result is formed from high and low parts 694 // 695 // Special cases: 696 // log(NaN) = quiet NaN, and raise invalid exception 697 // log(+INF) = that INF 698 // log(0) = -INF with divide-by-zero exception raised 699 // log(1) = +0 700 // log(x) = NaN with invalid exception raised if x < -0, including -INF 701 // 702 /******************************************************************************/ 703 704 #ifdef _LP64 705 706 ALIGNED_(16) juint _L_tbl[] = 707 { 708 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL, 709 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL, 710 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL, 711 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL, 712 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL, 713 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL, 714 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL, 715 0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL, 716 0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL, 717 0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL, 718 0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL, 719 0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL, 720 0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL, 721 0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL, 722 0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL, 723 0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL, 724 0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL, 725 0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL, 726 0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL, 727 0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL, 728 0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL, 729 0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL, 730 0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL, 731 0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL, 732 0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL, 733 0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL, 734 0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL, 735 0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL, 736 0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL, 737 0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL, 738 0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL, 739 0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL, 740 0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL, 741 0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL, 742 0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL, 743 0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL, 744 0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL, 745 0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL, 746 0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL, 747 0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL, 748 0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL, 749 0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL, 750 0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL, 751 0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL, 752 0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL, 753 0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL, 754 0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL, 755 0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL, 756 0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL, 757 0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL, 758 0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL, 759 0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL, 760 0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL, 761 0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL, 762 0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL, 763 0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL, 764 0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL, 765 0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL, 766 0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL, 767 0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL, 768 0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL, 769 0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL, 770 0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL, 771 0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL, 772 0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL, 773 0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL, 774 0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL, 775 0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL, 776 0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL, 777 0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL, 778 0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL, 779 0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL, 780 0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL, 781 0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL, 782 0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL, 783 0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL, 784 0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL, 785 0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL, 786 0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL, 787 0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL, 788 0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL, 789 0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL, 790 0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL, 791 0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL, 792 0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL, 793 0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL, 794 0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL, 795 0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL, 796 0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL, 797 0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL, 798 0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL, 799 0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL, 800 0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL, 801 0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL, 802 0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL, 803 0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL, 804 0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL, 805 0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL, 806 0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL, 807 0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL, 808 0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL, 809 0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL, 810 0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 811 0x80000000UL 812 }; 813 814 ALIGNED_(16) juint _log2[] = 815 { 816 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL 817 }; 818 819 ALIGNED_(16) juint _coeff[] = 820 { 821 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL, 822 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL, 823 0x00000000UL, 0xbfe00000UL 824 }; 825 826 //registers, 827 // input: xmm0 828 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 829 // rax, rdx, rcx, r8, r11 830 831 void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) { 832 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; 833 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; 834 Label L_2TAG_PACKET_8_0_2; 835 Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; 836 837 assert_different_registers(tmp1, tmp2, eax, ecx, edx); 838 jmp(start); 839 address L_tbl = (address)_L_tbl; 840 address log2 = (address)_log2; 841 address coeff = (address)_coeff; 842 843 bind(start); 844 subq(rsp, 24); 845 movsd(Address(rsp, 0), xmm0); 846 mov64(rax, 0x3ff0000000000000); 847 movdq(xmm2, rax); 848 mov64(rdx, 0x77f0000000000000); 849 movdq(xmm3, rdx); 850 movl(ecx, 32768); 851 movdl(xmm4, rcx); 852 mov64(tmp1, 0xffffe00000000000); 853 movdq(xmm5, tmp1); 854 movdqu(xmm1, xmm0); 855 pextrw(eax, xmm0, 3); 856 por(xmm0, xmm2); 857 movl(ecx, 16352); 858 psrlq(xmm0, 27); 859 lea(tmp2, ExternalAddress(L_tbl)); 860 psrld(xmm0, 2); 861 rcpps(xmm0, xmm0); 862 psllq(xmm1, 12); 863 pshufd(xmm6, xmm5, 228); 864 psrlq(xmm1, 12); 865 subl(eax, 16); 866 cmpl(eax, 32736); 867 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); 868 869 bind(L_2TAG_PACKET_1_0_2); 870 paddd(xmm0, xmm4); 871 por(xmm1, xmm3); 872 movdl(edx, xmm0); 873 psllq(xmm0, 29); 874 pand(xmm5, xmm1); 875 pand(xmm0, xmm6); 876 subsd(xmm1, xmm5); 877 mulpd(xmm5, xmm0); 878 andl(eax, 32752); 879 subl(eax, ecx); 880 cvtsi2sdl(xmm7, eax); 881 mulsd(xmm1, xmm0); 882 movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL 883 movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL 884 subsd(xmm5, xmm2); 885 andl(edx, 16711680); 886 shrl(edx, 12); 887 movdqu(xmm0, Address(tmp2, edx)); 888 movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL 889 addsd(xmm1, xmm5); 890 movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL 891 mulsd(xmm6, xmm7); 892 movddup(xmm5, xmm1); 893 mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL 894 mulsd(xmm3, xmm1); 895 addsd(xmm0, xmm6); 896 mulpd(xmm4, xmm5); 897 mulpd(xmm5, xmm5); 898 movddup(xmm6, xmm0); 899 addsd(xmm0, xmm1); 900 addpd(xmm4, xmm2); 901 mulpd(xmm3, xmm5); 902 subsd(xmm6, xmm0); 903 mulsd(xmm4, xmm1); 904 pshufd(xmm2, xmm0, 238); 905 addsd(xmm1, xmm6); 906 mulsd(xmm5, xmm5); 907 addsd(xmm7, xmm2); 908 addpd(xmm4, xmm3); 909 addsd(xmm1, xmm7); 910 mulpd(xmm4, xmm5); 911 addsd(xmm1, xmm4); 912 pshufd(xmm5, xmm4, 238); 913 addsd(xmm1, xmm5); 914 addsd(xmm0, xmm1); 915 jmp(B1_5); 916 917 bind(L_2TAG_PACKET_0_0_2); 918 movq(xmm0, Address(rsp, 0)); 919 movq(xmm1, Address(rsp, 0)); 920 addl(eax, 16); 921 cmpl(eax, 32768); 922 jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2); 923 cmpl(eax, 16); 924 jcc(Assembler::below, L_2TAG_PACKET_3_0_2); 925 926 bind(L_2TAG_PACKET_4_0_2); 927 addsd(xmm0, xmm0); 928 jmp(B1_5); 929 930 bind(L_2TAG_PACKET_5_0_2); 931 jcc(Assembler::above, L_2TAG_PACKET_4_0_2); 932 cmpl(edx, 0); 933 jcc(Assembler::above, L_2TAG_PACKET_4_0_2); 934 jmp(L_2TAG_PACKET_6_0_2); 935 936 bind(L_2TAG_PACKET_3_0_2); 937 xorpd(xmm1, xmm1); 938 addsd(xmm1, xmm0); 939 movdl(edx, xmm1); 940 psrlq(xmm1, 32); 941 movdl(ecx, xmm1); 942 orl(edx, ecx); 943 cmpl(edx, 0); 944 jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); 945 xorpd(xmm1, xmm1); 946 movl(eax, 18416); 947 pinsrw(xmm1, eax, 3); 948 mulsd(xmm0, xmm1); 949 movdqu(xmm1, xmm0); 950 pextrw(eax, xmm0, 3); 951 por(xmm0, xmm2); 952 psrlq(xmm0, 27); 953 movl(ecx, 18416); 954 psrld(xmm0, 2); 955 rcpps(xmm0, xmm0); 956 psllq(xmm1, 12); 957 pshufd(xmm6, xmm5, 228); 958 psrlq(xmm1, 12); 959 jmp(L_2TAG_PACKET_1_0_2); 960 961 bind(L_2TAG_PACKET_2_0_2); 962 movdl(edx, xmm1); 963 psrlq(xmm1, 32); 964 movdl(ecx, xmm1); 965 addl(ecx, ecx); 966 cmpl(ecx, -2097152); 967 jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); 968 orl(edx, ecx); 969 cmpl(edx, 0); 970 jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); 971 972 bind(L_2TAG_PACKET_6_0_2); 973 xorpd(xmm1, xmm1); 974 xorpd(xmm0, xmm0); 975 movl(eax, 32752); 976 pinsrw(xmm1, eax, 3); 977 mulsd(xmm0, xmm1); 978 movl(Address(rsp, 16), 3); 979 jmp(L_2TAG_PACKET_8_0_2); 980 bind(L_2TAG_PACKET_7_0_2); 981 xorpd(xmm1, xmm1); 982 xorpd(xmm0, xmm0); 983 movl(eax, 49136); 984 pinsrw(xmm0, eax, 3); 985 divsd(xmm0, xmm1); 986 movl(Address(rsp, 16), 2); 987 988 bind(L_2TAG_PACKET_8_0_2); 989 movq(Address(rsp, 8), xmm0); 990 991 bind(B1_3); 992 movq(xmm0, Address(rsp, 8)); 993 994 bind(B1_5); 995 addq(rsp, 24); 996 } 997 998 #endif 999 1000 #ifndef _LP64 1001 1002 ALIGNED_(16) juint _static_const_table_log[] = 1003 { 1004 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL, 1005 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL, 1006 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL, 1007 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL, 1008 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL, 1009 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL, 1010 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL, 1011 0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL, 1012 0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL, 1013 0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL, 1014 0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL, 1015 0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL, 1016 0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL, 1017 0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL, 1018 0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL, 1019 0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL, 1020 0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL, 1021 0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL, 1022 0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL, 1023 0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL, 1024 0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL, 1025 0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL, 1026 0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL, 1027 0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL, 1028 0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL, 1029 0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL, 1030 0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL, 1031 0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL, 1032 0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL, 1033 0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL, 1034 0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL, 1035 0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL, 1036 0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL, 1037 0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL, 1038 0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL, 1039 0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL, 1040 0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL, 1041 0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL, 1042 0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL, 1043 0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL, 1044 0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL, 1045 0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL, 1046 0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL, 1047 0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL, 1048 0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL, 1049 0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL, 1050 0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL, 1051 0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL, 1052 0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL, 1053 0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL, 1054 0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL, 1055 0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL, 1056 0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL, 1057 0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL, 1058 0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL, 1059 0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL, 1060 0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL, 1061 0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL, 1062 0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL, 1063 0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL, 1064 0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL, 1065 0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL, 1066 0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL, 1067 0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL, 1068 0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL, 1069 0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL, 1070 0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL, 1071 0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL, 1072 0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL, 1073 0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL, 1074 0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL, 1075 0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL, 1076 0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL, 1077 0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL, 1078 0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL, 1079 0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL, 1080 0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL, 1081 0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL, 1082 0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL, 1083 0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL, 1084 0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL, 1085 0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL, 1086 0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL, 1087 0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL, 1088 0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL, 1089 0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL, 1090 0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL, 1091 0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL, 1092 0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL, 1093 0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL, 1094 0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL, 1095 0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL, 1096 0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL, 1097 0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL, 1098 0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL, 1099 0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL, 1100 0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL, 1101 0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL, 1102 0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL, 1103 0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL, 1104 0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL, 1105 0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL, 1106 0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 1107 0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL, 1108 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL, 1109 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL, 1110 0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL, 1111 0xffffe000UL 1112 }; 1113 //registers, 1114 // input: xmm0 1115 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 1116 // rax, rdx, rcx, rbx (tmp) 1117 1118 void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { 1119 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; 1120 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; 1121 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2; 1122 Label L_2TAG_PACKET_10_0_2, start; 1123 1124 assert_different_registers(tmp, eax, ecx, edx); 1125 jmp(start); 1126 address static_const_table = (address)_static_const_table_log; 1127 1128 bind(start); 1129 subl(rsp, 104); 1130 movl(Address(rsp, 40), tmp); 1131 lea(tmp, ExternalAddress(static_const_table)); 1132 xorpd(xmm2, xmm2); 1133 movl(eax, 16368); 1134 pinsrw(xmm2, eax, 3); 1135 xorpd(xmm3, xmm3); 1136 movl(edx, 30704); 1137 pinsrw(xmm3, edx, 3); 1138 movsd(xmm0, Address(rsp, 112)); 1139 movapd(xmm1, xmm0); 1140 movl(ecx, 32768); 1141 movdl(xmm4, ecx); 1142 movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL 1143 pextrw(eax, xmm0, 3); 1144 por(xmm0, xmm2); 1145 psllq(xmm0, 5); 1146 movl(ecx, 16352); 1147 psrlq(xmm0, 34); 1148 rcpss(xmm0, xmm0); 1149 psllq(xmm1, 12); 1150 pshufd(xmm6, xmm5, 228); 1151 psrlq(xmm1, 12); 1152 subl(eax, 16); 1153 cmpl(eax, 32736); 1154 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); 1155 1156 bind(L_2TAG_PACKET_1_0_2); 1157 paddd(xmm0, xmm4); 1158 por(xmm1, xmm3); 1159 movdl(edx, xmm0); 1160 psllq(xmm0, 29); 1161 pand(xmm5, xmm1); 1162 pand(xmm0, xmm6); 1163 subsd(xmm1, xmm5); 1164 mulpd(xmm5, xmm0); 1165 andl(eax, 32752); 1166 subl(eax, ecx); 1167 cvtsi2sdl(xmm7, eax); 1168 mulsd(xmm1, xmm0); 1169 movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL 1170 movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL 1171 subsd(xmm5, xmm2); 1172 andl(edx, 16711680); 1173 shrl(edx, 12); 1174 movdqu(xmm0, Address(tmp, edx)); 1175 movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL 1176 addsd(xmm1, xmm5); 1177 movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL 1178 mulsd(xmm6, xmm7); 1179 pshufd(xmm5, xmm1, 68); 1180 mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL 1181 mulsd(xmm3, xmm1); 1182 addsd(xmm0, xmm6); 1183 mulpd(xmm4, xmm5); 1184 mulpd(xmm5, xmm5); 1185 pshufd(xmm6, xmm0, 228); 1186 addsd(xmm0, xmm1); 1187 addpd(xmm4, xmm2); 1188 mulpd(xmm3, xmm5); 1189 subsd(xmm6, xmm0); 1190 mulsd(xmm4, xmm1); 1191 pshufd(xmm2, xmm0, 238); 1192 addsd(xmm1, xmm6); 1193 mulsd(xmm5, xmm5); 1194 addsd(xmm7, xmm2); 1195 addpd(xmm4, xmm3); 1196 addsd(xmm1, xmm7); 1197 mulpd(xmm4, xmm5); 1198 addsd(xmm1, xmm4); 1199 pshufd(xmm5, xmm4, 238); 1200 addsd(xmm1, xmm5); 1201 addsd(xmm0, xmm1); 1202 jmp(L_2TAG_PACKET_2_0_2); 1203 1204 bind(L_2TAG_PACKET_0_0_2); 1205 movsd(xmm0, Address(rsp, 112)); 1206 movdqu(xmm1, xmm0); 1207 addl(eax, 16); 1208 cmpl(eax, 32768); 1209 jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2); 1210 cmpl(eax, 16); 1211 jcc(Assembler::below, L_2TAG_PACKET_4_0_2); 1212 1213 bind(L_2TAG_PACKET_5_0_2); 1214 addsd(xmm0, xmm0); 1215 jmp(L_2TAG_PACKET_2_0_2); 1216 1217 bind(L_2TAG_PACKET_6_0_2); 1218 jcc(Assembler::above, L_2TAG_PACKET_5_0_2); 1219 cmpl(edx, 0); 1220 jcc(Assembler::above, L_2TAG_PACKET_5_0_2); 1221 jmp(L_2TAG_PACKET_7_0_2); 1222 1223 bind(L_2TAG_PACKET_3_0_2); 1224 movdl(edx, xmm1); 1225 psrlq(xmm1, 32); 1226 movdl(ecx, xmm1); 1227 addl(ecx, ecx); 1228 cmpl(ecx, -2097152); 1229 jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2); 1230 orl(edx, ecx); 1231 cmpl(edx, 0); 1232 jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); 1233 1234 bind(L_2TAG_PACKET_7_0_2); 1235 xorpd(xmm1, xmm1); 1236 xorpd(xmm0, xmm0); 1237 movl(eax, 32752); 1238 pinsrw(xmm1, eax, 3); 1239 movl(edx, 3); 1240 mulsd(xmm0, xmm1); 1241 1242 bind(L_2TAG_PACKET_9_0_2); 1243 movsd(Address(rsp, 0), xmm0); 1244 movsd(xmm0, Address(rsp, 112)); 1245 fld_d(Address(rsp, 0)); 1246 jmp(L_2TAG_PACKET_10_0_2); 1247 1248 bind(L_2TAG_PACKET_8_0_2); 1249 xorpd(xmm1, xmm1); 1250 xorpd(xmm0, xmm0); 1251 movl(eax, 49136); 1252 pinsrw(xmm0, eax, 3); 1253 divsd(xmm0, xmm1); 1254 movl(edx, 2); 1255 jmp(L_2TAG_PACKET_9_0_2); 1256 1257 bind(L_2TAG_PACKET_4_0_2); 1258 movdl(edx, xmm1); 1259 psrlq(xmm1, 32); 1260 movdl(ecx, xmm1); 1261 orl(edx, ecx); 1262 cmpl(edx, 0); 1263 jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); 1264 xorpd(xmm1, xmm1); 1265 movl(eax, 18416); 1266 pinsrw(xmm1, eax, 3); 1267 mulsd(xmm0, xmm1); 1268 movapd(xmm1, xmm0); 1269 pextrw(eax, xmm0, 3); 1270 por(xmm0, xmm2); 1271 psllq(xmm0, 5); 1272 movl(ecx, 18416); 1273 psrlq(xmm0, 34); 1274 rcpss(xmm0, xmm0); 1275 psllq(xmm1, 12); 1276 pshufd(xmm6, xmm5, 228); 1277 psrlq(xmm1, 12); 1278 jmp(L_2TAG_PACKET_1_0_2); 1279 1280 bind(L_2TAG_PACKET_2_0_2); 1281 movsd(Address(rsp, 24), xmm0); 1282 fld_d(Address(rsp, 24)); 1283 1284 bind(L_2TAG_PACKET_10_0_2); 1285 movl(tmp, Address(rsp, 40)); 1286 } 1287 1288 #endif