1 /* 2 * Copyright (c) 2016, Intel Corporation. 3 * Intel Math Library (LIBM) Source Code 4 * 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #include "precompiled.hpp" 28 #include "asm/assembler.hpp" 29 #include "asm/assembler.inline.hpp" 30 #include "macroAssembler_x86.hpp" 31 32 #ifdef _MSC_VER 33 #define ALIGNED_(x) __declspec(align(x)) 34 #else 35 #define ALIGNED_(x) __attribute__ ((aligned(x))) 36 #endif 37 38 /******************************************************************************/ 39 // ALGORITHM DESCRIPTION - LOG() 40 // --------------------- 41 // 42 // x=2^k * mx, mx in [1,2) 43 // 44 // Get B~1/mx based on the output of rcpss instruction (B0) 45 // B = int((B0*2^7+0.5))/2^7 46 // 47 // Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts) 48 // 49 // Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and 50 // p(r) is a degree 7 polynomial 51 // -log(B) read from data table (high, low parts) 52 // Result is formed from high and low parts 53 // 54 // Special cases: 55 // log(NaN) = quiet NaN, and raise invalid exception 56 // log(+INF) = that INF 57 // log(0) = -INF with divide-by-zero exception raised 58 // log(1) = +0 59 // log(x) = NaN with invalid exception raised if x < -0, including -INF 60 // 61 /******************************************************************************/ 62 63 #ifdef _LP64 64 // The 64 bit code is at most SSE2 compliant 65 ALIGNED_(16) juint _L_tbl[] = 66 { 67 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL, 68 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL, 69 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL, 70 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL, 71 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL, 72 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL, 73 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL, 74 0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL, 75 0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL, 76 0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL, 77 0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL, 78 0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL, 79 0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL, 80 0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL, 81 0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL, 82 0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL, 83 0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL, 84 0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL, 85 0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL, 86 0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL, 87 0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL, 88 0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL, 89 0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL, 90 0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL, 91 0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL, 92 0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL, 93 0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL, 94 0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL, 95 0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL, 96 0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL, 97 0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL, 98 0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL, 99 0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL, 100 0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL, 101 0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL, 102 0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL, 103 0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL, 104 0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL, 105 0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL, 106 0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL, 107 0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL, 108 0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL, 109 0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL, 110 0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL, 111 0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL, 112 0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL, 113 0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL, 114 0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL, 115 0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL, 116 0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL, 117 0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL, 118 0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL, 119 0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL, 120 0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL, 121 0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL, 122 0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL, 123 0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL, 124 0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL, 125 0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL, 126 0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL, 127 0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL, 128 0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL, 129 0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL, 130 0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL, 131 0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL, 132 0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL, 133 0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL, 134 0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL, 135 0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL, 136 0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL, 137 0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL, 138 0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL, 139 0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL, 140 0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL, 141 0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL, 142 0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL, 143 0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL, 144 0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL, 145 0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL, 146 0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL, 147 0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL, 148 0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL, 149 0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL, 150 0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL, 151 0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL, 152 0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL, 153 0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL, 154 0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL, 155 0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL, 156 0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL, 157 0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL, 158 0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL, 159 0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL, 160 0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL, 161 0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL, 162 0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL, 163 0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL, 164 0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL, 165 0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL, 166 0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL, 167 0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL, 168 0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL, 169 0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 170 0x80000000UL 171 }; 172 173 ALIGNED_(16) juint _log2[] = 174 { 175 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL 176 }; 177 178 ALIGNED_(16) juint _coeff[] = 179 { 180 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL, 181 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL, 182 0x00000000UL, 0xbfe00000UL 183 }; 184 185 //registers, 186 // input: xmm0 187 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 188 // rax, rdx, rcx, r8, r11 189 190 void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) { 191 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; 192 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; 193 Label L_2TAG_PACKET_8_0_2; 194 Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; 195 196 assert_different_registers(tmp1, tmp2, eax, ecx, edx); 197 jmp(start); 198 address L_tbl = (address)_L_tbl; 199 address log2 = (address)_log2; 200 address coeff = (address)_coeff; 201 202 bind(start); 203 subq(rsp, 24); 204 movsd(Address(rsp, 0), xmm0); 205 mov64(rax, 0x3ff0000000000000); 206 movdq(xmm2, rax); 207 mov64(rdx, 0x77f0000000000000); 208 movdq(xmm3, rdx); 209 movl(ecx, 32768); 210 movdl(xmm4, rcx); 211 mov64(tmp1, 0xffffe00000000000); 212 movdq(xmm5, tmp1); 213 movdqu(xmm1, xmm0); 214 pextrw(eax, xmm0, 3); 215 por(xmm0, xmm2); 216 movl(ecx, 16352); 217 psrlq(xmm0, 27); 218 lea(tmp2, ExternalAddress(L_tbl)); 219 psrld(xmm0, 2); 220 rcpps(xmm0, xmm0); 221 psllq(xmm1, 12); 222 pshufd(xmm6, xmm5, 228); 223 psrlq(xmm1, 12); 224 subl(eax, 16); 225 cmpl(eax, 32736); 226 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); 227 228 bind(L_2TAG_PACKET_1_0_2); 229 paddd(xmm0, xmm4); 230 por(xmm1, xmm3); 231 movdl(edx, xmm0); 232 psllq(xmm0, 29); 233 pand(xmm5, xmm1); 234 pand(xmm0, xmm6); 235 subsd(xmm1, xmm5); 236 mulpd(xmm5, xmm0); 237 andl(eax, 32752); 238 subl(eax, ecx); 239 cvtsi2sdl(xmm7, eax); 240 mulsd(xmm1, xmm0); 241 movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL 242 movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL 243 subsd(xmm5, xmm2); 244 andl(edx, 16711680); 245 shrl(edx, 12); 246 movdqu(xmm0, Address(tmp2, edx)); 247 movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL 248 addsd(xmm1, xmm5); 249 movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL 250 mulsd(xmm6, xmm7); 251 if (VM_Version::supports_sse3()) { 252 movddup(xmm5, xmm1); 253 } 254 else { 255 movdqu(xmm5, xmm1); 256 movlhps(xmm5, xmm5); 257 } 258 mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL 259 mulsd(xmm3, xmm1); 260 addsd(xmm0, xmm6); 261 mulpd(xmm4, xmm5); 262 mulpd(xmm5, xmm5); 263 if (VM_Version::supports_sse3()) { 264 movddup(xmm6, xmm0); 265 } 266 else { 267 movdqu(xmm6, xmm0); 268 movlhps(xmm6, xmm6); 269 } 270 addsd(xmm0, xmm1); 271 addpd(xmm4, xmm2); 272 mulpd(xmm3, xmm5); 273 subsd(xmm6, xmm0); 274 mulsd(xmm4, xmm1); 275 pshufd(xmm2, xmm0, 238); 276 addsd(xmm1, xmm6); 277 mulsd(xmm5, xmm5); 278 addsd(xmm7, xmm2); 279 addpd(xmm4, xmm3); 280 addsd(xmm1, xmm7); 281 mulpd(xmm4, xmm5); 282 addsd(xmm1, xmm4); 283 pshufd(xmm5, xmm4, 238); 284 addsd(xmm1, xmm5); 285 addsd(xmm0, xmm1); 286 jmp(B1_5); 287 288 bind(L_2TAG_PACKET_0_0_2); 289 movq(xmm0, Address(rsp, 0)); 290 movq(xmm1, Address(rsp, 0)); 291 addl(eax, 16); 292 cmpl(eax, 32768); 293 jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2); 294 cmpl(eax, 16); 295 jcc(Assembler::below, L_2TAG_PACKET_3_0_2); 296 297 bind(L_2TAG_PACKET_4_0_2); 298 addsd(xmm0, xmm0); 299 jmp(B1_5); 300 301 bind(L_2TAG_PACKET_5_0_2); 302 jcc(Assembler::above, L_2TAG_PACKET_4_0_2); 303 cmpl(edx, 0); 304 jcc(Assembler::above, L_2TAG_PACKET_4_0_2); 305 jmp(L_2TAG_PACKET_6_0_2); 306 307 bind(L_2TAG_PACKET_3_0_2); 308 xorpd(xmm1, xmm1); 309 addsd(xmm1, xmm0); 310 movdl(edx, xmm1); 311 psrlq(xmm1, 32); 312 movdl(ecx, xmm1); 313 orl(edx, ecx); 314 cmpl(edx, 0); 315 jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); 316 xorpd(xmm1, xmm1); 317 movl(eax, 18416); 318 pinsrw(xmm1, eax, 3); 319 mulsd(xmm0, xmm1); 320 movdqu(xmm1, xmm0); 321 pextrw(eax, xmm0, 3); 322 por(xmm0, xmm2); 323 psrlq(xmm0, 27); 324 movl(ecx, 18416); 325 psrld(xmm0, 2); 326 rcpps(xmm0, xmm0); 327 psllq(xmm1, 12); 328 pshufd(xmm6, xmm5, 228); 329 psrlq(xmm1, 12); 330 jmp(L_2TAG_PACKET_1_0_2); 331 332 bind(L_2TAG_PACKET_2_0_2); 333 movdl(edx, xmm1); 334 psrlq(xmm1, 32); 335 movdl(ecx, xmm1); 336 addl(ecx, ecx); 337 cmpl(ecx, -2097152); 338 jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); 339 orl(edx, ecx); 340 cmpl(edx, 0); 341 jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); 342 343 bind(L_2TAG_PACKET_6_0_2); 344 xorpd(xmm1, xmm1); 345 xorpd(xmm0, xmm0); 346 movl(eax, 32752); 347 pinsrw(xmm1, eax, 3); 348 mulsd(xmm0, xmm1); 349 movl(Address(rsp, 16), 3); 350 jmp(L_2TAG_PACKET_8_0_2); 351 bind(L_2TAG_PACKET_7_0_2); 352 xorpd(xmm1, xmm1); 353 xorpd(xmm0, xmm0); 354 movl(eax, 49136); 355 pinsrw(xmm0, eax, 3); 356 divsd(xmm0, xmm1); 357 movl(Address(rsp, 16), 2); 358 359 bind(L_2TAG_PACKET_8_0_2); 360 movq(Address(rsp, 8), xmm0); 361 362 bind(B1_3); 363 movq(xmm0, Address(rsp, 8)); 364 365 bind(B1_5); 366 addq(rsp, 24); 367 } 368 #else 369 // The 32 bit code is at most SSE2 compliant 370 ALIGNED_(16) juint _static_const_table_log[] = 371 { 372 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL, 373 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL, 374 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL, 375 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL, 376 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL, 377 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL, 378 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL, 379 0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL, 380 0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL, 381 0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL, 382 0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL, 383 0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL, 384 0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL, 385 0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL, 386 0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL, 387 0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL, 388 0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL, 389 0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL, 390 0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL, 391 0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL, 392 0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL, 393 0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL, 394 0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL, 395 0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL, 396 0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL, 397 0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL, 398 0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL, 399 0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL, 400 0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL, 401 0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL, 402 0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL, 403 0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL, 404 0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL, 405 0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL, 406 0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL, 407 0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL, 408 0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL, 409 0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL, 410 0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL, 411 0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL, 412 0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL, 413 0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL, 414 0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL, 415 0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL, 416 0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL, 417 0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL, 418 0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL, 419 0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL, 420 0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL, 421 0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL, 422 0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL, 423 0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL, 424 0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL, 425 0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL, 426 0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL, 427 0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL, 428 0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL, 429 0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL, 430 0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL, 431 0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL, 432 0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL, 433 0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL, 434 0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL, 435 0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL, 436 0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL, 437 0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL, 438 0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL, 439 0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL, 440 0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL, 441 0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL, 442 0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL, 443 0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL, 444 0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL, 445 0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL, 446 0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL, 447 0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL, 448 0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL, 449 0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL, 450 0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL, 451 0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL, 452 0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL, 453 0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL, 454 0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL, 455 0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL, 456 0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL, 457 0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL, 458 0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL, 459 0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL, 460 0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL, 461 0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL, 462 0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL, 463 0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL, 464 0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL, 465 0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL, 466 0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL, 467 0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL, 468 0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL, 469 0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL, 470 0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL, 471 0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL, 472 0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL, 473 0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL, 474 0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 475 0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL, 476 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL, 477 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL, 478 0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL, 479 0xffffe000UL 480 }; 481 //registers, 482 // input: xmm0 483 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 484 // rax, rdx, rcx, rbx (tmp) 485 486 void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { 487 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; 488 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; 489 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2; 490 Label L_2TAG_PACKET_10_0_2, start; 491 492 assert_different_registers(tmp, eax, ecx, edx); 493 jmp(start); 494 address static_const_table = (address)_static_const_table_log; 495 496 bind(start); 497 subl(rsp, 104); 498 movl(Address(rsp, 40), tmp); 499 lea(tmp, ExternalAddress(static_const_table)); 500 xorpd(xmm2, xmm2); 501 movl(eax, 16368); 502 pinsrw(xmm2, eax, 3); 503 xorpd(xmm3, xmm3); 504 movl(edx, 30704); 505 pinsrw(xmm3, edx, 3); 506 movsd(xmm0, Address(rsp, 112)); 507 movapd(xmm1, xmm0); 508 movl(ecx, 32768); 509 movdl(xmm4, ecx); 510 movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL 511 pextrw(eax, xmm0, 3); 512 por(xmm0, xmm2); 513 psllq(xmm0, 5); 514 movl(ecx, 16352); 515 psrlq(xmm0, 34); 516 rcpss(xmm0, xmm0); 517 psllq(xmm1, 12); 518 pshufd(xmm6, xmm5, 228); 519 psrlq(xmm1, 12); 520 subl(eax, 16); 521 cmpl(eax, 32736); 522 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); 523 524 bind(L_2TAG_PACKET_1_0_2); 525 paddd(xmm0, xmm4); 526 por(xmm1, xmm3); 527 movdl(edx, xmm0); 528 psllq(xmm0, 29); 529 pand(xmm5, xmm1); 530 pand(xmm0, xmm6); 531 subsd(xmm1, xmm5); 532 mulpd(xmm5, xmm0); 533 andl(eax, 32752); 534 subl(eax, ecx); 535 cvtsi2sdl(xmm7, eax); 536 mulsd(xmm1, xmm0); 537 movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL 538 movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL 539 subsd(xmm5, xmm2); 540 andl(edx, 16711680); 541 shrl(edx, 12); 542 movdqu(xmm0, Address(tmp, edx)); 543 movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL 544 addsd(xmm1, xmm5); 545 movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL 546 mulsd(xmm6, xmm7); 547 pshufd(xmm5, xmm1, 68); 548 mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL 549 mulsd(xmm3, xmm1); 550 addsd(xmm0, xmm6); 551 mulpd(xmm4, xmm5); 552 mulpd(xmm5, xmm5); 553 pshufd(xmm6, xmm0, 228); 554 addsd(xmm0, xmm1); 555 addpd(xmm4, xmm2); 556 mulpd(xmm3, xmm5); 557 subsd(xmm6, xmm0); 558 mulsd(xmm4, xmm1); 559 pshufd(xmm2, xmm0, 238); 560 addsd(xmm1, xmm6); 561 mulsd(xmm5, xmm5); 562 addsd(xmm7, xmm2); 563 addpd(xmm4, xmm3); 564 addsd(xmm1, xmm7); 565 mulpd(xmm4, xmm5); 566 addsd(xmm1, xmm4); 567 pshufd(xmm5, xmm4, 238); 568 addsd(xmm1, xmm5); 569 addsd(xmm0, xmm1); 570 jmp(L_2TAG_PACKET_2_0_2); 571 572 bind(L_2TAG_PACKET_0_0_2); 573 movsd(xmm0, Address(rsp, 112)); 574 movdqu(xmm1, xmm0); 575 addl(eax, 16); 576 cmpl(eax, 32768); 577 jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2); 578 cmpl(eax, 16); 579 jcc(Assembler::below, L_2TAG_PACKET_4_0_2); 580 581 bind(L_2TAG_PACKET_5_0_2); 582 addsd(xmm0, xmm0); 583 jmp(L_2TAG_PACKET_2_0_2); 584 585 bind(L_2TAG_PACKET_6_0_2); 586 jcc(Assembler::above, L_2TAG_PACKET_5_0_2); 587 cmpl(edx, 0); 588 jcc(Assembler::above, L_2TAG_PACKET_5_0_2); 589 jmp(L_2TAG_PACKET_7_0_2); 590 591 bind(L_2TAG_PACKET_3_0_2); 592 movdl(edx, xmm1); 593 psrlq(xmm1, 32); 594 movdl(ecx, xmm1); 595 addl(ecx, ecx); 596 cmpl(ecx, -2097152); 597 jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2); 598 orl(edx, ecx); 599 cmpl(edx, 0); 600 jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); 601 602 bind(L_2TAG_PACKET_7_0_2); 603 xorpd(xmm1, xmm1); 604 xorpd(xmm0, xmm0); 605 movl(eax, 32752); 606 pinsrw(xmm1, eax, 3); 607 movl(edx, 3); 608 mulsd(xmm0, xmm1); 609 610 bind(L_2TAG_PACKET_9_0_2); 611 movsd(Address(rsp, 0), xmm0); 612 movsd(xmm0, Address(rsp, 112)); 613 fld_d(Address(rsp, 0)); 614 jmp(L_2TAG_PACKET_10_0_2); 615 616 bind(L_2TAG_PACKET_8_0_2); 617 xorpd(xmm1, xmm1); 618 xorpd(xmm0, xmm0); 619 movl(eax, 49136); 620 pinsrw(xmm0, eax, 3); 621 divsd(xmm0, xmm1); 622 movl(edx, 2); 623 jmp(L_2TAG_PACKET_9_0_2); 624 625 bind(L_2TAG_PACKET_4_0_2); 626 movdl(edx, xmm1); 627 psrlq(xmm1, 32); 628 movdl(ecx, xmm1); 629 orl(edx, ecx); 630 cmpl(edx, 0); 631 jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); 632 xorpd(xmm1, xmm1); 633 movl(eax, 18416); 634 pinsrw(xmm1, eax, 3); 635 mulsd(xmm0, xmm1); 636 movapd(xmm1, xmm0); 637 pextrw(eax, xmm0, 3); 638 por(xmm0, xmm2); 639 psllq(xmm0, 5); 640 movl(ecx, 18416); 641 psrlq(xmm0, 34); 642 rcpss(xmm0, xmm0); 643 psllq(xmm1, 12); 644 pshufd(xmm6, xmm5, 228); 645 psrlq(xmm1, 12); 646 jmp(L_2TAG_PACKET_1_0_2); 647 648 bind(L_2TAG_PACKET_2_0_2); 649 movsd(Address(rsp, 24), xmm0); 650 fld_d(Address(rsp, 24)); 651 652 bind(L_2TAG_PACKET_10_0_2); 653 movl(tmp, Address(rsp, 40)); 654 } 655 #endif