1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016, Intel Corporation. All rights reserved. 4 * Intel Math Library (LIBM) Source Code 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 package org.graalvm.compiler.lir.amd64; 28 29 import static jdk.vm.ci.amd64.AMD64.r11; 30 import static jdk.vm.ci.amd64.AMD64.r8; 31 import static jdk.vm.ci.amd64.AMD64.rax; 32 import static jdk.vm.ci.amd64.AMD64.rcx; 33 import static jdk.vm.ci.amd64.AMD64.rdx; 34 import static jdk.vm.ci.amd64.AMD64.rsp; 35 import static jdk.vm.ci.amd64.AMD64.xmm0; 36 import static jdk.vm.ci.amd64.AMD64.xmm1; 37 import static jdk.vm.ci.amd64.AMD64.xmm2; 38 import static jdk.vm.ci.amd64.AMD64.xmm3; 39 import static jdk.vm.ci.amd64.AMD64.xmm4; 40 import static jdk.vm.ci.amd64.AMD64.xmm5; 41 import static jdk.vm.ci.amd64.AMD64.xmm6; 42 import static jdk.vm.ci.amd64.AMD64.xmm7; 43 import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.pointerConstant; 44 import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.recordExternalAddress; 45 46 import org.graalvm.compiler.asm.Label; 47 import org.graalvm.compiler.asm.amd64.AMD64Address; 48 import org.graalvm.compiler.asm.amd64.AMD64Assembler; 49 import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler; 50 import org.graalvm.compiler.lir.LIRInstructionClass; 51 import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant; 52 import org.graalvm.compiler.lir.asm.CompilationResultBuilder; 53 54 import jdk.vm.ci.amd64.AMD64; 55 56 /** 57 * <pre> 58 * ALGORITHM DESCRIPTION - LOG() 59 * --------------------- 60 * 61 * x=2^k * mx, mx in [1,2) 62 * 63 * Get B~1/mx based on the output of rcpss instruction (B0) 64 * B = int((B0*2^7+0.5))/2^7 65 * 66 * Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts) 67 * 68 * Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and 69 * p(r) is a degree 7 polynomial 70 * -log(B) read from data table (high, low parts) 71 * Result is formed from high and low parts. 72 * 73 * Special cases: 74 * log(NaN) = quiet NaN, and raise invalid exception 75 * log(+INF) = that INF 76 * log(0) = -INF with divide-by-zero exception raised 77 * log(1) = +0 78 * log(x) = NaN with invalid exception raised if x < -0, including -INF 79 * </pre> 80 */ 81 public final class AMD64MathLogOp extends AMD64MathIntrinsicUnaryOp { 82 83 public static final LIRInstructionClass<AMD64MathLogOp> TYPE = LIRInstructionClass.create(AMD64MathLogOp.class); 84 85 public AMD64MathLogOp() { 86 super(TYPE, /* GPR */ rax, rcx, rdx, r8, r11, 87 /* XMM */ xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7); 88 } 89 90 private ArrayDataPointerConstant lTbl = pointerConstant(16, new int[]{ 91 // @formatter:off 92 0xfefa3800, 0x3fe62e42, 0x93c76730, 0x3d2ef357, 0xaa241800, 93 0x3fe5ee82, 0x0cda46be, 0x3d220238, 0x5c364800, 0x3fe5af40, 94 0xac10c9fb, 0x3d2dfa63, 0x26bb8c00, 0x3fe5707a, 0xff3303dd, 95 0x3d09980b, 0x26867800, 0x3fe5322e, 0x5d257531, 0x3d05ccc4, 96 0x835a5000, 0x3fe4f45a, 0x6d93b8fb, 0xbd2e6c51, 0x6f970c00, 97 0x3fe4b6fd, 0xed4c541c, 0x3cef7115, 0x27e8a400, 0x3fe47a15, 98 0xf94d60aa, 0xbd22cb6a, 0xf2f92400, 0x3fe43d9f, 0x481051f7, 99 0xbcfd984f, 0x2125cc00, 0x3fe4019c, 0x30f0c74c, 0xbd26ce79, 100 0x0c36c000, 0x3fe3c608, 0x7cfe13c2, 0xbd02b736, 0x17197800, 101 0x3fe38ae2, 0xbb5569a4, 0xbd218b7a, 0xad9d8c00, 0x3fe35028, 102 0x9527e6ac, 0x3d10b83f, 0x44340800, 0x3fe315da, 0xc5a0ed9c, 103 0xbd274e93, 0x57b0e000, 0x3fe2dbf5, 0x07b9dc11, 0xbd17a6e5, 104 0x6d0ec000, 0x3fe2a278, 0xe797882d, 0x3d206d2b, 0x1134dc00, 105 0x3fe26962, 0x05226250, 0xbd0b61f1, 0xd8bebc00, 0x3fe230b0, 106 0x6e48667b, 0x3d12fc06, 0x5fc61800, 0x3fe1f863, 0xc9fe81d3, 107 0xbd2a7242, 0x49ae6000, 0x3fe1c078, 0xed70e667, 0x3cccacde, 108 0x40f23c00, 0x3fe188ee, 0xf8ab4650, 0x3d14cc4e, 0xf6f29800, 109 0x3fe151c3, 0xa293ae49, 0xbd2edd97, 0x23c75c00, 0x3fe11af8, 110 0xbb9ddcb2, 0xbd258647, 0x8611cc00, 0x3fe0e489, 0x07801742, 111 0x3d1c2998, 0xe2d05400, 0x3fe0ae76, 0x887e7e27, 0x3d1f486b, 112 0x0533c400, 0x3fe078bf, 0x41edf5fd, 0x3d268122, 0xbe760400, 113 0x3fe04360, 0xe79539e0, 0xbd04c45f, 0xe5b20800, 0x3fe00e5a, 114 0xb1727b1c, 0xbd053ba3, 0xaf7a4800, 0x3fdfb358, 0x3c164935, 115 0x3d0085fa, 0xee031800, 0x3fdf4aa7, 0x6f014a8b, 0x3d12cde5, 116 0x56b41000, 0x3fdee2a1, 0x5a470251, 0x3d2f27f4, 0xc3ddb000, 117 0x3fde7b42, 0x5372bd08, 0xbd246550, 0x1a272800, 0x3fde148a, 118 0x07322938, 0xbd1326b2, 0x484c9800, 0x3fddae75, 0x60dc616a, 119 0xbd1ea42d, 0x46def800, 0x3fdd4902, 0xe9a767a8, 0x3d235baf, 120 0x18064800, 0x3fdce42f, 0x3ec7a6b0, 0xbd0797c3, 0xc7455800, 121 0x3fdc7ff9, 0xc15249ae, 0xbd29b6dd, 0x693fa000, 0x3fdc1c60, 122 0x7fe8e180, 0x3d2cec80, 0x1b80e000, 0x3fdbb961, 0xf40a666d, 123 0x3d27d85b, 0x04462800, 0x3fdb56fa, 0x2d841995, 0x3d109525, 124 0x5248d000, 0x3fdaf529, 0x52774458, 0xbd217cc5, 0x3c8ad800, 125 0x3fda93ed, 0xbea77a5d, 0x3d1e36f2, 0x0224f800, 0x3fda3344, 126 0x7f9d79f5, 0x3d23c645, 0xea15f000, 0x3fd9d32b, 0x10d0c0b0, 127 0xbd26279e, 0x43135800, 0x3fd973a3, 0xa502d9f0, 0xbd152313, 128 0x635bf800, 0x3fd914a8, 0x2ee6307d, 0xbd1766b5, 0xa88b3000, 129 0x3fd8b639, 0xe5e70470, 0xbd205ae1, 0x776dc800, 0x3fd85855, 130 0x3333778a, 0x3d2fd56f, 0x3bd81800, 0x3fd7fafa, 0xc812566a, 131 0xbd272090, 0x687cf800, 0x3fd79e26, 0x2efd1778, 0x3d29ec7d, 132 0x76c67800, 0x3fd741d8, 0x49dc60b3, 0x3d2d8b09, 0xe6af1800, 133 0x3fd6e60e, 0x7c222d87, 0x3d172165, 0x3e9c6800, 0x3fd68ac8, 134 0x2756eba0, 0x3d20a0d3, 0x0b3ab000, 0x3fd63003, 0xe731ae00, 135 0xbd2db623, 0xdf596000, 0x3fd5d5bd, 0x08a465dc, 0xbd0a0b2a, 136 0x53c8d000, 0x3fd57bf7, 0xee5d40ef, 0x3d1faded, 0x0738a000, 137 0x3fd522ae, 0x8164c759, 0x3d2ebe70, 0x9e173000, 0x3fd4c9e0, 138 0x1b0ad8a4, 0xbd2e2089, 0xc271c800, 0x3fd4718d, 0x0967d675, 139 0xbd2f27ce, 0x23d5e800, 0x3fd419b4, 0xec90e09d, 0x3d08e436, 140 0x77333000, 0x3fd3c252, 0xb606bd5c, 0x3d183b54, 0x76be1000, 141 0x3fd36b67, 0xb0f177c8, 0x3d116ecd, 0xe1d36000, 0x3fd314f1, 142 0xd3213cb8, 0xbd28e27a, 0x7cdc9000, 0x3fd2bef0, 0x4a5004f4, 143 0x3d2a9cfa, 0x1134d800, 0x3fd26962, 0xdf5bb3b6, 0x3d2c93c1, 144 0x6d0eb800, 0x3fd21445, 0xba46baea, 0x3d0a87de, 0x635a6800, 145 0x3fd1bf99, 0x5147bdb7, 0x3d2ca6ed, 0xcbacf800, 0x3fd16b5c, 146 0xf7a51681, 0x3d2b9acd, 0x8227e800, 0x3fd1178e, 0x63a5f01c, 147 0xbd2c210e, 0x67616000, 0x3fd0c42d, 0x163ceae9, 0x3d27188b, 148 0x604d5800, 0x3fd07138, 0x16ed4e91, 0x3cf89cdb, 0x5626c800, 149 0x3fd01eae, 0x1485e94a, 0xbd16f08c, 0x6cb3b000, 0x3fcf991c, 150 0xca0cdf30, 0x3d1bcbec, 0xe4dd0000, 0x3fcef5ad, 0x65bb8e11, 151 0xbcca2115, 0xffe71000, 0x3fce530e, 0x6041f430, 0x3cc21227, 152 0xb0d49000, 0x3fcdb13d, 0xf715b035, 0xbd2aff2a, 0xf2656000, 153 0x3fcd1037, 0x75b6f6e4, 0xbd084a7e, 0xc6f01000, 0x3fcc6ffb, 154 0xc5962bd2, 0xbcf1ec72, 0x383be000, 0x3fcbd087, 0x595412b6, 155 0xbd2d4bc4, 0x575bd000, 0x3fcb31d8, 0x4eace1aa, 0xbd0c358d, 156 0x3c8ae000, 0x3fca93ed, 0x50562169, 0xbd287243, 0x07089000, 157 0x3fc9f6c4, 0x6865817a, 0x3d29904d, 0xdcf70000, 0x3fc95a5a, 158 0x58a0ff6f, 0x3d07f228, 0xeb390000, 0x3fc8beaf, 0xaae92cd1, 159 0xbd073d54, 0x6551a000, 0x3fc823c1, 0x9a631e83, 0x3d1e0ddb, 160 0x85445000, 0x3fc7898d, 0x70914305, 0xbd1c6610, 0x8b757000, 161 0x3fc6f012, 0xe59c21e1, 0xbd25118d, 0xbe8c1000, 0x3fc6574e, 162 0x2c3c2e78, 0x3d19cf8b, 0x6b544000, 0x3fc5bf40, 0xeb68981c, 163 0xbd127023, 0xe4a1b000, 0x3fc527e5, 0xe5697dc7, 0x3d2633e8, 164 0x8333b000, 0x3fc4913d, 0x54fdb678, 0x3d258379, 0xa5993000, 165 0x3fc3fb45, 0x7e6a354d, 0xbd2cd1d8, 0xb0159000, 0x3fc365fc, 166 0x234b7289, 0x3cc62fa8, 0x0c868000, 0x3fc2d161, 0xcb81b4a1, 167 0x3d039d6c, 0x2a49c000, 0x3fc23d71, 0x8fd3df5c, 0x3d100d23, 168 0x7e23f000, 0x3fc1aa2b, 0x44389934, 0x3d2ca78e, 0x8227e000, 169 0x3fc1178e, 0xce2d07f2, 0x3d21ef78, 0xb59e4000, 0x3fc08598, 170 0x7009902c, 0xbd27e5dd, 0x39dbe000, 0x3fbfe891, 0x4fa10afd, 171 0xbd2534d6, 0x830a2000, 0x3fbec739, 0xafe645e0, 0xbd2dc068, 172 0x63844000, 0x3fbda727, 0x1fa71733, 0x3d1a8940, 0x01bc4000, 173 0x3fbc8858, 0xc65aacd3, 0x3d2646d1, 0x8dad6000, 0x3fbb6ac8, 174 0x2bf768e5, 0xbd139080, 0x40b1c000, 0x3fba4e76, 0xb94407c8, 175 0xbd0e42b6, 0x5d594000, 0x3fb9335e, 0x3abd47da, 0x3d23115c, 176 0x2f40e000, 0x3fb8197e, 0xf96ffdf7, 0x3d0f80dc, 0x0aeac000, 177 0x3fb700d3, 0xa99ded32, 0x3cec1e8d, 0x4d97a000, 0x3fb5e95a, 178 0x3c5d1d1e, 0xbd2c6906, 0x5d208000, 0x3fb4d311, 0x82f4e1ef, 179 0xbcf53a25, 0xa7d1e000, 0x3fb3bdf5, 0xa5db4ed7, 0x3d2cc85e, 180 0xa4472000, 0x3fb2aa04, 0xae9c697d, 0xbd20b6e8, 0xd1466000, 181 0x3fb1973b, 0x560d9e9b, 0xbd25325d, 0xb59e4000, 0x3fb08598, 182 0x7009902c, 0xbd17e5dd, 0xc006c000, 0x3faeea31, 0x4fc93b7b, 183 0xbd0e113e, 0xcdddc000, 0x3faccb73, 0x47d82807, 0xbd1a68f2, 184 0xd0fb0000, 0x3faaaef2, 0x353bb42e, 0x3d20fc1a, 0x149fc000, 185 0x3fa894aa, 0xd05a267d, 0xbd197995, 0xf2d4c000, 0x3fa67c94, 186 0xec19afa2, 0xbd029efb, 0xd42e0000, 0x3fa466ae, 0x75bdfd28, 187 0xbd2c1673, 0x2f8d0000, 0x3fa252f3, 0xe021b67b, 0x3d283e9a, 188 0x89e74000, 0x3fa0415d, 0x5cf1d753, 0x3d0111c0, 0xec148000, 189 0x3f9c63d2, 0x3f9eb2f3, 0x3d2578c6, 0x28c90000, 0x3f984925, 190 0x325a0c34, 0xbd2aa0ba, 0x25980000, 0x3f9432a9, 0x928637fe, 191 0x3d098139, 0x58938000, 0x3f902056, 0x06e2f7d2, 0xbd23dc5b, 192 0xa3890000, 0x3f882448, 0xda74f640, 0xbd275577, 0x75890000, 193 0x3f801015, 0x999d2be8, 0xbd10c76b, 0x59580000, 0x3f700805, 194 0xcb31c67b, 0x3d2166af, 0x00000000, 0x00000000, 0x00000000, 195 0x80000000 196 // @formatter:on 197 }); 198 199 private ArrayDataPointerConstant log2 = pointerConstant(8, new int[]{ 200 // @formatter:off 201 0xfefa3800, 0x3fa62e42, 202 }); 203 private ArrayDataPointerConstant log28 = pointerConstant(8, new int[]{ 204 0x93c76730, 0x3ceef357 205 // @formatter:on 206 }); 207 208 private ArrayDataPointerConstant coeff = pointerConstant(16, new int[]{ 209 // @formatter:off 210 0x92492492, 0x3fc24924, 0x00000000, 0xbfd00000, 211 }); 212 private ArrayDataPointerConstant coeff16 = pointerConstant(16, new int[]{ 213 0x3d6fb175, 0xbfc5555e, 0x55555555, 0x3fd55555, 214 }); 215 private ArrayDataPointerConstant coeff32 = pointerConstant(16, new int[]{ 216 0x9999999a, 0x3fc99999, 0x00000000, 0xbfe00000 217 // @formatter:on 218 }); 219 220 @Override 221 public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { 222 // registers, 223 // input: xmm0 224 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 225 // rax, rdx, rcx, r8, r11 226 Label block0 = new Label(); 227 Label block1 = new Label(); 228 Label block2 = new Label(); 229 Label block3 = new Label(); 230 Label block4 = new Label(); 231 Label block5 = new Label(); 232 Label block6 = new Label(); 233 Label block7 = new Label(); 234 Label block8 = new Label(); 235 Label block9 = new Label(); 236 237 masm.subq(rsp, 24); 238 masm.movsd(new AMD64Address(rsp, 0), xmm0); 239 masm.movq(rax, 0x3ff0000000000000L); 240 masm.movdq(xmm2, rax); 241 masm.movq(rdx, 0x77f0000000000000L); 242 masm.movdq(xmm3, rdx); 243 masm.movl(rcx, 32768); 244 masm.movdl(xmm4, rcx); 245 masm.movq(r8, 0xffffe00000000000L); 246 masm.movdq(xmm5, r8); 247 masm.movdqu(xmm1, xmm0); 248 masm.pextrw(rax, xmm0, 3); 249 masm.por(xmm0, xmm2); 250 masm.movl(rcx, 16352); 251 masm.psrlq(xmm0, 27); 252 masm.leaq(r11, recordExternalAddress(crb, lTbl)); 253 masm.psrld(xmm0, 2); 254 masm.rcpps(xmm0, xmm0); 255 masm.psllq(xmm1, 12); 256 masm.pshufd(xmm6, xmm5, 228); 257 masm.psrlq(xmm1, 12); 258 masm.subl(rax, 16); 259 masm.cmpl(rax, 32736); 260 masm.jcc(AMD64Assembler.ConditionFlag.AboveEqual, block0); 261 262 masm.bind(block1); 263 masm.paddd(xmm0, xmm4); 264 masm.por(xmm1, xmm3); 265 masm.movdl(rdx, xmm0); 266 masm.psllq(xmm0, 29); 267 masm.pand(xmm5, xmm1); 268 masm.pand(xmm0, xmm6); 269 masm.subsd(xmm1, xmm5); 270 masm.mulpd(xmm5, xmm0); 271 masm.andl(rax, 32752); 272 masm.subl(rax, rcx); 273 masm.cvtsi2sdl(xmm7, rax); 274 masm.mulsd(xmm1, xmm0); 275 masm.movq(xmm6, recordExternalAddress(crb, log2)); // 0xfefa3800, 0x3fa62e42 276 masm.movdqu(xmm3, recordExternalAddress(crb, coeff)); // 0x92492492, 0x3fc24924, 277 // 0x00000000, 0xbfd00000 278 masm.subsd(xmm5, xmm2); 279 masm.andl(rdx, 16711680); 280 masm.shrl(rdx, 12); 281 masm.movdqu(xmm0, new AMD64Address(r11, rdx, AMD64Address.Scale.Times1)); 282 masm.movdqu(xmm4, recordExternalAddress(crb, coeff16)); // 0x3d6fb175, 0xbfc5555e, 283 // 0x55555555, 0x3fd55555 284 masm.addsd(xmm1, xmm5); 285 masm.movdqu(xmm2, recordExternalAddress(crb, coeff32)); // 0x9999999a, 0x3fc99999, 286 // 0x00000000, 0xbfe00000 287 masm.mulsd(xmm6, xmm7); 288 if (masm.supports(AMD64.CPUFeature.SSE3)) { 289 masm.movddup(xmm5, xmm1); 290 } else { 291 masm.movdqu(xmm5, xmm1); 292 masm.movlhps(xmm5, xmm5); 293 } 294 masm.mulsd(xmm7, recordExternalAddress(crb, log28)); // 0x93c76730, 0x3ceef357 295 masm.mulsd(xmm3, xmm1); 296 masm.addsd(xmm0, xmm6); 297 masm.mulpd(xmm4, xmm5); 298 masm.mulpd(xmm5, xmm5); 299 if (masm.supports(AMD64.CPUFeature.SSE3)) { 300 masm.movddup(xmm6, xmm0); 301 } else { 302 masm.movdqu(xmm6, xmm0); 303 masm.movlhps(xmm6, xmm6); 304 } 305 masm.addsd(xmm0, xmm1); 306 masm.addpd(xmm4, xmm2); 307 masm.mulpd(xmm3, xmm5); 308 masm.subsd(xmm6, xmm0); 309 masm.mulsd(xmm4, xmm1); 310 masm.pshufd(xmm2, xmm0, 238); 311 masm.addsd(xmm1, xmm6); 312 masm.mulsd(xmm5, xmm5); 313 masm.addsd(xmm7, xmm2); 314 masm.addpd(xmm4, xmm3); 315 masm.addsd(xmm1, xmm7); 316 masm.mulpd(xmm4, xmm5); 317 masm.addsd(xmm1, xmm4); 318 masm.pshufd(xmm5, xmm4, 238); 319 masm.addsd(xmm1, xmm5); 320 masm.addsd(xmm0, xmm1); 321 masm.jmp(block9); 322 323 masm.bind(block0); 324 masm.movq(xmm0, new AMD64Address(rsp, 0)); 325 masm.movq(xmm1, new AMD64Address(rsp, 0)); 326 masm.addl(rax, 16); 327 masm.cmpl(rax, 32768); 328 masm.jcc(AMD64Assembler.ConditionFlag.AboveEqual, block2); 329 masm.cmpl(rax, 16); 330 masm.jcc(AMD64Assembler.ConditionFlag.Below, block3); 331 332 masm.bind(block4); 333 masm.addsd(xmm0, xmm0); 334 masm.jmp(block9); 335 336 masm.bind(block5); 337 masm.jcc(AMD64Assembler.ConditionFlag.Above, block4); 338 masm.cmpl(rdx, 0); 339 masm.jcc(AMD64Assembler.ConditionFlag.Above, block4); 340 masm.jmp(block6); 341 342 masm.bind(block3); 343 masm.xorpd(xmm1, xmm1); 344 masm.addsd(xmm1, xmm0); 345 masm.movdl(rdx, xmm1); 346 masm.psrlq(xmm1, 32); 347 masm.movdl(rcx, xmm1); 348 masm.orl(rdx, rcx); 349 masm.cmpl(rdx, 0); 350 masm.jcc(AMD64Assembler.ConditionFlag.Equal, block7); 351 masm.xorpd(xmm1, xmm1); 352 masm.movl(rax, 18416); 353 masm.pinsrw(xmm1, rax, 3); 354 masm.mulsd(xmm0, xmm1); 355 masm.movdqu(xmm1, xmm0); 356 masm.pextrw(rax, xmm0, 3); 357 masm.por(xmm0, xmm2); 358 masm.psrlq(xmm0, 27); 359 masm.movl(rcx, 18416); 360 masm.psrld(xmm0, 2); 361 masm.rcpps(xmm0, xmm0); 362 masm.psllq(xmm1, 12); 363 masm.pshufd(xmm6, xmm5, 228); 364 masm.psrlq(xmm1, 12); 365 masm.jmp(block1); 366 367 masm.bind(block2); 368 masm.movdl(rdx, xmm1); 369 masm.psrlq(xmm1, 32); 370 masm.movdl(rcx, xmm1); 371 masm.addl(rcx, rcx); 372 masm.cmpl(rcx, -2097152); 373 masm.jcc(AMD64Assembler.ConditionFlag.AboveEqual, block5); 374 masm.orl(rdx, rcx); 375 masm.cmpl(rdx, 0); 376 masm.jcc(AMD64Assembler.ConditionFlag.Equal, block7); 377 378 masm.bind(block6); 379 masm.xorpd(xmm1, xmm1); 380 masm.xorpd(xmm0, xmm0); 381 masm.movl(rax, 32752); 382 masm.pinsrw(xmm1, rax, 3); 383 masm.mulsd(xmm0, xmm1); 384 masm.movl(new AMD64Address(rsp, 16), 3); 385 masm.jmp(block8); 386 masm.bind(block7); 387 masm.xorpd(xmm1, xmm1); 388 masm.xorpd(xmm0, xmm0); 389 masm.movl(rax, 49136); 390 masm.pinsrw(xmm0, rax, 3); 391 masm.divsd(xmm0, xmm1); 392 masm.movl(new AMD64Address(rsp, 16), 2); 393 394 masm.bind(block8); 395 masm.movq(new AMD64Address(rsp, 8), xmm0); 396 397 masm.movq(xmm0, new AMD64Address(rsp, 8)); 398 399 masm.bind(block9); 400 masm.addq(rsp, 24); 401 } 402 }