1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2016, Intel Corporation. All rights reserved.
   4  * Intel Math Library (LIBM) Source Code
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 package org.graalvm.compiler.lir.amd64;
  28 
  29 import static jdk.vm.ci.amd64.AMD64.r11;
  30 import static jdk.vm.ci.amd64.AMD64.r8;
  31 import static jdk.vm.ci.amd64.AMD64.rax;
  32 import static jdk.vm.ci.amd64.AMD64.rcx;
  33 import static jdk.vm.ci.amd64.AMD64.rdx;
  34 import static jdk.vm.ci.amd64.AMD64.rsp;
  35 import static jdk.vm.ci.amd64.AMD64.xmm0;
  36 import static jdk.vm.ci.amd64.AMD64.xmm1;
  37 import static jdk.vm.ci.amd64.AMD64.xmm2;
  38 import static jdk.vm.ci.amd64.AMD64.xmm3;
  39 import static jdk.vm.ci.amd64.AMD64.xmm4;
  40 import static jdk.vm.ci.amd64.AMD64.xmm5;
  41 import static jdk.vm.ci.amd64.AMD64.xmm6;
  42 import static jdk.vm.ci.amd64.AMD64.xmm7;
  43 import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.pointerConstant;
  44 import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.recordExternalAddress;
  45 
  46 import org.graalvm.compiler.asm.Label;
  47 import org.graalvm.compiler.asm.amd64.AMD64Address;
  48 import org.graalvm.compiler.asm.amd64.AMD64Assembler;
  49 import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
  50 import org.graalvm.compiler.lir.LIRInstructionClass;
  51 import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant;
  52 import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
  53 
  54 import jdk.vm.ci.amd64.AMD64;
  55 
  56 /**
  57  * <pre>
  58  *                     ALGORITHM DESCRIPTION - LOG()
  59  *                     ---------------------
  60  *
  61  *    x=2^k * mx, mx in [1,2)
  62  *
  63  *    Get B~1/mx based on the output of rcpss instruction (B0)
  64  *    B = int((B0*2^7+0.5))/2^7
  65  *
  66  *    Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
  67  *
  68  *    Result:  k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6)  and
  69  *             p(r) is a degree 7 polynomial
  70  *             -log(B) read from data table (high, low parts)
  71  *             Result is formed from high and low parts.
  72  *
  73  * Special cases:
  74  *  log(NaN) = quiet NaN, and raise invalid exception
  75  *  log(+INF) = that INF
  76  *  log(0) = -INF with divide-by-zero exception raised
  77  *  log(1) = +0
  78  *  log(x) = NaN with invalid exception raised if x < -0, including -INF
  79  * </pre>
  80  */
  81 public final class AMD64MathLogOp extends AMD64MathIntrinsicUnaryOp {
  82 
  83     public static final LIRInstructionClass<AMD64MathLogOp> TYPE = LIRInstructionClass.create(AMD64MathLogOp.class);
  84 
  85     public AMD64MathLogOp() {
  86         super(TYPE, /* GPR */ rax, rcx, rdx, r8, r11,
  87                         /* XMM */ xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
  88     }
  89 
  90     private ArrayDataPointerConstant lTbl = pointerConstant(16, new int[]{
  91             // @formatter:off
  92             0xfefa3800, 0x3fe62e42, 0x93c76730, 0x3d2ef357, 0xaa241800,
  93             0x3fe5ee82, 0x0cda46be, 0x3d220238, 0x5c364800, 0x3fe5af40,
  94             0xac10c9fb, 0x3d2dfa63, 0x26bb8c00, 0x3fe5707a, 0xff3303dd,
  95             0x3d09980b, 0x26867800, 0x3fe5322e, 0x5d257531, 0x3d05ccc4,
  96             0x835a5000, 0x3fe4f45a, 0x6d93b8fb, 0xbd2e6c51, 0x6f970c00,
  97             0x3fe4b6fd, 0xed4c541c, 0x3cef7115, 0x27e8a400, 0x3fe47a15,
  98             0xf94d60aa, 0xbd22cb6a, 0xf2f92400, 0x3fe43d9f, 0x481051f7,
  99             0xbcfd984f, 0x2125cc00, 0x3fe4019c, 0x30f0c74c, 0xbd26ce79,
 100             0x0c36c000, 0x3fe3c608, 0x7cfe13c2, 0xbd02b736, 0x17197800,
 101             0x3fe38ae2, 0xbb5569a4, 0xbd218b7a, 0xad9d8c00, 0x3fe35028,
 102             0x9527e6ac, 0x3d10b83f, 0x44340800, 0x3fe315da, 0xc5a0ed9c,
 103             0xbd274e93, 0x57b0e000, 0x3fe2dbf5, 0x07b9dc11, 0xbd17a6e5,
 104             0x6d0ec000, 0x3fe2a278, 0xe797882d, 0x3d206d2b, 0x1134dc00,
 105             0x3fe26962, 0x05226250, 0xbd0b61f1, 0xd8bebc00, 0x3fe230b0,
 106             0x6e48667b, 0x3d12fc06, 0x5fc61800, 0x3fe1f863, 0xc9fe81d3,
 107             0xbd2a7242, 0x49ae6000, 0x3fe1c078, 0xed70e667, 0x3cccacde,
 108             0x40f23c00, 0x3fe188ee, 0xf8ab4650, 0x3d14cc4e, 0xf6f29800,
 109             0x3fe151c3, 0xa293ae49, 0xbd2edd97, 0x23c75c00, 0x3fe11af8,
 110             0xbb9ddcb2, 0xbd258647, 0x8611cc00, 0x3fe0e489, 0x07801742,
 111             0x3d1c2998, 0xe2d05400, 0x3fe0ae76, 0x887e7e27, 0x3d1f486b,
 112             0x0533c400, 0x3fe078bf, 0x41edf5fd, 0x3d268122, 0xbe760400,
 113             0x3fe04360, 0xe79539e0, 0xbd04c45f, 0xe5b20800, 0x3fe00e5a,
 114             0xb1727b1c, 0xbd053ba3, 0xaf7a4800, 0x3fdfb358, 0x3c164935,
 115             0x3d0085fa, 0xee031800, 0x3fdf4aa7, 0x6f014a8b, 0x3d12cde5,
 116             0x56b41000, 0x3fdee2a1, 0x5a470251, 0x3d2f27f4, 0xc3ddb000,
 117             0x3fde7b42, 0x5372bd08, 0xbd246550, 0x1a272800, 0x3fde148a,
 118             0x07322938, 0xbd1326b2, 0x484c9800, 0x3fddae75, 0x60dc616a,
 119             0xbd1ea42d, 0x46def800, 0x3fdd4902, 0xe9a767a8, 0x3d235baf,
 120             0x18064800, 0x3fdce42f, 0x3ec7a6b0, 0xbd0797c3, 0xc7455800,
 121             0x3fdc7ff9, 0xc15249ae, 0xbd29b6dd, 0x693fa000, 0x3fdc1c60,
 122             0x7fe8e180, 0x3d2cec80, 0x1b80e000, 0x3fdbb961, 0xf40a666d,
 123             0x3d27d85b, 0x04462800, 0x3fdb56fa, 0x2d841995, 0x3d109525,
 124             0x5248d000, 0x3fdaf529, 0x52774458, 0xbd217cc5, 0x3c8ad800,
 125             0x3fda93ed, 0xbea77a5d, 0x3d1e36f2, 0x0224f800, 0x3fda3344,
 126             0x7f9d79f5, 0x3d23c645, 0xea15f000, 0x3fd9d32b, 0x10d0c0b0,
 127             0xbd26279e, 0x43135800, 0x3fd973a3, 0xa502d9f0, 0xbd152313,
 128             0x635bf800, 0x3fd914a8, 0x2ee6307d, 0xbd1766b5, 0xa88b3000,
 129             0x3fd8b639, 0xe5e70470, 0xbd205ae1, 0x776dc800, 0x3fd85855,
 130             0x3333778a, 0x3d2fd56f, 0x3bd81800, 0x3fd7fafa, 0xc812566a,
 131             0xbd272090, 0x687cf800, 0x3fd79e26, 0x2efd1778, 0x3d29ec7d,
 132             0x76c67800, 0x3fd741d8, 0x49dc60b3, 0x3d2d8b09, 0xe6af1800,
 133             0x3fd6e60e, 0x7c222d87, 0x3d172165, 0x3e9c6800, 0x3fd68ac8,
 134             0x2756eba0, 0x3d20a0d3, 0x0b3ab000, 0x3fd63003, 0xe731ae00,
 135             0xbd2db623, 0xdf596000, 0x3fd5d5bd, 0x08a465dc, 0xbd0a0b2a,
 136             0x53c8d000, 0x3fd57bf7, 0xee5d40ef, 0x3d1faded, 0x0738a000,
 137             0x3fd522ae, 0x8164c759, 0x3d2ebe70, 0x9e173000, 0x3fd4c9e0,
 138             0x1b0ad8a4, 0xbd2e2089, 0xc271c800, 0x3fd4718d, 0x0967d675,
 139             0xbd2f27ce, 0x23d5e800, 0x3fd419b4, 0xec90e09d, 0x3d08e436,
 140             0x77333000, 0x3fd3c252, 0xb606bd5c, 0x3d183b54, 0x76be1000,
 141             0x3fd36b67, 0xb0f177c8, 0x3d116ecd, 0xe1d36000, 0x3fd314f1,
 142             0xd3213cb8, 0xbd28e27a, 0x7cdc9000, 0x3fd2bef0, 0x4a5004f4,
 143             0x3d2a9cfa, 0x1134d800, 0x3fd26962, 0xdf5bb3b6, 0x3d2c93c1,
 144             0x6d0eb800, 0x3fd21445, 0xba46baea, 0x3d0a87de, 0x635a6800,
 145             0x3fd1bf99, 0x5147bdb7, 0x3d2ca6ed, 0xcbacf800, 0x3fd16b5c,
 146             0xf7a51681, 0x3d2b9acd, 0x8227e800, 0x3fd1178e, 0x63a5f01c,
 147             0xbd2c210e, 0x67616000, 0x3fd0c42d, 0x163ceae9, 0x3d27188b,
 148             0x604d5800, 0x3fd07138, 0x16ed4e91, 0x3cf89cdb, 0x5626c800,
 149             0x3fd01eae, 0x1485e94a, 0xbd16f08c, 0x6cb3b000, 0x3fcf991c,
 150             0xca0cdf30, 0x3d1bcbec, 0xe4dd0000, 0x3fcef5ad, 0x65bb8e11,
 151             0xbcca2115, 0xffe71000, 0x3fce530e, 0x6041f430, 0x3cc21227,
 152             0xb0d49000, 0x3fcdb13d, 0xf715b035, 0xbd2aff2a, 0xf2656000,
 153             0x3fcd1037, 0x75b6f6e4, 0xbd084a7e, 0xc6f01000, 0x3fcc6ffb,
 154             0xc5962bd2, 0xbcf1ec72, 0x383be000, 0x3fcbd087, 0x595412b6,
 155             0xbd2d4bc4, 0x575bd000, 0x3fcb31d8, 0x4eace1aa, 0xbd0c358d,
 156             0x3c8ae000, 0x3fca93ed, 0x50562169, 0xbd287243, 0x07089000,
 157             0x3fc9f6c4, 0x6865817a, 0x3d29904d, 0xdcf70000, 0x3fc95a5a,
 158             0x58a0ff6f, 0x3d07f228, 0xeb390000, 0x3fc8beaf, 0xaae92cd1,
 159             0xbd073d54, 0x6551a000, 0x3fc823c1, 0x9a631e83, 0x3d1e0ddb,
 160             0x85445000, 0x3fc7898d, 0x70914305, 0xbd1c6610, 0x8b757000,
 161             0x3fc6f012, 0xe59c21e1, 0xbd25118d, 0xbe8c1000, 0x3fc6574e,
 162             0x2c3c2e78, 0x3d19cf8b, 0x6b544000, 0x3fc5bf40, 0xeb68981c,
 163             0xbd127023, 0xe4a1b000, 0x3fc527e5, 0xe5697dc7, 0x3d2633e8,
 164             0x8333b000, 0x3fc4913d, 0x54fdb678, 0x3d258379, 0xa5993000,
 165             0x3fc3fb45, 0x7e6a354d, 0xbd2cd1d8, 0xb0159000, 0x3fc365fc,
 166             0x234b7289, 0x3cc62fa8, 0x0c868000, 0x3fc2d161, 0xcb81b4a1,
 167             0x3d039d6c, 0x2a49c000, 0x3fc23d71, 0x8fd3df5c, 0x3d100d23,
 168             0x7e23f000, 0x3fc1aa2b, 0x44389934, 0x3d2ca78e, 0x8227e000,
 169             0x3fc1178e, 0xce2d07f2, 0x3d21ef78, 0xb59e4000, 0x3fc08598,
 170             0x7009902c, 0xbd27e5dd, 0x39dbe000, 0x3fbfe891, 0x4fa10afd,
 171             0xbd2534d6, 0x830a2000, 0x3fbec739, 0xafe645e0, 0xbd2dc068,
 172             0x63844000, 0x3fbda727, 0x1fa71733, 0x3d1a8940, 0x01bc4000,
 173             0x3fbc8858, 0xc65aacd3, 0x3d2646d1, 0x8dad6000, 0x3fbb6ac8,
 174             0x2bf768e5, 0xbd139080, 0x40b1c000, 0x3fba4e76, 0xb94407c8,
 175             0xbd0e42b6, 0x5d594000, 0x3fb9335e, 0x3abd47da, 0x3d23115c,
 176             0x2f40e000, 0x3fb8197e, 0xf96ffdf7, 0x3d0f80dc, 0x0aeac000,
 177             0x3fb700d3, 0xa99ded32, 0x3cec1e8d, 0x4d97a000, 0x3fb5e95a,
 178             0x3c5d1d1e, 0xbd2c6906, 0x5d208000, 0x3fb4d311, 0x82f4e1ef,
 179             0xbcf53a25, 0xa7d1e000, 0x3fb3bdf5, 0xa5db4ed7, 0x3d2cc85e,
 180             0xa4472000, 0x3fb2aa04, 0xae9c697d, 0xbd20b6e8, 0xd1466000,
 181             0x3fb1973b, 0x560d9e9b, 0xbd25325d, 0xb59e4000, 0x3fb08598,
 182             0x7009902c, 0xbd17e5dd, 0xc006c000, 0x3faeea31, 0x4fc93b7b,
 183             0xbd0e113e, 0xcdddc000, 0x3faccb73, 0x47d82807, 0xbd1a68f2,
 184             0xd0fb0000, 0x3faaaef2, 0x353bb42e, 0x3d20fc1a, 0x149fc000,
 185             0x3fa894aa, 0xd05a267d, 0xbd197995, 0xf2d4c000, 0x3fa67c94,
 186             0xec19afa2, 0xbd029efb, 0xd42e0000, 0x3fa466ae, 0x75bdfd28,
 187             0xbd2c1673, 0x2f8d0000, 0x3fa252f3, 0xe021b67b, 0x3d283e9a,
 188             0x89e74000, 0x3fa0415d, 0x5cf1d753, 0x3d0111c0, 0xec148000,
 189             0x3f9c63d2, 0x3f9eb2f3, 0x3d2578c6, 0x28c90000, 0x3f984925,
 190             0x325a0c34, 0xbd2aa0ba, 0x25980000, 0x3f9432a9, 0x928637fe,
 191             0x3d098139, 0x58938000, 0x3f902056, 0x06e2f7d2, 0xbd23dc5b,
 192             0xa3890000, 0x3f882448, 0xda74f640, 0xbd275577, 0x75890000,
 193             0x3f801015, 0x999d2be8, 0xbd10c76b, 0x59580000, 0x3f700805,
 194             0xcb31c67b, 0x3d2166af, 0x00000000, 0x00000000, 0x00000000,
 195             0x80000000
 196             // @formatter:on
 197     });
 198 
 199     private ArrayDataPointerConstant log2 = pointerConstant(8, new int[]{
 200             // @formatter:off
 201             0xfefa3800, 0x3fa62e42,
 202     });
 203     private ArrayDataPointerConstant log28 = pointerConstant(8, new int[]{
 204             0x93c76730, 0x3ceef357
 205             // @formatter:on
 206     });
 207 
 208     private ArrayDataPointerConstant coeff = pointerConstant(16, new int[]{
 209             // @formatter:off
 210             0x92492492, 0x3fc24924, 0x00000000, 0xbfd00000,
 211     });
 212     private ArrayDataPointerConstant coeff16 = pointerConstant(16, new int[]{
 213             0x3d6fb175, 0xbfc5555e, 0x55555555, 0x3fd55555,
 214     });
 215     private ArrayDataPointerConstant coeff32 = pointerConstant(16, new int[]{
 216             0x9999999a, 0x3fc99999, 0x00000000, 0xbfe00000
 217             // @formatter:on
 218     });
 219 
 220     @Override
 221     public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
 222         // registers,
 223         // input: xmm0
 224         // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
 225         // rax, rdx, rcx, r8, r11
 226         Label block0 = new Label();
 227         Label block1 = new Label();
 228         Label block2 = new Label();
 229         Label block3 = new Label();
 230         Label block4 = new Label();
 231         Label block5 = new Label();
 232         Label block6 = new Label();
 233         Label block7 = new Label();
 234         Label block8 = new Label();
 235         Label block9 = new Label();
 236 
 237         masm.subq(rsp, 24);
 238         masm.movsd(new AMD64Address(rsp, 0), xmm0);
 239         masm.movq(rax, 0x3ff0000000000000L);
 240         masm.movdq(xmm2, rax);
 241         masm.movq(rdx, 0x77f0000000000000L);
 242         masm.movdq(xmm3, rdx);
 243         masm.movl(rcx, 32768);
 244         masm.movdl(xmm4, rcx);
 245         masm.movq(r8, 0xffffe00000000000L);
 246         masm.movdq(xmm5, r8);
 247         masm.movdqu(xmm1, xmm0);
 248         masm.pextrw(rax, xmm0, 3);
 249         masm.por(xmm0, xmm2);
 250         masm.movl(rcx, 16352);
 251         masm.psrlq(xmm0, 27);
 252         masm.leaq(r11, recordExternalAddress(crb, lTbl));
 253         masm.psrld(xmm0, 2);
 254         masm.rcpps(xmm0, xmm0);
 255         masm.psllq(xmm1, 12);
 256         masm.pshufd(xmm6, xmm5, 228);
 257         masm.psrlq(xmm1, 12);
 258         masm.subl(rax, 16);
 259         masm.cmpl(rax, 32736);
 260         masm.jcc(AMD64Assembler.ConditionFlag.AboveEqual, block0);
 261 
 262         masm.bind(block1);
 263         masm.paddd(xmm0, xmm4);
 264         masm.por(xmm1, xmm3);
 265         masm.movdl(rdx, xmm0);
 266         masm.psllq(xmm0, 29);
 267         masm.pand(xmm5, xmm1);
 268         masm.pand(xmm0, xmm6);
 269         masm.subsd(xmm1, xmm5);
 270         masm.mulpd(xmm5, xmm0);
 271         masm.andl(rax, 32752);
 272         masm.subl(rax, rcx);
 273         masm.cvtsi2sdl(xmm7, rax);
 274         masm.mulsd(xmm1, xmm0);
 275         masm.movq(xmm6, recordExternalAddress(crb, log2));             // 0xfefa3800, 0x3fa62e42
 276         masm.movdqu(xmm3, recordExternalAddress(crb, coeff));          // 0x92492492, 0x3fc24924,
 277                                                                        // 0x00000000, 0xbfd00000
 278         masm.subsd(xmm5, xmm2);
 279         masm.andl(rdx, 16711680);
 280         masm.shrl(rdx, 12);
 281         masm.movdqu(xmm0, new AMD64Address(r11, rdx, AMD64Address.Scale.Times1));
 282         masm.movdqu(xmm4, recordExternalAddress(crb, coeff16));        // 0x3d6fb175, 0xbfc5555e,
 283                                                                        // 0x55555555, 0x3fd55555
 284         masm.addsd(xmm1, xmm5);
 285         masm.movdqu(xmm2, recordExternalAddress(crb, coeff32));        // 0x9999999a, 0x3fc99999,
 286                                                                        // 0x00000000, 0xbfe00000
 287         masm.mulsd(xmm6, xmm7);
 288         if (masm.supports(AMD64.CPUFeature.SSE3)) {
 289             masm.movddup(xmm5, xmm1);
 290         } else {
 291             masm.movdqu(xmm5, xmm1);
 292             masm.movlhps(xmm5, xmm5);
 293         }
 294         masm.mulsd(xmm7, recordExternalAddress(crb, log28));           // 0x93c76730, 0x3ceef357
 295         masm.mulsd(xmm3, xmm1);
 296         masm.addsd(xmm0, xmm6);
 297         masm.mulpd(xmm4, xmm5);
 298         masm.mulpd(xmm5, xmm5);
 299         if (masm.supports(AMD64.CPUFeature.SSE3)) {
 300             masm.movddup(xmm6, xmm0);
 301         } else {
 302             masm.movdqu(xmm6, xmm0);
 303             masm.movlhps(xmm6, xmm6);
 304         }
 305         masm.addsd(xmm0, xmm1);
 306         masm.addpd(xmm4, xmm2);
 307         masm.mulpd(xmm3, xmm5);
 308         masm.subsd(xmm6, xmm0);
 309         masm.mulsd(xmm4, xmm1);
 310         masm.pshufd(xmm2, xmm0, 238);
 311         masm.addsd(xmm1, xmm6);
 312         masm.mulsd(xmm5, xmm5);
 313         masm.addsd(xmm7, xmm2);
 314         masm.addpd(xmm4, xmm3);
 315         masm.addsd(xmm1, xmm7);
 316         masm.mulpd(xmm4, xmm5);
 317         masm.addsd(xmm1, xmm4);
 318         masm.pshufd(xmm5, xmm4, 238);
 319         masm.addsd(xmm1, xmm5);
 320         masm.addsd(xmm0, xmm1);
 321         masm.jmp(block9);
 322 
 323         masm.bind(block0);
 324         masm.movq(xmm0, new AMD64Address(rsp, 0));
 325         masm.movq(xmm1, new AMD64Address(rsp, 0));
 326         masm.addl(rax, 16);
 327         masm.cmpl(rax, 32768);
 328         masm.jcc(AMD64Assembler.ConditionFlag.AboveEqual, block2);
 329         masm.cmpl(rax, 16);
 330         masm.jcc(AMD64Assembler.ConditionFlag.Below, block3);
 331 
 332         masm.bind(block4);
 333         masm.addsd(xmm0, xmm0);
 334         masm.jmp(block9);
 335 
 336         masm.bind(block5);
 337         masm.jcc(AMD64Assembler.ConditionFlag.Above, block4);
 338         masm.cmpl(rdx, 0);
 339         masm.jcc(AMD64Assembler.ConditionFlag.Above, block4);
 340         masm.jmp(block6);
 341 
 342         masm.bind(block3);
 343         masm.xorpd(xmm1, xmm1);
 344         masm.addsd(xmm1, xmm0);
 345         masm.movdl(rdx, xmm1);
 346         masm.psrlq(xmm1, 32);
 347         masm.movdl(rcx, xmm1);
 348         masm.orl(rdx, rcx);
 349         masm.cmpl(rdx, 0);
 350         masm.jcc(AMD64Assembler.ConditionFlag.Equal, block7);
 351         masm.xorpd(xmm1, xmm1);
 352         masm.movl(rax, 18416);
 353         masm.pinsrw(xmm1, rax, 3);
 354         masm.mulsd(xmm0, xmm1);
 355         masm.movdqu(xmm1, xmm0);
 356         masm.pextrw(rax, xmm0, 3);
 357         masm.por(xmm0, xmm2);
 358         masm.psrlq(xmm0, 27);
 359         masm.movl(rcx, 18416);
 360         masm.psrld(xmm0, 2);
 361         masm.rcpps(xmm0, xmm0);
 362         masm.psllq(xmm1, 12);
 363         masm.pshufd(xmm6, xmm5, 228);
 364         masm.psrlq(xmm1, 12);
 365         masm.jmp(block1);
 366 
 367         masm.bind(block2);
 368         masm.movdl(rdx, xmm1);
 369         masm.psrlq(xmm1, 32);
 370         masm.movdl(rcx, xmm1);
 371         masm.addl(rcx, rcx);
 372         masm.cmpl(rcx, -2097152);
 373         masm.jcc(AMD64Assembler.ConditionFlag.AboveEqual, block5);
 374         masm.orl(rdx, rcx);
 375         masm.cmpl(rdx, 0);
 376         masm.jcc(AMD64Assembler.ConditionFlag.Equal, block7);
 377 
 378         masm.bind(block6);
 379         masm.xorpd(xmm1, xmm1);
 380         masm.xorpd(xmm0, xmm0);
 381         masm.movl(rax, 32752);
 382         masm.pinsrw(xmm1, rax, 3);
 383         masm.mulsd(xmm0, xmm1);
 384         masm.movl(new AMD64Address(rsp, 16), 3);
 385         masm.jmp(block8);
 386         masm.bind(block7);
 387         masm.xorpd(xmm1, xmm1);
 388         masm.xorpd(xmm0, xmm0);
 389         masm.movl(rax, 49136);
 390         masm.pinsrw(xmm0, rax, 3);
 391         masm.divsd(xmm0, xmm1);
 392         masm.movl(new AMD64Address(rsp, 16), 2);
 393 
 394         masm.bind(block8);
 395         masm.movq(new AMD64Address(rsp, 8), xmm0);
 396 
 397         masm.movq(xmm0, new AMD64Address(rsp, 8));
 398 
 399         masm.bind(block9);
 400         masm.addq(rsp, 24);
 401     }
 402 }