1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2016, Intel Corporation. All rights reserved.
   4  * Intel Math Library (LIBM) Source Code
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 
  27 package org.graalvm.compiler.lir.amd64;
  28 
  29 import static jdk.vm.ci.amd64.AMD64.r10;
  30 import static jdk.vm.ci.amd64.AMD64.r11;
  31 import static jdk.vm.ci.amd64.AMD64.r8;
  32 import static jdk.vm.ci.amd64.AMD64.r9;
  33 import static jdk.vm.ci.amd64.AMD64.rax;
  34 import static jdk.vm.ci.amd64.AMD64.rbx;
  35 import static jdk.vm.ci.amd64.AMD64.rcx;
  36 import static jdk.vm.ci.amd64.AMD64.rdi;
  37 import static jdk.vm.ci.amd64.AMD64.rdx;
  38 import static jdk.vm.ci.amd64.AMD64.rsi;
  39 import static jdk.vm.ci.amd64.AMD64.rsp;
  40 import static jdk.vm.ci.amd64.AMD64.xmm0;
  41 import static jdk.vm.ci.amd64.AMD64.xmm1;
  42 import static jdk.vm.ci.amd64.AMD64.xmm2;
  43 import static jdk.vm.ci.amd64.AMD64.xmm3;
  44 import static jdk.vm.ci.amd64.AMD64.xmm4;
  45 import static jdk.vm.ci.amd64.AMD64.xmm5;
  46 import static jdk.vm.ci.amd64.AMD64.xmm6;
  47 import static jdk.vm.ci.amd64.AMD64.xmm7;
  48 import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.pointerConstant;
  49 import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.recordExternalAddress;
  50 
  51 import org.graalvm.compiler.asm.Label;
  52 import org.graalvm.compiler.asm.amd64.AMD64Address;
  53 import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag;
  54 import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
  55 import org.graalvm.compiler.lir.LIRInstructionClass;
  56 import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant;
  57 import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
  58 
  59 import jdk.vm.ci.amd64.AMD64;
  60 
  61 /**
  62  * <pre>
  63  *                     ALGORITHM DESCRIPTION - SIN()
  64  *                     ---------------------
  65  *
  66  *     1. RANGE REDUCTION
  67  *
  68  *     We perform an initial range reduction from X to r with
  69  *
  70  *          X =~= N * pi/32 + r
  71  *
  72  *     so that |r| <= pi/64 + epsilon. We restrict inputs to those
  73  *     where |N| <= 932560. Beyond this, the range reduction is
  74  *     insufficiently accurate. For extremely small inputs,
  75  *     denormalization can occur internally, impacting performance.
  76  *     This means that the main path is actually only taken for
  77  *     2^-252 <= |X| < 90112.
  78  *
  79  *     To avoid branches, we perform the range reduction to full
  80  *     accuracy each time.
  81  *
  82  *          X - N * (P_1 + P_2 + P_3)
  83  *
  84  *     where P_1 and P_2 are 32-bit numbers (so multiplication by N
  85  *     is exact) and P_3 is a 53-bit number. Together, these
  86  *     approximate pi well enough for all cases in the restricted
  87  *     range.
  88  *
  89  *     The main reduction sequence is:
  90  *
  91  *             y = 32/pi * x
  92  *             N = integer(y)
  93  *     (computed by adding and subtracting off SHIFTER)
  94  *
  95  *             m_1 = N * P_1
  96  *             m_2 = N * P_2
  97  *             r_1 = x - m_1
  98  *             r = r_1 - m_2
  99  *     (this r can be used for most of the calculation)
 100  *
 101  *             c_1 = r_1 - r
 102  *             m_3 = N * P_3
 103  *             c_2 = c_1 - m_2
 104  *             c = c_2 - m_3
 105  *
 106  *     2. MAIN ALGORITHM
 107  *
 108  *     The algorithm uses a table lookup based on B = M * pi / 32
 109  *     where M = N mod 64. The stored values are:
 110  *       sigma             closest power of 2 to cos(B)
 111  *       C_hl              53-bit cos(B) - sigma
 112  *       S_hi + S_lo       2 * 53-bit sin(B)
 113  *
 114  *     The computation is organized as follows:
 115  *
 116  *          sin(B + r + c) = [sin(B) + sigma * r] +
 117  *                           r * (cos(B) - sigma) +
 118  *                           sin(B) * [cos(r + c) - 1] +
 119  *                           cos(B) * [sin(r + c) - r]
 120  *
 121  *     which is approximately:
 122  *
 123  *          [S_hi + sigma * r] +
 124  *          C_hl * r +
 125  *          S_lo + S_hi * [(cos(r) - 1) - r * c] +
 126  *          (C_hl + sigma) * [(sin(r) - r) + c]
 127  *
 128  *     and this is what is actually computed. We separate this sum
 129  *     into four parts:
 130  *
 131  *          hi + med + pols + corr
 132  *
 133  *     where
 134  *
 135  *          hi       = S_hi + sigma r
 136  *          med      = C_hl * r
 137  *          pols     = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
 138  *          corr     = S_lo + c * ((C_hl + sigma) - S_hi * r)
 139  *
 140  *     3. POLYNOMIAL
 141  *
 142  *     The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) *
 143  *     (sin(r) - r) can be rearranged freely, since it is quite
 144  *     small, so we exploit parallelism to the fullest.
 145  *
 146  *          psc4       =   SC_4 * r_1
 147  *          msc4       =   psc4 * r
 148  *          r2         =   r * r
 149  *          msc2       =   SC_2 * r2
 150  *          r4         =   r2 * r2
 151  *          psc3       =   SC_3 + msc4
 152  *          psc1       =   SC_1 + msc2
 153  *          msc3       =   r4 * psc3
 154  *          sincospols =   psc1 + msc3
 155  *          pols       =   sincospols *
 156  *                         <S_hi * r^2 | (C_hl + sigma) * r^3>
 157  *
 158  *     4. CORRECTION TERM
 159  *
 160  *     This is where the "c" component of the range reduction is
 161  *     taken into account; recall that just "r" is used for most of
 162  *     the calculation.
 163  *
 164  *          -c   = m_3 - c_2
 165  *          -d   = S_hi * r - (C_hl + sigma)
 166  *          corr = -c * -d + S_lo
 167  *
 168  *     5. COMPENSATED SUMMATIONS
 169  *
 170  *     The two successive compensated summations add up the high
 171  *     and medium parts, leaving just the low parts to add up at
 172  *     the end.
 173  *
 174  *          rs        =  sigma * r
 175  *          res_int   =  S_hi + rs
 176  *          k_0       =  S_hi - res_int
 177  *          k_2       =  k_0 + rs
 178  *          med       =  C_hl * r
 179  *          res_hi    =  res_int + med
 180  *          k_1       =  res_int - res_hi
 181  *          k_3       =  k_1 + med
 182  *
 183  *     6. FINAL SUMMATION
 184  *
 185  *     We now add up all the small parts:
 186  *
 187  *          res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
 188  *
 189  *     Now the overall result is just:
 190  *
 191  *          res_hi + res_lo
 192  *
 193  *     7. SMALL ARGUMENTS
 194  *
 195  *     If |x| < SNN (SNN meaning the smallest normal number), we
 196  *     simply perform 0.1111111 cdots 1111 * x. For SNN <= |x|, we
 197  *     do 2^-55 * (2^55 * x - x).
 198  *
 199  * Special cases:
 200  *  sin(NaN) = quiet NaN, and raise invalid exception
 201  *  sin(INF) = NaN and raise invalid exception
 202  *  sin(+/-0) = +/-0
 203  * </pre>
 204  */
 205 public final class AMD64MathSinOp extends AMD64MathIntrinsicUnaryOp {
 206 
 207     public static final LIRInstructionClass<AMD64MathSinOp> TYPE = LIRInstructionClass.create(AMD64MathSinOp.class);
 208 
 209     public AMD64MathSinOp() {
 210         super(TYPE, /* GPR */ rax, rcx, rdx, rbx, rsi, rdi, r8, r9, r10, r11,
 211                         /* XMM */ xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
 212     }
 213 
 214     private ArrayDataPointerConstant onehalf = pointerConstant(16, new int[]{
 215             // @formatter:off
 216             0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000
 217             // @formatter:on
 218     });
 219 
 220     private ArrayDataPointerConstant p2 = pointerConstant(16, new int[]{
 221             // @formatter:off
 222             0x1a600000, 0x3d90b461, 0x1a600000, 0x3d90b461
 223             // @formatter:on
 224     });
 225 
 226     private ArrayDataPointerConstant sc4 = pointerConstant(16, new int[]{
 227             // @formatter:off
 228             0xa556c734, 0x3ec71de3, 0x1a01a01a, 0x3efa01a0
 229             // @formatter:on
 230     });
 231 
 232     private ArrayDataPointerConstant ctable = pointerConstant(16, new int[]{
 233             // @formatter:off
 234             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
 235             0x00000000, 0x00000000, 0x3ff00000, 0x176d6d31, 0xbf73b92e,
 236             0xbc29b42c, 0x3fb917a6, 0xe0000000, 0xbc3e2718, 0x00000000,
 237             0x3ff00000, 0x011469fb, 0xbf93ad06, 0x3c69a60b, 0x3fc8f8b8,
 238             0xc0000000, 0xbc626d19, 0x00000000, 0x3ff00000, 0x939d225a,
 239             0xbfa60bea, 0x2ed59f06, 0x3fd29406, 0xa0000000, 0xbc75d28d,
 240             0x00000000, 0x3ff00000, 0x866b95cf, 0xbfb37ca1, 0xa6aea963,
 241             0x3fd87de2, 0xe0000000, 0xbc672ced, 0x00000000, 0x3ff00000,
 242             0x73fa1279, 0xbfbe3a68, 0x3806f63b, 0x3fde2b5d, 0x20000000,
 243             0x3c5e0d89, 0x00000000, 0x3ff00000, 0x5bc57974, 0xbfc59267,
 244             0x39ae68c8, 0x3fe1c73b, 0x20000000, 0x3c8b25dd, 0x00000000,
 245             0x3ff00000, 0x53aba2fd, 0xbfcd0dfe, 0x25091dd6, 0x3fe44cf3,
 246             0x20000000, 0x3c68076a, 0x00000000, 0x3ff00000, 0x99fcef32,
 247             0x3fca8279, 0x667f3bcd, 0x3fe6a09e, 0x20000000, 0xbc8bdd34,
 248             0x00000000, 0x3fe00000, 0x94247758, 0x3fc133cc, 0x6b151741,
 249             0x3fe8bc80, 0x20000000, 0xbc82c5e1, 0x00000000, 0x3fe00000,
 250             0x9ae68c87, 0x3fac73b3, 0x290ea1a3, 0x3fea9b66, 0xe0000000,
 251             0x3c39f630, 0x00000000, 0x3fe00000, 0x7f909c4e, 0xbf9d4a2c,
 252             0xf180bdb1, 0x3fec38b2, 0x80000000, 0xbc76e0b1, 0x00000000,
 253             0x3fe00000, 0x65455a75, 0xbfbe0875, 0xcf328d46, 0x3fed906b,
 254             0x20000000, 0x3c7457e6, 0x00000000, 0x3fe00000, 0x76acf82d,
 255             0x3fa4a031, 0x56c62dda, 0x3fee9f41, 0xe0000000, 0x3c8760b1,
 256             0x00000000, 0x3fd00000, 0x0e5967d5, 0xbfac1d1f, 0xcff75cb0,
 257             0x3fef6297, 0x20000000, 0x3c756217, 0x00000000, 0x3fd00000,
 258             0x0f592f50, 0xbf9ba165, 0xa3d12526, 0x3fefd88d, 0x40000000,
 259             0xbc887df6, 0x00000000, 0x3fc00000, 0x00000000, 0x00000000,
 260             0x00000000, 0x3ff00000, 0x00000000, 0x00000000, 0x00000000,
 261             0x00000000, 0x0f592f50, 0x3f9ba165, 0xa3d12526, 0x3fefd88d,
 262             0x40000000, 0xbc887df6, 0x00000000, 0xbfc00000, 0x0e5967d5,
 263             0x3fac1d1f, 0xcff75cb0, 0x3fef6297, 0x20000000, 0x3c756217,
 264             0x00000000, 0xbfd00000, 0x76acf82d, 0xbfa4a031, 0x56c62dda,
 265             0x3fee9f41, 0xe0000000, 0x3c8760b1, 0x00000000, 0xbfd00000,
 266             0x65455a75, 0x3fbe0875, 0xcf328d46, 0x3fed906b, 0x20000000,
 267             0x3c7457e6, 0x00000000, 0xbfe00000, 0x7f909c4e, 0x3f9d4a2c,
 268             0xf180bdb1, 0x3fec38b2, 0x80000000, 0xbc76e0b1, 0x00000000,
 269             0xbfe00000, 0x9ae68c87, 0xbfac73b3, 0x290ea1a3, 0x3fea9b66,
 270             0xe0000000, 0x3c39f630, 0x00000000, 0xbfe00000, 0x94247758,
 271             0xbfc133cc, 0x6b151741, 0x3fe8bc80, 0x20000000, 0xbc82c5e1,
 272             0x00000000, 0xbfe00000, 0x99fcef32, 0xbfca8279, 0x667f3bcd,
 273             0x3fe6a09e, 0x20000000, 0xbc8bdd34, 0x00000000, 0xbfe00000,
 274             0x53aba2fd, 0x3fcd0dfe, 0x25091dd6, 0x3fe44cf3, 0x20000000,
 275             0x3c68076a, 0x00000000, 0xbff00000, 0x5bc57974, 0x3fc59267,
 276             0x39ae68c8, 0x3fe1c73b, 0x20000000, 0x3c8b25dd, 0x00000000,
 277             0xbff00000, 0x73fa1279, 0x3fbe3a68, 0x3806f63b, 0x3fde2b5d,
 278             0x20000000, 0x3c5e0d89, 0x00000000, 0xbff00000, 0x866b95cf,
 279             0x3fb37ca1, 0xa6aea963, 0x3fd87de2, 0xe0000000, 0xbc672ced,
 280             0x00000000, 0xbff00000, 0x939d225a, 0x3fa60bea, 0x2ed59f06,
 281             0x3fd29406, 0xa0000000, 0xbc75d28d, 0x00000000, 0xbff00000,
 282             0x011469fb, 0x3f93ad06, 0x3c69a60b, 0x3fc8f8b8, 0xc0000000,
 283             0xbc626d19, 0x00000000, 0xbff00000, 0x176d6d31, 0x3f73b92e,
 284             0xbc29b42c, 0x3fb917a6, 0xe0000000, 0xbc3e2718, 0x00000000,
 285             0xbff00000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
 286             0x00000000, 0x00000000, 0x00000000, 0xbff00000, 0x176d6d31,
 287             0x3f73b92e, 0xbc29b42c, 0xbfb917a6, 0xe0000000, 0x3c3e2718,
 288             0x00000000, 0xbff00000, 0x011469fb, 0x3f93ad06, 0x3c69a60b,
 289             0xbfc8f8b8, 0xc0000000, 0x3c626d19, 0x00000000, 0xbff00000,
 290             0x939d225a, 0x3fa60bea, 0x2ed59f06, 0xbfd29406, 0xa0000000,
 291             0x3c75d28d, 0x00000000, 0xbff00000, 0x866b95cf, 0x3fb37ca1,
 292             0xa6aea963, 0xbfd87de2, 0xe0000000, 0x3c672ced, 0x00000000,
 293             0xbff00000, 0x73fa1279, 0x3fbe3a68, 0x3806f63b, 0xbfde2b5d,
 294             0x20000000, 0xbc5e0d89, 0x00000000, 0xbff00000, 0x5bc57974,
 295             0x3fc59267, 0x39ae68c8, 0xbfe1c73b, 0x20000000, 0xbc8b25dd,
 296             0x00000000, 0xbff00000, 0x53aba2fd, 0x3fcd0dfe, 0x25091dd6,
 297             0xbfe44cf3, 0x20000000, 0xbc68076a, 0x00000000, 0xbff00000,
 298             0x99fcef32, 0xbfca8279, 0x667f3bcd, 0xbfe6a09e, 0x20000000,
 299             0x3c8bdd34, 0x00000000, 0xbfe00000, 0x94247758, 0xbfc133cc,
 300             0x6b151741, 0xbfe8bc80, 0x20000000, 0x3c82c5e1, 0x00000000,
 301             0xbfe00000, 0x9ae68c87, 0xbfac73b3, 0x290ea1a3, 0xbfea9b66,
 302             0xe0000000, 0xbc39f630, 0x00000000, 0xbfe00000, 0x7f909c4e,
 303             0x3f9d4a2c, 0xf180bdb1, 0xbfec38b2, 0x80000000, 0x3c76e0b1,
 304             0x00000000, 0xbfe00000, 0x65455a75, 0x3fbe0875, 0xcf328d46,
 305             0xbfed906b, 0x20000000, 0xbc7457e6, 0x00000000, 0xbfe00000,
 306             0x76acf82d, 0xbfa4a031, 0x56c62dda, 0xbfee9f41, 0xe0000000,
 307             0xbc8760b1, 0x00000000, 0xbfd00000, 0x0e5967d5, 0x3fac1d1f,
 308             0xcff75cb0, 0xbfef6297, 0x20000000, 0xbc756217, 0x00000000,
 309             0xbfd00000, 0x0f592f50, 0x3f9ba165, 0xa3d12526, 0xbfefd88d,
 310             0x40000000, 0x3c887df6, 0x00000000, 0xbfc00000, 0x00000000,
 311             0x00000000, 0x00000000, 0xbff00000, 0x00000000, 0x00000000,
 312             0x00000000, 0x00000000, 0x0f592f50, 0xbf9ba165, 0xa3d12526,
 313             0xbfefd88d, 0x40000000, 0x3c887df6, 0x00000000, 0x3fc00000,
 314             0x0e5967d5, 0xbfac1d1f, 0xcff75cb0, 0xbfef6297, 0x20000000,
 315             0xbc756217, 0x00000000, 0x3fd00000, 0x76acf82d, 0x3fa4a031,
 316             0x56c62dda, 0xbfee9f41, 0xe0000000, 0xbc8760b1, 0x00000000,
 317             0x3fd00000, 0x65455a75, 0xbfbe0875, 0xcf328d46, 0xbfed906b,
 318             0x20000000, 0xbc7457e6, 0x00000000, 0x3fe00000, 0x7f909c4e,
 319             0xbf9d4a2c, 0xf180bdb1, 0xbfec38b2, 0x80000000, 0x3c76e0b1,
 320             0x00000000, 0x3fe00000, 0x9ae68c87, 0x3fac73b3, 0x290ea1a3,
 321             0xbfea9b66, 0xe0000000, 0xbc39f630, 0x00000000, 0x3fe00000,
 322             0x94247758, 0x3fc133cc, 0x6b151741, 0xbfe8bc80, 0x20000000,
 323             0x3c82c5e1, 0x00000000, 0x3fe00000, 0x99fcef32, 0x3fca8279,
 324             0x667f3bcd, 0xbfe6a09e, 0x20000000, 0x3c8bdd34, 0x00000000,
 325             0x3fe00000, 0x53aba2fd, 0xbfcd0dfe, 0x25091dd6, 0xbfe44cf3,
 326             0x20000000, 0xbc68076a, 0x00000000, 0x3ff00000, 0x5bc57974,
 327             0xbfc59267, 0x39ae68c8, 0xbfe1c73b, 0x20000000, 0xbc8b25dd,
 328             0x00000000, 0x3ff00000, 0x73fa1279, 0xbfbe3a68, 0x3806f63b,
 329             0xbfde2b5d, 0x20000000, 0xbc5e0d89, 0x00000000, 0x3ff00000,
 330             0x866b95cf, 0xbfb37ca1, 0xa6aea963, 0xbfd87de2, 0xe0000000,
 331             0x3c672ced, 0x00000000, 0x3ff00000, 0x939d225a, 0xbfa60bea,
 332             0x2ed59f06, 0xbfd29406, 0xa0000000, 0x3c75d28d, 0x00000000,
 333             0x3ff00000, 0x011469fb, 0xbf93ad06, 0x3c69a60b, 0xbfc8f8b8,
 334             0xc0000000, 0x3c626d19, 0x00000000, 0x3ff00000, 0x176d6d31,
 335             0xbf73b92e, 0xbc29b42c, 0xbfb917a6, 0xe0000000, 0x3c3e2718,
 336             0x00000000, 0x3ff00000
 337             // @formatter:on
 338     });
 339 
 340     private ArrayDataPointerConstant sc2 = pointerConstant(16, new int[]{
 341             // @formatter:off
 342             0x11111111, 0x3f811111, 0x55555555, 0x3fa55555
 343             // @formatter:on
 344     });
 345 
 346     private ArrayDataPointerConstant sc3 = pointerConstant(16, new int[]{
 347             // @formatter:off
 348             0x1a01a01a, 0xbf2a01a0, 0x16c16c17, 0xbf56c16c
 349             // @formatter:on
 350     });
 351 
 352     private ArrayDataPointerConstant sc1 = pointerConstant(16, new int[]{
 353             // @formatter:off
 354             0x55555555, 0xbfc55555, 0x00000000, 0xbfe00000
 355             // @formatter:on
 356     });
 357 
 358     private ArrayDataPointerConstant piInvTable = pointerConstant(16, new int[]{
 359             // @formatter:off
 360             0x00000000, 0x00000000, 0xa2f9836e, 0x4e441529, 0xfc2757d1,
 361             0xf534ddc0, 0xdb629599, 0x3c439041, 0xfe5163ab, 0xdebbc561,
 362             0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c, 0xfe1deb1c,
 363             0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
 364             0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff,
 365             0xde05980f, 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7,
 366             0x4f463f66, 0x9e5fea2d, 0x7527bac7, 0xebe5f17b, 0x3d0739f7,
 367             0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08, 0x56033046, 0xfc7b6bab,
 368             0xf0cfbc21
 369             // @formatter:on
 370     });
 371 
 372     private ArrayDataPointerConstant pi4 = pointerConstant(8, new int[]{
 373             // @formatter:off
 374             0x40000000, 0x3fe921fb,
 375     });
 376     private ArrayDataPointerConstant pi48 = pointerConstant(8, new int[]{
 377             0x18469899, 0x3e64442d
 378             // @formatter:on
 379     });
 380 
 381     private ArrayDataPointerConstant pi32Inv = pointerConstant(8, new int[]{
 382             // @formatter:off
 383             0x6dc9c883, 0x40245f30
 384             // @formatter:on
 385     });
 386 
 387     private ArrayDataPointerConstant shifter = pointerConstant(8, new int[]{
 388             // @formatter:off
 389             0x00000000, 0x43380000
 390             // @formatter:on
 391     });
 392 
 393     private ArrayDataPointerConstant signMask = pointerConstant(8, new int[]{
 394             // @formatter:off
 395             0x00000000, 0x80000000
 396             // @formatter:on
 397     });
 398 
 399     private ArrayDataPointerConstant p3 = pointerConstant(8, new int[]{
 400             // @formatter:off
 401             0x2e037073, 0x3b63198a
 402             // @formatter:on
 403     });
 404 
 405     private ArrayDataPointerConstant allOnes = pointerConstant(8, new int[]{
 406             // @formatter:off
 407             0xffffffff, 0x3fefffff
 408             // @formatter:on
 409     });
 410 
 411     private ArrayDataPointerConstant twoPow55 = pointerConstant(8, new int[]{
 412             // @formatter:off
 413             0x00000000, 0x43600000
 414             // @formatter:on
 415     });
 416 
 417     private ArrayDataPointerConstant twoPowM55 = pointerConstant(8, new int[]{
 418             // @formatter:off
 419             0x00000000, 0x3c800000
 420             // @formatter:on
 421     });
 422 
 423     private ArrayDataPointerConstant p1 = pointerConstant(8, new int[]{
 424             // @formatter:off
 425             0x54400000, 0x3fb921fb
 426             // @formatter:on
 427     });
 428 
 429     private ArrayDataPointerConstant negZero = pointerConstant(8, new int[]{
 430             // @formatter:off
 431             0x00000000, 0x80000000
 432             // @formatter:on
 433     });
 434 
 435     @Override
 436     public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
 437         Label block0 = new Label();
 438         Label block1 = new Label();
 439         Label block2 = new Label();
 440         Label block3 = new Label();
 441         Label block4 = new Label();
 442         Label block5 = new Label();
 443         Label block6 = new Label();
 444         Label block7 = new Label();
 445         Label block8 = new Label();
 446         Label block9 = new Label();
 447         Label block10 = new Label();
 448         Label block11 = new Label();
 449         Label block12 = new Label();
 450         Label block13 = new Label();
 451         Label block14 = new Label();
 452 
 453         masm.push(AMD64.rbx);
 454         masm.subq(rsp, 16);
 455         masm.movsd(new AMD64Address(rsp, 8), xmm0);
 456         masm.movl(rax, new AMD64Address(rsp, 12));
 457         masm.movq(xmm1, recordExternalAddress(crb, pi32Inv));          // 0x6dc9c883, 0x40245f30
 458         masm.movq(xmm2, recordExternalAddress(crb, shifter));          // 0x00000000, 0x43380000
 459         masm.andl(rax, 2147418112);
 460         masm.subl(rax, 808452096);
 461         masm.cmpl(rax, 281346048);
 462         masm.jcc(ConditionFlag.Above, block0);
 463         masm.mulsd(xmm1, xmm0);
 464         masm.movdqu(xmm5, recordExternalAddress(crb, onehalf));        // 0x00000000, 0x3fe00000,
 465                                                                        // 0x00000000, 0x3fe00000
 466         masm.movq(xmm4, recordExternalAddress(crb, signMask));         // 0x00000000, 0x80000000
 467         masm.pand(xmm4, xmm0);
 468         masm.por(xmm5, xmm4);
 469         masm.addpd(xmm1, xmm5);
 470         masm.cvttsd2sil(rdx, xmm1);
 471         masm.cvtsi2sdl(xmm1, rdx);
 472         masm.movdqu(xmm6, recordExternalAddress(crb, p2));             // 0x1a600000, 0x3d90b461,
 473                                                                        // 0x1a600000, 0x3d90b461
 474         masm.movq(r8, 0x3fb921fb54400000L);
 475         masm.movdq(xmm3, r8);
 476         masm.movdqu(xmm5, recordExternalAddress(crb, sc4));            // 0xa556c734, 0x3ec71de3,
 477                                                                        // 0x1a01a01a, 0x3efa01a0
 478         masm.pshufd(xmm4, xmm0, 68);
 479         masm.mulsd(xmm3, xmm1);
 480         if (masm.supports(AMD64.CPUFeature.SSE3)) {
 481             masm.movddup(xmm1, xmm1);
 482         } else {
 483             masm.movlhps(xmm1, xmm1);
 484         }
 485         masm.andl(rdx, 63);
 486         masm.shll(rdx, 5);
 487         masm.leaq(AMD64.rax, recordExternalAddress(crb, ctable));
 488         masm.addq(AMD64.rax, AMD64.rdx);
 489         masm.mulpd(xmm6, xmm1);
 490         masm.mulsd(xmm1, recordExternalAddress(crb, p3));              // 0x2e037073, 0x3b63198a
 491         masm.subsd(xmm4, xmm3);
 492         masm.movq(xmm7, new AMD64Address(AMD64.rax, 8));
 493         masm.subsd(xmm0, xmm3);
 494         if (masm.supports(AMD64.CPUFeature.SSE3)) {
 495             masm.movddup(xmm3, xmm4);
 496         } else {
 497             masm.movdqu(xmm3, xmm4);
 498             masm.movlhps(xmm3, xmm3);
 499         }
 500         masm.subsd(xmm4, xmm6);
 501         masm.pshufd(xmm0, xmm0, 68);
 502         masm.movdqu(xmm2, new AMD64Address(AMD64.rax, 0));
 503         masm.mulpd(xmm5, xmm0);
 504         masm.subpd(xmm0, xmm6);
 505         masm.mulsd(xmm7, xmm4);
 506         masm.subsd(xmm3, xmm4);
 507         masm.mulpd(xmm5, xmm0);
 508         masm.mulpd(xmm0, xmm0);
 509         masm.subsd(xmm3, xmm6);
 510         masm.movdqu(xmm6, recordExternalAddress(crb, sc2));            // 0x11111111, 0x3f811111,
 511                                                                        // 0x55555555, 0x3fa55555
 512         masm.subsd(xmm1, xmm3);
 513         masm.movq(xmm3, new AMD64Address(AMD64.rax, 24));
 514         masm.addsd(xmm2, xmm3);
 515         masm.subsd(xmm7, xmm2);
 516         masm.mulsd(xmm2, xmm4);
 517         masm.mulpd(xmm6, xmm0);
 518         masm.mulsd(xmm3, xmm4);
 519         masm.mulpd(xmm2, xmm0);
 520         masm.mulpd(xmm0, xmm0);
 521         masm.addpd(xmm5, recordExternalAddress(crb, sc3));             // 0x1a01a01a, 0xbf2a01a0,
 522                                                                        // 0x16c16c17, 0xbf56c16c
 523         masm.mulsd(xmm4, new AMD64Address(AMD64.rax, 0));
 524         masm.addpd(xmm6, recordExternalAddress(crb, sc1));             // 0x55555555, 0xbfc55555,
 525                                                                        // 0x00000000, 0xbfe00000
 526         masm.mulpd(xmm5, xmm0);
 527         masm.movdqu(xmm0, xmm3);
 528         masm.addsd(xmm3, new AMD64Address(AMD64.rax, 8));
 529         masm.mulpd(xmm1, xmm7);
 530         masm.movdqu(xmm7, xmm4);
 531         masm.addsd(xmm4, xmm3);
 532         masm.addpd(xmm6, xmm5);
 533         masm.movq(xmm5, new AMD64Address(AMD64.rax, 8));
 534         masm.subsd(xmm5, xmm3);
 535         masm.subsd(xmm3, xmm4);
 536         masm.addsd(xmm1, new AMD64Address(AMD64.rax, 16));
 537         masm.mulpd(xmm6, xmm2);
 538         masm.addsd(xmm5, xmm0);
 539         masm.addsd(xmm3, xmm7);
 540         masm.addsd(xmm1, xmm5);
 541         masm.addsd(xmm1, xmm3);
 542         masm.addsd(xmm1, xmm6);
 543         masm.unpckhpd(xmm6, xmm6);
 544         masm.movdqu(xmm0, xmm4);
 545         masm.addsd(xmm1, xmm6);
 546         masm.addsd(xmm0, xmm1);
 547         masm.jmp(block14);
 548 
 549         masm.bind(block0);
 550         masm.jcc(ConditionFlag.Greater, block1);
 551         masm.shrl(rax, 20);
 552         masm.cmpl(rax, 3325);
 553         masm.jcc(ConditionFlag.NotEqual, block2);
 554         masm.mulsd(xmm0, recordExternalAddress(crb, allOnes));         // 0xffffffff, 0x3fefffff
 555         masm.jmp(block14);
 556 
 557         masm.bind(block2);
 558         masm.movq(xmm3, recordExternalAddress(crb, twoPow55));         // 0x00000000, 0x43600000
 559         masm.mulsd(xmm3, xmm0);
 560         masm.subsd(xmm3, xmm0);
 561         masm.mulsd(xmm3, recordExternalAddress(crb, twoPowM55));       // 0x00000000, 0x3c800000
 562         masm.jmp(block14);
 563 
 564         masm.bind(block1);
 565         masm.pextrw(rax, xmm0, 3);
 566         masm.andl(rax, 32752);
 567         masm.cmpl(rax, 32752);
 568         masm.jcc(ConditionFlag.Equal, block3);
 569         masm.pextrw(rcx, xmm0, 3);
 570         masm.andl(rcx, 32752);
 571         masm.subl(rcx, 16224);
 572         masm.shrl(rcx, 7);
 573         masm.andl(rcx, 65532);
 574         masm.leaq(r11, recordExternalAddress(crb, piInvTable));
 575         masm.addq(AMD64.rcx, r11);
 576         masm.movdq(AMD64.rax, xmm0);
 577         masm.movl(r10, new AMD64Address(AMD64.rcx, 20));
 578         masm.movl(r8, new AMD64Address(AMD64.rcx, 24));
 579         masm.movl(rdx, rax);
 580         masm.shrq(AMD64.rax, 21);
 581         masm.orl(rax, Integer.MIN_VALUE);
 582         masm.shrl(rax, 11);
 583         masm.movl(r9, r10);
 584         masm.imulq(r10, AMD64.rdx);
 585         masm.imulq(r9, AMD64.rax);
 586         masm.imulq(r8, AMD64.rax);
 587         masm.movl(rsi, new AMD64Address(AMD64.rcx, 16));
 588         masm.movl(rdi, new AMD64Address(AMD64.rcx, 12));
 589         masm.movl(r11, r10);
 590         masm.shrq(r10, 32);
 591         masm.addq(r9, r10);
 592         masm.addq(r11, r8);
 593         masm.movl(r8, r11);
 594         masm.shrq(r11, 32);
 595         masm.addq(r9, r11);
 596         masm.movl(r10, rsi);
 597         masm.imulq(rsi, AMD64.rdx);
 598         masm.imulq(r10, AMD64.rax);
 599         masm.movl(r11, rdi);
 600         masm.imulq(rdi, AMD64.rdx);
 601         masm.movl(rbx, rsi);
 602         masm.shrq(rsi, 32);
 603         masm.addq(r9, AMD64.rbx);
 604         masm.movl(rbx, r9);
 605         masm.shrq(r9, 32);
 606         masm.addq(r10, rsi);
 607         masm.addq(r10, r9);
 608         masm.shlq(AMD64.rbx, 32);
 609         masm.orq(r8, AMD64.rbx);
 610         masm.imulq(r11, AMD64.rax);
 611         masm.movl(r9, new AMD64Address(AMD64.rcx, 8));
 612         masm.movl(rsi, new AMD64Address(AMD64.rcx, 4));
 613         masm.movl(rbx, rdi);
 614         masm.shrq(rdi, 32);
 615         masm.addq(r10, AMD64.rbx);
 616         masm.movl(rbx, r10);
 617         masm.shrq(r10, 32);
 618         masm.addq(r11, rdi);
 619         masm.addq(r11, r10);
 620         masm.movq(rdi, r9);
 621         masm.imulq(r9, AMD64.rdx);
 622         masm.imulq(rdi, AMD64.rax);
 623         masm.movl(r10, r9);
 624         masm.shrq(r9, 32);
 625         masm.addq(r11, r10);
 626         masm.movl(r10, r11);
 627         masm.shrq(r11, 32);
 628         masm.addq(rdi, r9);
 629         masm.addq(rdi, r11);
 630         masm.movq(r9, rsi);
 631         masm.imulq(rsi, AMD64.rdx);
 632         masm.imulq(r9, AMD64.rax);
 633         masm.shlq(r10, 32);
 634         masm.orq(r10, AMD64.rbx);
 635         masm.movl(rax, new AMD64Address(AMD64.rcx, 0));
 636         masm.movl(r11, rsi);
 637         masm.shrq(rsi, 32);
 638         masm.addq(rdi, r11);
 639         masm.movl(r11, rdi);
 640         masm.shrq(rdi, 32);
 641         masm.addq(r9, rsi);
 642         masm.addq(r9, rdi);
 643         masm.imulq(AMD64.rdx, AMD64.rax);
 644         masm.pextrw(rbx, xmm0, 3);
 645         masm.leaq(rdi, recordExternalAddress(crb, piInvTable));
 646         masm.subq(AMD64.rcx, rdi);
 647         masm.addl(rcx, rcx);
 648         masm.addl(rcx, rcx);
 649         masm.addl(rcx, rcx);
 650         masm.addl(rcx, 19);
 651         masm.movl(rsi, 32768);
 652         masm.andl(rsi, rbx);
 653         masm.shrl(rbx, 4);
 654         masm.andl(rbx, 2047);
 655         masm.subl(rbx, 1023);
 656         masm.subl(rcx, rbx);
 657         masm.addq(r9, AMD64.rdx);
 658         masm.movl(rdx, rcx);
 659         masm.addl(rdx, 32);
 660         masm.cmpl(rcx, 1);
 661         masm.jcc(ConditionFlag.Less, block4);
 662         masm.negl(rcx);
 663         masm.addl(rcx, 29);
 664         masm.shll(r9);
 665         masm.movl(rdi, r9);
 666         masm.andl(r9, 536870911);
 667         masm.testl(r9, 268435456);
 668         masm.jcc(ConditionFlag.NotEqual, block5);
 669         masm.shrl(r9);
 670         masm.movl(rbx, 0);
 671         masm.shlq(r9, 32);
 672         masm.orq(r9, r11);
 673 
 674         masm.bind(block6);
 675 
 676         masm.bind(block7);
 677 
 678         masm.cmpq(r9, 0);
 679         masm.jcc(ConditionFlag.Equal, block8);
 680 
 681         masm.bind(block9);
 682         masm.bsrq(r11, r9);
 683         masm.movl(rcx, 29);
 684         masm.subl(rcx, r11);
 685         masm.jcc(ConditionFlag.LessEqual, block10);
 686         masm.shlq(r9);
 687         masm.movq(AMD64.rax, r10);
 688         masm.shlq(r10);
 689         masm.addl(rdx, rcx);
 690         masm.negl(rcx);
 691         masm.addl(rcx, 64);
 692         masm.shrq(AMD64.rax);
 693         masm.shrq(r8);
 694         masm.orq(r9, AMD64.rax);
 695         masm.orq(r10, r8);
 696 
 697         masm.bind(block11);
 698         masm.cvtsi2sdq(xmm0, r9);
 699         masm.shrq(r10, 1);
 700         masm.cvtsi2sdq(xmm3, r10);
 701         masm.xorpd(xmm4, xmm4);
 702         masm.shll(rdx, 4);
 703         masm.negl(rdx);
 704         masm.addl(rdx, 16368);
 705         masm.orl(rdx, rsi);
 706         masm.xorl(rdx, rbx);
 707         masm.pinsrw(xmm4, rdx, 3);
 708         masm.movq(xmm2, recordExternalAddress(crb, pi4));              // 0x40000000, 0x3fe921fb,
 709                                                                        // 0x18469899, 0x3e64442d
 710         masm.movq(xmm6, recordExternalAddress(crb, pi48));             // 0x3fe921fb, 0x18469899,
 711                                                                        // 0x3e64442d
 712         masm.xorpd(xmm5, xmm5);
 713         masm.subl(rdx, 1008);
 714         masm.pinsrw(xmm5, rdx, 3);
 715         masm.mulsd(xmm0, xmm4);
 716         masm.shll(rsi, 16);
 717         masm.sarl(rsi, 31);
 718         masm.mulsd(xmm3, xmm5);
 719         masm.movdqu(xmm1, xmm0);
 720         masm.mulsd(xmm0, xmm2);
 721         masm.shrl(rdi, 29);
 722         masm.addsd(xmm1, xmm3);
 723         masm.mulsd(xmm3, xmm2);
 724         masm.addl(rdi, rsi);
 725         masm.xorl(rdi, rsi);
 726         masm.mulsd(xmm6, xmm1);
 727         masm.movl(rax, rdi);
 728         masm.addsd(xmm6, xmm3);
 729         masm.movdqu(xmm2, xmm0);
 730         masm.addsd(xmm0, xmm6);
 731         masm.subsd(xmm2, xmm0);
 732         masm.addsd(xmm6, xmm2);
 733 
 734         masm.bind(block12);
 735         masm.movq(xmm1, recordExternalAddress(crb, pi32Inv));          // 0x6dc9c883, 0x40245f30
 736         masm.mulsd(xmm1, xmm0);
 737         masm.movq(xmm5, recordExternalAddress(crb, onehalf));          // 0x00000000, 0x3fe00000,
 738                                                                        // 0x00000000, 0x3fe00000
 739         masm.movq(xmm4, recordExternalAddress(crb, signMask));         // 0x00000000, 0x80000000
 740         masm.pand(xmm4, xmm0);
 741         masm.por(xmm5, xmm4);
 742         masm.addpd(xmm1, xmm5);
 743         masm.cvttsd2sil(rdx, xmm1);
 744         masm.cvtsi2sdl(xmm1, rdx);
 745         masm.movq(xmm3, recordExternalAddress(crb, p1));               // 0x54400000, 0x3fb921fb
 746         masm.movdqu(xmm2, recordExternalAddress(crb, p2));             // 0x1a600000, 0x3d90b461,
 747                                                                        // 0x1a600000, 0x3d90b461
 748         masm.mulsd(xmm3, xmm1);
 749         masm.unpcklpd(xmm1, xmm1);
 750         masm.shll(rax, 3);
 751         masm.addl(rdx, 1865216);
 752         masm.movdqu(xmm4, xmm0);
 753         masm.addl(rdx, rax);
 754         masm.andl(rdx, 63);
 755         masm.movdqu(xmm5, recordExternalAddress(crb, sc4));            // 0x54400000, 0x3fb921fb
 756         masm.leaq(AMD64.rax, recordExternalAddress(crb, ctable));
 757         masm.shll(rdx, 5);
 758         masm.addq(AMD64.rax, AMD64.rdx);
 759         masm.mulpd(xmm2, xmm1);
 760         masm.subsd(xmm0, xmm3);
 761         masm.mulsd(xmm1, recordExternalAddress(crb, p3));              // 0x2e037073, 0x3b63198a
 762         masm.subsd(xmm4, xmm3);
 763         masm.movq(xmm7, new AMD64Address(AMD64.rax, 8));
 764         masm.unpcklpd(xmm0, xmm0);
 765         masm.movdqu(xmm3, xmm4);
 766         masm.subsd(xmm4, xmm2);
 767         masm.mulpd(xmm5, xmm0);
 768         masm.subpd(xmm0, xmm2);
 769         masm.mulsd(xmm7, xmm4);
 770         masm.subsd(xmm3, xmm4);
 771         masm.mulpd(xmm5, xmm0);
 772         masm.mulpd(xmm0, xmm0);
 773         masm.subsd(xmm3, xmm2);
 774         masm.movdqu(xmm2, new AMD64Address(AMD64.rax, 0));
 775         masm.subsd(xmm1, xmm3);
 776         masm.movq(xmm3, new AMD64Address(AMD64.rax, 24));
 777         masm.addsd(xmm2, xmm3);
 778         masm.subsd(xmm7, xmm2);
 779         masm.subsd(xmm1, xmm6);
 780         masm.movdqu(xmm6, recordExternalAddress(crb, sc2));            // 0x11111111, 0x3f811111,
 781                                                                        // 0x55555555, 0x3fa55555
 782         masm.mulsd(xmm2, xmm4);
 783         masm.mulpd(xmm6, xmm0);
 784         masm.mulsd(xmm3, xmm4);
 785         masm.mulpd(xmm2, xmm0);
 786         masm.mulpd(xmm0, xmm0);
 787         masm.addpd(xmm5, recordExternalAddress(crb, sc3));             // 0x1a01a01a, 0xbf2a01a0,
 788                                                                        // 0x16c16c17, 0xbf56c16c
 789         masm.mulsd(xmm4, new AMD64Address(AMD64.rax, 0));
 790         masm.addpd(xmm6, recordExternalAddress(crb, sc1));             // 0x55555555, 0xbfc55555,
 791                                                                        // 0x00000000, 0xbfe00000
 792         masm.mulpd(xmm5, xmm0);
 793         masm.movdqu(xmm0, xmm3);
 794         masm.addsd(xmm3, new AMD64Address(AMD64.rax, 8));
 795         masm.mulpd(xmm1, xmm7);
 796         masm.movdqu(xmm7, xmm4);
 797         masm.addsd(xmm4, xmm3);
 798         masm.addpd(xmm6, xmm5);
 799         masm.movq(xmm5, new AMD64Address(AMD64.rax, 8));
 800         masm.subsd(xmm5, xmm3);
 801         masm.subsd(xmm3, xmm4);
 802         masm.addsd(xmm1, new AMD64Address(AMD64.rax, 16));
 803         masm.mulpd(xmm6, xmm2);
 804         masm.addsd(xmm5, xmm0);
 805         masm.addsd(xmm3, xmm7);
 806         masm.addsd(xmm1, xmm5);
 807         masm.addsd(xmm1, xmm3);
 808         masm.addsd(xmm1, xmm6);
 809         masm.unpckhpd(xmm6, xmm6);
 810         masm.movdqu(xmm0, xmm4);
 811         masm.addsd(xmm1, xmm6);
 812         masm.addsd(xmm0, xmm1);
 813         masm.jmp(block14);
 814 
 815         masm.bind(block8);
 816         masm.addl(rdx, 64);
 817         masm.movq(r9, r10);
 818         masm.movq(r10, r8);
 819         masm.movl(r8, 0);
 820         masm.cmpq(r9, 0);
 821         masm.jcc(ConditionFlag.NotEqual, block9);
 822         masm.addl(rdx, 64);
 823         masm.movq(r9, r10);
 824         masm.movq(r10, r8);
 825         masm.cmpq(r9, 0);
 826         masm.jcc(ConditionFlag.NotEqual, block9);
 827         masm.xorpd(xmm0, xmm0);
 828         masm.xorpd(xmm6, xmm6);
 829         masm.jmp(block12);
 830 
 831         masm.bind(block10);
 832         masm.jcc(ConditionFlag.Equal, block11);
 833         masm.negl(rcx);
 834         masm.shrq(r10);
 835         masm.movq(AMD64.rax, r9);
 836         masm.shrq(r9);
 837         masm.subl(rdx, rcx);
 838         masm.negl(rcx);
 839         masm.addl(rcx, 64);
 840         masm.shlq(AMD64.rax);
 841         masm.orq(r10, AMD64.rax);
 842         masm.jmp(block11);
 843 
 844         masm.bind(block4);
 845         masm.negl(rcx);
 846         masm.shlq(r9, 32);
 847         masm.orq(r9, r11);
 848         masm.shlq(r9);
 849         masm.movq(rdi, r9);
 850         masm.testl(r9, Integer.MIN_VALUE);
 851         masm.jcc(ConditionFlag.NotEqual, block13);
 852         masm.shrl(r9);
 853         masm.movl(rbx, 0);
 854         masm.shrq(rdi, 3);
 855         masm.jmp(block7);
 856 
 857         masm.bind(block5);
 858         masm.shrl(r9);
 859         masm.movl(rbx, 536870912);
 860         masm.shrl(rbx);
 861         masm.shlq(r9, 32);
 862         masm.orq(r9, r11);
 863         masm.shlq(AMD64.rbx, 32);
 864         masm.addl(rdi, 536870912);
 865         masm.movl(AMD64.rcx, 0);
 866         masm.movl(r11, 0);
 867         masm.subq(AMD64.rcx, r8);
 868         masm.sbbq(r11, r10);
 869         masm.sbbq(AMD64.rbx, r9);
 870         masm.movq(r8, AMD64.rcx);
 871         masm.movq(r10, r11);
 872         masm.movq(r9, AMD64.rbx);
 873         masm.movl(rbx, 32768);
 874         masm.jmp(block6);
 875 
 876         masm.bind(block13);
 877         masm.shrl(r9);
 878         masm.movq(AMD64.rbx, 0x100000000L);
 879         masm.shrq(AMD64.rbx);
 880         masm.movl(AMD64.rcx, 0);
 881         masm.movl(r11, 0);
 882         masm.subq(AMD64.rcx, r8);
 883         masm.sbbq(r11, r10);
 884         masm.sbbq(AMD64.rbx, r9);
 885         masm.movq(r8, AMD64.rcx);
 886         masm.movq(r10, r11);
 887         masm.movq(r9, AMD64.rbx);
 888         masm.movl(rbx, 32768);
 889         masm.shrq(rdi, 3);
 890         masm.addl(rdi, 536870912);
 891         masm.jmp(block7);
 892 
 893         masm.bind(block3);
 894         masm.movq(xmm0, new AMD64Address(rsp, 8));
 895         masm.mulsd(xmm0, recordExternalAddress(crb, negZero));         // 0x00000000, 0x80000000
 896         masm.movq(new AMD64Address(rsp, 0), xmm0);
 897 
 898         masm.bind(block14);
 899         masm.addq(rsp, 16);
 900         masm.pop(AMD64.rbx);
 901     }
 902 }