1 /*
   2  * Copyright (c) 2015, Intel Corporation.
   3  * Intel Math Library (LIBM) Source Code
   4  *
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "precompiled.hpp"
  28 #include "asm/assembler.hpp"
  29 #include "asm/assembler.inline.hpp"
  30 #include "macroAssembler_x86.hpp"
  31 
  32 #ifdef _MSC_VER
  33 #define ALIGNED_(x) __declspec(align(x))
  34 #else
  35 #define ALIGNED_(x) __attribute__ ((aligned(x)))
  36 #endif
  37 
  38 // The 64 bit code is at most SSE2 compliant
  39 
  40 /******************************************************************************/
  41 //                     ALGORITHM DESCRIPTION - EXP()
  42 //                     ---------------------
  43 //
  44 // Description:
  45 //  Let K = 64 (table size).
  46 //        x    x/log(2)     n
  47 //       e  = 2          = 2 * T[j] * (1 + P(y))
  48 //  where
  49 //       x = m*log(2)/K + y,    y in [-log(2)/K..log(2)/K]
  50 //       m = n*K + j,           m,n,j - signed integer, j in [-K/2..K/2]
  51 //                  j/K
  52 //       values of 2   are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
  53 //
  54 //       P(y) is a minimax polynomial approximation of exp(x)-1
  55 //       on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
  56 //
  57 //  To avoid problems with arithmetic overflow and underflow,
  58 //            n                        n1  n2
  59 //  value of 2  is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
  60 //  where BIAS is a value of exponent bias.
  61 //
  62 // Special cases:
  63 //  exp(NaN) = NaN
  64 //  exp(+INF) = +INF
  65 //  exp(-INF) = 0
  66 //  exp(x) = 1 for subnormals
  67 //  for finite argument, only exp(0)=1 is exact
  68 //  For IEEE double
  69 //    if x >  709.782712893383973096 then exp(x) overflow
  70 //    if x < -745.133219101941108420 then exp(x) underflow
  71 //
  72 /******************************************************************************/
  73 
  74 ALIGNED_(16) juint _cv[] =
  75 {
  76     0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
  77     0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
  78     0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
  79     0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
  80     0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
  81 };
  82 
  83 ALIGNED_(16) juint _shifter[] =
  84 {
  85     0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
  86 };
  87 
  88 ALIGNED_(16) juint _mmask[] =
  89 {
  90     0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
  91 };
  92 
  93 ALIGNED_(16) juint _bias[] =
  94 {
  95     0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
  96 };
  97 
  98 ALIGNED_(16) juint _Tbl_addr[] =
  99 {
 100     0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
 101     0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
 102     0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
 103     0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
 104     0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
 105     0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
 106     0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
 107     0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
 108     0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
 109     0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
 110     0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
 111     0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
 112     0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
 113     0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
 114     0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
 115     0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
 116     0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
 117     0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
 118     0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
 119     0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
 120     0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
 121     0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
 122     0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
 123     0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
 124     0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
 125     0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
 126     0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
 127     0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
 128     0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
 129     0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
 130     0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
 131     0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
 132     0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
 133     0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
 134     0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
 135     0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
 136     0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
 137     0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
 138     0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
 139     0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
 140     0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
 141     0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
 142     0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
 143     0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
 144     0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
 145     0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
 146     0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
 147     0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
 148     0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
 149     0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
 150     0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
 151     0x000fa7c1UL
 152 };
 153 
 154 ALIGNED_(16) juint _ALLONES[] =
 155 {
 156     0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
 157 };
 158 
 159 ALIGNED_(16) juint _ebias[] =
 160 {
 161     0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
 162 };
 163 
 164 ALIGNED_(4) juint _XMAX[] =
 165 {
 166     0xffffffffUL, 0x7fefffffUL
 167 };
 168 
 169 ALIGNED_(4) juint _XMIN[] =
 170 {
 171     0x00000000UL, 0x00100000UL
 172 };
 173 
 174 ALIGNED_(4) juint _INF[] =
 175 {
 176     0x00000000UL, 0x7ff00000UL
 177 };
 178 
 179 ALIGNED_(4) juint _ZERO[] =
 180 {
 181     0x00000000UL, 0x00000000UL
 182 };
 183 
 184 ALIGNED_(4) juint _ONE_val[] =
 185 {
 186     0x00000000UL, 0x3ff00000UL
 187 };
 188 
 189 
 190 // Registers:
 191 // input: xmm0
 192 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
 193 //          rax, rdx, rcx, tmp - r11
 194 
 195 // Code generated by Intel C compiler for LIBM library
 196 
 197 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
 198   Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
 199   Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
 200   Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
 201   Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
 202 
 203   assert_different_registers(tmp, eax, ecx, edx);
 204   jmp(start);
 205   address cv = (address)_cv;
 206   address Shifter = (address)_shifter;
 207   address mmask = (address)_mmask;
 208   address bias = (address)_bias;
 209   address Tbl_addr = (address)_Tbl_addr;
 210   address ALLONES = (address)_ALLONES;
 211   address ebias = (address)_ebias;
 212   address XMAX = (address)_XMAX;
 213   address XMIN = (address)_XMIN;
 214   address INF = (address)_INF;
 215   address ZERO = (address)_ZERO;
 216   address ONE_val = (address)_ONE_val;
 217 
 218   bind(start);
 219   subq(rsp, 24);
 220   movsd(Address(rsp, 8), xmm0);
 221   unpcklpd(xmm0, xmm0);
 222   movdqu(xmm1, ExternalAddress(cv));       // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
 223   movdqu(xmm6, ExternalAddress(Shifter));  // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
 224   movdqu(xmm2, ExternalAddress(16+cv));    // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
 225   movdqu(xmm3, ExternalAddress(32+cv));    // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
 226   pextrw(eax, xmm0, 3);
 227   andl(eax, 32767);
 228   movl(edx, 16527);
 229   subl(edx, eax);
 230   subl(eax, 15504);
 231   orl(edx, eax);
 232   cmpl(edx, INT_MIN);
 233   jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
 234   mulpd(xmm1, xmm0);
 235   addpd(xmm1, xmm6);
 236   movapd(xmm7, xmm1);
 237   subpd(xmm1, xmm6);
 238   mulpd(xmm2, xmm1);
 239   movdqu(xmm4, ExternalAddress(64+cv));    // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
 240   mulpd(xmm3, xmm1);
 241   movdqu(xmm5, ExternalAddress(80+cv));    // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
 242   subpd(xmm0, xmm2);
 243   movdl(eax, xmm7);
 244   movl(ecx, eax);
 245   andl(ecx, 63);
 246   shll(ecx, 4);
 247   sarl(eax, 6);
 248   movl(edx, eax);
 249   movdqu(xmm6, ExternalAddress(mmask));    // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
 250   pand(xmm7, xmm6);
 251   movdqu(xmm6, ExternalAddress(bias));     // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
 252   paddq(xmm7, xmm6);
 253   psllq(xmm7, 46);
 254   subpd(xmm0, xmm3);
 255   lea(tmp, ExternalAddress(Tbl_addr));
 256   movdqu(xmm2, Address(ecx,tmp));
 257   mulpd(xmm4, xmm0);
 258   movapd(xmm6, xmm0);
 259   movapd(xmm1, xmm0);
 260   mulpd(xmm6, xmm6);
 261   mulpd(xmm0, xmm6);
 262   addpd(xmm5, xmm4);
 263   mulsd(xmm0, xmm6);
 264   mulpd(xmm6, ExternalAddress(48+cv));     // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
 265   addsd(xmm1, xmm2);
 266   unpckhpd(xmm2, xmm2);
 267   mulpd(xmm0, xmm5);
 268   addsd(xmm1, xmm0);
 269   por(xmm2, xmm7);
 270   unpckhpd(xmm0, xmm0);
 271   addsd(xmm0, xmm1);
 272   addsd(xmm0, xmm6);
 273   addl(edx, 894);
 274   cmpl(edx, 1916);
 275   jcc (Assembler::above, L_2TAG_PACKET_1_0_2);
 276   mulsd(xmm0, xmm2);
 277   addsd(xmm0, xmm2);
 278   jmp (B1_5);
 279 
 280   bind(L_2TAG_PACKET_1_0_2);
 281   xorpd(xmm3, xmm3);
 282   movdqu(xmm4, ExternalAddress(ALLONES));  // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
 283   movl(edx, -1022);
 284   subl(edx, eax);
 285   movdl(xmm5, edx);
 286   psllq(xmm4, xmm5);
 287   movl(ecx, eax);
 288   sarl(eax, 1);
 289   pinsrw(xmm3, eax, 3);
 290   movdqu(xmm6, ExternalAddress(ebias));    // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
 291   psllq(xmm3, 4);
 292   psubd(xmm2, xmm3);
 293   mulsd(xmm0, xmm2);
 294   cmpl(edx, 52);
 295   jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
 296   pand(xmm4, xmm2);
 297   paddd(xmm3, xmm6);
 298   subsd(xmm2, xmm4);
 299   addsd(xmm0, xmm2);
 300   cmpl(ecx, 1023);
 301   jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
 302   pextrw(ecx, xmm0, 3);
 303   andl(ecx, 32768);
 304   orl(edx, ecx);
 305   cmpl(edx, 0);
 306   jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
 307   movapd(xmm6, xmm0);
 308   addsd(xmm0, xmm4);
 309   mulsd(xmm0, xmm3);
 310   pextrw(ecx, xmm0, 3);
 311   andl(ecx, 32752);
 312   cmpl(ecx, 0);
 313   jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
 314   jmp(B1_5);
 315 
 316   bind(L_2TAG_PACKET_5_0_2);
 317   mulsd(xmm6, xmm3);
 318   mulsd(xmm4, xmm3);
 319   movdqu(xmm0, xmm6);
 320   pxor(xmm6, xmm4);
 321   psrad(xmm6, 31);
 322   pshufd(xmm6, xmm6, 85);
 323   psllq(xmm0, 1);
 324   psrlq(xmm0, 1);
 325   pxor(xmm0, xmm6);
 326   psrlq(xmm6, 63);
 327   paddq(xmm0, xmm6);
 328   paddq(xmm0, xmm4);
 329   movl(Address(rsp,0), 15);
 330   jmp(L_2TAG_PACKET_6_0_2);
 331 
 332   bind(L_2TAG_PACKET_4_0_2);
 333   addsd(xmm0, xmm4);
 334   mulsd(xmm0, xmm3);
 335   jmp(B1_5);
 336 
 337   bind(L_2TAG_PACKET_3_0_2);
 338   addsd(xmm0, xmm4);
 339   mulsd(xmm0, xmm3);
 340   pextrw(ecx, xmm0, 3);
 341   andl(ecx, 32752);
 342   cmpl(ecx, 32752);
 343   jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
 344   jmp(B1_5);
 345 
 346   bind(L_2TAG_PACKET_2_0_2);
 347   paddd(xmm3, xmm6);
 348   addpd(xmm0, xmm2);
 349   mulsd(xmm0, xmm3);
 350   movl(Address(rsp,0), 15);
 351   jmp(L_2TAG_PACKET_6_0_2);
 352 
 353   bind(L_2TAG_PACKET_8_0_2);
 354   cmpl(eax, 2146435072);
 355   jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
 356   movl(eax, Address(rsp,12));
 357   cmpl(eax, INT_MIN);
 358   jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
 359   movsd(xmm0, ExternalAddress(XMAX));      // 0xffffffffUL, 0x7fefffffUL
 360   mulsd(xmm0, xmm0);
 361 
 362   bind(L_2TAG_PACKET_7_0_2);
 363   movl(Address(rsp,0), 14);
 364   jmp(L_2TAG_PACKET_6_0_2);
 365 
 366   bind(L_2TAG_PACKET_10_0_2);
 367   movsd(xmm0, ExternalAddress(XMIN));      // 0x00000000UL, 0x00100000UL
 368   mulsd(xmm0, xmm0);
 369   movl(Address(rsp,0), 15);
 370   jmp(L_2TAG_PACKET_6_0_2);
 371 
 372   bind(L_2TAG_PACKET_9_0_2);
 373   movl(edx, Address(rsp,8));
 374   cmpl(eax, 2146435072);
 375   jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
 376   cmpl(edx, 0);
 377   jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
 378   movl(eax, Address(rsp,12));
 379   cmpl(eax, 2146435072);
 380   jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
 381   movsd(xmm0, ExternalAddress(INF));       // 0x00000000UL, 0x7ff00000UL
 382   jmp(B1_5);
 383 
 384   bind(L_2TAG_PACKET_12_0_2);
 385   movsd(xmm0, ExternalAddress(ZERO));      // 0x00000000UL, 0x00000000UL
 386   jmp(B1_5);
 387 
 388   bind(L_2TAG_PACKET_11_0_2);
 389   movsd(xmm0, Address(rsp, 8));
 390   addsd(xmm0, xmm0);
 391   jmp(B1_5);
 392 
 393   bind(L_2TAG_PACKET_0_0_2);
 394   movl(eax, Address(rsp, 12));
 395   andl(eax, 2147483647);
 396   cmpl(eax, 1083179008);
 397   jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
 398   movsd(Address(rsp, 8), xmm0);
 399   addsd(xmm0, ExternalAddress(ONE_val));   // 0x00000000UL, 0x3ff00000UL
 400   jmp(B1_5);
 401 
 402   bind(L_2TAG_PACKET_6_0_2);
 403   movq(Address(rsp, 16), xmm0);
 404 
 405   bind(B1_3);
 406   movq(xmm0, Address(rsp, 16));
 407 
 408   bind(B1_5);
 409   addq(rsp, 24);
 410 }
 411 
 412 /******************************************************************************/
 413 //                     ALGORITHM DESCRIPTION - LOG()
 414 //                     ---------------------
 415 //
 416 //    x=2^k * mx, mx in [1,2)
 417 //
 418 //    Get B~1/mx based on the output of rcpss instruction (B0)
 419 //    B = int((B0*2^7+0.5))/2^7
 420 //
 421 //    Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
 422 //
 423 //    Result:  k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6)  and
 424 //             p(r) is a degree 7 polynomial
 425 //             -log(B) read from data table (high, low parts)
 426 //             Result is formed from high and low parts
 427 //
 428 // Special cases:
 429 //  log(NaN) = quiet NaN, and raise invalid exception
 430 //  log(+INF) = that INF
 431 //  log(0) = -INF with divide-by-zero exception raised
 432 //  log(1) = +0
 433 //  log(x) = NaN with invalid exception raised if x < -0, including -INF
 434 //
 435 /******************************************************************************/
 436 
 437 ALIGNED_(16) juint _L_tbl[] =
 438 {
 439   0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
 440   0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
 441   0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
 442   0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
 443   0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
 444   0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
 445   0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
 446   0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
 447   0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
 448   0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
 449   0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
 450   0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
 451   0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
 452   0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
 453   0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
 454   0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
 455   0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
 456   0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
 457   0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
 458   0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
 459   0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
 460   0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
 461   0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
 462   0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
 463   0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
 464   0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
 465   0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
 466   0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
 467   0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
 468   0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
 469   0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
 470   0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
 471   0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
 472   0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
 473   0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
 474   0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
 475   0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
 476   0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
 477   0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
 478   0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
 479   0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
 480   0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
 481   0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
 482   0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
 483   0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
 484   0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
 485   0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
 486   0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
 487   0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
 488   0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
 489   0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
 490   0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
 491   0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
 492   0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
 493   0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
 494   0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
 495   0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
 496   0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
 497   0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
 498   0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
 499   0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
 500   0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
 501   0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
 502   0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
 503   0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
 504   0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
 505   0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
 506   0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
 507   0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
 508   0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
 509   0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
 510   0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
 511   0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
 512   0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
 513   0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
 514   0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
 515   0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
 516   0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
 517   0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
 518   0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
 519   0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
 520   0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
 521   0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
 522   0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
 523   0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
 524   0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
 525   0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
 526   0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
 527   0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
 528   0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
 529   0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
 530   0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
 531   0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
 532   0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
 533   0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
 534   0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
 535   0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
 536   0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
 537   0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
 538   0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
 539   0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
 540   0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
 541   0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
 542   0x80000000UL
 543 };
 544 
 545 ALIGNED_(16) juint _log2[] =
 546 {
 547   0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL
 548 };
 549 
 550 ALIGNED_(16) juint _coeff[] =
 551 {
 552   0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
 553   0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
 554   0x00000000UL, 0xbfe00000UL
 555 };
 556 
 557 //registers,
 558 // input: xmm0
 559 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
 560 //          rax, rdx, rcx, r8, r11
 561 
 562 void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) {
 563   Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
 564   Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
 565   Label L_2TAG_PACKET_8_0_2;
 566   Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
 567 
 568   assert_different_registers(tmp1, tmp2, eax, ecx, edx);
 569   jmp(start);
 570   address L_tbl = (address)_L_tbl;
 571   address log2 = (address)_log2;
 572   address coeff = (address)_coeff;
 573 
 574   bind(start);
 575   subq(rsp, 24);
 576   movsd(Address(rsp, 0), xmm0);
 577   mov64(rax, 0x3ff0000000000000);
 578   movdq(xmm2, rax);
 579   mov64(rdx, 0x77f0000000000000);
 580   movdq(xmm3, rdx);
 581   movl(ecx, 32768);
 582   movdl(xmm4, rcx);
 583   mov64(tmp1, 0xffffe00000000000);
 584   movdq(xmm5, tmp1);
 585   movdqu(xmm1, xmm0);
 586   pextrw(eax, xmm0, 3);
 587   por(xmm0, xmm2);
 588   movl(ecx, 16352);
 589   psrlq(xmm0, 27);
 590   lea(tmp2, ExternalAddress(L_tbl));
 591   psrld(xmm0, 2);
 592   rcpps(xmm0, xmm0);
 593   psllq(xmm1, 12);
 594   pshufd(xmm6, xmm5, 228);
 595   psrlq(xmm1, 12);
 596   subl(eax, 16);
 597   cmpl(eax, 32736);
 598   jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
 599 
 600   bind(L_2TAG_PACKET_1_0_2);
 601   paddd(xmm0, xmm4);
 602   por(xmm1, xmm3);
 603   movdl(edx, xmm0);
 604   psllq(xmm0, 29);
 605   pand(xmm5, xmm1);
 606   pand(xmm0, xmm6);
 607   subsd(xmm1, xmm5);
 608   mulpd(xmm5, xmm0);
 609   andl(eax, 32752);
 610   subl(eax, ecx);
 611   cvtsi2sdl(xmm7, eax);
 612   mulsd(xmm1, xmm0);
 613   movq(xmm6, ExternalAddress(log2));       // 0xfefa3800UL, 0x3fa62e42UL
 614   movdqu(xmm3, ExternalAddress(coeff));    // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
 615   subsd(xmm5, xmm2);
 616   andl(edx, 16711680);
 617   shrl(edx, 12);
 618   movdqu(xmm0, Address(tmp2, edx));
 619   movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
 620   addsd(xmm1, xmm5);
 621   movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
 622   mulsd(xmm6, xmm7);
 623   if (VM_Version::supports_sse3()) {
 624     movddup(xmm5, xmm1);
 625   } else {
 626     movdqu(xmm5, xmm1);
 627     movlhps(xmm5, xmm5);
 628   }
 629   mulsd(xmm7, ExternalAddress(8 + log2));    // 0x93c76730UL, 0x3ceef357UL
 630   mulsd(xmm3, xmm1);
 631   addsd(xmm0, xmm6);
 632   mulpd(xmm4, xmm5);
 633   mulpd(xmm5, xmm5);
 634   if (VM_Version::supports_sse3()) {
 635     movddup(xmm6, xmm0);
 636   } else {
 637     movdqu(xmm6, xmm0);
 638     movlhps(xmm6, xmm6);
 639   }
 640   addsd(xmm0, xmm1);
 641   addpd(xmm4, xmm2);
 642   mulpd(xmm3, xmm5);
 643   subsd(xmm6, xmm0);
 644   mulsd(xmm4, xmm1);
 645   pshufd(xmm2, xmm0, 238);
 646   addsd(xmm1, xmm6);
 647   mulsd(xmm5, xmm5);
 648   addsd(xmm7, xmm2);
 649   addpd(xmm4, xmm3);
 650   addsd(xmm1, xmm7);
 651   mulpd(xmm4, xmm5);
 652   addsd(xmm1, xmm4);
 653   pshufd(xmm5, xmm4, 238);
 654   addsd(xmm1, xmm5);
 655   addsd(xmm0, xmm1);
 656   jmp(B1_5);
 657 
 658   bind(L_2TAG_PACKET_0_0_2);
 659   movq(xmm0, Address(rsp, 0));
 660   movq(xmm1, Address(rsp, 0));
 661   addl(eax, 16);
 662   cmpl(eax, 32768);
 663   jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
 664   cmpl(eax, 16);
 665   jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
 666 
 667   bind(L_2TAG_PACKET_4_0_2);
 668   addsd(xmm0, xmm0);
 669   jmp(B1_5);
 670 
 671   bind(L_2TAG_PACKET_5_0_2);
 672   jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
 673   cmpl(edx, 0);
 674   jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
 675   jmp(L_2TAG_PACKET_6_0_2);
 676 
 677   bind(L_2TAG_PACKET_3_0_2);
 678   xorpd(xmm1, xmm1);
 679   addsd(xmm1, xmm0);
 680   movdl(edx, xmm1);
 681   psrlq(xmm1, 32);
 682   movdl(ecx, xmm1);
 683   orl(edx, ecx);
 684   cmpl(edx, 0);
 685   jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
 686   xorpd(xmm1, xmm1);
 687   movl(eax, 18416);
 688   pinsrw(xmm1, eax, 3);
 689   mulsd(xmm0, xmm1);
 690   movdqu(xmm1, xmm0);
 691   pextrw(eax, xmm0, 3);
 692   por(xmm0, xmm2);
 693   psrlq(xmm0, 27);
 694   movl(ecx, 18416);
 695   psrld(xmm0, 2);
 696   rcpps(xmm0, xmm0);
 697   psllq(xmm1, 12);
 698   pshufd(xmm6, xmm5, 228);
 699   psrlq(xmm1, 12);
 700   jmp(L_2TAG_PACKET_1_0_2);
 701 
 702   bind(L_2TAG_PACKET_2_0_2);
 703   movdl(edx, xmm1);
 704   psrlq(xmm1, 32);
 705   movdl(ecx, xmm1);
 706   addl(ecx, ecx);
 707   cmpl(ecx, -2097152);
 708   jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
 709   orl(edx, ecx);
 710   cmpl(edx, 0);
 711   jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
 712 
 713   bind(L_2TAG_PACKET_6_0_2);
 714   xorpd(xmm1, xmm1);
 715   xorpd(xmm0, xmm0);
 716   movl(eax, 32752);
 717   pinsrw(xmm1, eax, 3);
 718   mulsd(xmm0, xmm1);
 719   movl(Address(rsp, 16), 3);
 720   jmp(L_2TAG_PACKET_8_0_2);
 721   bind(L_2TAG_PACKET_7_0_2);
 722   xorpd(xmm1, xmm1);
 723   xorpd(xmm0, xmm0);
 724   movl(eax, 49136);
 725   pinsrw(xmm0, eax, 3);
 726   divsd(xmm0, xmm1);
 727   movl(Address(rsp, 16), 2);
 728 
 729   bind(L_2TAG_PACKET_8_0_2);
 730   movq(Address(rsp, 8), xmm0);
 731 
 732   bind(B1_3);
 733   movq(xmm0, Address(rsp, 8));
 734 
 735   bind(B1_5);
 736   addq(rsp, 24);
 737 }
 738 
 739 /******************************************************************************/
 740 //                     ALGORITHM DESCRIPTION  - POW()
 741 //                     ---------------------
 742 //
 743 //    Let x=2^k * mx, mx in [1,2)
 744 //
 745 //    log2(x) calculation:
 746 //
 747 //    Get B~1/mx based on the output of rcpps instruction (B0)
 748 //    B = int((B0*LH*2^9+0.5))/2^9
 749 //    LH is a short approximation for log2(e)
 750 //
 751 //    Reduced argument, scaled by LH:
 752 //                r=B*mx-LH (computed accurately in high and low parts)
 753 //
 754 //    log2(x) result:  k - log2(B) + p(r)
 755 //             p(r) is a degree 8 polynomial
 756 //             -log2(B) read from data table (high, low parts)
 757 //             log2(x) is formed from high and low parts
 758 //    For |x| in [1-1/32, 1+1/16), a slower but more accurate computation
 759 //    based om the same table design is performed.
 760 //
 761 //   Main path is taken if | floor(log2(|log2(|x|)|) + floor(log2|y|) | < 8,
 762 //   to filter out all potential OF/UF cases.
 763 //   exp2(y*log2(x)) is computed using an 8-bit index table and a degree 5
 764 //   polynomial
 765 //
 766 // Special cases:
 767 //  pow(-0,y) = -INF and raises the divide-by-zero exception for y an odd
 768 //  integer < 0.
 769 //  pow(-0,y) = +INF and raises the divide-by-zero exception for y < 0 and
 770 //  not an odd integer.
 771 //  pow(-0,y) = -0 for y an odd integer > 0.
 772 //  pow(-0,y) = +0 for y > 0 and not an odd integer.
 773 //  pow(-1,-INF) = NaN.
 774 //  pow(+1,y) = NaN for any y, even a NaN.
 775 //  pow(x,-0) = 1 for any x, even a NaN.
 776 //  pow(x,y) = a NaN and raises the invalid exception for finite x < 0 and
 777 //  finite non-integer y.
 778 //  pow(x,-INF) = +INF for |x|<1.
 779 //  pow(x,-INF) = +0 for |x|>1.
 780 //  pow(x,+INF) = +0 for |x|<1.
 781 //  pow(x,+INF) = +INF for |x|>1.
 782 //  pow(-INF,y) = -0 for y an odd integer < 0.
 783 //  pow(-INF,y) = +0 for y < 0 and not an odd integer.
 784 //  pow(-INF,y) = -INF for y an odd integer > 0.
 785 //  pow(-INF,y) = +INF for y > 0 and not an odd integer.
 786 //  pow(+INF,y) = +0 for y <0.
 787 //  pow(+INF,y) = +INF for y >0.
 788 //
 789 /******************************************************************************/
 790 
 791 ALIGNED_(16) juint _HIGHSIGMASK[] =
 792 {
 793   0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL
 794 };
 795 
 796 ALIGNED_(16) juint _LOG2_E[] =
 797 {
 798   0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
 799 };
 800 
 801 ALIGNED_(16) juint _HIGHMASK_Y[] =
 802 {
 803   0x00000000UL, 0xfffffff8UL, 0x00000000UL, 0xffffffffUL
 804 };
 805 
 806 ALIGNED_(16) juint _T_exp[] =
 807 {
 808   0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3b700000UL, 0xfa5abcbfUL,
 809   0x3ff00b1aUL, 0xa7609f71UL, 0xbc84f6b2UL, 0xa9fb3335UL, 0x3ff0163dUL,
 810   0x9ab8cdb7UL, 0x3c9b6129UL, 0x143b0281UL, 0x3ff02168UL, 0x0fc54eb6UL,
 811   0xbc82bf31UL, 0x3e778061UL, 0x3ff02c9aUL, 0x535b085dUL, 0xbc719083UL,
 812   0x2e11bbccUL, 0x3ff037d4UL, 0xeeade11aUL, 0x3c656811UL, 0xe86e7f85UL,
 813   0x3ff04315UL, 0x1977c96eUL, 0xbc90a31cUL, 0x72f654b1UL, 0x3ff04e5fUL,
 814   0x3aa0d08cUL, 0x3c84c379UL, 0xd3158574UL, 0x3ff059b0UL, 0xa475b465UL,
 815   0x3c8d73e2UL, 0x0e3c1f89UL, 0x3ff0650aUL, 0x5799c397UL, 0xbc95cb7bUL,
 816   0x29ddf6deUL, 0x3ff0706bUL, 0xe2b13c27UL, 0xbc8c91dfUL, 0x2b72a836UL,
 817   0x3ff07bd4UL, 0x54458700UL, 0x3c832334UL, 0x18759bc8UL, 0x3ff08745UL,
 818   0x4bb284ffUL, 0x3c6186beUL, 0xf66607e0UL, 0x3ff092bdUL, 0x800a3fd1UL,
 819   0xbc968063UL, 0xcac6f383UL, 0x3ff09e3eUL, 0x18316136UL, 0x3c914878UL,
 820   0x9b1f3919UL, 0x3ff0a9c7UL, 0x873d1d38UL, 0x3c85d16cUL, 0x6cf9890fUL,
 821   0x3ff0b558UL, 0x4adc610bUL, 0x3c98a62eUL, 0x45e46c85UL, 0x3ff0c0f1UL,
 822   0x06d21cefUL, 0x3c94f989UL, 0x2b7247f7UL, 0x3ff0cc92UL, 0x16e24f71UL,
 823   0x3c901edcUL, 0x23395decUL, 0x3ff0d83bUL, 0xe43f316aUL, 0xbc9bc14dUL,
 824   0x32d3d1a2UL, 0x3ff0e3ecUL, 0x27c57b52UL, 0x3c403a17UL, 0x5fdfa9c5UL,
 825   0x3ff0efa5UL, 0xbc54021bUL, 0xbc949db9UL, 0xaffed31bUL, 0x3ff0fb66UL,
 826   0xc44ebd7bUL, 0xbc6b9bedUL, 0x28d7233eUL, 0x3ff10730UL, 0x1692fdd5UL,
 827   0x3c8d46ebUL, 0xd0125b51UL, 0x3ff11301UL, 0x39449b3aUL, 0xbc96c510UL,
 828   0xab5e2ab6UL, 0x3ff11edbUL, 0xf703fb72UL, 0xbc9ca454UL, 0xc06c31ccUL,
 829   0x3ff12abdUL, 0xb36ca5c7UL, 0xbc51b514UL, 0x14f204abUL, 0x3ff136a8UL,
 830   0xba48dcf0UL, 0xbc67108fUL, 0xaea92de0UL, 0x3ff1429aUL, 0x9af1369eUL,
 831   0xbc932fbfUL, 0x934f312eUL, 0x3ff14e95UL, 0x39bf44abUL, 0xbc8b91e8UL,
 832   0xc8a58e51UL, 0x3ff15a98UL, 0xb9eeab0aUL, 0x3c82406aUL, 0x5471c3c2UL,
 833   0x3ff166a4UL, 0x82ea1a32UL, 0x3c58f23bUL, 0x3c7d517bUL, 0x3ff172b8UL,
 834   0xb9d78a76UL, 0xbc819041UL, 0x8695bbc0UL, 0x3ff17ed4UL, 0xe2ac5a64UL,
 835   0x3c709e3fUL, 0x388c8deaUL, 0x3ff18af9UL, 0xd1970f6cUL, 0xbc911023UL,
 836   0x58375d2fUL, 0x3ff19726UL, 0x85f17e08UL, 0x3c94aaddUL, 0xeb6fcb75UL,
 837   0x3ff1a35bUL, 0x7b4968e4UL, 0x3c8e5b4cUL, 0xf8138a1cUL, 0x3ff1af99UL,
 838   0xa4b69280UL, 0x3c97bf85UL, 0x84045cd4UL, 0x3ff1bbe0UL, 0x352ef607UL,
 839   0xbc995386UL, 0x95281c6bUL, 0x3ff1c82fUL, 0x8010f8c9UL, 0x3c900977UL,
 840   0x3168b9aaUL, 0x3ff1d487UL, 0x00a2643cUL, 0x3c9e016eUL, 0x5eb44027UL,
 841   0x3ff1e0e7UL, 0x088cb6deUL, 0xbc96fdd8UL, 0x22fcd91dUL, 0x3ff1ed50UL,
 842   0x027bb78cUL, 0xbc91df98UL, 0x8438ce4dUL, 0x3ff1f9c1UL, 0xa097af5cUL,
 843   0xbc9bf524UL, 0x88628cd6UL, 0x3ff2063bUL, 0x814a8495UL, 0x3c8dc775UL,
 844   0x3578a819UL, 0x3ff212beUL, 0x2cfcaac9UL, 0x3c93592dUL, 0x917ddc96UL,
 845   0x3ff21f49UL, 0x9494a5eeUL, 0x3c82a97eUL, 0xa27912d1UL, 0x3ff22bddUL,
 846   0x5577d69fUL, 0x3c8d34fbUL, 0x6e756238UL, 0x3ff2387aUL, 0xb6c70573UL,
 847   0x3c99b07eUL, 0xfb82140aUL, 0x3ff2451fUL, 0x911ca996UL, 0x3c8acfccUL,
 848   0x4fb2a63fUL, 0x3ff251ceUL, 0xbef4f4a4UL, 0x3c8ac155UL, 0x711ece75UL,
 849   0x3ff25e85UL, 0x4ac31b2cUL, 0x3c93e1a2UL, 0x65e27cddUL, 0x3ff26b45UL,
 850   0x9940e9d9UL, 0x3c82bd33UL, 0x341ddf29UL, 0x3ff2780eUL, 0x05f9e76cUL,
 851   0x3c9e067cUL, 0xe1f56381UL, 0x3ff284dfUL, 0x8c3f0d7eUL, 0xbc9a4c3aUL,
 852   0x7591bb70UL, 0x3ff291baUL, 0x28401cbdUL, 0xbc82cc72UL, 0xf51fdee1UL,
 853   0x3ff29e9dUL, 0xafad1255UL, 0x3c8612e8UL, 0x66d10f13UL, 0x3ff2ab8aUL,
 854   0x191690a7UL, 0xbc995743UL, 0xd0dad990UL, 0x3ff2b87fUL, 0xd6381aa4UL,
 855   0xbc410adcUL, 0x39771b2fUL, 0x3ff2c57eUL, 0xa6eb5124UL, 0xbc950145UL,
 856   0xa6e4030bUL, 0x3ff2d285UL, 0x54db41d5UL, 0x3c900247UL, 0x1f641589UL,
 857   0x3ff2df96UL, 0xfbbce198UL, 0x3c9d16cfUL, 0xa93e2f56UL, 0x3ff2ecafUL,
 858   0x45d52383UL, 0x3c71ca0fUL, 0x4abd886bUL, 0x3ff2f9d2UL, 0x532bda93UL,
 859   0xbc653c55UL, 0x0a31b715UL, 0x3ff306feUL, 0xd23182e4UL, 0x3c86f46aUL,
 860   0xedeeb2fdUL, 0x3ff31432UL, 0xf3f3fcd1UL, 0x3c8959a3UL, 0xfc4cd831UL,
 861   0x3ff32170UL, 0x8e18047cUL, 0x3c8a9ce7UL, 0x3ba8ea32UL, 0x3ff32eb8UL,
 862   0x3cb4f318UL, 0xbc9c45e8UL, 0xb26416ffUL, 0x3ff33c08UL, 0x843659a6UL,
 863   0x3c932721UL, 0x66e3fa2dUL, 0x3ff34962UL, 0x930881a4UL, 0xbc835a75UL,
 864   0x5f929ff1UL, 0x3ff356c5UL, 0x5c4e4628UL, 0xbc8b5ceeUL, 0xa2de883bUL,
 865   0x3ff36431UL, 0xa06cb85eUL, 0xbc8c3144UL, 0x373aa9cbUL, 0x3ff371a7UL,
 866   0xbf42eae2UL, 0xbc963aeaUL, 0x231e754aUL, 0x3ff37f26UL, 0x9eceb23cUL,
 867   0xbc99f5caUL, 0x6d05d866UL, 0x3ff38caeUL, 0x3c9904bdUL, 0xbc9e958dUL,
 868   0x1b7140efUL, 0x3ff39a40UL, 0xfc8e2934UL, 0xbc99a9a5UL, 0x34e59ff7UL,
 869   0x3ff3a7dbUL, 0xd661f5e3UL, 0xbc75e436UL, 0xbfec6cf4UL, 0x3ff3b57fUL,
 870   0xe26fff18UL, 0x3c954c66UL, 0xc313a8e5UL, 0x3ff3c32dUL, 0x375d29c3UL,
 871   0xbc9efff8UL, 0x44ede173UL, 0x3ff3d0e5UL, 0x8c284c71UL, 0x3c7fe8d0UL,
 872   0x4c123422UL, 0x3ff3dea6UL, 0x11f09ebcUL, 0x3c8ada09UL, 0xdf1c5175UL,
 873   0x3ff3ec70UL, 0x7b8c9bcaUL, 0xbc8af663UL, 0x04ac801cUL, 0x3ff3fa45UL,
 874   0xf956f9f3UL, 0xbc97d023UL, 0xc367a024UL, 0x3ff40822UL, 0xb6f4d048UL,
 875   0x3c8bddf8UL, 0x21f72e2aUL, 0x3ff4160aUL, 0x1c309278UL, 0xbc5ef369UL,
 876   0x2709468aUL, 0x3ff423fbUL, 0xc0b314ddUL, 0xbc98462dUL, 0xd950a897UL,
 877   0x3ff431f5UL, 0xe35f7999UL, 0xbc81c7ddUL, 0x3f84b9d4UL, 0x3ff43ffaUL,
 878   0x9704c003UL, 0x3c8880beUL, 0x6061892dUL, 0x3ff44e08UL, 0x04ef80d0UL,
 879   0x3c489b7aUL, 0x42a7d232UL, 0x3ff45c20UL, 0x82fb1f8eUL, 0xbc686419UL,
 880   0xed1d0057UL, 0x3ff46a41UL, 0xd1648a76UL, 0x3c9c944bUL, 0x668b3237UL,
 881   0x3ff4786dUL, 0xed445733UL, 0xbc9c20f0UL, 0xb5c13cd0UL, 0x3ff486a2UL,
 882   0xb69062f0UL, 0x3c73c1a3UL, 0xe192aed2UL, 0x3ff494e1UL, 0x5e499ea0UL,
 883   0xbc83b289UL, 0xf0d7d3deUL, 0x3ff4a32aUL, 0xf3d1be56UL, 0x3c99cb62UL,
 884   0xea6db7d7UL, 0x3ff4b17dUL, 0x7f2897f0UL, 0xbc8125b8UL, 0xd5362a27UL,
 885   0x3ff4bfdaUL, 0xafec42e2UL, 0x3c7d4397UL, 0xb817c114UL, 0x3ff4ce41UL,
 886   0x690abd5dUL, 0x3c905e29UL, 0x99fddd0dUL, 0x3ff4dcb2UL, 0xbc6a7833UL,
 887   0x3c98ecdbUL, 0x81d8abffUL, 0x3ff4eb2dUL, 0x2e5d7a52UL, 0xbc95257dUL,
 888   0x769d2ca7UL, 0x3ff4f9b2UL, 0xd25957e3UL, 0xbc94b309UL, 0x7f4531eeUL,
 889   0x3ff50841UL, 0x49b7465fUL, 0x3c7a249bUL, 0xa2cf6642UL, 0x3ff516daUL,
 890   0x69bd93efUL, 0xbc8f7685UL, 0xe83f4eefUL, 0x3ff5257dUL, 0x43efef71UL,
 891   0xbc7c998dUL, 0x569d4f82UL, 0x3ff5342bUL, 0x1db13cadUL, 0xbc807abeUL,
 892   0xf4f6ad27UL, 0x3ff542e2UL, 0x192d5f7eUL, 0x3c87926dUL, 0xca5d920fUL,
 893   0x3ff551a4UL, 0xefede59bUL, 0xbc8d689cUL, 0xdde910d2UL, 0x3ff56070UL,
 894   0x168eebf0UL, 0xbc90fb6eUL, 0x36b527daUL, 0x3ff56f47UL, 0x011d93adUL,
 895   0x3c99bb2cUL, 0xdbe2c4cfUL, 0x3ff57e27UL, 0x8a57b9c4UL, 0xbc90b98cUL,
 896   0xd497c7fdUL, 0x3ff58d12UL, 0x5b9a1de8UL, 0x3c8295e1UL, 0x27ff07ccUL,
 897   0x3ff59c08UL, 0xe467e60fUL, 0xbc97e2ceUL, 0xdd485429UL, 0x3ff5ab07UL,
 898   0x054647adUL, 0x3c96324cUL, 0xfba87a03UL, 0x3ff5ba11UL, 0x4c233e1aUL,
 899   0xbc9b77a1UL, 0x8a5946b7UL, 0x3ff5c926UL, 0x816986a2UL, 0x3c3c4b1bUL,
 900   0x90998b93UL, 0x3ff5d845UL, 0xa8b45643UL, 0xbc9cd6a7UL, 0x15ad2148UL,
 901   0x3ff5e76fUL, 0x3080e65eUL, 0x3c9ba6f9UL, 0x20dceb71UL, 0x3ff5f6a3UL,
 902   0xe3cdcf92UL, 0xbc89eaddUL, 0xb976dc09UL, 0x3ff605e1UL, 0x9b56de47UL,
 903   0xbc93e242UL, 0xe6cdf6f4UL, 0x3ff6152aUL, 0x4ab84c27UL, 0x3c9e4b3eUL,
 904   0xb03a5585UL, 0x3ff6247eUL, 0x7e40b497UL, 0xbc9383c1UL, 0x1d1929fdUL,
 905   0x3ff633ddUL, 0xbeb964e5UL, 0x3c984710UL, 0x34ccc320UL, 0x3ff64346UL,
 906   0x759d8933UL, 0xbc8c483cUL, 0xfebc8fb7UL, 0x3ff652b9UL, 0xc9a73e09UL,
 907   0xbc9ae3d5UL, 0x82552225UL, 0x3ff66238UL, 0x87591c34UL, 0xbc9bb609UL,
 908   0xc70833f6UL, 0x3ff671c1UL, 0x586c6134UL, 0xbc8e8732UL, 0xd44ca973UL,
 909   0x3ff68155UL, 0x44f73e65UL, 0x3c6038aeUL, 0xb19e9538UL, 0x3ff690f4UL,
 910   0x9aeb445dUL, 0x3c8804bdUL, 0x667f3bcdUL, 0x3ff6a09eUL, 0x13b26456UL,
 911   0xbc9bdd34UL, 0xfa75173eUL, 0x3ff6b052UL, 0x2c9a9d0eUL, 0x3c7a38f5UL,
 912   0x750bdabfUL, 0x3ff6c012UL, 0x67ff0b0dUL, 0xbc728956UL, 0xddd47645UL,
 913   0x3ff6cfdcUL, 0xb6f17309UL, 0x3c9c7aa9UL, 0x3c651a2fUL, 0x3ff6dfb2UL,
 914   0x683c88abUL, 0xbc6bbe3aUL, 0x98593ae5UL, 0x3ff6ef92UL, 0x9e1ac8b2UL,
 915   0xbc90b974UL, 0xf9519484UL, 0x3ff6ff7dUL, 0x25860ef6UL, 0xbc883c0fUL,
 916   0x66f42e87UL, 0x3ff70f74UL, 0xd45aa65fUL, 0x3c59d644UL, 0xe8ec5f74UL,
 917   0x3ff71f75UL, 0x86887a99UL, 0xbc816e47UL, 0x86ead08aUL, 0x3ff72f82UL,
 918   0x2cd62c72UL, 0xbc920aa0UL, 0x48a58174UL, 0x3ff73f9aUL, 0x6c65d53cUL,
 919   0xbc90a8d9UL, 0x35d7cbfdUL, 0x3ff74fbdUL, 0x618a6e1cUL, 0x3c9047fdUL,
 920   0x564267c9UL, 0x3ff75febUL, 0x57316dd3UL, 0xbc902459UL, 0xb1ab6e09UL,
 921   0x3ff77024UL, 0x169147f8UL, 0x3c9b7877UL, 0x4fde5d3fUL, 0x3ff78069UL,
 922   0x0a02162dUL, 0x3c9866b8UL, 0x38ac1cf6UL, 0x3ff790b9UL, 0x62aadd3eUL,
 923   0x3c9349a8UL, 0x73eb0187UL, 0x3ff7a114UL, 0xee04992fUL, 0xbc841577UL,
 924   0x0976cfdbUL, 0x3ff7b17bUL, 0x8468dc88UL, 0xbc9bebb5UL, 0x0130c132UL,
 925   0x3ff7c1edUL, 0xd1164dd6UL, 0x3c9f124cUL, 0x62ff86f0UL, 0x3ff7d26aUL,
 926   0xfb72b8b4UL, 0x3c91bddbUL, 0x36cf4e62UL, 0x3ff7e2f3UL, 0xba15797eUL,
 927   0x3c705d02UL, 0x8491c491UL, 0x3ff7f387UL, 0xcf9311aeUL, 0xbc807f11UL,
 928   0x543e1a12UL, 0x3ff80427UL, 0x626d972bUL, 0xbc927c86UL, 0xadd106d9UL,
 929   0x3ff814d2UL, 0x0d151d4dUL, 0x3c946437UL, 0x994cce13UL, 0x3ff82589UL,
 930   0xd41532d8UL, 0xbc9d4c1dUL, 0x1eb941f7UL, 0x3ff8364cUL, 0x31df2bd5UL,
 931   0x3c999b9aUL, 0x4623c7adUL, 0x3ff8471aUL, 0xa341cdfbUL, 0xbc88d684UL,
 932   0x179f5b21UL, 0x3ff857f4UL, 0xf8b216d0UL, 0xbc5ba748UL, 0x9b4492edUL,
 933   0x3ff868d9UL, 0x9bd4f6baUL, 0xbc9fc6f8UL, 0xd931a436UL, 0x3ff879caUL,
 934   0xd2db47bdUL, 0x3c85d2d7UL, 0xd98a6699UL, 0x3ff88ac7UL, 0xf37cb53aUL,
 935   0x3c9994c2UL, 0xa478580fUL, 0x3ff89bd0UL, 0x4475202aUL, 0x3c9d5395UL,
 936   0x422aa0dbUL, 0x3ff8ace5UL, 0x56864b27UL, 0x3c96e9f1UL, 0xbad61778UL,
 937   0x3ff8be05UL, 0xfc43446eUL, 0x3c9ecb5eUL, 0x16b5448cUL, 0x3ff8cf32UL,
 938   0x32e9e3aaUL, 0xbc70d55eUL, 0x5e0866d9UL, 0x3ff8e06aUL, 0x6fc9b2e6UL,
 939   0xbc97114aUL, 0x99157736UL, 0x3ff8f1aeUL, 0xa2e3976cUL, 0x3c85cc13UL,
 940   0xd0282c8aUL, 0x3ff902feUL, 0x85fe3fd2UL, 0x3c9592caUL, 0x0b91ffc6UL,
 941   0x3ff9145bUL, 0x2e582524UL, 0xbc9dd679UL, 0x53aa2fe2UL, 0x3ff925c3UL,
 942   0xa639db7fUL, 0xbc83455fUL, 0xb0cdc5e5UL, 0x3ff93737UL, 0x81b57ebcUL,
 943   0xbc675fc7UL, 0x2b5f98e5UL, 0x3ff948b8UL, 0x797d2d99UL, 0xbc8dc3d6UL,
 944   0xcbc8520fUL, 0x3ff95a44UL, 0x96a5f039UL, 0xbc764b7cUL, 0x9a7670b3UL,
 945   0x3ff96bddUL, 0x7f19c896UL, 0xbc5ba596UL, 0x9fde4e50UL, 0x3ff97d82UL,
 946   0x7c1b85d1UL, 0xbc9d185bUL, 0xe47a22a2UL, 0x3ff98f33UL, 0xa24c78ecUL,
 947   0x3c7cabdaUL, 0x70ca07baUL, 0x3ff9a0f1UL, 0x91cee632UL, 0xbc9173bdUL,
 948   0x4d53fe0dUL, 0x3ff9b2bbUL, 0x4df6d518UL, 0xbc9dd84eUL, 0x82a3f090UL,
 949   0x3ff9c491UL, 0xb071f2beUL, 0x3c7c7c46UL, 0x194bb8d5UL, 0x3ff9d674UL,
 950   0xa3dd8233UL, 0xbc9516beUL, 0x19e32323UL, 0x3ff9e863UL, 0x78e64c6eUL,
 951   0x3c7824caUL, 0x8d07f29eUL, 0x3ff9fa5eUL, 0xaaf1faceUL, 0xbc84a9ceUL,
 952   0x7b5de565UL, 0x3ffa0c66UL, 0x5d1cd533UL, 0xbc935949UL, 0xed8eb8bbUL,
 953   0x3ffa1e7aUL, 0xee8be70eUL, 0x3c9c6618UL, 0xec4a2d33UL, 0x3ffa309bUL,
 954   0x7ddc36abUL, 0x3c96305cUL, 0x80460ad8UL, 0x3ffa42c9UL, 0x589fb120UL,
 955   0xbc9aa780UL, 0xb23e255dUL, 0x3ffa5503UL, 0xdb8d41e1UL, 0xbc9d2f6eUL,
 956   0x8af46052UL, 0x3ffa674aUL, 0x30670366UL, 0x3c650f56UL, 0x1330b358UL,
 957   0x3ffa799eUL, 0xcac563c7UL, 0x3c9bcb7eUL, 0x53c12e59UL, 0x3ffa8bfeUL,
 958   0xb2ba15a9UL, 0xbc94f867UL, 0x5579fdbfUL, 0x3ffa9e6bUL, 0x0ef7fd31UL,
 959   0x3c90fac9UL, 0x21356ebaUL, 0x3ffab0e5UL, 0xdae94545UL, 0x3c889c31UL,
 960   0xbfd3f37aUL, 0x3ffac36bUL, 0xcae76cd0UL, 0xbc8f9234UL, 0x3a3c2774UL,
 961   0x3ffad5ffUL, 0xb6b1b8e5UL, 0x3c97ef3bUL, 0x995ad3adUL, 0x3ffae89fUL,
 962   0x345dcc81UL, 0x3c97a1cdUL, 0xe622f2ffUL, 0x3ffafb4cUL, 0x0f315ecdUL,
 963   0xbc94b2fcUL, 0x298db666UL, 0x3ffb0e07UL, 0x4c80e425UL, 0xbc9bdef5UL,
 964   0x6c9a8952UL, 0x3ffb20ceUL, 0x4a0756ccUL, 0x3c94dd02UL, 0xb84f15fbUL,
 965   0x3ffb33a2UL, 0x3084d708UL, 0xbc62805eUL, 0x15b749b1UL, 0x3ffb4684UL,
 966   0xe9df7c90UL, 0xbc7f763dUL, 0x8de5593aUL, 0x3ffb5972UL, 0xbbba6de3UL,
 967   0xbc9c71dfUL, 0x29f1c52aUL, 0x3ffb6c6eUL, 0x52883f6eUL, 0x3c92a8f3UL,
 968   0xf2fb5e47UL, 0x3ffb7f76UL, 0x7e54ac3bUL, 0xbc75584fUL, 0xf22749e4UL,
 969   0x3ffb928cUL, 0x54cb65c6UL, 0xbc9b7216UL, 0x30a1064aUL, 0x3ffba5b0UL,
 970   0x0e54292eUL, 0xbc9efcd3UL, 0xb79a6f1fUL, 0x3ffbb8e0UL, 0xc9696205UL,
 971   0xbc3f52d1UL, 0x904bc1d2UL, 0x3ffbcc1eUL, 0x7a2d9e84UL, 0x3c823dd0UL,
 972   0xc3f3a207UL, 0x3ffbdf69UL, 0x60ea5b53UL, 0xbc3c2623UL, 0x5bd71e09UL,
 973   0x3ffbf2c2UL, 0x3f6b9c73UL, 0xbc9efdcaUL, 0x6141b33dUL, 0x3ffc0628UL,
 974   0xa1fbca34UL, 0xbc8d8a5aUL, 0xdd85529cUL, 0x3ffc199bUL, 0x895048ddUL,
 975   0x3c811065UL, 0xd9fa652cUL, 0x3ffc2d1cUL, 0x17c8a5d7UL, 0xbc96e516UL,
 976   0x5fffd07aUL, 0x3ffc40abUL, 0xe083c60aUL, 0x3c9b4537UL, 0x78fafb22UL,
 977   0x3ffc5447UL, 0x2493b5afUL, 0x3c912f07UL, 0x2e57d14bUL, 0x3ffc67f1UL,
 978   0xff483cadUL, 0x3c92884dUL, 0x8988c933UL, 0x3ffc7ba8UL, 0xbe255559UL,
 979   0xbc8e76bbUL, 0x9406e7b5UL, 0x3ffc8f6dUL, 0x48805c44UL, 0x3c71acbcUL,
 980   0x5751c4dbUL, 0x3ffca340UL, 0xd10d08f5UL, 0xbc87f2beUL, 0xdcef9069UL,
 981   0x3ffcb720UL, 0xd1e949dbUL, 0x3c7503cbUL, 0x2e6d1675UL, 0x3ffccb0fUL,
 982   0x86009092UL, 0xbc7d220fUL, 0x555dc3faUL, 0x3ffcdf0bUL, 0x53829d72UL,
 983   0xbc8dd83bUL, 0x5b5bab74UL, 0x3ffcf315UL, 0xb86dff57UL, 0xbc9a08e9UL,
 984   0x4a07897cUL, 0x3ffd072dUL, 0x43797a9cUL, 0xbc9cbc37UL, 0x2b08c968UL,
 985   0x3ffd1b53UL, 0x219a36eeUL, 0x3c955636UL, 0x080d89f2UL, 0x3ffd2f87UL,
 986   0x719d8578UL, 0xbc9d487bUL, 0xeacaa1d6UL, 0x3ffd43c8UL, 0xbf5a1614UL,
 987   0x3c93db53UL, 0xdcfba487UL, 0x3ffd5818UL, 0xd75b3707UL, 0x3c82ed02UL,
 988   0xe862e6d3UL, 0x3ffd6c76UL, 0x4a8165a0UL, 0x3c5fe87aUL, 0x16c98398UL,
 989   0x3ffd80e3UL, 0x8beddfe8UL, 0xbc911ec1UL, 0x71ff6075UL, 0x3ffd955dUL,
 990   0xbb9af6beUL, 0x3c9a052dUL, 0x03db3285UL, 0x3ffda9e6UL, 0x696db532UL,
 991   0x3c9c2300UL, 0xd63a8315UL, 0x3ffdbe7cUL, 0x926b8be4UL, 0xbc9b76f1UL,
 992   0xf301b460UL, 0x3ffdd321UL, 0x78f018c3UL, 0x3c92da57UL, 0x641c0658UL,
 993   0x3ffde7d5UL, 0x8e79ba8fUL, 0xbc9ca552UL, 0x337b9b5fUL, 0x3ffdfc97UL,
 994   0x4f184b5cUL, 0xbc91a5cdUL, 0x6b197d17UL, 0x3ffe1167UL, 0xbd5c7f44UL,
 995   0xbc72b529UL, 0x14f5a129UL, 0x3ffe2646UL, 0x817a1496UL, 0xbc97b627UL,
 996   0x3b16ee12UL, 0x3ffe3b33UL, 0x31fdc68bUL, 0xbc99f4a4UL, 0xe78b3ff6UL,
 997   0x3ffe502eUL, 0x80a9cc8fUL, 0x3c839e89UL, 0x24676d76UL, 0x3ffe6539UL,
 998   0x7522b735UL, 0xbc863ff8UL, 0xfbc74c83UL, 0x3ffe7a51UL, 0xca0c8de2UL,
 999   0x3c92d522UL, 0x77cdb740UL, 0x3ffe8f79UL, 0x80b054b1UL, 0xbc910894UL,
1000   0xa2a490daUL, 0x3ffea4afUL, 0x179c2893UL, 0xbc9e9c23UL, 0x867cca6eUL,
1001   0x3ffeb9f4UL, 0x2293e4f2UL, 0x3c94832fUL, 0x2d8e67f1UL, 0x3ffecf48UL,
1002   0xb411ad8cUL, 0xbc9c93f3UL, 0xa2188510UL, 0x3ffee4aaUL, 0xa487568dUL,
1003   0x3c91c68dUL, 0xee615a27UL, 0x3ffefa1bUL, 0x86a4b6b0UL, 0x3c9dc7f4UL,
1004   0x1cb6412aUL, 0x3fff0f9cUL, 0x65181d45UL, 0xbc932200UL, 0x376bba97UL,
1005   0x3fff252bUL, 0xbf0d8e43UL, 0x3c93a1a5UL, 0x48dd7274UL, 0x3fff3ac9UL,
1006   0x3ed837deUL, 0xbc795a5aUL, 0x5b6e4540UL, 0x3fff5076UL, 0x2dd8a18bUL,
1007   0x3c99d3e1UL, 0x798844f8UL, 0x3fff6632UL, 0x3539343eUL, 0x3c9fa37bUL,
1008   0xad9cbe14UL, 0x3fff7bfdUL, 0xd006350aUL, 0xbc9dbb12UL, 0x02243c89UL,
1009   0x3fff91d8UL, 0xa779f689UL, 0xbc612ea8UL, 0x819e90d8UL, 0x3fffa7c1UL,
1010   0xf3a5931eUL, 0x3c874853UL, 0x3692d514UL, 0x3fffbdbaUL, 0x15098eb6UL,
1011   0xbc796773UL, 0x2b8f71f1UL, 0x3fffd3c2UL, 0x966579e7UL, 0x3c62eb74UL,
1012   0x6b2a23d9UL, 0x3fffe9d9UL, 0x7442fde3UL, 0x3c74a603UL
1013 };
1014 
1015 ALIGNED_(16) juint _e_coeff[] =
1016 {
1017   0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL, 0x6fba4e77UL,
1018   0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL, 0xfefa39efUL, 0x3fe62e42UL,
1019   0x00000000UL, 0x00000000UL
1020 };
1021 
1022 ALIGNED_(16) juint _coeff_h[] =
1023 {
1024   0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL
1025 };
1026 
1027 ALIGNED_(16) juint _HIGHMASK_LOG_X[] =
1028 {
1029   0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xfffff800UL
1030 };
1031 
1032 ALIGNED_(8) juint _HALFMASK[] =
1033 {
1034   0xf8000000UL, 0xffffffffUL, 0xf8000000UL, 0xffffffffUL
1035 };
1036 
1037 ALIGNED_(16) juint _coeff_pow[] =
1038 {
1039   0x6dc96112UL, 0xbf836578UL, 0xee241472UL, 0xbf9b0301UL, 0x9f95985aUL,
1040   0xbfb528dbUL, 0xb3841d2aUL, 0xbfd619b6UL, 0x518775e3UL, 0x3f9004f2UL,
1041   0xac8349bbUL, 0x3fa76c9bUL, 0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL,
1042   0xbf5dabe1UL, 0x9f95985aUL, 0xbfb528dbUL, 0xf8b5787dUL, 0x3ef2531eUL,
1043   0x486ececbUL, 0x3fc4635eUL, 0x412055ccUL, 0xbdd61bb2UL
1044 };
1045 
1046 ALIGNED_(16) juint _L_tbl_pow[] =
1047 {
1048   0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x20000000UL,
1049   0x3feff00aUL, 0x96621f95UL, 0x3e5b1856UL, 0xe0000000UL, 0x3fefe019UL,
1050   0xe5916f9eUL, 0xbe325278UL, 0x00000000UL, 0x3fefd02fUL, 0x859a1062UL,
1051   0x3e595fb7UL, 0xc0000000UL, 0x3fefc049UL, 0xb245f18fUL, 0xbe529c38UL,
1052   0xe0000000UL, 0x3fefb069UL, 0xad2880a7UL, 0xbe501230UL, 0x60000000UL,
1053   0x3fefa08fUL, 0xc8e72420UL, 0x3e597bd1UL, 0x80000000UL, 0x3fef90baUL,
1054   0xc30c4500UL, 0xbe5d6c75UL, 0xe0000000UL, 0x3fef80eaUL, 0x02c63f43UL,
1055   0x3e2e1318UL, 0xc0000000UL, 0x3fef7120UL, 0xb3d4ccccUL, 0xbe44c52aUL,
1056   0x00000000UL, 0x3fef615cUL, 0xdbd91397UL, 0xbe4e7d6cUL, 0xa0000000UL,
1057   0x3fef519cUL, 0x65c5cd68UL, 0xbe522dc8UL, 0xa0000000UL, 0x3fef41e2UL,
1058   0x46d1306cUL, 0xbe5a840eUL, 0xe0000000UL, 0x3fef322dUL, 0xd2980e94UL,
1059   0x3e5071afUL, 0xa0000000UL, 0x3fef227eUL, 0x773abadeUL, 0xbe5891e5UL,
1060   0xa0000000UL, 0x3fef12d4UL, 0xdc6bf46bUL, 0xbe5cccbeUL, 0xe0000000UL,
1061   0x3fef032fUL, 0xbc7247faUL, 0xbe2bab83UL, 0x80000000UL, 0x3feef390UL,
1062   0xbcaa1e46UL, 0xbe53bb3bUL, 0x60000000UL, 0x3feee3f6UL, 0x5f6c682dUL,
1063   0xbe54c619UL, 0x80000000UL, 0x3feed461UL, 0x5141e368UL, 0xbe4b6d86UL,
1064   0xe0000000UL, 0x3feec4d1UL, 0xec678f76UL, 0xbe369af6UL, 0x80000000UL,
1065   0x3feeb547UL, 0x41301f55UL, 0xbe2d4312UL, 0x60000000UL, 0x3feea5c2UL,
1066   0x676da6bdUL, 0xbe4d8dd0UL, 0x60000000UL, 0x3fee9642UL, 0x57a891c4UL,
1067   0x3e51f991UL, 0xa0000000UL, 0x3fee86c7UL, 0xe4eb491eUL, 0x3e579bf9UL,
1068   0x20000000UL, 0x3fee7752UL, 0xfddc4a2cUL, 0xbe3356e6UL, 0xc0000000UL,
1069   0x3fee67e1UL, 0xd75b5bf1UL, 0xbe449531UL, 0x80000000UL, 0x3fee5876UL,
1070   0xbd423b8eUL, 0x3df54fe4UL, 0x60000000UL, 0x3fee4910UL, 0x330e51b9UL,
1071   0x3e54289cUL, 0x80000000UL, 0x3fee39afUL, 0x8651a95fUL, 0xbe55aad6UL,
1072   0xa0000000UL, 0x3fee2a53UL, 0x5e98c708UL, 0xbe2fc4a9UL, 0xe0000000UL,
1073   0x3fee1afcUL, 0x0989328dUL, 0x3e23958cUL, 0x40000000UL, 0x3fee0babUL,
1074   0xee642abdUL, 0xbe425dd8UL, 0xa0000000UL, 0x3fedfc5eUL, 0xc394d236UL,
1075   0x3e526362UL, 0x20000000UL, 0x3feded17UL, 0xe104aa8eUL, 0x3e4ce247UL,
1076   0xc0000000UL, 0x3fedddd4UL, 0x265a9be4UL, 0xbe5bb77aUL, 0x40000000UL,
1077   0x3fedce97UL, 0x0ecac52fUL, 0x3e4a7cb1UL, 0xe0000000UL, 0x3fedbf5eUL,
1078   0x124cb3b8UL, 0x3e257024UL, 0x80000000UL, 0x3fedb02bUL, 0xe6d4febeUL,
1079   0xbe2033eeUL, 0x20000000UL, 0x3feda0fdUL, 0x39cca00eUL, 0xbe3ddabcUL,
1080   0xc0000000UL, 0x3fed91d3UL, 0xef8a552aUL, 0xbe543390UL, 0x40000000UL,
1081   0x3fed82afUL, 0xb8e85204UL, 0x3e513850UL, 0xe0000000UL, 0x3fed738fUL,
1082   0x3d59fe08UL, 0xbe5db728UL, 0x40000000UL, 0x3fed6475UL, 0x3aa7ead1UL,
1083   0x3e58804bUL, 0xc0000000UL, 0x3fed555fUL, 0xf8a35ba9UL, 0xbe5298b0UL,
1084   0x00000000UL, 0x3fed464fUL, 0x9a88dd15UL, 0x3e5a8cdbUL, 0x40000000UL,
1085   0x3fed3743UL, 0xb0b0a190UL, 0x3e598635UL, 0x80000000UL, 0x3fed283cUL,
1086   0xe2113295UL, 0xbe5c1119UL, 0x80000000UL, 0x3fed193aUL, 0xafbf1728UL,
1087   0xbe492e9cUL, 0x60000000UL, 0x3fed0a3dUL, 0xe4a4ccf3UL, 0x3e19b90eUL,
1088   0x20000000UL, 0x3fecfb45UL, 0xba3cbeb8UL, 0x3e406b50UL, 0xc0000000UL,
1089   0x3fecec51UL, 0x110f7dddUL, 0x3e0d6806UL, 0x40000000UL, 0x3fecdd63UL,
1090   0x7dd7d508UL, 0xbe5a8943UL, 0x80000000UL, 0x3fecce79UL, 0x9b60f271UL,
1091   0xbe50676aUL, 0x80000000UL, 0x3fecbf94UL, 0x0b9ad660UL, 0x3e59174fUL,
1092   0x60000000UL, 0x3fecb0b4UL, 0x00823d9cUL, 0x3e5bbf72UL, 0x20000000UL,
1093   0x3feca1d9UL, 0x38a6ec89UL, 0xbe4d38f9UL, 0x80000000UL, 0x3fec9302UL,
1094   0x3a0b7d8eUL, 0x3e53dbfdUL, 0xc0000000UL, 0x3fec8430UL, 0xc6826b34UL,
1095   0xbe27c5c9UL, 0xc0000000UL, 0x3fec7563UL, 0x0c706381UL, 0xbe593653UL,
1096   0x60000000UL, 0x3fec669bUL, 0x7df34ec7UL, 0x3e461ab5UL, 0xe0000000UL,
1097   0x3fec57d7UL, 0x40e5e7e8UL, 0xbe5c3daeUL, 0x00000000UL, 0x3fec4919UL,
1098   0x5602770fUL, 0xbe55219dUL, 0xc0000000UL, 0x3fec3a5eUL, 0xec7911ebUL,
1099   0x3e5a5d25UL, 0x60000000UL, 0x3fec2ba9UL, 0xb39ea225UL, 0xbe53c00bUL,
1100   0x80000000UL, 0x3fec1cf8UL, 0x967a212eUL, 0x3e5a8ddfUL, 0x60000000UL,
1101   0x3fec0e4cUL, 0x580798bdUL, 0x3e5f53abUL, 0x00000000UL, 0x3febffa5UL,
1102   0xb8282df6UL, 0xbe46b874UL, 0x20000000UL, 0x3febf102UL, 0xe33a6729UL,
1103   0x3e54963fUL, 0x00000000UL, 0x3febe264UL, 0x3b53e88aUL, 0xbe3adce1UL,
1104   0x60000000UL, 0x3febd3caUL, 0xc2585084UL, 0x3e5cde9fUL, 0x80000000UL,
1105   0x3febc535UL, 0xa335c5eeUL, 0xbe39fd9cUL, 0x20000000UL, 0x3febb6a5UL,
1106   0x7325b04dUL, 0x3e42ba15UL, 0x60000000UL, 0x3feba819UL, 0x1564540fUL,
1107   0x3e3a9f35UL, 0x40000000UL, 0x3feb9992UL, 0x83fff592UL, 0xbe5465ceUL,
1108   0xa0000000UL, 0x3feb8b0fUL, 0xb9da63d3UL, 0xbe4b1a0aUL, 0x80000000UL,
1109   0x3feb7c91UL, 0x6d6f1ea4UL, 0x3e557657UL, 0x00000000UL, 0x3feb6e18UL,
1110   0x5e80a1bfUL, 0x3e4ddbb6UL, 0x00000000UL, 0x3feb5fa3UL, 0x1c9eacb5UL,
1111   0x3e592877UL, 0xa0000000UL, 0x3feb5132UL, 0x6d40beb3UL, 0xbe51858cUL,
1112   0xa0000000UL, 0x3feb42c6UL, 0xd740c67bUL, 0x3e427ad2UL, 0x40000000UL,
1113   0x3feb345fUL, 0xa3e0cceeUL, 0xbe5c2fc4UL, 0x40000000UL, 0x3feb25fcUL,
1114   0x8e752b50UL, 0xbe3da3c2UL, 0xc0000000UL, 0x3feb179dUL, 0xa892e7deUL,
1115   0x3e1fb481UL, 0xc0000000UL, 0x3feb0943UL, 0x21ed71e9UL, 0xbe365206UL,
1116   0x20000000UL, 0x3feafaeeUL, 0x0e1380a3UL, 0x3e5c5b7bUL, 0x20000000UL,
1117   0x3feaec9dUL, 0x3c3d640eUL, 0xbe5dbbd0UL, 0x60000000UL, 0x3feade50UL,
1118   0x8f97a715UL, 0x3e3a8ec5UL, 0x20000000UL, 0x3fead008UL, 0x23ab2839UL,
1119   0x3e2fe98aUL, 0x40000000UL, 0x3feac1c4UL, 0xf4bbd50fUL, 0x3e54d8f6UL,
1120   0xe0000000UL, 0x3feab384UL, 0x14757c4dUL, 0xbe48774cUL, 0xc0000000UL,
1121   0x3feaa549UL, 0x7c7b0eeaUL, 0x3e5b51bbUL, 0x20000000UL, 0x3fea9713UL,
1122   0xf56f7013UL, 0x3e386200UL, 0xe0000000UL, 0x3fea88e0UL, 0xbe428ebeUL,
1123   0xbe514af5UL, 0xe0000000UL, 0x3fea7ab2UL, 0x8d0e4496UL, 0x3e4f9165UL,
1124   0x60000000UL, 0x3fea6c89UL, 0xdbacc5d5UL, 0xbe5c063bUL, 0x20000000UL,
1125   0x3fea5e64UL, 0x3f19d970UL, 0xbe5a0c8cUL, 0x20000000UL, 0x3fea5043UL,
1126   0x09ea3e6bUL, 0x3e5065dcUL, 0x80000000UL, 0x3fea4226UL, 0x78df246cUL,
1127   0x3e5e05f6UL, 0x40000000UL, 0x3fea340eUL, 0x4057d4a0UL, 0x3e431b2bUL,
1128   0x40000000UL, 0x3fea25faUL, 0x82867bb5UL, 0x3e4b76beUL, 0xa0000000UL,
1129   0x3fea17eaUL, 0x9436f40aUL, 0xbe5aad39UL, 0x20000000UL, 0x3fea09dfUL,
1130   0x4b5253b3UL, 0x3e46380bUL, 0x00000000UL, 0x3fe9fbd8UL, 0x8fc52466UL,
1131   0xbe386f9bUL, 0x20000000UL, 0x3fe9edd5UL, 0x22d3f344UL, 0xbe538347UL,
1132   0x60000000UL, 0x3fe9dfd6UL, 0x1ac33522UL, 0x3e5dbc53UL, 0x00000000UL,
1133   0x3fe9d1dcUL, 0xeabdff1dUL, 0x3e40fc0cUL, 0xe0000000UL, 0x3fe9c3e5UL,
1134   0xafd30e73UL, 0xbe585e63UL, 0xe0000000UL, 0x3fe9b5f3UL, 0xa52f226aUL,
1135   0xbe43e8f9UL, 0x20000000UL, 0x3fe9a806UL, 0xecb8698dUL, 0xbe515b36UL,
1136   0x80000000UL, 0x3fe99a1cUL, 0xf2b4e89dUL, 0x3e48b62bUL, 0x20000000UL,
1137   0x3fe98c37UL, 0x7c9a88fbUL, 0x3e44414cUL, 0x00000000UL, 0x3fe97e56UL,
1138   0xda015741UL, 0xbe5d13baUL, 0xe0000000UL, 0x3fe97078UL, 0x5fdace06UL,
1139   0x3e51b947UL, 0x00000000UL, 0x3fe962a0UL, 0x956ca094UL, 0x3e518785UL,
1140   0x40000000UL, 0x3fe954cbUL, 0x01164c1dUL, 0x3e5d5b57UL, 0xc0000000UL,
1141   0x3fe946faUL, 0xe63b3767UL, 0xbe4f84e7UL, 0x40000000UL, 0x3fe9392eUL,
1142   0xe57cc2a9UL, 0x3e34eda3UL, 0xe0000000UL, 0x3fe92b65UL, 0x8c75b544UL,
1143   0x3e5766a0UL, 0xc0000000UL, 0x3fe91da1UL, 0x37d1d087UL, 0xbe5e2ab1UL,
1144   0x80000000UL, 0x3fe90fe1UL, 0xa953dc20UL, 0x3e5fa1f3UL, 0x80000000UL,
1145   0x3fe90225UL, 0xdbd3f369UL, 0x3e47d6dbUL, 0xa0000000UL, 0x3fe8f46dUL,
1146   0x1c9be989UL, 0xbe5e2b0aUL, 0xa0000000UL, 0x3fe8e6b9UL, 0x3c93d76aUL,
1147   0x3e5c8618UL, 0xe0000000UL, 0x3fe8d909UL, 0x2182fc9aUL, 0xbe41aa9eUL,
1148   0x20000000UL, 0x3fe8cb5eUL, 0xe6b3539dUL, 0xbe530d19UL, 0x60000000UL,
1149   0x3fe8bdb6UL, 0x49e58cc3UL, 0xbe3bb374UL, 0xa0000000UL, 0x3fe8b012UL,
1150   0xa7cfeb8fUL, 0x3e56c412UL, 0x00000000UL, 0x3fe8a273UL, 0x8d52bc19UL,
1151   0x3e1429b8UL, 0x60000000UL, 0x3fe894d7UL, 0x4dc32c6cUL, 0xbe48604cUL,
1152   0xc0000000UL, 0x3fe8873fUL, 0x0c868e56UL, 0xbe564ee5UL, 0x00000000UL,
1153   0x3fe879acUL, 0x56aee828UL, 0x3e5e2fd8UL, 0x60000000UL, 0x3fe86c1cUL,
1154   0x7ceab8ecUL, 0x3e493365UL, 0xc0000000UL, 0x3fe85e90UL, 0x78d4dadcUL,
1155   0xbe4f7f25UL, 0x00000000UL, 0x3fe85109UL, 0x0ccd8280UL, 0x3e31e7a2UL,
1156   0x40000000UL, 0x3fe84385UL, 0x34ba4e15UL, 0x3e328077UL, 0x80000000UL,
1157   0x3fe83605UL, 0xa670975aUL, 0xbe53eee5UL, 0xa0000000UL, 0x3fe82889UL,
1158   0xf61b77b2UL, 0xbe43a20aUL, 0xa0000000UL, 0x3fe81b11UL, 0x13e6643bUL,
1159   0x3e5e5fe5UL, 0xc0000000UL, 0x3fe80d9dUL, 0x82cc94e8UL, 0xbe5ff1f9UL,
1160   0xa0000000UL, 0x3fe8002dUL, 0x8a0c9c5dUL, 0xbe42b0e7UL, 0x60000000UL,
1161   0x3fe7f2c1UL, 0x22a16f01UL, 0x3e5d9ea0UL, 0x20000000UL, 0x3fe7e559UL,
1162   0xc38cd451UL, 0x3e506963UL, 0xc0000000UL, 0x3fe7d7f4UL, 0x9902bc71UL,
1163   0x3e4503d7UL, 0x40000000UL, 0x3fe7ca94UL, 0xdef2a3c0UL, 0x3e3d98edUL,
1164   0xa0000000UL, 0x3fe7bd37UL, 0xed49abb0UL, 0x3e24c1ffUL, 0xe0000000UL,
1165   0x3fe7afdeUL, 0xe3b0be70UL, 0xbe40c467UL, 0x00000000UL, 0x3fe7a28aUL,
1166   0xaf9f193cUL, 0xbe5dff6cUL, 0xe0000000UL, 0x3fe79538UL, 0xb74cf6b6UL,
1167   0xbe258ed0UL, 0xa0000000UL, 0x3fe787ebUL, 0x1d9127c7UL, 0x3e345fb0UL,
1168   0x40000000UL, 0x3fe77aa2UL, 0x1028c21dUL, 0xbe4619bdUL, 0xa0000000UL,
1169   0x3fe76d5cUL, 0x7cb0b5e4UL, 0x3e40f1a2UL, 0xe0000000UL, 0x3fe7601aUL,
1170   0x2b1bc4adUL, 0xbe32e8bbUL, 0xe0000000UL, 0x3fe752dcUL, 0x6839f64eUL,
1171   0x3e41f57bUL, 0xc0000000UL, 0x3fe745a2UL, 0xc4121f7eUL, 0xbe52c40aUL,
1172   0x60000000UL, 0x3fe7386cUL, 0xd6852d72UL, 0xbe5c4e6bUL, 0xc0000000UL,
1173   0x3fe72b39UL, 0x91d690f7UL, 0xbe57f88fUL, 0xe0000000UL, 0x3fe71e0aUL,
1174   0x627a2159UL, 0xbe4425d5UL, 0xc0000000UL, 0x3fe710dfUL, 0x50a54033UL,
1175   0x3e422b7eUL, 0x60000000UL, 0x3fe703b8UL, 0x3b0b5f91UL, 0x3e5d3857UL,
1176   0xe0000000UL, 0x3fe6f694UL, 0x84d628a2UL, 0xbe51f090UL, 0x00000000UL,
1177   0x3fe6e975UL, 0x306d8894UL, 0xbe414d83UL, 0xe0000000UL, 0x3fe6dc58UL,
1178   0x30bf24aaUL, 0xbe4650caUL, 0x80000000UL, 0x3fe6cf40UL, 0xd4628d69UL,
1179   0xbe5db007UL, 0xc0000000UL, 0x3fe6c22bUL, 0xa2aae57bUL, 0xbe31d279UL,
1180   0xc0000000UL, 0x3fe6b51aUL, 0x860edf7eUL, 0xbe2d4c4aUL, 0x80000000UL,
1181   0x3fe6a80dUL, 0xf3559341UL, 0xbe5f7e98UL, 0xe0000000UL, 0x3fe69b03UL,
1182   0xa885899eUL, 0xbe5c2011UL, 0xe0000000UL, 0x3fe68dfdUL, 0x2bdc6d37UL,
1183   0x3e224a82UL, 0xa0000000UL, 0x3fe680fbUL, 0xc12ad1b9UL, 0xbe40cf56UL,
1184   0x00000000UL, 0x3fe673fdUL, 0x1bcdf659UL, 0xbdf52f2dUL, 0x00000000UL,
1185   0x3fe66702UL, 0x5df10408UL, 0x3e5663e0UL, 0xc0000000UL, 0x3fe65a0aUL,
1186   0xa4070568UL, 0xbe40b12fUL, 0x00000000UL, 0x3fe64d17UL, 0x71c54c47UL,
1187   0x3e5f5e8bUL, 0x00000000UL, 0x3fe64027UL, 0xbd4b7e83UL, 0x3e42ead6UL,
1188   0xa0000000UL, 0x3fe6333aUL, 0x61598bd2UL, 0xbe4c48d4UL, 0xc0000000UL,
1189   0x3fe62651UL, 0x6f538d61UL, 0x3e548401UL, 0xa0000000UL, 0x3fe6196cUL,
1190   0x14344120UL, 0xbe529af6UL, 0x00000000UL, 0x3fe60c8bUL, 0x5982c587UL,
1191   0xbe3e1e4fUL, 0x00000000UL, 0x3fe5ffadUL, 0xfe51d4eaUL, 0xbe4c897aUL,
1192   0x80000000UL, 0x3fe5f2d2UL, 0xfd46ebe1UL, 0x3e552e00UL, 0xa0000000UL,
1193   0x3fe5e5fbUL, 0xa4695699UL, 0x3e5ed471UL, 0x60000000UL, 0x3fe5d928UL,
1194   0x80d118aeUL, 0x3e456b61UL, 0xa0000000UL, 0x3fe5cc58UL, 0x304c330bUL,
1195   0x3e54dc29UL, 0x80000000UL, 0x3fe5bf8cUL, 0x0af2dedfUL, 0xbe3aa9bdUL,
1196   0xe0000000UL, 0x3fe5b2c3UL, 0x15fc9258UL, 0xbe479a37UL, 0xc0000000UL,
1197   0x3fe5a5feUL, 0x9292c7eaUL, 0x3e188650UL, 0x20000000UL, 0x3fe5993dUL,
1198   0x33b4d380UL, 0x3e5d6d93UL, 0x20000000UL, 0x3fe58c7fUL, 0x02fd16c7UL,
1199   0x3e2fe961UL, 0xa0000000UL, 0x3fe57fc4UL, 0x4a05edb6UL, 0xbe4d55b4UL,
1200   0xa0000000UL, 0x3fe5730dUL, 0x3d443abbUL, 0xbe5e6954UL, 0x00000000UL,
1201   0x3fe5665aUL, 0x024acfeaUL, 0x3e50e61bUL, 0x00000000UL, 0x3fe559aaUL,
1202   0xcc9edd09UL, 0xbe325403UL, 0x60000000UL, 0x3fe54cfdUL, 0x1fe26950UL,
1203   0x3e5d500eUL, 0x60000000UL, 0x3fe54054UL, 0x6c5ae164UL, 0xbe4a79b4UL,
1204   0xc0000000UL, 0x3fe533aeUL, 0x154b0287UL, 0xbe401571UL, 0xa0000000UL,
1205   0x3fe5270cUL, 0x0673f401UL, 0xbe56e56bUL, 0xe0000000UL, 0x3fe51a6dUL,
1206   0x751b639cUL, 0x3e235269UL, 0xa0000000UL, 0x3fe50dd2UL, 0x7c7b2bedUL,
1207   0x3ddec887UL, 0xc0000000UL, 0x3fe5013aUL, 0xafab4e17UL, 0x3e5e7575UL,
1208   0x60000000UL, 0x3fe4f4a6UL, 0x2e308668UL, 0x3e59aed6UL, 0x80000000UL,
1209   0x3fe4e815UL, 0xf33e2a76UL, 0xbe51f184UL, 0xe0000000UL, 0x3fe4db87UL,
1210   0x839f3e3eUL, 0x3e57db01UL, 0xc0000000UL, 0x3fe4cefdUL, 0xa9eda7bbUL,
1211   0x3e535e0fUL, 0x00000000UL, 0x3fe4c277UL, 0x2a8f66a5UL, 0x3e5ce451UL,
1212   0xc0000000UL, 0x3fe4b5f3UL, 0x05192456UL, 0xbe4e8518UL, 0xc0000000UL,
1213   0x3fe4a973UL, 0x4aa7cd1dUL, 0x3e46784aUL, 0x40000000UL, 0x3fe49cf7UL,
1214   0x8e23025eUL, 0xbe5749f2UL, 0x00000000UL, 0x3fe4907eUL, 0x18d30215UL,
1215   0x3e360f39UL, 0x20000000UL, 0x3fe48408UL, 0x63dcf2f3UL, 0x3e5e00feUL,
1216   0xc0000000UL, 0x3fe47795UL, 0x46182d09UL, 0xbe5173d9UL, 0xa0000000UL,
1217   0x3fe46b26UL, 0x8f0e62aaUL, 0xbe48f281UL, 0xe0000000UL, 0x3fe45ebaUL,
1218   0x5775c40cUL, 0xbe56aad4UL, 0x60000000UL, 0x3fe45252UL, 0x0fe25f69UL,
1219   0x3e48bd71UL, 0x40000000UL, 0x3fe445edUL, 0xe9989ec5UL, 0x3e590d97UL,
1220   0x80000000UL, 0x3fe4398bUL, 0xb3d9ffe3UL, 0x3e479dbcUL, 0x20000000UL,
1221   0x3fe42d2dUL, 0x388e4d2eUL, 0xbe5eed80UL, 0xe0000000UL, 0x3fe420d1UL,
1222   0x6f797c18UL, 0x3e554b4cUL, 0x20000000UL, 0x3fe4147aUL, 0x31048bb4UL,
1223   0xbe5b1112UL, 0x80000000UL, 0x3fe40825UL, 0x2efba4f9UL, 0x3e48ebc7UL,
1224   0x40000000UL, 0x3fe3fbd4UL, 0x50201119UL, 0x3e40b701UL, 0x40000000UL,
1225   0x3fe3ef86UL, 0x0a4db32cUL, 0x3e551de8UL, 0xa0000000UL, 0x3fe3e33bUL,
1226   0x0c9c148bUL, 0xbe50c1f6UL, 0x20000000UL, 0x3fe3d6f4UL, 0xc9129447UL,
1227   0x3e533fa0UL, 0x00000000UL, 0x3fe3cab0UL, 0xaae5b5a0UL, 0xbe22b68eUL,
1228   0x20000000UL, 0x3fe3be6fUL, 0x02305e8aUL, 0xbe54fc08UL, 0x60000000UL,
1229   0x3fe3b231UL, 0x7f908258UL, 0x3e57dc05UL, 0x00000000UL, 0x3fe3a5f7UL,
1230   0x1a09af78UL, 0x3e08038bUL, 0xe0000000UL, 0x3fe399bfUL, 0x490643c1UL,
1231   0xbe5dbe42UL, 0xe0000000UL, 0x3fe38d8bUL, 0x5e8ad724UL, 0xbe3c2b72UL,
1232   0x20000000UL, 0x3fe3815bUL, 0xc67196b6UL, 0x3e1713cfUL, 0xa0000000UL,
1233   0x3fe3752dUL, 0x6182e429UL, 0xbe3ec14cUL, 0x40000000UL, 0x3fe36903UL,
1234   0xab6eb1aeUL, 0x3e5a2cc5UL, 0x40000000UL, 0x3fe35cdcUL, 0xfe5dc064UL,
1235   0xbe5c5878UL, 0x40000000UL, 0x3fe350b8UL, 0x0ba6b9e4UL, 0x3e51619bUL,
1236   0x80000000UL, 0x3fe34497UL, 0x857761aaUL, 0x3e5fff53UL, 0x00000000UL,
1237   0x3fe3387aUL, 0xf872d68cUL, 0x3e484f4dUL, 0xa0000000UL, 0x3fe32c5fUL,
1238   0x087e97c2UL, 0x3e52842eUL, 0x80000000UL, 0x3fe32048UL, 0x73d6d0c0UL,
1239   0xbe503edfUL, 0x80000000UL, 0x3fe31434UL, 0x0c1456a1UL, 0xbe5f72adUL,
1240   0xa0000000UL, 0x3fe30823UL, 0x83a1a4d5UL, 0xbe5e65ccUL, 0xe0000000UL,
1241   0x3fe2fc15UL, 0x855a7390UL, 0xbe506438UL, 0x40000000UL, 0x3fe2f00bUL,
1242   0xa2898287UL, 0x3e3d22a2UL, 0xe0000000UL, 0x3fe2e403UL, 0x8b56f66fUL,
1243   0xbe5aa5fdUL, 0x80000000UL, 0x3fe2d7ffUL, 0x52db119aUL, 0x3e3a2e3dUL,
1244   0x60000000UL, 0x3fe2cbfeUL, 0xe2ddd4c0UL, 0xbe586469UL, 0x40000000UL,
1245   0x3fe2c000UL, 0x6b01bf10UL, 0x3e352b9dUL, 0x40000000UL, 0x3fe2b405UL,
1246   0xb07a1cdfUL, 0x3e5c5cdaUL, 0x80000000UL, 0x3fe2a80dUL, 0xc7b5f868UL,
1247   0xbe5668b3UL, 0xc0000000UL, 0x3fe29c18UL, 0x185edf62UL, 0xbe563d66UL,
1248   0x00000000UL, 0x3fe29027UL, 0xf729e1ccUL, 0x3e59a9a0UL, 0x80000000UL,
1249   0x3fe28438UL, 0x6433c727UL, 0xbe43cc89UL, 0x00000000UL, 0x3fe2784dUL,
1250   0x41782631UL, 0xbe30750cUL, 0xa0000000UL, 0x3fe26c64UL, 0x914911b7UL,
1251   0xbe58290eUL, 0x40000000UL, 0x3fe2607fUL, 0x3dcc73e1UL, 0xbe4269cdUL,
1252   0x00000000UL, 0x3fe2549dUL, 0x2751bf70UL, 0xbe5a6998UL, 0xc0000000UL,
1253   0x3fe248bdUL, 0x4248b9fbUL, 0xbe4ddb00UL, 0x80000000UL, 0x3fe23ce1UL,
1254   0xf35cf82fUL, 0x3e561b71UL, 0x60000000UL, 0x3fe23108UL, 0x8e481a2dUL,
1255   0x3e518fb9UL, 0x60000000UL, 0x3fe22532UL, 0x5ab96edcUL, 0xbe5fafc5UL,
1256   0x40000000UL, 0x3fe2195fUL, 0x80943911UL, 0xbe07f819UL, 0x40000000UL,
1257   0x3fe20d8fUL, 0x386f2d6cUL, 0xbe54ba8bUL, 0x40000000UL, 0x3fe201c2UL,
1258   0xf29664acUL, 0xbe5eb815UL, 0x20000000UL, 0x3fe1f5f8UL, 0x64f03390UL,
1259   0x3e5e320cUL, 0x20000000UL, 0x3fe1ea31UL, 0x747ff696UL, 0x3e5ef0a5UL,
1260   0x40000000UL, 0x3fe1de6dUL, 0x3e9ceb51UL, 0xbe5f8d27UL, 0x20000000UL,
1261   0x3fe1d2acUL, 0x4ae0b55eUL, 0x3e5faa21UL, 0x20000000UL, 0x3fe1c6eeUL,
1262   0x28569a5eUL, 0x3e598a4fUL, 0x20000000UL, 0x3fe1bb33UL, 0x54b33e07UL,
1263   0x3e46130aUL, 0x20000000UL, 0x3fe1af7bUL, 0x024f1078UL, 0xbe4dbf93UL,
1264   0x00000000UL, 0x3fe1a3c6UL, 0xb0783bfaUL, 0x3e419248UL, 0xe0000000UL,
1265   0x3fe19813UL, 0x2f02b836UL, 0x3e4e02b7UL, 0xc0000000UL, 0x3fe18c64UL,
1266   0x28dec9d4UL, 0x3e09064fUL, 0x80000000UL, 0x3fe180b8UL, 0x45cbf406UL,
1267   0x3e5b1f46UL, 0x40000000UL, 0x3fe1750fUL, 0x03d9964cUL, 0x3e5b0a79UL,
1268   0x00000000UL, 0x3fe16969UL, 0x8b5b882bUL, 0xbe238086UL, 0xa0000000UL,
1269   0x3fe15dc5UL, 0x73bad6f8UL, 0xbdf1fca4UL, 0x20000000UL, 0x3fe15225UL,
1270   0x5385769cUL, 0x3e5e8d76UL, 0xa0000000UL, 0x3fe14687UL, 0x1676dc6bUL,
1271   0x3e571d08UL, 0x20000000UL, 0x3fe13aedUL, 0xa8c41c7fUL, 0xbe598a25UL,
1272   0x60000000UL, 0x3fe12f55UL, 0xc4e1aaf0UL, 0x3e435277UL, 0xa0000000UL,
1273   0x3fe123c0UL, 0x403638e1UL, 0xbe21aa7cUL, 0xc0000000UL, 0x3fe1182eUL,
1274   0x557a092bUL, 0xbdd0116bUL, 0xc0000000UL, 0x3fe10c9fUL, 0x7d779f66UL,
1275   0x3e4a61baUL, 0xc0000000UL, 0x3fe10113UL, 0x2b09c645UL, 0xbe5d586eUL,
1276   0x20000000UL, 0x3fe0ea04UL, 0xea2cad46UL, 0x3e5aa97cUL, 0x20000000UL,
1277   0x3fe0d300UL, 0x23190e54UL, 0x3e50f1a7UL, 0xa0000000UL, 0x3fe0bc07UL,
1278   0x1379a5a6UL, 0xbe51619dUL, 0x60000000UL, 0x3fe0a51aUL, 0x926a3d4aUL,
1279   0x3e5cf019UL, 0xa0000000UL, 0x3fe08e38UL, 0xa8c24358UL, 0x3e35241eUL,
1280   0x20000000UL, 0x3fe07762UL, 0x24317e7aUL, 0x3e512cfaUL, 0x00000000UL,
1281   0x3fe06097UL, 0xfd9cf274UL, 0xbe55bef3UL, 0x00000000UL, 0x3fe049d7UL,
1282   0x3689b49dUL, 0xbe36d26dUL, 0x40000000UL, 0x3fe03322UL, 0xf72ef6c4UL,
1283   0xbe54cd08UL, 0xa0000000UL, 0x3fe01c78UL, 0x23702d2dUL, 0xbe5900bfUL,
1284   0x00000000UL, 0x3fe005daUL, 0x3f59c14cUL, 0x3e57d80bUL, 0x40000000UL,
1285   0x3fdfde8dUL, 0xad67766dUL, 0xbe57fad4UL, 0x40000000UL, 0x3fdfb17cUL,
1286   0x644f4ae7UL, 0x3e1ee43bUL, 0x40000000UL, 0x3fdf8481UL, 0x903234d2UL,
1287   0x3e501a86UL, 0x40000000UL, 0x3fdf579cUL, 0xafe9e509UL, 0xbe267c3eUL,
1288   0x00000000UL, 0x3fdf2acdUL, 0xb7dfda0bUL, 0xbe48149bUL, 0x40000000UL,
1289   0x3fdefe13UL, 0x3b94305eUL, 0x3e5f4ea7UL, 0x80000000UL, 0x3fded16fUL,
1290   0x5d95da61UL, 0xbe55c198UL, 0x00000000UL, 0x3fdea4e1UL, 0x406960c9UL,
1291   0xbdd99a19UL, 0x00000000UL, 0x3fde7868UL, 0xd22f3539UL, 0x3e470c78UL,
1292   0x80000000UL, 0x3fde4c04UL, 0x83eec535UL, 0xbe3e1232UL, 0x40000000UL,
1293   0x3fde1fb6UL, 0x3dfbffcbUL, 0xbe4b7d71UL, 0x40000000UL, 0x3fddf37dUL,
1294   0x7e1be4e0UL, 0xbe5b8f8fUL, 0x40000000UL, 0x3fddc759UL, 0x46dae887UL,
1295   0xbe350458UL, 0x80000000UL, 0x3fdd9b4aUL, 0xed6ecc49UL, 0xbe5f0045UL,
1296   0x80000000UL, 0x3fdd6f50UL, 0x2e9e883cUL, 0x3e2915daUL, 0x80000000UL,
1297   0x3fdd436bUL, 0xf0bccb32UL, 0x3e4a68c9UL, 0x80000000UL, 0x3fdd179bUL,
1298   0x9bbfc779UL, 0xbe54a26aUL, 0x00000000UL, 0x3fdcebe0UL, 0x7cea33abUL,
1299   0x3e43c6b7UL, 0x40000000UL, 0x3fdcc039UL, 0xe740fd06UL, 0x3e5526c2UL,
1300   0x40000000UL, 0x3fdc94a7UL, 0x9eadeb1aUL, 0xbe396d8dUL, 0xc0000000UL,
1301   0x3fdc6929UL, 0xf0a8f95aUL, 0xbe5c0ab2UL, 0x80000000UL, 0x3fdc3dc0UL,
1302   0x6ee2693bUL, 0x3e0992e6UL, 0xc0000000UL, 0x3fdc126bUL, 0x5ac6b581UL,
1303   0xbe2834b6UL, 0x40000000UL, 0x3fdbe72bUL, 0x8cc226ffUL, 0x3e3596a6UL,
1304   0x00000000UL, 0x3fdbbbffUL, 0xf92a74bbUL, 0x3e3c5813UL, 0x00000000UL,
1305   0x3fdb90e7UL, 0x479664c0UL, 0xbe50d644UL, 0x00000000UL, 0x3fdb65e3UL,
1306   0x5004975bUL, 0xbe55258fUL, 0x00000000UL, 0x3fdb3af3UL, 0xe4b23194UL,
1307   0xbe588407UL, 0xc0000000UL, 0x3fdb1016UL, 0xe65d4d0aUL, 0x3e527c26UL,
1308   0x80000000UL, 0x3fdae54eUL, 0x814fddd6UL, 0x3e5962a2UL, 0x40000000UL,
1309   0x3fdaba9aUL, 0xe19d0913UL, 0xbe562f4eUL, 0x80000000UL, 0x3fda8ff9UL,
1310   0x43cfd006UL, 0xbe4cfdebUL, 0x40000000UL, 0x3fda656cUL, 0x686f0a4eUL,
1311   0x3e5e47a8UL, 0xc0000000UL, 0x3fda3af2UL, 0x7200d410UL, 0x3e5e1199UL,
1312   0xc0000000UL, 0x3fda108cUL, 0xabd2266eUL, 0x3e5ee4d1UL, 0x40000000UL,
1313   0x3fd9e63aUL, 0x396f8f2cUL, 0x3e4dbffbUL, 0x00000000UL, 0x3fd9bbfbUL,
1314   0xe32b25ddUL, 0x3e5c3a54UL, 0x40000000UL, 0x3fd991cfUL, 0x431e4035UL,
1315   0xbe457925UL, 0x80000000UL, 0x3fd967b6UL, 0x7bed3dd3UL, 0x3e40c61dUL,
1316   0x00000000UL, 0x3fd93db1UL, 0xd7449365UL, 0x3e306419UL, 0x80000000UL,
1317   0x3fd913beUL, 0x1746e791UL, 0x3e56fcfcUL, 0x40000000UL, 0x3fd8e9dfUL,
1318   0xf3a9028bUL, 0xbe5041b9UL, 0xc0000000UL, 0x3fd8c012UL, 0x56840c50UL,
1319   0xbe26e20aUL, 0x40000000UL, 0x3fd89659UL, 0x19763102UL, 0xbe51f466UL,
1320   0x80000000UL, 0x3fd86cb2UL, 0x7032de7cUL, 0xbe4d298aUL, 0x80000000UL,
1321   0x3fd8431eUL, 0xdeb39fabUL, 0xbe4361ebUL, 0x40000000UL, 0x3fd8199dUL,
1322   0x5d01cbe0UL, 0xbe5425b3UL, 0x80000000UL, 0x3fd7f02eUL, 0x3ce99aa9UL,
1323   0x3e146fa8UL, 0x80000000UL, 0x3fd7c6d2UL, 0xd1a262b9UL, 0xbe5a1a69UL,
1324   0xc0000000UL, 0x3fd79d88UL, 0x8606c236UL, 0x3e423a08UL, 0x80000000UL,
1325   0x3fd77451UL, 0x8fd1e1b7UL, 0x3e5a6a63UL, 0xc0000000UL, 0x3fd74b2cUL,
1326   0xe491456aUL, 0x3e42c1caUL, 0x40000000UL, 0x3fd7221aUL, 0x4499a6d7UL,
1327   0x3e36a69aUL, 0x00000000UL, 0x3fd6f91aUL, 0x5237df94UL, 0xbe0f8f02UL,
1328   0x00000000UL, 0x3fd6d02cUL, 0xb6482c6eUL, 0xbe5abcf7UL, 0x00000000UL,
1329   0x3fd6a750UL, 0x1919fd61UL, 0xbe57ade2UL, 0x00000000UL, 0x3fd67e86UL,
1330   0xaa7a994dUL, 0xbe3f3fbdUL, 0x00000000UL, 0x3fd655ceUL, 0x67db014cUL,
1331   0x3e33c550UL, 0x00000000UL, 0x3fd62d28UL, 0xa82856b7UL, 0xbe1409d1UL,
1332   0xc0000000UL, 0x3fd60493UL, 0x1e6a300dUL, 0x3e55d899UL, 0x80000000UL,
1333   0x3fd5dc11UL, 0x1222bd5cUL, 0xbe35bfc0UL, 0xc0000000UL, 0x3fd5b3a0UL,
1334   0x6e8dc2d3UL, 0x3e5d4d79UL, 0x00000000UL, 0x3fd58b42UL, 0xe0e4ace6UL,
1335   0xbe517303UL, 0x80000000UL, 0x3fd562f4UL, 0xb306e0a8UL, 0x3e5edf0fUL,
1336   0xc0000000UL, 0x3fd53ab8UL, 0x6574bc54UL, 0x3e5ee859UL, 0x80000000UL,
1337   0x3fd5128eUL, 0xea902207UL, 0x3e5f6188UL, 0xc0000000UL, 0x3fd4ea75UL,
1338   0x9f911d79UL, 0x3e511735UL, 0x80000000UL, 0x3fd4c26eUL, 0xf9c77397UL,
1339   0xbe5b1643UL, 0x40000000UL, 0x3fd49a78UL, 0x15fc9258UL, 0x3e479a37UL,
1340   0x80000000UL, 0x3fd47293UL, 0xd5a04dd9UL, 0xbe426e56UL, 0xc0000000UL,
1341   0x3fd44abfUL, 0xe04042f5UL, 0x3e56f7c6UL, 0x40000000UL, 0x3fd422fdUL,
1342   0x1d8bf2c8UL, 0x3e5d8810UL, 0x00000000UL, 0x3fd3fb4cUL, 0x88a8ddeeUL,
1343   0xbe311454UL, 0xc0000000UL, 0x3fd3d3abUL, 0x3e3b5e47UL, 0xbe5d1b72UL,
1344   0x40000000UL, 0x3fd3ac1cUL, 0xc2ab5d59UL, 0x3e31b02bUL, 0xc0000000UL,
1345   0x3fd3849dUL, 0xd4e34b9eUL, 0x3e51cb2fUL, 0x40000000UL, 0x3fd35d30UL,
1346   0x177204fbUL, 0xbe2b8cd7UL, 0x80000000UL, 0x3fd335d3UL, 0xfcd38c82UL,
1347   0xbe4356e1UL, 0x80000000UL, 0x3fd30e87UL, 0x64f54accUL, 0xbe4e6224UL,
1348   0x00000000UL, 0x3fd2e74cUL, 0xaa7975d9UL, 0x3e5dc0feUL, 0x80000000UL,
1349   0x3fd2c021UL, 0x516dab3fUL, 0xbe50ffa3UL, 0x40000000UL, 0x3fd29907UL,
1350   0x2bfb7313UL, 0x3e5674a2UL, 0xc0000000UL, 0x3fd271fdUL, 0x0549fc99UL,
1351   0x3e385d29UL, 0xc0000000UL, 0x3fd24b04UL, 0x55b63073UL, 0xbe500c6dUL,
1352   0x00000000UL, 0x3fd2241cUL, 0x3f91953aUL, 0x3e389977UL, 0xc0000000UL,
1353   0x3fd1fd43UL, 0xa1543f71UL, 0xbe3487abUL, 0xc0000000UL, 0x3fd1d67bUL,
1354   0x4ec8867cUL, 0x3df6a2dcUL, 0x00000000UL, 0x3fd1afc4UL, 0x4328e3bbUL,
1355   0x3e41d9c0UL, 0x80000000UL, 0x3fd1891cUL, 0x2e1cda84UL, 0x3e3bdd87UL,
1356   0x40000000UL, 0x3fd16285UL, 0x4b5331aeUL, 0xbe53128eUL, 0x00000000UL,
1357   0x3fd13bfeUL, 0xb9aec164UL, 0xbe52ac98UL, 0xc0000000UL, 0x3fd11586UL,
1358   0xd91e1316UL, 0xbe350630UL, 0x80000000UL, 0x3fd0ef1fUL, 0x7cacc12cUL,
1359   0x3e3f5219UL, 0x40000000UL, 0x3fd0c8c8UL, 0xbce277b7UL, 0x3e3d30c0UL,
1360   0x00000000UL, 0x3fd0a281UL, 0x2a63447dUL, 0xbe541377UL, 0x80000000UL,
1361   0x3fd07c49UL, 0xfac483b5UL, 0xbe5772ecUL, 0xc0000000UL, 0x3fd05621UL,
1362   0x36b8a570UL, 0xbe4fd4bdUL, 0xc0000000UL, 0x3fd03009UL, 0xbae505f7UL,
1363   0xbe450388UL, 0x80000000UL, 0x3fd00a01UL, 0x3e35aeadUL, 0xbe5430fcUL,
1364   0x80000000UL, 0x3fcfc811UL, 0x707475acUL, 0x3e38806eUL, 0x80000000UL,
1365   0x3fcf7c3fUL, 0xc91817fcUL, 0xbe40cceaUL, 0x80000000UL, 0x3fcf308cUL,
1366   0xae05d5e9UL, 0xbe4919b8UL, 0x80000000UL, 0x3fcee4f8UL, 0xae6cc9e6UL,
1367   0xbe530b94UL, 0x00000000UL, 0x3fce9983UL, 0x1efe3e8eUL, 0x3e57747eUL,
1368   0x00000000UL, 0x3fce4e2dUL, 0xda78d9bfUL, 0xbe59a608UL, 0x00000000UL,
1369   0x3fce02f5UL, 0x8abe2c2eUL, 0x3e4a35adUL, 0x00000000UL, 0x3fcdb7dcUL,
1370   0x1495450dUL, 0xbe0872ccUL, 0x80000000UL, 0x3fcd6ce1UL, 0x86ee0ba0UL,
1371   0xbe4f59a0UL, 0x00000000UL, 0x3fcd2205UL, 0xe81ca888UL, 0x3e5402c3UL,
1372   0x00000000UL, 0x3fccd747UL, 0x3b4424b9UL, 0x3e5dfdc3UL, 0x80000000UL,
1373   0x3fcc8ca7UL, 0xd305b56cUL, 0x3e202da6UL, 0x00000000UL, 0x3fcc4226UL,
1374   0x399a6910UL, 0xbe482a1cUL, 0x80000000UL, 0x3fcbf7c2UL, 0x747f7938UL,
1375   0xbe587372UL, 0x80000000UL, 0x3fcbad7cUL, 0x6fc246a0UL, 0x3e50d83dUL,
1376   0x00000000UL, 0x3fcb6355UL, 0xee9e9be5UL, 0xbe5c35bdUL, 0x80000000UL,
1377   0x3fcb194aUL, 0x8416c0bcUL, 0x3e546d4fUL, 0x00000000UL, 0x3fcacf5eUL,
1378   0x49f7f08fUL, 0x3e56da76UL, 0x00000000UL, 0x3fca858fUL, 0x5dc30de2UL,
1379   0x3e5f390cUL, 0x00000000UL, 0x3fca3bdeUL, 0x950583b6UL, 0xbe5e4169UL,
1380   0x80000000UL, 0x3fc9f249UL, 0x33631553UL, 0x3e52aeb1UL, 0x00000000UL,
1381   0x3fc9a8d3UL, 0xde8795a6UL, 0xbe59a504UL, 0x00000000UL, 0x3fc95f79UL,
1382   0x076bf41eUL, 0x3e5122feUL, 0x80000000UL, 0x3fc9163cUL, 0x2914c8e7UL,
1383   0x3e3dd064UL, 0x00000000UL, 0x3fc8cd1dUL, 0x3a30eca3UL, 0xbe21b4aaUL,
1384   0x80000000UL, 0x3fc8841aUL, 0xb2a96650UL, 0xbe575444UL, 0x80000000UL,
1385   0x3fc83b34UL, 0x2376c0cbUL, 0xbe2a74c7UL, 0x80000000UL, 0x3fc7f26bUL,
1386   0xd8a0b653UL, 0xbe5181b6UL, 0x00000000UL, 0x3fc7a9bfUL, 0x32257882UL,
1387   0xbe4a78b4UL, 0x00000000UL, 0x3fc7612fUL, 0x1eee8bd9UL, 0xbe1bfe9dUL,
1388   0x80000000UL, 0x3fc718bbUL, 0x0c603cc4UL, 0x3e36fdc9UL, 0x80000000UL,
1389   0x3fc6d064UL, 0x3728b8cfUL, 0xbe1e542eUL, 0x80000000UL, 0x3fc68829UL,
1390   0xc79a4067UL, 0x3e5c380fUL, 0x00000000UL, 0x3fc6400bUL, 0xf69eac69UL,
1391   0x3e550a84UL, 0x80000000UL, 0x3fc5f808UL, 0xb7a780a4UL, 0x3e5d9224UL,
1392   0x80000000UL, 0x3fc5b022UL, 0xad9dfb1eUL, 0xbe55242fUL, 0x00000000UL,
1393   0x3fc56858UL, 0x659b18beUL, 0xbe4bfda3UL, 0x80000000UL, 0x3fc520a9UL,
1394   0x66ee3631UL, 0xbe57d769UL, 0x80000000UL, 0x3fc4d916UL, 0x1ec62819UL,
1395   0x3e2427f7UL, 0x80000000UL, 0x3fc4919fUL, 0xdec25369UL, 0xbe435431UL,
1396   0x00000000UL, 0x3fc44a44UL, 0xa8acfc4bUL, 0xbe3c62e8UL, 0x00000000UL,
1397   0x3fc40304UL, 0xcf1d3eabUL, 0xbdfba29fUL, 0x80000000UL, 0x3fc3bbdfUL,
1398   0x79aba3eaUL, 0xbdf1b7c8UL, 0x80000000UL, 0x3fc374d6UL, 0xb8d186daUL,
1399   0xbe5130cfUL, 0x80000000UL, 0x3fc32de8UL, 0x9d74f152UL, 0x3e2285b6UL,
1400   0x00000000UL, 0x3fc2e716UL, 0x50ae7ca9UL, 0xbe503920UL, 0x80000000UL,
1401   0x3fc2a05eUL, 0x6caed92eUL, 0xbe533924UL, 0x00000000UL, 0x3fc259c2UL,
1402   0x9cb5034eUL, 0xbe510e31UL, 0x80000000UL, 0x3fc21340UL, 0x12c4d378UL,
1403   0xbe540b43UL, 0x80000000UL, 0x3fc1ccd9UL, 0xcc418706UL, 0x3e59887aUL,
1404   0x00000000UL, 0x3fc1868eUL, 0x921f4106UL, 0xbe528e67UL, 0x80000000UL,
1405   0x3fc1405cUL, 0x3969441eUL, 0x3e5d8051UL, 0x00000000UL, 0x3fc0fa46UL,
1406   0xd941ef5bUL, 0x3e5f9079UL, 0x80000000UL, 0x3fc0b44aUL, 0x5a3e81b2UL,
1407   0xbe567691UL, 0x00000000UL, 0x3fc06e69UL, 0x9d66afe7UL, 0xbe4d43fbUL,
1408   0x00000000UL, 0x3fc028a2UL, 0x0a92a162UL, 0xbe52f394UL, 0x00000000UL,
1409   0x3fbfc5eaUL, 0x209897e5UL, 0x3e529e37UL, 0x00000000UL, 0x3fbf3ac5UL,
1410   0x8458bd7bUL, 0x3e582831UL, 0x00000000UL, 0x3fbeafd5UL, 0xb8d8b4b8UL,
1411   0xbe486b4aUL, 0x00000000UL, 0x3fbe2518UL, 0xe0a3b7b6UL, 0x3e5bafd2UL,
1412   0x00000000UL, 0x3fbd9a90UL, 0x2bf2710eUL, 0x3e383b2bUL, 0x00000000UL,
1413   0x3fbd103cUL, 0x73eb6ab7UL, 0xbe56d78dUL, 0x00000000UL, 0x3fbc861bUL,
1414   0x32ceaff5UL, 0xbe32dc5aUL, 0x00000000UL, 0x3fbbfc2eUL, 0xbee04cb7UL,
1415   0xbe4a71a4UL, 0x00000000UL, 0x3fbb7274UL, 0x35ae9577UL, 0x3e38142fUL,
1416   0x00000000UL, 0x3fbae8eeUL, 0xcbaddab4UL, 0xbe5490f0UL, 0x00000000UL,
1417   0x3fba5f9aUL, 0x95ce1114UL, 0x3e597c71UL, 0x00000000UL, 0x3fb9d67aUL,
1418   0x6d7c0f78UL, 0x3e3abc2dUL, 0x00000000UL, 0x3fb94d8dUL, 0x2841a782UL,
1419   0xbe566cbcUL, 0x00000000UL, 0x3fb8c4d2UL, 0x6ed429c6UL, 0xbe3cfff9UL,
1420   0x00000000UL, 0x3fb83c4aUL, 0xe4a49fbbUL, 0xbe552964UL, 0x00000000UL,
1421   0x3fb7b3f4UL, 0x2193d81eUL, 0xbe42fa72UL, 0x00000000UL, 0x3fb72bd0UL,
1422   0xdd70c122UL, 0x3e527a8cUL, 0x00000000UL, 0x3fb6a3dfUL, 0x03108a54UL,
1423   0xbe450393UL, 0x00000000UL, 0x3fb61c1fUL, 0x30ff7954UL, 0x3e565840UL,
1424   0x00000000UL, 0x3fb59492UL, 0xdedd460cUL, 0xbe5422b5UL, 0x00000000UL,
1425   0x3fb50d36UL, 0x950f9f45UL, 0xbe5313f6UL, 0x00000000UL, 0x3fb4860bUL,
1426   0x582cdcb1UL, 0x3e506d39UL, 0x00000000UL, 0x3fb3ff12UL, 0x7216d3a6UL,
1427   0x3e4aa719UL, 0x00000000UL, 0x3fb3784aUL, 0x57a423fdUL, 0x3e5a9b9fUL,
1428   0x00000000UL, 0x3fb2f1b4UL, 0x7a138b41UL, 0xbe50b418UL, 0x00000000UL,
1429   0x3fb26b4eUL, 0x2fbfd7eaUL, 0x3e23a53eUL, 0x00000000UL, 0x3fb1e519UL,
1430   0x18913ccbUL, 0x3e465fc1UL, 0x00000000UL, 0x3fb15f15UL, 0x7ea24e21UL,
1431   0x3e042843UL, 0x00000000UL, 0x3fb0d941UL, 0x7c6d9c77UL, 0x3e59f61eUL,
1432   0x00000000UL, 0x3fb0539eUL, 0x114efd44UL, 0x3e4ccab7UL, 0x00000000UL,
1433   0x3faf9c56UL, 0x1777f657UL, 0x3e552f65UL, 0x00000000UL, 0x3fae91d2UL,
1434   0xc317b86aUL, 0xbe5a61e0UL, 0x00000000UL, 0x3fad87acUL, 0xb7664efbUL,
1435   0xbe41f64eUL, 0x00000000UL, 0x3fac7de6UL, 0x5d3d03a9UL, 0x3e0807a0UL,
1436   0x00000000UL, 0x3fab7480UL, 0x743c38ebUL, 0xbe3726e1UL, 0x00000000UL,
1437   0x3faa6b78UL, 0x06a253f1UL, 0x3e5ad636UL, 0x00000000UL, 0x3fa962d0UL,
1438   0xa35f541bUL, 0x3e5a187aUL, 0x00000000UL, 0x3fa85a88UL, 0x4b86e446UL,
1439   0xbe508150UL, 0x00000000UL, 0x3fa7529cUL, 0x2589cacfUL, 0x3e52938aUL,
1440   0x00000000UL, 0x3fa64b10UL, 0xaf6b11f2UL, 0xbe3454cdUL, 0x00000000UL,
1441   0x3fa543e2UL, 0x97506fefUL, 0xbe5fdec5UL, 0x00000000UL, 0x3fa43d10UL,
1442   0xe75f7dd9UL, 0xbe388dd3UL, 0x00000000UL, 0x3fa3369cUL, 0xa4139632UL,
1443   0xbdea5177UL, 0x00000000UL, 0x3fa23086UL, 0x352d6f1eUL, 0xbe565ad6UL,
1444   0x00000000UL, 0x3fa12accUL, 0x77449eb7UL, 0xbe50d5c7UL, 0x00000000UL,
1445   0x3fa0256eUL, 0x7478da78UL, 0x3e404724UL, 0x00000000UL, 0x3f9e40dcUL,
1446   0xf59cef7fUL, 0xbe539d0aUL, 0x00000000UL, 0x3f9c3790UL, 0x1511d43cUL,
1447   0x3e53c2c8UL, 0x00000000UL, 0x3f9a2f00UL, 0x9b8bff3cUL, 0xbe43b3e1UL,
1448   0x00000000UL, 0x3f982724UL, 0xad1e22a5UL, 0x3e46f0bdUL, 0x00000000UL,
1449   0x3f962000UL, 0x130d9356UL, 0x3e475ba0UL, 0x00000000UL, 0x3f941994UL,
1450   0x8f86f883UL, 0xbe513d0bUL, 0x00000000UL, 0x3f9213dcUL, 0x914d0dc8UL,
1451   0xbe534335UL, 0x00000000UL, 0x3f900ed8UL, 0x2d73e5e7UL, 0xbe22ba75UL,
1452   0x00000000UL, 0x3f8c1510UL, 0xc5b7d70eUL, 0x3e599c5dUL, 0x00000000UL,
1453   0x3f880de0UL, 0x8a27857eUL, 0xbe3d28c8UL, 0x00000000UL, 0x3f840810UL,
1454   0xda767328UL, 0x3e531b3dUL, 0x00000000UL, 0x3f8003b0UL, 0x77bacaf3UL,
1455   0xbe5f04e3UL, 0x00000000UL, 0x3f780150UL, 0xdf4b0720UL, 0x3e5a8bffUL,
1456   0x00000000UL, 0x3f6ffc40UL, 0x34c48e71UL, 0xbe3fcd99UL, 0x00000000UL,
1457   0x3f5ff6c0UL, 0x1ad218afUL, 0xbe4c78a7UL, 0x00000000UL, 0x00000000UL,
1458   0x00000000UL, 0x80000000UL
1459 };
1460 
1461 ALIGNED_(8) juint _log2_pow[] =
1462 {
1463   0xfefa39efUL, 0x3fe62e42UL, 0xfefa39efUL, 0xbfe62e42UL
1464 };
1465 
1466 //registers,
1467 // input: xmm0, xmm1
1468 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
1469 //          rax, rdx, rcx, r8, r11
1470 
1471 // Code generated by Intel C compiler for LIBM library
1472 
1473 void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
1474   Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
1475   Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
1476   Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
1477   Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, L_2TAG_PACKET_14_0_2, L_2TAG_PACKET_15_0_2;
1478   Label L_2TAG_PACKET_16_0_2, L_2TAG_PACKET_17_0_2, L_2TAG_PACKET_18_0_2, L_2TAG_PACKET_19_0_2;
1479   Label L_2TAG_PACKET_20_0_2, L_2TAG_PACKET_21_0_2, L_2TAG_PACKET_22_0_2, L_2TAG_PACKET_23_0_2;
1480   Label L_2TAG_PACKET_24_0_2, L_2TAG_PACKET_25_0_2, L_2TAG_PACKET_26_0_2, L_2TAG_PACKET_27_0_2;
1481   Label L_2TAG_PACKET_28_0_2, L_2TAG_PACKET_29_0_2, L_2TAG_PACKET_30_0_2, L_2TAG_PACKET_31_0_2;
1482   Label L_2TAG_PACKET_32_0_2, L_2TAG_PACKET_33_0_2, L_2TAG_PACKET_34_0_2, L_2TAG_PACKET_35_0_2;
1483   Label L_2TAG_PACKET_36_0_2, L_2TAG_PACKET_37_0_2, L_2TAG_PACKET_38_0_2, L_2TAG_PACKET_39_0_2;
1484   Label L_2TAG_PACKET_40_0_2, L_2TAG_PACKET_41_0_2, L_2TAG_PACKET_42_0_2, L_2TAG_PACKET_43_0_2;
1485   Label L_2TAG_PACKET_44_0_2, L_2TAG_PACKET_45_0_2, L_2TAG_PACKET_46_0_2, L_2TAG_PACKET_47_0_2;
1486   Label L_2TAG_PACKET_48_0_2, L_2TAG_PACKET_49_0_2, L_2TAG_PACKET_50_0_2, L_2TAG_PACKET_51_0_2;
1487   Label L_2TAG_PACKET_52_0_2, L_2TAG_PACKET_53_0_2, L_2TAG_PACKET_54_0_2, L_2TAG_PACKET_55_0_2;
1488   Label L_2TAG_PACKET_56_0_2;
1489   Label B1_2, B1_3, B1_5, start;
1490 
1491   assert_different_registers(tmp1, tmp2, eax, ecx, edx);
1492   jmp(start);
1493   address HIGHSIGMASK = (address)_HIGHSIGMASK;
1494   address LOG2_E = (address)_LOG2_E;
1495   address coeff = (address)_coeff_pow;
1496   address L_tbl = (address)_L_tbl_pow;
1497   address HIGHMASK_Y = (address)_HIGHMASK_Y;
1498   address T_exp = (address)_T_exp;
1499   address e_coeff = (address)_e_coeff;
1500   address coeff_h = (address)_coeff_h;
1501   address HIGHMASK_LOG_X = (address)_HIGHMASK_LOG_X;
1502   address HALFMASK = (address)_HALFMASK;
1503   address log2 = (address)_log2_pow;
1504 
1505 
1506   bind(start);
1507   subq(rsp, 40);
1508   movsd(Address(rsp, 8), xmm0);
1509   movsd(Address(rsp, 16), xmm1);
1510 
1511   bind(B1_2);
1512   pextrw(eax, xmm0, 3);
1513   xorpd(xmm2, xmm2);
1514   mov64(tmp2, 0x3ff0000000000000);
1515   movdq(xmm2, tmp2);
1516   movl(tmp1, 1069088768);
1517   movdq(xmm7, tmp1);
1518   xorpd(xmm1, xmm1);
1519   mov64(tmp3, 0x77f0000000000000);
1520   movdq(xmm1, tmp3);
1521   movdqu(xmm3, xmm0);
1522   movl(edx, 32752);
1523   andl(edx, eax);
1524   subl(edx, 16368);
1525   movl(ecx, edx);
1526   sarl(edx, 31);
1527   addl(ecx, edx);
1528   xorl(ecx, edx);
1529   por(xmm0, xmm2);
1530   movdqu(xmm6, ExternalAddress(HIGHSIGMASK));    //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL
1531   psrlq(xmm0, 27);
1532   movq(xmm2, ExternalAddress(LOG2_E));    //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
1533   psrld(xmm0, 2);
1534   addl(ecx, 16);
1535   bsrl(ecx, ecx);
1536   rcpps(xmm0, xmm0);
1537   psllq(xmm3, 12);
1538   movl(tmp4, 8192);
1539   movdq(xmm4, tmp4);
1540   psrlq(xmm3, 12);
1541   subl(eax, 16);
1542   cmpl(eax, 32736);
1543   jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
1544   movl(tmp1, 0);
1545 
1546   bind(L_2TAG_PACKET_1_0_2);
1547   mulss(xmm0, xmm7);
1548   movl(edx, -1);
1549   subl(ecx, 4);
1550   shll(edx);
1551   shlq(edx, 32);
1552   movdq(xmm5, edx);
1553   por(xmm3, xmm1);
1554   subl(eax, 16351);
1555   cmpl(eax, 1);
1556   jcc(Assembler::belowEqual, L_2TAG_PACKET_2_0_2);
1557   paddd(xmm0, xmm4);
1558   pand(xmm5, xmm3);
1559   movdl(edx, xmm0);
1560   psllq(xmm0, 29);
1561 
1562   bind(L_2TAG_PACKET_3_0_2);
1563   subsd(xmm3, xmm5);
1564   pand(xmm0, xmm6);
1565   subl(eax, 1);
1566   sarl(eax, 4);
1567   cvtsi2sdl(xmm7, eax);
1568   mulpd(xmm5, xmm0);
1569 
1570   bind(L_2TAG_PACKET_4_0_2);
1571   mulsd(xmm3, xmm0);
1572   movdqu(xmm1, ExternalAddress(coeff));    //0x6dc96112UL, 0xbf836578UL, 0xee241472UL, 0xbf9b0301UL
1573   lea(tmp4, ExternalAddress(L_tbl));
1574   subsd(xmm5, xmm2);
1575   movdqu(xmm4, ExternalAddress(16 + coeff));    //0x9f95985aUL, 0xbfb528dbUL, 0xb3841d2aUL, 0xbfd619b6UL
1576   movl(ecx, eax);
1577   sarl(eax, 31);
1578   addl(ecx, eax);
1579   xorl(eax, ecx);
1580   addl(eax, 1);
1581   bsrl(eax, eax);
1582   unpcklpd(xmm5, xmm3);
1583   movdqu(xmm6, ExternalAddress(32 + coeff));    //0x518775e3UL, 0x3f9004f2UL, 0xac8349bbUL, 0x3fa76c9bUL
1584   addsd(xmm3, xmm5);
1585   andl(edx, 16760832);
1586   shrl(edx, 10);
1587   addpd(xmm5, Address(tmp4, edx, Address::times_1, -3648));
1588   movdqu(xmm0, ExternalAddress(48 + coeff));    //0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, 0xbf5dabe1UL
1589   pshufd(xmm2, xmm3, 68);
1590   mulsd(xmm3, xmm3);
1591   mulpd(xmm1, xmm2);
1592   mulpd(xmm4, xmm2);
1593   addsd(xmm5, xmm7);
1594   mulsd(xmm2, xmm3);
1595   addpd(xmm6, xmm1);
1596   mulsd(xmm3, xmm3);
1597   addpd(xmm0, xmm4);
1598   movq(xmm1, Address(rsp, 16));
1599   movw(ecx, Address(rsp, 22));
1600   pshufd(xmm7, xmm5, 238);
1601   movq(xmm4, ExternalAddress(HIGHMASK_Y));    //0x00000000UL, 0xfffffff8UL, 0x00000000UL, 0xffffffffUL
1602   mulpd(xmm6, xmm2);
1603   pshufd(xmm3, xmm3, 68);
1604   mulpd(xmm0, xmm2);
1605   shll(eax, 4);
1606   subl(eax, 15872);
1607   andl(ecx, 32752);
1608   addl(eax, ecx);
1609   mulpd(xmm3, xmm6);
1610   cmpl(eax, 624);
1611   jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
1612   xorpd(xmm6, xmm6);
1613   movl(edx, 17080);
1614   pinsrw(xmm6, edx, 3);
1615   movdqu(xmm2, xmm1);
1616   pand(xmm4, xmm1);
1617   subsd(xmm1, xmm4);
1618   mulsd(xmm4, xmm5);
1619   addsd(xmm0, xmm7);
1620   mulsd(xmm1, xmm5);
1621   movdqu(xmm7, xmm6);
1622   addsd(xmm6, xmm4);
1623   lea(tmp4, ExternalAddress(T_exp));
1624   addpd(xmm3, xmm0);
1625   movdl(edx, xmm6);
1626   subsd(xmm6, xmm7);
1627   pshufd(xmm0, xmm3, 238);
1628   subsd(xmm4, xmm6);
1629   addsd(xmm0, xmm3);
1630   movl(ecx, edx);
1631   andl(edx, 255);
1632   addl(edx, edx);
1633   movdqu(xmm5, Address(tmp4, edx, Address::times_8, 0));
1634   addsd(xmm4, xmm1);
1635   mulsd(xmm2, xmm0);
1636   movdqu(xmm7, ExternalAddress(e_coeff));    //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL
1637   movdqu(xmm3, ExternalAddress(16 + e_coeff));    //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL
1638   shll(ecx, 12);
1639   xorl(ecx, tmp1);
1640   andl(rcx, -1048576);
1641   movdq(xmm6, rcx);
1642   addsd(xmm2, xmm4);
1643   mov64(tmp2, 0x3fe62e42fefa39ef);
1644   movdq(xmm1, tmp2);
1645   pshufd(xmm0, xmm2, 68);
1646   pshufd(xmm4, xmm2, 68);
1647   mulsd(xmm1, xmm2);
1648   pshufd(xmm6, xmm6, 17);
1649   mulpd(xmm0, xmm0);
1650   mulpd(xmm7, xmm4);
1651   paddd(xmm5, xmm6);
1652   mulsd(xmm1, xmm5);
1653   pshufd(xmm6, xmm5, 238);
1654   mulsd(xmm0, xmm0);
1655   addpd(xmm3, xmm7);
1656   addsd(xmm1, xmm6);
1657   mulpd(xmm0, xmm3);
1658   pshufd(xmm3, xmm0, 238);
1659   mulsd(xmm0, xmm5);
1660   mulsd(xmm3, xmm5);
1661   addsd(xmm0, xmm1);
1662   addsd(xmm0, xmm3);
1663   addsd(xmm0, xmm5);
1664   jmp(B1_5);
1665 
1666   bind(L_2TAG_PACKET_0_0_2);
1667   addl(eax, 16);
1668   movl(edx, 32752);
1669   andl(edx, eax);
1670   cmpl(edx, 32752);
1671   jcc(Assembler::equal, L_2TAG_PACKET_6_0_2);
1672   testl(eax, 32768);
1673   jcc(Assembler::notEqual, L_2TAG_PACKET_7_0_2);
1674 
1675   bind(L_2TAG_PACKET_8_0_2);
1676   movq(xmm0, Address(rsp, 8));
1677   movq(xmm3, Address(rsp, 8));
1678   movdl(edx, xmm3);
1679   psrlq(xmm3, 32);
1680   movdl(ecx, xmm3);
1681   orl(edx, ecx);
1682   cmpl(edx, 0);
1683   jcc(Assembler::equal, L_2TAG_PACKET_9_0_2);
1684   xorpd(xmm3, xmm3);
1685   movl(eax, 18416);
1686   pinsrw(xmm3, eax, 3);
1687   mulsd(xmm0, xmm3);
1688   xorpd(xmm2, xmm2);
1689   movl(eax, 16368);
1690   pinsrw(xmm2, eax, 3);
1691   movdqu(xmm3, xmm0);
1692   pextrw(eax, xmm0, 3);
1693   por(xmm0, xmm2);
1694   movl(ecx, 18416);
1695   psrlq(xmm0, 27);
1696   movq(xmm2, ExternalAddress(LOG2_E));    //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
1697   psrld(xmm0, 2);
1698   rcpps(xmm0, xmm0);
1699   psllq(xmm3, 12);
1700   movdqu(xmm6, ExternalAddress(HIGHSIGMASK));    //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL
1701   psrlq(xmm3, 12);
1702   mulss(xmm0, xmm7);
1703   movl(edx, -1024);
1704   movdl(xmm5, edx);
1705   por(xmm3, xmm1);
1706   paddd(xmm0, xmm4);
1707   psllq(xmm5, 32);
1708   movdl(edx, xmm0);
1709   psllq(xmm0, 29);
1710   pand(xmm5, xmm3);
1711   movl(tmp1, 0);
1712   pand(xmm0, xmm6);
1713   subsd(xmm3, xmm5);
1714   andl(eax, 32752);
1715   subl(eax, 18416);
1716   sarl(eax, 4);
1717   cvtsi2sdl(xmm7, eax);
1718   mulpd(xmm5, xmm0);
1719   jmp(L_2TAG_PACKET_4_0_2);
1720 
1721   bind(L_2TAG_PACKET_10_0_2);
1722   movq(xmm0, Address(rsp, 8));
1723   movq(xmm3, Address(rsp, 8));
1724   movdl(edx, xmm3);
1725   psrlq(xmm3, 32);
1726   movdl(ecx, xmm3);
1727   orl(edx, ecx);
1728   cmpl(edx, 0);
1729   jcc(Assembler::equal, L_2TAG_PACKET_9_0_2);
1730   xorpd(xmm3, xmm3);
1731   movl(eax, 18416);
1732   pinsrw(xmm3, eax, 3);
1733   mulsd(xmm0, xmm3);
1734   xorpd(xmm2, xmm2);
1735   movl(eax, 16368);
1736   pinsrw(xmm2, eax, 3);
1737   movdqu(xmm3, xmm0);
1738   pextrw(eax, xmm0, 3);
1739   por(xmm0, xmm2);
1740   movl(ecx, 18416);
1741   psrlq(xmm0, 27);
1742   movq(xmm2, ExternalAddress(LOG2_E));    //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
1743   psrld(xmm0, 2);
1744   rcpps(xmm0, xmm0);
1745   psllq(xmm3, 12);
1746   movdqu(xmm6, ExternalAddress(HIGHSIGMASK));    //0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL
1747   psrlq(xmm3, 12);
1748   mulss(xmm0, xmm7);
1749   movl(edx, -1024);
1750   movdl(xmm5, edx);
1751   por(xmm3, xmm1);
1752   paddd(xmm0, xmm4);
1753   psllq(xmm5, 32);
1754   movdl(edx, xmm0);
1755   psllq(xmm0, 29);
1756   pand(xmm5, xmm3);
1757   movl(tmp1, INT_MIN);
1758   pand(xmm0, xmm6);
1759   subsd(xmm3, xmm5);
1760   andl(eax, 32752);
1761   subl(eax, 18416);
1762   sarl(eax, 4);
1763   cvtsi2sdl(xmm7, eax);
1764   mulpd(xmm5, xmm0);
1765   jmp(L_2TAG_PACKET_4_0_2);
1766 
1767   bind(L_2TAG_PACKET_5_0_2);
1768   cmpl(eax, 0);
1769   jcc(Assembler::less, L_2TAG_PACKET_11_0_2);
1770   cmpl(eax, 752);
1771   jcc(Assembler::aboveEqual, L_2TAG_PACKET_12_0_2);
1772   addsd(xmm0, xmm7);
1773   movq(xmm2, ExternalAddress(HALFMASK));    //0xf8000000UL, 0xffffffffUL, 0xf8000000UL, 0xffffffffUL
1774   addpd(xmm3, xmm0);
1775   xorpd(xmm6, xmm6);
1776   movl(eax, 17080);
1777   pinsrw(xmm6, eax, 3);
1778   pshufd(xmm0, xmm3, 238);
1779   addsd(xmm0, xmm3);
1780   movdqu(xmm3, xmm5);
1781   addsd(xmm5, xmm0);
1782   movdqu(xmm4, xmm2);
1783   subsd(xmm3, xmm5);
1784   movdqu(xmm7, xmm5);
1785   pand(xmm5, xmm2);
1786   movdqu(xmm2, xmm1);
1787   pand(xmm4, xmm1);
1788   subsd(xmm7, xmm5);
1789   addsd(xmm0, xmm3);
1790   subsd(xmm1, xmm4);
1791   mulsd(xmm4, xmm5);
1792   addsd(xmm0, xmm7);
1793   mulsd(xmm2, xmm0);
1794   movdqu(xmm7, xmm6);
1795   mulsd(xmm1, xmm5);
1796   addsd(xmm6, xmm4);
1797   movdl(eax, xmm6);
1798   subsd(xmm6, xmm7);
1799   lea(tmp4, ExternalAddress(T_exp));
1800   addsd(xmm2, xmm1);
1801   movdqu(xmm7, ExternalAddress(e_coeff));    //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL
1802   movdqu(xmm3, ExternalAddress(16 + e_coeff));    //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL
1803   subsd(xmm4, xmm6);
1804   pextrw(edx, xmm6, 3);
1805   movl(ecx, eax);
1806   andl(eax, 255);
1807   addl(eax, eax);
1808   movdqu(xmm5, Address(tmp4, rax, Address::times_8, 0));
1809   addsd(xmm2, xmm4);
1810   sarl(ecx, 8);
1811   movl(eax, ecx);
1812   sarl(ecx, 1);
1813   subl(eax, ecx);
1814   shll(ecx, 20);
1815   xorl(ecx, tmp1);
1816   movdl(xmm6, ecx);
1817   movq(xmm1, ExternalAddress(32 + e_coeff));    //0xfefa39efUL, 0x3fe62e42UL, 0x00000000UL, 0x00000000UL
1818   andl(edx, 32767);
1819   cmpl(edx, 16529);
1820   jcc(Assembler::above, L_2TAG_PACKET_12_0_2);
1821   pshufd(xmm0, xmm2, 68);
1822   pshufd(xmm4, xmm2, 68);
1823   mulpd(xmm0, xmm0);
1824   mulpd(xmm7, xmm4);
1825   pshufd(xmm6, xmm6, 17);
1826   mulsd(xmm1, xmm2);
1827   mulsd(xmm0, xmm0);
1828   paddd(xmm5, xmm6);
1829   addpd(xmm3, xmm7);
1830   mulsd(xmm1, xmm5);
1831   pshufd(xmm6, xmm5, 238);
1832   mulpd(xmm0, xmm3);
1833   addsd(xmm1, xmm6);
1834   pshufd(xmm3, xmm0, 238);
1835   mulsd(xmm0, xmm5);
1836   mulsd(xmm3, xmm5);
1837   shll(eax, 4);
1838   xorpd(xmm4, xmm4);
1839   addl(eax, 16368);
1840   pinsrw(xmm4, eax, 3);
1841   addsd(xmm0, xmm1);
1842   addsd(xmm0, xmm3);
1843   movdqu(xmm1, xmm0);
1844   addsd(xmm0, xmm5);
1845   mulsd(xmm0, xmm4);
1846   pextrw(eax, xmm0, 3);
1847   andl(eax, 32752);
1848   jcc(Assembler::equal, L_2TAG_PACKET_13_0_2);
1849   cmpl(eax, 32752);
1850   jcc(Assembler::equal, L_2TAG_PACKET_14_0_2);
1851   jmp(B1_5);
1852 
1853   bind(L_2TAG_PACKET_6_0_2);
1854   movq(xmm1, Address(rsp, 16));
1855   movq(xmm0, Address(rsp, 8));
1856   movdqu(xmm2, xmm0);
1857   movdl(eax, xmm2);
1858   psrlq(xmm2, 20);
1859   movdl(edx, xmm2);
1860   orl(eax, edx);
1861   jcc(Assembler::equal, L_2TAG_PACKET_15_0_2);
1862   movdl(eax, xmm1);
1863   psrlq(xmm1, 32);
1864   movdl(edx, xmm1);
1865   movl(ecx, edx);
1866   addl(edx, edx);
1867   orl(eax, edx);
1868   jcc(Assembler::equal, L_2TAG_PACKET_16_0_2);
1869   addsd(xmm0, xmm0);
1870   jmp(B1_5);
1871 
1872   bind(L_2TAG_PACKET_16_0_2);
1873   xorpd(xmm0, xmm0);
1874   movl(eax, 16368);
1875   pinsrw(xmm0, eax, 3);
1876   movl(Address(rsp, 0), 29);
1877   jmp(L_2TAG_PACKET_17_0_2);
1878 
1879   bind(L_2TAG_PACKET_18_0_2);
1880   movq(xmm0, Address(rsp, 16));
1881   addpd(xmm0, xmm0);
1882   jmp(B1_5);
1883 
1884   bind(L_2TAG_PACKET_15_0_2);
1885   movdl(eax, xmm1);
1886   movdqu(xmm2, xmm1);
1887   psrlq(xmm1, 32);
1888   movdl(edx, xmm1);
1889   movl(ecx, edx);
1890   addl(edx, edx);
1891   orl(eax, edx);
1892   jcc(Assembler::equal, L_2TAG_PACKET_19_0_2);
1893   pextrw(eax, xmm2, 3);
1894   andl(eax, 32752);
1895   cmpl(eax, 32752);
1896   jcc(Assembler::notEqual, L_2TAG_PACKET_20_0_2);
1897   movdl(eax, xmm2);
1898   psrlq(xmm2, 20);
1899   movdl(edx, xmm2);
1900   orl(eax, edx);
1901   jcc(Assembler::notEqual, L_2TAG_PACKET_18_0_2);
1902 
1903   bind(L_2TAG_PACKET_20_0_2);
1904   pextrw(eax, xmm0, 3);
1905   testl(eax, 32768);
1906   jcc(Assembler::notEqual, L_2TAG_PACKET_21_0_2);
1907   testl(ecx, INT_MIN);
1908   jcc(Assembler::notEqual, L_2TAG_PACKET_22_0_2);
1909   jmp(B1_5);
1910 
1911   bind(L_2TAG_PACKET_23_0_2);
1912   movq(xmm1, Address(rsp, 16));
1913   movdl(eax, xmm1);
1914   testl(eax, 1);
1915   jcc(Assembler::notEqual, L_2TAG_PACKET_24_0_2);
1916   testl(eax, 2);
1917   jcc(Assembler::notEqual, L_2TAG_PACKET_25_0_2);
1918   jmp(L_2TAG_PACKET_24_0_2);
1919 
1920   bind(L_2TAG_PACKET_21_0_2);
1921   shrl(ecx, 20);
1922   andl(ecx, 2047);
1923   cmpl(ecx, 1075);
1924   jcc(Assembler::above, L_2TAG_PACKET_24_0_2);
1925   jcc(Assembler::equal, L_2TAG_PACKET_26_0_2);
1926   cmpl(ecx, 1074);
1927   jcc(Assembler::above, L_2TAG_PACKET_23_0_2);
1928   cmpl(ecx, 1023);
1929   jcc(Assembler::below, L_2TAG_PACKET_24_0_2);
1930   movq(xmm1, Address(rsp, 16));
1931   movl(eax, 17208);
1932   xorpd(xmm3, xmm3);
1933   pinsrw(xmm3, eax, 3);
1934   movdqu(xmm4, xmm3);
1935   addsd(xmm3, xmm1);
1936   subsd(xmm4, xmm3);
1937   addsd(xmm1, xmm4);
1938   pextrw(eax, xmm1, 3);
1939   andl(eax, 32752);
1940   jcc(Assembler::notEqual, L_2TAG_PACKET_24_0_2);
1941   movdl(eax, xmm3);
1942   andl(eax, 1);
1943   jcc(Assembler::equal, L_2TAG_PACKET_24_0_2);
1944 
1945   bind(L_2TAG_PACKET_25_0_2);
1946   movq(xmm1, Address(rsp, 16));
1947   pextrw(eax, xmm1, 3);
1948   andl(eax, 32768);
1949   jcc(Assembler::notEqual, L_2TAG_PACKET_27_0_2);
1950   jmp(B1_5);
1951 
1952   bind(L_2TAG_PACKET_27_0_2);
1953   xorpd(xmm0, xmm0);
1954   movl(eax, 32768);
1955   pinsrw(xmm0, eax, 3);
1956   jmp(B1_5);
1957 
1958   bind(L_2TAG_PACKET_24_0_2);
1959   movq(xmm1, Address(rsp, 16));
1960   pextrw(eax, xmm1, 3);
1961   andl(eax, 32768);
1962   jcc(Assembler::notEqual, L_2TAG_PACKET_22_0_2);
1963   xorpd(xmm0, xmm0);
1964   movl(eax, 32752);
1965   pinsrw(xmm0, eax, 3);
1966   jmp(B1_5);
1967 
1968   bind(L_2TAG_PACKET_26_0_2);
1969   movq(xmm1, Address(rsp, 16));
1970   movdl(eax, xmm1);
1971   andl(eax, 1);
1972   jcc(Assembler::equal, L_2TAG_PACKET_24_0_2);
1973   jmp(L_2TAG_PACKET_25_0_2);
1974 
1975   bind(L_2TAG_PACKET_28_0_2);
1976   movdl(eax, xmm1);
1977   psrlq(xmm1, 20);
1978   movdl(edx, xmm1);
1979   orl(eax, edx);
1980   jcc(Assembler::equal, L_2TAG_PACKET_29_0_2);
1981   movq(xmm0, Address(rsp, 16));
1982   addsd(xmm0, xmm0);
1983   jmp(B1_5);
1984 
1985   bind(L_2TAG_PACKET_29_0_2);
1986   movq(xmm0, Address(rsp, 8));
1987   pextrw(eax, xmm0, 3);
1988   cmpl(eax, 49136);
1989   jcc(Assembler::notEqual, L_2TAG_PACKET_30_0_2);
1990   movdl(ecx, xmm0);
1991   psrlq(xmm0, 20);
1992   movdl(edx, xmm0);
1993   orl(ecx, edx);
1994   jcc(Assembler::notEqual, L_2TAG_PACKET_30_0_2);
1995   xorpd(xmm0, xmm0);
1996   movl(eax, 32760);
1997   pinsrw(xmm0, eax, 3);
1998   jmp(B1_5);
1999 
2000   bind(L_2TAG_PACKET_30_0_2);
2001   movq(xmm1, Address(rsp, 16));
2002   andl(eax, 32752);
2003   subl(eax, 16368);
2004   pextrw(edx, xmm1, 3);
2005   xorpd(xmm0, xmm0);
2006   xorl(eax, edx);
2007   andl(eax, 32768);
2008   jcc(Assembler::equal, L_2TAG_PACKET_31_0_2);
2009   jmp(B1_5);
2010 
2011   bind(L_2TAG_PACKET_31_0_2);
2012   movl(ecx, 32752);
2013   pinsrw(xmm0, ecx, 3);
2014   jmp(B1_5);
2015 
2016   bind(L_2TAG_PACKET_32_0_2);
2017   movdl(eax, xmm1);
2018   cmpl(edx, 17184);
2019   jcc(Assembler::above, L_2TAG_PACKET_33_0_2);
2020   testl(eax, 1);
2021   jcc(Assembler::notEqual, L_2TAG_PACKET_34_0_2);
2022   testl(eax, 2);
2023   jcc(Assembler::equal, L_2TAG_PACKET_35_0_2);
2024   jmp(L_2TAG_PACKET_36_0_2);
2025 
2026   bind(L_2TAG_PACKET_33_0_2);
2027   testl(eax, 1);
2028   jcc(Assembler::equal, L_2TAG_PACKET_35_0_2);
2029   jmp(L_2TAG_PACKET_36_0_2);
2030 
2031   bind(L_2TAG_PACKET_7_0_2);
2032   movq(xmm2, Address(rsp, 8));
2033   movdl(eax, xmm2);
2034   psrlq(xmm2, 31);
2035   movdl(ecx, xmm2);
2036   orl(eax, ecx);
2037   jcc(Assembler::equal, L_2TAG_PACKET_9_0_2);
2038   movq(xmm1, Address(rsp, 16));
2039   pextrw(edx, xmm1, 3);
2040   movdl(eax, xmm1);
2041   movdqu(xmm2, xmm1);
2042   psrlq(xmm2, 32);
2043   movdl(ecx, xmm2);
2044   addl(ecx, ecx);
2045   orl(ecx, eax);
2046   jcc(Assembler::equal, L_2TAG_PACKET_37_0_2);
2047   andl(edx, 32752);
2048   cmpl(edx, 32752);
2049   jcc(Assembler::equal, L_2TAG_PACKET_28_0_2);
2050   cmpl(edx, 17200);
2051   jcc(Assembler::above, L_2TAG_PACKET_35_0_2);
2052   cmpl(edx, 17184);
2053   jcc(Assembler::aboveEqual, L_2TAG_PACKET_32_0_2);
2054   cmpl(edx, 16368);
2055   jcc(Assembler::below, L_2TAG_PACKET_34_0_2);
2056   movl(eax, 17208);
2057   xorpd(xmm2, xmm2);
2058   pinsrw(xmm2, eax, 3);
2059   movdqu(xmm4, xmm2);
2060   addsd(xmm2, xmm1);
2061   subsd(xmm4, xmm2);
2062   addsd(xmm1, xmm4);
2063   pextrw(eax, xmm1, 3);
2064   andl(eax, 32767);
2065   jcc(Assembler::notEqual, L_2TAG_PACKET_34_0_2);
2066   movdl(eax, xmm2);
2067   andl(eax, 1);
2068   jcc(Assembler::equal, L_2TAG_PACKET_35_0_2);
2069 
2070   bind(L_2TAG_PACKET_36_0_2);
2071   xorpd(xmm1, xmm1);
2072   movl(edx, 30704);
2073   pinsrw(xmm1, edx, 3);
2074   movq(xmm2, ExternalAddress(LOG2_E));    //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
2075   movq(xmm4, Address(rsp, 8));
2076   pextrw(eax, xmm4, 3);
2077   movl(edx, 8192);
2078   movdl(xmm4, edx);
2079   andl(eax, 32767);
2080   subl(eax, 16);
2081   jcc(Assembler::less, L_2TAG_PACKET_10_0_2);
2082   movl(edx, eax);
2083   andl(edx, 32752);
2084   subl(edx, 16368);
2085   movl(ecx, edx);
2086   sarl(edx, 31);
2087   addl(ecx, edx);
2088   xorl(ecx, edx);
2089   addl(ecx, 16);
2090   bsrl(ecx, ecx);
2091   movl(tmp1, INT_MIN);
2092   jmp(L_2TAG_PACKET_1_0_2);
2093 
2094   bind(L_2TAG_PACKET_34_0_2);
2095   xorpd(xmm1, xmm1);
2096   movl(eax, 32752);
2097   pinsrw(xmm1, eax, 3);
2098   xorpd(xmm0, xmm0);
2099   mulsd(xmm0, xmm1);
2100   movl(Address(rsp, 0), 28);
2101   jmp(L_2TAG_PACKET_17_0_2);
2102 
2103   bind(L_2TAG_PACKET_35_0_2);
2104   xorpd(xmm1, xmm1);
2105   movl(edx, 30704);
2106   pinsrw(xmm1, edx, 3);
2107   movq(xmm2, ExternalAddress(LOG2_E));    //0x00000000UL, 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL
2108   movq(xmm4, Address(rsp, 8));
2109   pextrw(eax, xmm4, 3);
2110   movl(edx, 8192);
2111   movdl(xmm4, edx);
2112   andl(eax, 32767);
2113   subl(eax, 16);
2114   jcc(Assembler::less, L_2TAG_PACKET_8_0_2);
2115   movl(edx, eax);
2116   andl(edx, 32752);
2117   subl(edx, 16368);
2118   movl(ecx, edx);
2119   sarl(edx, 31);
2120   addl(ecx, edx);
2121   xorl(ecx, edx);
2122   addl(ecx, 16);
2123   bsrl(ecx, ecx);
2124   movl(tmp1, 0);
2125   jmp(L_2TAG_PACKET_1_0_2);
2126 
2127   bind(L_2TAG_PACKET_19_0_2);
2128   xorpd(xmm0, xmm0);
2129   movl(eax, 16368);
2130   pinsrw(xmm0, eax, 3);
2131   jmp(B1_5);
2132 
2133   bind(L_2TAG_PACKET_22_0_2);
2134   xorpd(xmm0, xmm0);
2135   jmp(B1_5);
2136 
2137   bind(L_2TAG_PACKET_11_0_2);
2138   addl(eax, 384);
2139   cmpl(eax, 0);
2140   jcc(Assembler::less, L_2TAG_PACKET_38_0_2);
2141   mulsd(xmm5, xmm1);
2142   addsd(xmm0, xmm7);
2143   shrl(tmp1, 31);
2144   addpd(xmm3, xmm0);
2145   pshufd(xmm0, xmm3, 238);
2146   addsd(xmm3, xmm0);
2147   lea(tmp4, ExternalAddress(log2));    //0xfefa39efUL, 0x3fe62e42UL, 0xfefa39efUL, 0xbfe62e42UL
2148   movq(xmm4, Address(tmp4, tmp1, Address::times_8, 0));
2149   mulsd(xmm1, xmm3);
2150   xorpd(xmm0, xmm0);
2151   movl(eax, 16368);
2152   shll(tmp1, 15);
2153   orl(eax, tmp1);
2154   pinsrw(xmm0, eax, 3);
2155   addsd(xmm5, xmm1);
2156   mulsd(xmm5, xmm4);
2157   addsd(xmm0, xmm5);
2158   jmp(B1_5);
2159 
2160   bind(L_2TAG_PACKET_38_0_2);
2161 
2162   bind(L_2TAG_PACKET_37_0_2);
2163   xorpd(xmm0, xmm0);
2164   movl(eax, 16368);
2165   pinsrw(xmm0, eax, 3);
2166   jmp(B1_5);
2167 
2168   bind(L_2TAG_PACKET_39_0_2);
2169   xorpd(xmm0, xmm0);
2170   movl(eax, 16368);
2171   pinsrw(xmm0, eax, 3);
2172   movl(Address(rsp, 0), 26);
2173   jmp(L_2TAG_PACKET_17_0_2);
2174 
2175   bind(L_2TAG_PACKET_9_0_2);
2176   movq(xmm1, Address(rsp, 16));
2177   movdqu(xmm2, xmm1);
2178   pextrw(eax, xmm1, 3);
2179   andl(eax, 32752);
2180   cmpl(eax, 32752);
2181   jcc(Assembler::notEqual, L_2TAG_PACKET_40_0_2);
2182   movdl(eax, xmm2);
2183   psrlq(xmm2, 20);
2184   movdl(edx, xmm2);
2185   orl(eax, edx);
2186   jcc(Assembler::notEqual, L_2TAG_PACKET_18_0_2);
2187 
2188   bind(L_2TAG_PACKET_40_0_2);
2189   movdl(eax, xmm1);
2190   psrlq(xmm1, 32);
2191   movdl(edx, xmm1);
2192   movl(ecx, edx);
2193   addl(edx, edx);
2194   orl(eax, edx);
2195   jcc(Assembler::equal, L_2TAG_PACKET_39_0_2);
2196   shrl(edx, 21);
2197   cmpl(edx, 1075);
2198   jcc(Assembler::above, L_2TAG_PACKET_41_0_2);
2199   jcc(Assembler::equal, L_2TAG_PACKET_42_0_2);
2200   cmpl(edx, 1023);
2201   jcc(Assembler::below, L_2TAG_PACKET_41_0_2);
2202   movq(xmm1, Address(rsp, 16));
2203   movl(eax, 17208);
2204   xorpd(xmm3, xmm3);
2205   pinsrw(xmm3, eax, 3);
2206   movdqu(xmm4, xmm3);
2207   addsd(xmm3, xmm1);
2208   subsd(xmm4, xmm3);
2209   addsd(xmm1, xmm4);
2210   pextrw(eax, xmm1, 3);
2211   andl(eax, 32752);
2212   jcc(Assembler::notEqual, L_2TAG_PACKET_41_0_2);
2213   movdl(eax, xmm3);
2214   andl(eax, 1);
2215   jcc(Assembler::equal, L_2TAG_PACKET_41_0_2);
2216 
2217   bind(L_2TAG_PACKET_43_0_2);
2218   movq(xmm0, Address(rsp, 8));
2219   testl(ecx, INT_MIN);
2220   jcc(Assembler::notEqual, L_2TAG_PACKET_44_0_2);
2221   jmp(B1_5);
2222 
2223   bind(L_2TAG_PACKET_42_0_2);
2224   movq(xmm1, Address(rsp, 16));
2225   movdl(eax, xmm1);
2226   testl(eax, 1);
2227   jcc(Assembler::notEqual, L_2TAG_PACKET_43_0_2);
2228 
2229   bind(L_2TAG_PACKET_41_0_2);
2230   testl(ecx, INT_MIN);
2231   jcc(Assembler::equal, L_2TAG_PACKET_22_0_2);
2232   xorpd(xmm0, xmm0);
2233 
2234   bind(L_2TAG_PACKET_44_0_2);
2235   movl(eax, 16368);
2236   xorpd(xmm1, xmm1);
2237   pinsrw(xmm1, eax, 3);
2238   divsd(xmm1, xmm0);
2239   movdqu(xmm0, xmm1);
2240   movl(Address(rsp, 0), 27);
2241   jmp(L_2TAG_PACKET_17_0_2);
2242 
2243   bind(L_2TAG_PACKET_12_0_2);
2244   movq(xmm2, Address(rsp, 8));
2245   movq(xmm6, Address(rsp, 16));
2246   pextrw(eax, xmm2, 3);
2247   pextrw(edx, xmm6, 3);
2248   movl(ecx, 32752);
2249   andl(ecx, edx);
2250   cmpl(ecx, 32752);
2251   jcc(Assembler::equal, L_2TAG_PACKET_45_0_2);
2252   andl(eax, 32752);
2253   subl(eax, 16368);
2254   xorl(edx, eax);
2255   testl(edx, 32768);
2256   jcc(Assembler::notEqual, L_2TAG_PACKET_46_0_2);
2257 
2258   bind(L_2TAG_PACKET_47_0_2);
2259   movl(eax, 32736);
2260   pinsrw(xmm0, eax, 3);
2261   shrl(tmp1, 16);
2262   orl(eax, tmp1);
2263   pinsrw(xmm1, eax, 3);
2264   mulsd(xmm0, xmm1);
2265 
2266   bind(L_2TAG_PACKET_14_0_2);
2267   movl(Address(rsp, 0), 24);
2268   jmp(L_2TAG_PACKET_17_0_2);
2269 
2270   bind(L_2TAG_PACKET_46_0_2);
2271   movl(eax, 16);
2272   pinsrw(xmm0, eax, 3);
2273   mulsd(xmm0, xmm0);
2274   testl(tmp1, INT_MIN);
2275   jcc(Assembler::equal, L_2TAG_PACKET_48_0_2);
2276   mov64(tmp2, 0x8000000000000000);
2277   movdq(xmm2, tmp2);
2278   xorpd(xmm0, xmm2);
2279 
2280   bind(L_2TAG_PACKET_48_0_2);
2281   movl(Address(rsp, 0), 25);
2282   jmp(L_2TAG_PACKET_17_0_2);
2283 
2284   bind(L_2TAG_PACKET_13_0_2);
2285   pextrw(ecx, xmm5, 3);
2286   pextrw(edx, xmm4, 3);
2287   movl(eax, -1);
2288   andl(ecx, 32752);
2289   subl(ecx, 16368);
2290   andl(edx, 32752);
2291   addl(edx, ecx);
2292   movl(ecx, -31);
2293   sarl(edx, 4);
2294   subl(ecx, edx);
2295   jcc(Assembler::lessEqual, L_2TAG_PACKET_49_0_2);
2296   cmpl(ecx, 20);
2297   jcc(Assembler::above, L_2TAG_PACKET_50_0_2);
2298   shll(eax);
2299 
2300   bind(L_2TAG_PACKET_49_0_2);
2301   movdl(xmm0, eax);
2302   psllq(xmm0, 32);
2303   pand(xmm0, xmm5);
2304   subsd(xmm5, xmm0);
2305   addsd(xmm5, xmm1);
2306   mulsd(xmm0, xmm4);
2307   mulsd(xmm5, xmm4);
2308   addsd(xmm0, xmm5);
2309 
2310   bind(L_2TAG_PACKET_50_0_2);
2311   jmp(L_2TAG_PACKET_48_0_2);
2312 
2313   bind(L_2TAG_PACKET_2_0_2);
2314   movw(ecx, Address(rsp, 22));
2315   movl(edx, INT_MIN);
2316   movdl(xmm1, rdx);
2317   xorpd(xmm7, xmm7);
2318   paddd(xmm0, xmm4);
2319   movdl(edx, xmm0);
2320   psllq(xmm0, 29);
2321   paddq(xmm1, xmm3);
2322   pand(xmm5, xmm1);
2323   andl(ecx, 32752);
2324   cmpl(ecx, 16560);
2325   jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
2326   pand(xmm0, xmm6);
2327   subsd(xmm3, xmm5);
2328   addl(eax, 16351);
2329   shrl(eax, 4);
2330   subl(eax, 1022);
2331   cvtsi2sdl(xmm7, eax);
2332   mulpd(xmm5, xmm0);
2333   lea(r11, ExternalAddress(L_tbl));
2334   movq(xmm4, ExternalAddress(coeff_h));    //0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL
2335   mulsd(xmm3, xmm0);
2336   movq(xmm6, ExternalAddress(coeff_h));    //0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL
2337   subsd(xmm5, xmm2);
2338   movq(xmm1, ExternalAddress(8 + coeff_h));    //0x00000000UL, 0xbf5dabe1UL
2339   pshufd(xmm2, xmm3, 68);
2340   unpcklpd(xmm5, xmm3);
2341   addsd(xmm3, xmm5);
2342   movq(xmm0, ExternalAddress(8 + coeff_h));    //0x00000000UL, 0xbf5dabe1UL
2343   andl(edx, 16760832);
2344   shrl(edx, 10);
2345   addpd(xmm7, Address(tmp4, edx, Address::times_1, -3648));
2346   mulsd(xmm4, xmm5);
2347   mulsd(xmm0, xmm5);
2348   mulsd(xmm6, xmm2);
2349   mulsd(xmm1, xmm2);
2350   movdqu(xmm2, xmm5);
2351   mulsd(xmm4, xmm5);
2352   addsd(xmm5, xmm0);
2353   movdqu(xmm0, xmm7);
2354   addsd(xmm2, xmm3);
2355   addsd(xmm7, xmm5);
2356   mulsd(xmm6, xmm2);
2357   subsd(xmm0, xmm7);
2358   movdqu(xmm2, xmm7);
2359   addsd(xmm7, xmm4);
2360   addsd(xmm0, xmm5);
2361   subsd(xmm2, xmm7);
2362   addsd(xmm4, xmm2);
2363   pshufd(xmm2, xmm5, 238);
2364   movdqu(xmm5, xmm7);
2365   addsd(xmm7, xmm2);
2366   addsd(xmm4, xmm0);
2367   movdqu(xmm0, ExternalAddress(coeff));    //0x6dc96112UL, 0xbf836578UL, 0xee241472UL, 0xbf9b0301UL
2368   subsd(xmm5, xmm7);
2369   addsd(xmm6, xmm4);
2370   movdqu(xmm4, xmm7);
2371   addsd(xmm5, xmm2);
2372   addsd(xmm7, xmm1);
2373   movdqu(xmm2, ExternalAddress(64 + coeff));    //0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, 0xbf5dabe1UL
2374   subsd(xmm4, xmm7);
2375   addsd(xmm6, xmm5);
2376   addsd(xmm4, xmm1);
2377   pshufd(xmm5, xmm7, 238);
2378   movapd(xmm1, xmm7);
2379   addsd(xmm7, xmm5);
2380   subsd(xmm1, xmm7);
2381   addsd(xmm1, xmm5);
2382   movdqu(xmm5, ExternalAddress(80 + coeff));    //0x9f95985aUL, 0xbfb528dbUL, 0xf8b5787dUL, 0x3ef2531eUL
2383   pshufd(xmm3, xmm3, 68);
2384   addsd(xmm6, xmm4);
2385   addsd(xmm6, xmm1);
2386   movdqu(xmm1, ExternalAddress(32 + coeff));    //0x9f95985aUL, 0xbfb528dbUL, 0xb3841d2aUL, 0xbfd619b6UL
2387   mulpd(xmm0, xmm3);
2388   mulpd(xmm2, xmm3);
2389   pshufd(xmm4, xmm3, 68);
2390   mulpd(xmm3, xmm3);
2391   addpd(xmm0, xmm1);
2392   addpd(xmm5, xmm2);
2393   mulsd(xmm4, xmm3);
2394   movq(xmm2, ExternalAddress(HIGHMASK_LOG_X));    //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xfffff800UL
2395   mulpd(xmm3, xmm3);
2396   movq(xmm1, Address(rsp, 16));
2397   movw(ecx, Address(rsp, 22));
2398   mulpd(xmm0, xmm4);
2399   pextrw(eax, xmm7, 3);
2400   mulpd(xmm5, xmm4);
2401   mulpd(xmm0, xmm3);
2402   movq(xmm4, ExternalAddress(8 + HIGHMASK_Y));    //0x00000000UL, 0xffffffffUL
2403   pand(xmm2, xmm7);
2404   addsd(xmm5, xmm6);
2405   subsd(xmm7, xmm2);
2406   addpd(xmm5, xmm0);
2407   andl(eax, 32752);
2408   subl(eax, 16368);
2409   andl(ecx, 32752);
2410   cmpl(ecx, 32752);
2411   jcc(Assembler::equal, L_2TAG_PACKET_45_0_2);
2412   addl(ecx, eax);
2413   cmpl(ecx, 16576);
2414   jcc(Assembler::aboveEqual, L_2TAG_PACKET_51_0_2);
2415   pshufd(xmm0, xmm5, 238);
2416   pand(xmm4, xmm1);
2417   movdqu(xmm3, xmm1);
2418   addsd(xmm5, xmm0);
2419   subsd(xmm1, xmm4);
2420   xorpd(xmm6, xmm6);
2421   movl(edx, 17080);
2422   pinsrw(xmm6, edx, 3);
2423   addsd(xmm7, xmm5);
2424   mulsd(xmm4, xmm2);
2425   mulsd(xmm1, xmm2);
2426   movdqu(xmm5, xmm6);
2427   mulsd(xmm3, xmm7);
2428   addsd(xmm6, xmm4);
2429   addsd(xmm1, xmm3);
2430   movdqu(xmm7, ExternalAddress(e_coeff));    //0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL
2431   movdl(edx, xmm6);
2432   subsd(xmm6, xmm5);
2433   lea(tmp4, ExternalAddress(T_exp));
2434   movdqu(xmm3, ExternalAddress(16 + e_coeff));    //0x6fba4e77UL, 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL
2435   movq(xmm2, ExternalAddress(32 + e_coeff));    //0xfefa39efUL, 0x3fe62e42UL, 0x00000000UL, 0x00000000UL
2436   subsd(xmm4, xmm6);
2437   movl(ecx, edx);
2438   andl(edx, 255);
2439   addl(edx, edx);
2440   movdqu(xmm5, Address(tmp4, edx, Address::times_8, 0));
2441   addsd(xmm4, xmm1);
2442   pextrw(edx, xmm6, 3);
2443   shrl(ecx, 8);
2444   movl(eax, ecx);
2445   shrl(ecx, 1);
2446   subl(eax, ecx);
2447   shll(ecx, 20);
2448   movdl(xmm6, ecx);
2449   pshufd(xmm0, xmm4, 68);
2450   pshufd(xmm1, xmm4, 68);
2451   mulpd(xmm0, xmm0);
2452   mulpd(xmm7, xmm1);
2453   pshufd(xmm6, xmm6, 17);
2454   mulsd(xmm2, xmm4);
2455   andl(edx, 32767);
2456   cmpl(edx, 16529);
2457   jcc(Assembler::above, L_2TAG_PACKET_12_0_2);
2458   mulsd(xmm0, xmm0);
2459   paddd(xmm5, xmm6);
2460   addpd(xmm3, xmm7);
2461   mulsd(xmm2, xmm5);
2462   pshufd(xmm6, xmm5, 238);
2463   mulpd(xmm0, xmm3);
2464   addsd(xmm2, xmm6);
2465   pshufd(xmm3, xmm0, 238);
2466   addl(eax, 1023);
2467   shll(eax, 20);
2468   orl(eax, tmp1);
2469   movdl(xmm4, eax);
2470   mulsd(xmm0, xmm5);
2471   mulsd(xmm3, xmm5);
2472   addsd(xmm0, xmm2);
2473   psllq(xmm4, 32);
2474   addsd(xmm0, xmm3);
2475   movdqu(xmm1, xmm0);
2476   addsd(xmm0, xmm5);
2477   mulsd(xmm0, xmm4);
2478   pextrw(eax, xmm0, 3);
2479   andl(eax, 32752);
2480   jcc(Assembler::equal, L_2TAG_PACKET_13_0_2);
2481   cmpl(eax, 32752);
2482   jcc(Assembler::equal, L_2TAG_PACKET_14_0_2);
2483 
2484   bind(L_2TAG_PACKET_52_0_2);
2485   jmp(B1_5);
2486 
2487   bind(L_2TAG_PACKET_45_0_2);
2488   movq(xmm0, Address(rsp, 8));
2489   xorpd(xmm2, xmm2);
2490   movl(eax, 49136);
2491   pinsrw(xmm2, eax, 3);
2492   addsd(xmm2, xmm0);
2493   pextrw(eax, xmm2, 3);
2494   cmpl(eax, 0);
2495   jcc(Assembler::notEqual, L_2TAG_PACKET_53_0_2);
2496   xorpd(xmm0, xmm0);
2497   movl(eax, 32760);
2498   pinsrw(xmm0, eax, 3);
2499   jmp(B1_5);
2500 
2501   bind(L_2TAG_PACKET_53_0_2);
2502   movq(xmm1, Address(rsp, 16));
2503   movdl(edx, xmm1);
2504   movdqu(xmm3, xmm1);
2505   psrlq(xmm3, 20);
2506   movdl(ecx, xmm3);
2507   orl(ecx, edx);
2508   jcc(Assembler::equal, L_2TAG_PACKET_54_0_2);
2509   addsd(xmm1, xmm1);
2510   movdqu(xmm0, xmm1);
2511   jmp(B1_5);
2512 
2513   bind(L_2TAG_PACKET_51_0_2);
2514   pextrw(eax, xmm1, 3);
2515   pextrw(ecx, xmm2, 3);
2516   xorl(eax, ecx);
2517   testl(eax, 32768);
2518   jcc(Assembler::equal, L_2TAG_PACKET_47_0_2);
2519   jmp(L_2TAG_PACKET_46_0_2);
2520 
2521   bind(L_2TAG_PACKET_54_0_2);
2522   pextrw(eax, xmm0, 3);
2523   andl(eax, 32752);
2524   pextrw(edx, xmm1, 3);
2525   xorpd(xmm0, xmm0);
2526   subl(eax, 16368);
2527   xorl(eax, edx);
2528   testl(eax, 32768);
2529   jcc(Assembler::equal, L_2TAG_PACKET_55_0_2);
2530   jmp(B1_5);
2531 
2532   bind(L_2TAG_PACKET_55_0_2);
2533   movl(edx, 32752);
2534   pinsrw(xmm0, edx, 3);
2535   jmp(B1_5);
2536 
2537   bind(L_2TAG_PACKET_17_0_2);
2538   movq(Address(rsp, 24), xmm0);
2539 
2540   bind(B1_3);
2541   movq(xmm0, Address(rsp, 24));
2542 
2543   bind(L_2TAG_PACKET_56_0_2);
2544 
2545   bind(B1_5);
2546   addq(rsp, 40);
2547 }
2548 
2549 /******************************************************************************/
2550 //                     ALGORITHM DESCRIPTION - SIN()
2551 //                     ---------------------
2552 //
2553 //     1. RANGE REDUCTION
2554 //
2555 //     We perform an initial range reduction from X to r with
2556 //
2557 //          X =~= N * pi/32 + r
2558 //
2559 //     so that |r| <= pi/64 + epsilon. We restrict inputs to those
2560 //     where |N| <= 932560. Beyond this, the range reduction is
2561 //     insufficiently accurate. For extremely small inputs,
2562 //     denormalization can occur internally, impacting performance.
2563 //     This means that the main path is actually only taken for
2564 //     2^-252 <= |X| < 90112.
2565 //
2566 //     To avoid branches, we perform the range reduction to full
2567 //     accuracy each time.
2568 //
2569 //          X - N * (P_1 + P_2 + P_3)
2570 //
2571 //     where P_1 and P_2 are 32-bit numbers (so multiplication by N
2572 //     is exact) and P_3 is a 53-bit number. Together, these
2573 //     approximate pi well enough for all cases in the restricted
2574 //     range.
2575 //
2576 //     The main reduction sequence is:
2577 //
2578 //             y = 32/pi * x
2579 //             N = integer(y)
2580 //     (computed by adding and subtracting off SHIFTER)
2581 //
2582 //             m_1 = N * P_1
2583 //             m_2 = N * P_2
2584 //             r_1 = x - m_1
2585 //             r = r_1 - m_2
2586 //     (this r can be used for most of the calculation)
2587 //
2588 //             c_1 = r_1 - r
2589 //             m_3 = N * P_3
2590 //             c_2 = c_1 - m_2
2591 //             c = c_2 - m_3
2592 //
2593 //     2. MAIN ALGORITHM
2594 //
2595 //     The algorithm uses a table lookup based on B = M * pi / 32
2596 //     where M = N mod 64. The stored values are:
2597 //       sigma             closest power of 2 to cos(B)
2598 //       C_hl              53-bit cos(B) - sigma
2599 //       S_hi + S_lo       2 * 53-bit sin(B)
2600 //
2601 //     The computation is organized as follows:
2602 //
2603 //          sin(B + r + c) = [sin(B) + sigma * r] +
2604 //                           r * (cos(B) - sigma) +
2605 //                           sin(B) * [cos(r + c) - 1] +
2606 //                           cos(B) * [sin(r + c) - r]
2607 //
2608 //     which is approximately:
2609 //
2610 //          [S_hi + sigma * r] +
2611 //          C_hl * r +
2612 //          S_lo + S_hi * [(cos(r) - 1) - r * c] +
2613 //          (C_hl + sigma) * [(sin(r) - r) + c]
2614 //
2615 //     and this is what is actually computed. We separate this sum
2616 //     into four parts:
2617 //
2618 //          hi + med + pols + corr
2619 //
2620 //     where
2621 //
2622 //          hi       = S_hi + sigma r
2623 //          med      = C_hl * r
2624 //          pols     = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
2625 //          corr     = S_lo + c * ((C_hl + sigma) - S_hi * r)
2626 //
2627 //     3. POLYNOMIAL
2628 //
2629 //     The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) *
2630 //     (sin(r) - r) can be rearranged freely, since it is quite
2631 //     small, so we exploit parallelism to the fullest.
2632 //
2633 //          psc4       =   SC_4 * r_1
2634 //          msc4       =   psc4 * r
2635 //          r2         =   r * r
2636 //          msc2       =   SC_2 * r2
2637 //          r4         =   r2 * r2
2638 //          psc3       =   SC_3 + msc4
2639 //          psc1       =   SC_1 + msc2
2640 //          msc3       =   r4 * psc3
2641 //          sincospols =   psc1 + msc3
2642 //          pols       =   sincospols *
2643 //                         <S_hi * r^2 | (C_hl + sigma) * r^3>
2644 //
2645 //     4. CORRECTION TERM
2646 //
2647 //     This is where the "c" component of the range reduction is
2648 //     taken into account; recall that just "r" is used for most of
2649 //     the calculation.
2650 //
2651 //          -c   = m_3 - c_2
2652 //          -d   = S_hi * r - (C_hl + sigma)
2653 //          corr = -c * -d + S_lo
2654 //
2655 //     5. COMPENSATED SUMMATIONS
2656 //
2657 //     The two successive compensated summations add up the high
2658 //     and medium parts, leaving just the low parts to add up at
2659 //     the end.
2660 //
2661 //          rs        =  sigma * r
2662 //          res_int   =  S_hi + rs
2663 //          k_0       =  S_hi - res_int
2664 //          k_2       =  k_0 + rs
2665 //          med       =  C_hl * r
2666 //          res_hi    =  res_int + med
2667 //          k_1       =  res_int - res_hi
2668 //          k_3       =  k_1 + med
2669 //
2670 //     6. FINAL SUMMATION
2671 //
2672 //     We now add up all the small parts:
2673 //
2674 //          res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
2675 //
2676 //     Now the overall result is just:
2677 //
2678 //          res_hi + res_lo
2679 //
2680 //     7. SMALL ARGUMENTS
2681 //
2682 //     If |x| < SNN (SNN meaning the smallest normal number), we
2683 //     simply perform 0.1111111 cdots 1111 * x. For SNN <= |x|, we
2684 //     do 2^-55 * (2^55 * x - x).
2685 //
2686 // Special cases:
2687 //  sin(NaN) = quiet NaN, and raise invalid exception
2688 //  sin(INF) = NaN and raise invalid exception
2689 //  sin(+/-0) = +/-0
2690 //
2691 /******************************************************************************/
2692 
2693 ALIGNED_(16) juint _ONEHALF[] =
2694 {
2695     0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
2696 };
2697 
2698 ALIGNED_(16) juint _P_2[] =
2699 {
2700     0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
2701 };
2702 
2703 ALIGNED_(16) juint _SC_4[] =
2704 {
2705     0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
2706 };
2707 
2708 ALIGNED_(16) juint _Ctable[] =
2709 {
2710     0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
2711     0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL,
2712     0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
2713     0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL,
2714     0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL,
2715     0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL,
2716     0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL,
2717     0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL,
2718     0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL,
2719     0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL,
2720     0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
2721     0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL,
2722     0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL,
2723     0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL,
2724     0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL,
2725     0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL,
2726     0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL,
2727     0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL,
2728     0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
2729     0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL,
2730     0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL,
2731     0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL,
2732     0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL,
2733     0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL,
2734     0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL,
2735     0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL,
2736     0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
2737     0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL,
2738     0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL,
2739     0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL,
2740     0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL,
2741     0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL,
2742     0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL,
2743     0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL,
2744     0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
2745     0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL,
2746     0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL,
2747     0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL,
2748     0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL,
2749     0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL,
2750     0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL,
2751     0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL,
2752     0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
2753     0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL,
2754     0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL,
2755     0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL,
2756     0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL,
2757     0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL,
2758     0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL,
2759     0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL,
2760     0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
2761     0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
2762     0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL,
2763     0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
2764     0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL,
2765     0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL,
2766     0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL,
2767     0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL,
2768     0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL,
2769     0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL,
2770     0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL,
2771     0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
2772     0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL,
2773     0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL,
2774     0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL,
2775     0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL,
2776     0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL,
2777     0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL,
2778     0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL,
2779     0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
2780     0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL,
2781     0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL,
2782     0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL,
2783     0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL,
2784     0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL,
2785     0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL,
2786     0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL,
2787     0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL,
2788     0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL,
2789     0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL,
2790     0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL,
2791     0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL,
2792     0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL,
2793     0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL,
2794     0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL,
2795     0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
2796     0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL,
2797     0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL,
2798     0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL,
2799     0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL,
2800     0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL,
2801     0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL,
2802     0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL,
2803     0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
2804     0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL,
2805     0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL,
2806     0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL,
2807     0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL,
2808     0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL,
2809     0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL,
2810     0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL,
2811     0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
2812     0x00000000UL, 0x3ff00000UL
2813 };
2814 
2815 ALIGNED_(16) juint _SC_2[] =
2816 {
2817     0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
2818 };
2819 
2820 ALIGNED_(16) juint _SC_3[] =
2821 {
2822     0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
2823 };
2824 
2825 ALIGNED_(16) juint _SC_1[] =
2826 {
2827     0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
2828 };
2829 
2830 ALIGNED_(16) juint _PI_INV_TABLE[] =
2831 {
2832     0x00000000UL, 0x00000000UL, 0xa2f9836eUL, 0x4e441529UL, 0xfc2757d1UL,
2833     0xf534ddc0UL, 0xdb629599UL, 0x3c439041UL, 0xfe5163abUL, 0xdebbc561UL,
2834     0xb7246e3aUL, 0x424dd2e0UL, 0x06492eeaUL, 0x09d1921cUL, 0xfe1deb1cUL,
2835     0xb129a73eUL, 0xe88235f5UL, 0x2ebb4484UL, 0xe99c7026UL, 0xb45f7e41UL,
2836     0x3991d639UL, 0x835339f4UL, 0x9c845f8bUL, 0xbdf9283bUL, 0x1ff897ffUL,
2837     0xde05980fUL, 0xef2f118bUL, 0x5a0a6d1fUL, 0x6d367ecfUL, 0x27cb09b7UL,
2838     0x4f463f66UL, 0x9e5fea2dUL, 0x7527bac7UL, 0xebe5f17bUL, 0x3d0739f7UL,
2839     0x8a5292eaUL, 0x6bfb5fb1UL, 0x1f8d5d08UL, 0x56033046UL, 0xfc7b6babUL,
2840     0xf0cfbc21UL
2841 };
2842 
2843 ALIGNED_(8) juint _PI_4[] =
2844 {
2845     0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
2846 };
2847 
2848 ALIGNED_(8) juint _PI32INV[] =
2849 {
2850     0x6dc9c883UL, 0x40245f30UL
2851 };
2852 
2853 ALIGNED_(8) juint _SHIFTER[] =
2854 {
2855     0x00000000UL, 0x43380000UL
2856 };
2857 
2858 ALIGNED_(8) juint _SIGN_MASK[] =
2859 {
2860     0x00000000UL, 0x80000000UL
2861 };
2862 
2863 ALIGNED_(8) juint _P_3[] =
2864 {
2865     0x2e037073UL, 0x3b63198aUL
2866 };
2867 
2868 ALIGNED_(8) juint _ALL_ONES[] =
2869 {
2870     0xffffffffUL, 0x3fefffffUL
2871 };
2872 
2873 ALIGNED_(8) juint _TWO_POW_55[] =
2874 {
2875     0x00000000UL, 0x43600000UL
2876 };
2877 
2878 ALIGNED_(8) juint _TWO_POW_M55[] =
2879 {
2880     0x00000000UL, 0x3c800000ULL
2881 };
2882 
2883 ALIGNED_(8) juint _P_1[] =
2884 {
2885     0x54400000UL, 0x3fb921fbUL
2886 };
2887 
2888 ALIGNED_(8) juint _NEG_ZERO[] =
2889 {
2890     0x00000000UL, 0x80000000UL
2891 };
2892 
2893 void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ebx, Register ecx, Register edx, Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
2894   Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
2895   Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
2896   Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1;
2897   Label L_2TAG_PACKET_13_0_1, L_2TAG_PACKET_14_0_1;
2898   Label L_2TAG_PACKET_12_0_1, B1_1, B1_2, B1_4, start;
2899 
2900   assert_different_registers(tmp1, tmp2, tmp3, tmp4, eax, ebx, ecx, edx);
2901   address ONEHALF = (address)_ONEHALF;
2902   address P_2 = (address)_P_2;
2903   address SC_4 = (address)_SC_4;
2904   address Ctable = (address)_Ctable;
2905   address SC_2 = (address)_SC_2;
2906   address SC_3 = (address)_SC_3;
2907   address SC_1 = (address)_SC_1;
2908   address PI_INV_TABLE = (address)_PI_INV_TABLE;
2909   address PI_4 = (address)_PI_4;
2910   address PI32INV = (address)_PI32INV;
2911   address SHIFTER = (address)_SHIFTER;
2912   address SIGN_MASK = (address)_SIGN_MASK;
2913   address P_3 = (address)_P_3;
2914   address ALL_ONES = (address)_ALL_ONES;
2915   address TWO_POW_55 = (address)_TWO_POW_55;
2916   address TWO_POW_M55 = (address)_TWO_POW_M55;
2917   address P_1 = (address)_P_1;
2918   address NEG_ZERO = (address)_NEG_ZERO;
2919 
2920   bind(start);
2921   push(rbx);
2922   subq(rsp, 16);
2923   movsd(Address(rsp, 8), xmm0);
2924   movl(eax, Address(rsp, 12));
2925   movq(xmm1, ExternalAddress(PI32INV));    //0x6dc9c883UL, 0x40245f30UL
2926   movq(xmm2, ExternalAddress(SHIFTER));    //0x00000000UL, 0x43380000UL
2927   andl(eax, 2147418112);
2928   subl(eax, 808452096);
2929   cmpl(eax, 281346048);
2930   jcc(Assembler::above, L_2TAG_PACKET_0_0_1);
2931   mulsd(xmm1, xmm0);
2932   movdqu(xmm5, ExternalAddress(ONEHALF));    //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
2933   movq(xmm4, ExternalAddress(SIGN_MASK));    //0x00000000UL, 0x80000000UL
2934   pand(xmm4, xmm0);
2935   por(xmm5, xmm4);
2936   addpd(xmm1, xmm5);
2937   cvttsd2sil(edx, xmm1);
2938   cvtsi2sdl(xmm1, edx);
2939   movdqu(xmm6, ExternalAddress(P_2));    //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
2940   mov64(r8, 0x3fb921fb54400000);
2941   movdq(xmm3, r8);
2942   movdqu(xmm5, ExternalAddress(SC_4));    //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
2943   pshufd(xmm4, xmm0, 68);
2944   mulsd(xmm3, xmm1);
2945   movddup(xmm1, xmm1);
2946   andl(edx, 63);
2947   shll(edx, 5);
2948   lea(rax, ExternalAddress(Ctable));
2949   addq(rax, rdx);
2950   mulpd(xmm6, xmm1);
2951   mulsd(xmm1, ExternalAddress(P_3));    //0x2e037073UL, 0x3b63198aUL
2952   subsd(xmm4, xmm3);
2953   movq(xmm7, Address(rax, 8));
2954   subsd(xmm0, xmm3);
2955   movddup(xmm3, xmm4);
2956   subsd(xmm4, xmm6);
2957   pshufd(xmm0, xmm0, 68);
2958   movdqu(xmm2, Address(rax, 0));
2959   mulpd(xmm5, xmm0);
2960   subpd(xmm0, xmm6);
2961   mulsd(xmm7, xmm4);
2962   subsd(xmm3, xmm4);
2963   mulpd(xmm5, xmm0);
2964   mulpd(xmm0, xmm0);
2965   subsd(xmm3, xmm6);
2966   movdqu(xmm6, ExternalAddress(SC_2));    //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
2967   subsd(xmm1, xmm3);
2968   movq(xmm3, Address(rax, 24));
2969   addsd(xmm2, xmm3);
2970   subsd(xmm7, xmm2);
2971   mulsd(xmm2, xmm4);
2972   mulpd(xmm6, xmm0);
2973   mulsd(xmm3, xmm4);
2974   mulpd(xmm2, xmm0);
2975   mulpd(xmm0, xmm0);
2976   addpd(xmm5, ExternalAddress(SC_3));    //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
2977   mulsd(xmm4, Address(rax, 0));
2978   addpd(xmm6, ExternalAddress(SC_1));    //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
2979   mulpd(xmm5, xmm0);
2980   movdqu(xmm0, xmm3);
2981   addsd(xmm3, Address(rax, 8));
2982   mulpd(xmm1, xmm7);
2983   movdqu(xmm7, xmm4);
2984   addsd(xmm4, xmm3);
2985   addpd(xmm6, xmm5);
2986   movq(xmm5, Address(rax, 8));
2987   subsd(xmm5, xmm3);
2988   subsd(xmm3, xmm4);
2989   addsd(xmm1, Address(rax, 16));
2990   mulpd(xmm6, xmm2);
2991   addsd(xmm5, xmm0);
2992   addsd(xmm3, xmm7);
2993   addsd(xmm1, xmm5);
2994   addsd(xmm1, xmm3);
2995   addsd(xmm1, xmm6);
2996   unpckhpd(xmm6, xmm6);
2997   movdqu(xmm0, xmm4);
2998   addsd(xmm1, xmm6);
2999   addsd(xmm0, xmm1);
3000   jmp(B1_4);
3001 
3002   bind(L_2TAG_PACKET_0_0_1);
3003   jcc(Assembler::greater, L_2TAG_PACKET_1_0_1);
3004   shrl(eax, 20);
3005   cmpl(eax, 3325);
3006   jcc(Assembler::notEqual, L_2TAG_PACKET_2_0_1);
3007   mulsd(xmm0, ExternalAddress(ALL_ONES));    //0xffffffffUL, 0x3fefffffUL
3008   jmp(B1_4);
3009 
3010   bind(L_2TAG_PACKET_2_0_1);
3011   movq(xmm3, ExternalAddress(TWO_POW_55));    //0x00000000UL, 0x43600000UL
3012   mulsd(xmm3, xmm0);
3013   subsd(xmm3, xmm0);
3014   mulsd(xmm3, ExternalAddress(TWO_POW_M55));    //0x00000000UL, 0x3c800000UL
3015   jmp(B1_4);
3016 
3017   bind(L_2TAG_PACKET_1_0_1);
3018   pextrw(eax, xmm0, 3);
3019   andl(eax, 32752);
3020   cmpl(eax, 32752);
3021   jcc(Assembler::equal, L_2TAG_PACKET_3_0_1);
3022   pextrw(ecx, xmm0, 3);
3023   andl(ecx, 32752);
3024   subl(ecx, 16224);
3025   shrl(ecx, 7);
3026   andl(ecx, 65532);
3027   lea(r11, ExternalAddress(PI_INV_TABLE));
3028   addq(rcx, r11);
3029   movdq(rax, xmm0);
3030   movl(r10, Address(rcx, 20));
3031   movl(r8, Address(rcx, 24));
3032   movl(edx, eax);
3033   shrq(rax, 21);
3034   orl(eax, INT_MIN);
3035   shrl(eax, 11);
3036   movl(r9, r10);
3037   imulq(r10, rdx);
3038   imulq(r9, rax);
3039   imulq(r8, rax);
3040   movl(rsi, Address(rcx, 16));
3041   movl(rdi, Address(rcx, 12));
3042   movl(r11, r10);
3043   shrq(r10, 32);
3044   addq(r9, r10);
3045   addq(r11, r8);
3046   movl(r8, r11);
3047   shrq(r11, 32);
3048   addq(r9, r11);
3049   movl(r10, rsi);
3050   imulq(rsi, rdx);
3051   imulq(r10, rax);
3052   movl(r11, rdi);
3053   imulq(rdi, rdx);
3054   movl(ebx, rsi);
3055   shrq(rsi, 32);
3056   addq(r9, rbx);
3057   movl(ebx, r9);
3058   shrq(r9, 32);
3059   addq(r10, rsi);
3060   addq(r10, r9);
3061   shlq(rbx, 32);
3062   orq(r8, rbx);
3063   imulq(r11, rax);
3064   movl(r9, Address(rcx, 8));
3065   movl(rsi, Address(rcx, 4));
3066   movl(ebx, rdi);
3067   shrq(rdi, 32);
3068   addq(r10, rbx);
3069   movl(ebx, r10);
3070   shrq(r10, 32);
3071   addq(r11, rdi);
3072   addq(r11, r10);
3073   movq(rdi, r9);
3074   imulq(r9, rdx);
3075   imulq(rdi, rax);
3076   movl(r10, r9);
3077   shrq(r9, 32);
3078   addq(r11, r10);
3079   movl(r10, r11);
3080   shrq(r11, 32);
3081   addq(rdi, r9);
3082   addq(rdi, r11);
3083   movq(r9, rsi);
3084   imulq(rsi, rdx);
3085   imulq(r9, rax);
3086   shlq(r10, 32);
3087   orq(r10, rbx);
3088   movl(eax, Address(rcx, 0));
3089   movl(r11, rsi);
3090   shrq(rsi, 32);
3091   addq(rdi, r11);
3092   movl(r11, rdi);
3093   shrq(rdi, 32);
3094   addq(r9, rsi);
3095   addq(r9, rdi);
3096   imulq(rdx, rax);
3097   pextrw(ebx, xmm0, 3);
3098   lea(rdi, ExternalAddress(PI_INV_TABLE));
3099   subq(rcx, rdi);
3100   addl(ecx, ecx);
3101   addl(ecx, ecx);
3102   addl(ecx, ecx);
3103   addl(ecx, 19);
3104   movl(rsi, 32768);
3105   andl(rsi, ebx);
3106   shrl(ebx, 4);
3107   andl(ebx, 2047);
3108   subl(ebx, 1023);
3109   subl(ecx, ebx);
3110   addq(r9, rdx);
3111   movl(edx, ecx);
3112   addl(edx, 32);
3113   cmpl(ecx, 1);
3114   jcc(Assembler::less, L_2TAG_PACKET_4_0_1);
3115   negl(ecx);
3116   addl(ecx, 29);
3117   shll(r9);
3118   movl(rdi, r9);
3119   andl(r9, 536870911);
3120   testl(r9, 268435456);
3121   jcc(Assembler::notEqual, L_2TAG_PACKET_5_0_1);
3122   shrl(r9);
3123   movl(ebx, 0);
3124   shlq(r9, 32);
3125   orq(r9, r11);
3126 
3127   bind(L_2TAG_PACKET_6_0_1);
3128 
3129   bind(L_2TAG_PACKET_7_0_1);
3130 
3131   cmpq(r9, 0);
3132   jcc(Assembler::equal, L_2TAG_PACKET_8_0_1);
3133 
3134   bind(L_2TAG_PACKET_9_0_1);
3135   bsrq(r11, r9);
3136   movl(ecx, 29);
3137   subl(ecx, r11);
3138   jcc(Assembler::lessEqual, L_2TAG_PACKET_10_0_1);
3139   shlq(r9);
3140   movq(rax, r10);
3141   shlq(r10);
3142   addl(edx, ecx);
3143   negl(ecx);
3144   addl(ecx, 64);
3145   shrq(rax);
3146   shrq(r8);
3147   orq(r9, rax);
3148   orq(r10, r8);
3149 
3150   bind(L_2TAG_PACKET_11_0_1);
3151   cvtsi2sdq(xmm0, r9);
3152   shrq(r10, 1);
3153   cvtsi2sdq(xmm3, r10);
3154   xorpd(xmm4, xmm4);
3155   shll(edx, 4);
3156   negl(edx);
3157   addl(edx, 16368);
3158   orl(edx, rsi);
3159   xorl(edx, ebx);
3160   pinsrw(xmm4, edx, 3);
3161   movq(xmm2, ExternalAddress(PI_4));    //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
3162   movq(xmm6, ExternalAddress(8 + PI_4));    //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
3163   xorpd(xmm5, xmm5);
3164   subl(edx, 1008);
3165   pinsrw(xmm5, edx, 3);
3166   mulsd(xmm0, xmm4);
3167   shll(rsi, 16);
3168   sarl(rsi, 31);
3169   mulsd(xmm3, xmm5);
3170   movdqu(xmm1, xmm0);
3171   mulsd(xmm0, xmm2);
3172   shrl(rdi, 29);
3173   addsd(xmm1, xmm3);
3174   mulsd(xmm3, xmm2);
3175   addl(rdi, rsi);
3176   xorl(rdi, rsi);
3177   mulsd(xmm6, xmm1);
3178   movl(eax, rdi);
3179   addsd(xmm6, xmm3);
3180   movdqu(xmm2, xmm0);
3181   addsd(xmm0, xmm6);
3182   subsd(xmm2, xmm0);
3183   addsd(xmm6, xmm2);
3184 
3185   bind(L_2TAG_PACKET_12_0_1);
3186   movq(xmm1, ExternalAddress(PI32INV));    //0x6dc9c883UL, 0x40245f30UL
3187   mulsd(xmm1, xmm0);
3188   movq(xmm5, ExternalAddress(ONEHALF));    //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
3189   movq(xmm4, ExternalAddress(SIGN_MASK));    //0x00000000UL, 0x80000000UL
3190   pand(xmm4, xmm0);
3191   por(xmm5, xmm4);
3192   addpd(xmm1, xmm5);
3193   cvttsd2sil(edx, xmm1);
3194   cvtsi2sdl(xmm1, edx);
3195   movq(xmm3, ExternalAddress(P_1));    //0x54400000UL, 0x3fb921fbUL
3196   movdqu(xmm2, ExternalAddress(P_2));    //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
3197   mulsd(xmm3, xmm1);
3198   unpcklpd(xmm1, xmm1);
3199   shll(eax, 3);
3200   addl(edx, 1865216);
3201   movdqu(xmm4, xmm0);
3202   addl(edx, eax);
3203   andl(edx, 63);
3204   movdqu(xmm5, ExternalAddress(SC_4));    //0x54400000UL, 0x3fb921fbUL
3205   lea(rax, ExternalAddress(Ctable));
3206   shll(edx, 5);
3207   addq(rax, rdx);
3208   mulpd(xmm2, xmm1);
3209   subsd(xmm0, xmm3);
3210   mulsd(xmm1, ExternalAddress(P_3));    //0x2e037073UL, 0x3b63198aUL
3211   subsd(xmm4, xmm3);
3212   movq(xmm7, Address(rax, 8));
3213   unpcklpd(xmm0, xmm0);
3214   movdqu(xmm3, xmm4);
3215   subsd(xmm4, xmm2);
3216   mulpd(xmm5, xmm0);
3217   subpd(xmm0, xmm2);
3218   mulsd(xmm7, xmm4);
3219   subsd(xmm3, xmm4);
3220   mulpd(xmm5, xmm0);
3221   mulpd(xmm0, xmm0);
3222   subsd(xmm3, xmm2);
3223   movdqu(xmm2, Address(rax, 0));
3224   subsd(xmm1, xmm3);
3225   movq(xmm3, Address(rax, 24));
3226   addsd(xmm2, xmm3);
3227   subsd(xmm7, xmm2);
3228   subsd(xmm1, xmm6);
3229   movdqu(xmm6, ExternalAddress(SC_2));    //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
3230   mulsd(xmm2, xmm4);
3231   mulpd(xmm6, xmm0);
3232   mulsd(xmm3, xmm4);
3233   mulpd(xmm2, xmm0);
3234   mulpd(xmm0, xmm0);
3235   addpd(xmm5, ExternalAddress(SC_3));    //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
3236   mulsd(xmm4, Address(rax, 0));
3237   addpd(xmm6, ExternalAddress(SC_1));    //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
3238   mulpd(xmm5, xmm0);
3239   movdqu(xmm0, xmm3);
3240   addsd(xmm3, Address(rax, 8));
3241   mulpd(xmm1, xmm7);
3242   movdqu(xmm7, xmm4);
3243   addsd(xmm4, xmm3);
3244   addpd(xmm6, xmm5);
3245   movq(xmm5, Address(rax, 8));
3246   subsd(xmm5, xmm3);
3247   subsd(xmm3, xmm4);
3248   addsd(xmm1, Address(rax, 16));
3249   mulpd(xmm6, xmm2);
3250   addsd(xmm5, xmm0);
3251   addsd(xmm3, xmm7);
3252   addsd(xmm1, xmm5);
3253   addsd(xmm1, xmm3);
3254   addsd(xmm1, xmm6);
3255   unpckhpd(xmm6, xmm6);
3256   movdqu(xmm0, xmm4);
3257   addsd(xmm1, xmm6);
3258   addsd(xmm0, xmm1);
3259   jmp(B1_4);
3260 
3261   bind(L_2TAG_PACKET_8_0_1);
3262   addl(edx, 64);
3263   movq(r9, r10);
3264   movq(r10, r8);
3265   movl(r8, 0);
3266   cmpq(r9, 0);
3267   jcc(Assembler::notEqual, L_2TAG_PACKET_9_0_1);
3268   addl(edx, 64);
3269   movq(r9, r10);
3270   movq(r10, r8);
3271   cmpq(r9, 0);
3272   jcc(Assembler::notEqual, L_2TAG_PACKET_9_0_1);
3273   xorpd(xmm0, xmm0);
3274   xorpd(xmm6, xmm6);
3275   jmp(L_2TAG_PACKET_12_0_1);
3276 
3277   bind(L_2TAG_PACKET_10_0_1);
3278   jcc(Assembler::equal, L_2TAG_PACKET_11_0_1);
3279   negl(ecx);
3280   shrq(r10);
3281   movq(rax, r9);
3282   shrq(r9);
3283   subl(edx, ecx);
3284   negl(ecx);
3285   addl(ecx, 64);
3286   shlq(rax);
3287   orq(r10, rax);
3288   jmp(L_2TAG_PACKET_11_0_1);
3289 
3290   bind(L_2TAG_PACKET_4_0_1);
3291   negl(ecx);
3292   shlq(r9, 32);
3293   orq(r9, r11);
3294   shlq(r9);
3295   movq(rdi, r9);
3296   testl(r9, INT_MIN);
3297   jcc(Assembler::notEqual, L_2TAG_PACKET_13_0_1);
3298   shrl(r9);
3299   movl(ebx, 0);
3300   shrq(rdi, 3);
3301   jmp(L_2TAG_PACKET_7_0_1);
3302 
3303   bind(L_2TAG_PACKET_5_0_1);
3304   shrl(r9);
3305   movl(ebx, 536870912);
3306   shrl(ebx);
3307   shlq(r9, 32);
3308   orq(r9, r11);
3309   shlq(rbx, 32);
3310   addl(rdi, 536870912);
3311   movl(rcx, 0);
3312   movl(r11, 0);
3313   subq(rcx, r8);
3314   sbbq(r11, r10);
3315   sbbq(rbx, r9);
3316   movq(r8, rcx);
3317   movq(r10, r11);
3318   movq(r9, rbx);
3319   movl(ebx, 32768);
3320   jmp(L_2TAG_PACKET_6_0_1);
3321 
3322   bind(L_2TAG_PACKET_13_0_1);
3323   shrl(r9);
3324   mov64(rbx, 0x100000000);
3325   shrq(rbx);
3326   movl(rcx, 0);
3327   movl(r11, 0);
3328   subq(rcx, r8);
3329   sbbq(r11, r10);
3330   sbbq(rbx, r9);
3331   movq(r8, rcx);
3332   movq(r10, r11);
3333   movq(r9, rbx);
3334   movl(ebx, 32768);
3335   shrq(rdi, 3);
3336   addl(rdi, 536870912);
3337   jmp(L_2TAG_PACKET_7_0_1);
3338 
3339   bind(L_2TAG_PACKET_3_0_1);
3340   movq(xmm0, Address(rsp, 8));
3341   mulsd(xmm0, ExternalAddress(NEG_ZERO));    //0x00000000UL, 0x80000000UL
3342   movq(Address(rsp, 0), xmm0);
3343 
3344   bind(L_2TAG_PACKET_14_0_1);
3345 
3346   bind(B1_4);
3347   addq(rsp, 16);
3348   pop(rbx);
3349 }
3350 
3351 /******************************************************************************/
3352 //                     ALGORITHM DESCRIPTION - COS()
3353 //                     ---------------------
3354 //
3355 //     1. RANGE REDUCTION
3356 //
3357 //     We perform an initial range reduction from X to r with
3358 //
3359 //          X =~= N * pi/32 + r
3360 //
3361 //     so that |r| <= pi/64 + epsilon. We restrict inputs to those
3362 //     where |N| <= 932560. Beyond this, the range reduction is
3363 //     insufficiently accurate. For extremely small inputs,
3364 //     denormalization can occur internally, impacting performance.
3365 //     This means that the main path is actually only taken for
3366 //     2^-252 <= |X| < 90112.
3367 //
3368 //     To avoid branches, we perform the range reduction to full
3369 //     accuracy each time.
3370 //
3371 //          X - N * (P_1 + P_2 + P_3)
3372 //
3373 //     where P_1 and P_2 are 32-bit numbers (so multiplication by N
3374 //     is exact) and P_3 is a 53-bit number. Together, these
3375 //     approximate pi well enough for all cases in the restricted
3376 //     range.
3377 //
3378 //     The main reduction sequence is:
3379 //
3380 //             y = 32/pi * x
3381 //             N = integer(y)
3382 //     (computed by adding and subtracting off SHIFTER)
3383 //
3384 //             m_1 = N * P_1
3385 //             m_2 = N * P_2
3386 //             r_1 = x - m_1
3387 //             r = r_1 - m_2
3388 //     (this r can be used for most of the calculation)
3389 //
3390 //             c_1 = r_1 - r
3391 //             m_3 = N * P_3
3392 //             c_2 = c_1 - m_2
3393 //             c = c_2 - m_3
3394 //
3395 //     2. MAIN ALGORITHM
3396 //
3397 //     The algorithm uses a table lookup based on B = M * pi / 32
3398 //     where M = N mod 64. The stored values are:
3399 //       sigma             closest power of 2 to cos(B)
3400 //       C_hl              53-bit cos(B) - sigma
3401 //       S_hi + S_lo       2 * 53-bit sin(B)
3402 //
3403 //     The computation is organized as follows:
3404 //
3405 //          sin(B + r + c) = [sin(B) + sigma * r] +
3406 //                           r * (cos(B) - sigma) +
3407 //                           sin(B) * [cos(r + c) - 1] +
3408 //                           cos(B) * [sin(r + c) - r]
3409 //
3410 //     which is approximately:
3411 //
3412 //          [S_hi + sigma * r] +
3413 //          C_hl * r +
3414 //          S_lo + S_hi * [(cos(r) - 1) - r * c] +
3415 //          (C_hl + sigma) * [(sin(r) - r) + c]
3416 //
3417 //     and this is what is actually computed. We separate this sum
3418 //     into four parts:
3419 //
3420 //          hi + med + pols + corr
3421 //
3422 //     where
3423 //
3424 //          hi       = S_hi + sigma r
3425 //          med      = C_hl * r
3426 //          pols     = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
3427 //          corr     = S_lo + c * ((C_hl + sigma) - S_hi * r)
3428 //
3429 //     3. POLYNOMIAL
3430 //
3431 //     The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) *
3432 //     (sin(r) - r) can be rearranged freely, since it is quite
3433 //     small, so we exploit parallelism to the fullest.
3434 //
3435 //          psc4       =   SC_4 * r_1
3436 //          msc4       =   psc4 * r
3437 //          r2         =   r * r
3438 //          msc2       =   SC_2 * r2
3439 //          r4         =   r2 * r2
3440 //          psc3       =   SC_3 + msc4
3441 //          psc1       =   SC_1 + msc2
3442 //          msc3       =   r4 * psc3
3443 //          sincospols =   psc1 + msc3
3444 //          pols       =   sincospols *
3445 //                         <S_hi * r^2 | (C_hl + sigma) * r^3>
3446 //
3447 //     4. CORRECTION TERM
3448 //
3449 //     This is where the "c" component of the range reduction is
3450 //     taken into account; recall that just "r" is used for most of
3451 //     the calculation.
3452 //
3453 //          -c   = m_3 - c_2
3454 //          -d   = S_hi * r - (C_hl + sigma)
3455 //          corr = -c * -d + S_lo
3456 //
3457 //     5. COMPENSATED SUMMATIONS
3458 //
3459 //     The two successive compensated summations add up the high
3460 //     and medium parts, leaving just the low parts to add up at
3461 //     the end.
3462 //
3463 //          rs        =  sigma * r
3464 //          res_int   =  S_hi + rs
3465 //          k_0       =  S_hi - res_int
3466 //          k_2       =  k_0 + rs
3467 //          med       =  C_hl * r
3468 //          res_hi    =  res_int + med
3469 //          k_1       =  res_int - res_hi
3470 //          k_3       =  k_1 + med
3471 //
3472 //     6. FINAL SUMMATION
3473 //
3474 //     We now add up all the small parts:
3475 //
3476 //          res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
3477 //
3478 //     Now the overall result is just:
3479 //
3480 //          res_hi + res_lo
3481 //
3482 //     7. SMALL ARGUMENTS
3483 //
3484 //     Inputs with |X| < 2^-252 are treated specially as
3485 //     1 - |x|.
3486 //
3487 // Special cases:
3488 //  cos(NaN) = quiet NaN, and raise invalid exception
3489 //  cos(INF) = NaN and raise invalid exception
3490 //  cos(0) = 1
3491 //
3492 /******************************************************************************/
3493 
3494 ALIGNED_(8) juint _ONE[] =
3495 {
3496     0x00000000UL, 0x3ff00000UL
3497 };
3498 
3499 void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r8, Register r9, Register r10, Register r11) {
3500   Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
3501   Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
3502   Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1;
3503   Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, B1_2, B1_3, B1_4, B1_5, start;
3504 
3505   assert_different_registers(r8, r9, r10, r11, eax, ecx, edx);
3506 
3507   address ONEHALF = (address)_ONEHALF;
3508   address P_2 = (address)_P_2;
3509   address SC_4 = (address)_SC_4;
3510   address Ctable = (address)_Ctable;
3511   address SC_2 = (address)_SC_2;
3512   address SC_3 = (address)_SC_3;
3513   address SC_1 = (address)_SC_1;
3514   address PI_INV_TABLE = (address)_PI_INV_TABLE;
3515   address PI_4 = (address)_PI_4;
3516   address PI32INV = (address)_PI32INV;
3517   address SIGN_MASK = (address)_SIGN_MASK;
3518   address P_1 = (address)_P_1;
3519   address P_3 = (address)_P_3;
3520   address ONE = (address)_ONE;
3521   address NEG_ZERO = (address)_NEG_ZERO;
3522 
3523   bind(start);
3524   push(rbx);
3525   subq(rsp, 16);
3526   movsd(Address(rsp, 8), xmm0);
3527 
3528   bind(B1_2);
3529   movl(eax, Address(rsp, 12));
3530   movq(xmm1, ExternalAddress(PI32INV));    //0x6dc9c883UL, 0x40245f30UL
3531   andl(eax, 2147418112);
3532   subl(eax, 808452096);
3533   cmpl(eax, 281346048);
3534   jcc(Assembler::above, L_2TAG_PACKET_0_0_1);
3535   mulsd(xmm1, xmm0);
3536   movdqu(xmm5, ExternalAddress(ONEHALF));    //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
3537   movq(xmm4, ExternalAddress(SIGN_MASK));    //0x00000000UL, 0x80000000UL
3538   pand(xmm4, xmm0);
3539   por(xmm5, xmm4);
3540   addpd(xmm1, xmm5);
3541   cvttsd2sil(edx, xmm1);
3542   cvtsi2sdl(xmm1, edx);
3543   movdqu(xmm2, ExternalAddress(P_2));    //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
3544   movq(xmm3, ExternalAddress(P_1));    //0x54400000UL, 0x3fb921fbUL
3545   mulsd(xmm3, xmm1);
3546   unpcklpd(xmm1, xmm1);
3547   addq(rdx, 1865232);
3548   movdqu(xmm4, xmm0);
3549   andq(rdx, 63);
3550   movdqu(xmm5, ExternalAddress(SC_4));    //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
3551   lea(rax, ExternalAddress(Ctable));
3552   shlq(rdx, 5);
3553   addq(rax, rdx);
3554   mulpd(xmm2, xmm1);
3555   subsd(xmm0, xmm3);
3556   mulsd(xmm1, ExternalAddress(P_3));    //0x2e037073UL, 0x3b63198aUL
3557   subsd(xmm4, xmm3);
3558   movq(xmm7, Address(rax, 8));
3559   unpcklpd(xmm0, xmm0);
3560   movdqu(xmm3, xmm4);
3561   subsd(xmm4, xmm2);
3562   mulpd(xmm5, xmm0);
3563   subpd(xmm0, xmm2);
3564   movdqu(xmm6, ExternalAddress(SC_2));    //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
3565   mulsd(xmm7, xmm4);
3566   subsd(xmm3, xmm4);
3567   mulpd(xmm5, xmm0);
3568   mulpd(xmm0, xmm0);
3569   subsd(xmm3, xmm2);
3570   movdqu(xmm2, Address(rax, 0));
3571   subsd(xmm1, xmm3);
3572   movq(xmm3, Address(rax, 24));
3573   addsd(xmm2, xmm3);
3574   subsd(xmm7, xmm2);
3575   mulsd(xmm2, xmm4);
3576   mulpd(xmm6, xmm0);
3577   mulsd(xmm3, xmm4);
3578   mulpd(xmm2, xmm0);
3579   mulpd(xmm0, xmm0);
3580   addpd(xmm5, ExternalAddress(SC_3));    //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
3581   mulsd(xmm4, Address(rax, 0));
3582   addpd(xmm6, ExternalAddress(SC_1));    //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
3583   mulpd(xmm5, xmm0);
3584   movdqu(xmm0, xmm3);
3585   addsd(xmm3, Address(rax, 8));
3586   mulpd(xmm1, xmm7);
3587   movdqu(xmm7, xmm4);
3588   addsd(xmm4, xmm3);
3589   addpd(xmm6, xmm5);
3590   movq(xmm5, Address(rax, 8));
3591   subsd(xmm5, xmm3);
3592   subsd(xmm3, xmm4);
3593   addsd(xmm1, Address(rax, 16));
3594   mulpd(xmm6, xmm2);
3595   addsd(xmm0, xmm5);
3596   addsd(xmm3, xmm7);
3597   addsd(xmm0, xmm1);
3598   addsd(xmm0, xmm3);
3599   addsd(xmm0, xmm6);
3600   unpckhpd(xmm6, xmm6);
3601   addsd(xmm0, xmm6);
3602   addsd(xmm0, xmm4);
3603   jmp(B1_4);
3604 
3605   bind(L_2TAG_PACKET_0_0_1);
3606   jcc(Assembler::greater, L_2TAG_PACKET_1_0_1);
3607   pextrw(eax, xmm0, 3);
3608   andl(eax, 32767);
3609   pinsrw(xmm0, eax, 3);
3610   movq(xmm1, ExternalAddress(ONE));    //0x00000000UL, 0x3ff00000UL
3611   subsd(xmm1, xmm0);
3612   movdqu(xmm0, xmm1);
3613   jmp(B1_4);
3614 
3615   bind(L_2TAG_PACKET_1_0_1);
3616   pextrw(eax, xmm0, 3);
3617   andl(eax, 32752);
3618   cmpl(eax, 32752);
3619   jcc(Assembler::equal, L_2TAG_PACKET_2_0_1);
3620   pextrw(ecx, xmm0, 3);
3621   andl(ecx, 32752);
3622   subl(ecx, 16224);
3623   shrl(ecx, 7);
3624   andl(ecx, 65532);
3625   lea(r11, ExternalAddress(PI_INV_TABLE));
3626   addq(rcx, r11);
3627   movdq(rax, xmm0);
3628   movl(r10, Address(rcx, 20));
3629   movl(r8, Address(rcx, 24));
3630   movl(edx, eax);
3631   shrq(rax, 21);
3632   orl(eax, INT_MIN);
3633   shrl(eax, 11);
3634   movl(r9, r10);
3635   imulq(r10, rdx);
3636   imulq(r9, rax);
3637   imulq(r8, rax);
3638   movl(rsi, Address(rcx, 16));
3639   movl(rdi, Address(rcx, 12));
3640   movl(r11, r10);
3641   shrq(r10, 32);
3642   addq(r9, r10);
3643   addq(r11, r8);
3644   movl(r8, r11);
3645   shrq(r11, 32);
3646   addq(r9, r11);
3647   movl(r10, rsi);
3648   imulq(rsi, rdx);
3649   imulq(r10, rax);
3650   movl(r11, rdi);
3651   imulq(rdi, rdx);
3652   movl(rbx, rsi);
3653   shrq(rsi, 32);
3654   addq(r9, rbx);
3655   movl(rbx, r9);
3656   shrq(r9, 32);
3657   addq(r10, rsi);
3658   addq(r10, r9);
3659   shlq(rbx, 32);
3660   orq(r8, rbx);
3661   imulq(r11, rax);
3662   movl(r9, Address(rcx, 8));
3663   movl(rsi, Address(rcx, 4));
3664   movl(rbx, rdi);
3665   shrq(rdi, 32);
3666   addq(r10, rbx);
3667   movl(rbx, r10);
3668   shrq(r10, 32);
3669   addq(r11, rdi);
3670   addq(r11, r10);
3671   movq(rdi, r9);
3672   imulq(r9, rdx);
3673   imulq(rdi, rax);
3674   movl(r10, r9);
3675   shrq(r9, 32);
3676   addq(r11, r10);
3677   movl(r10, r11);
3678   shrq(r11, 32);
3679   addq(rdi, r9);
3680   addq(rdi, r11);
3681   movq(r9, rsi);
3682   imulq(rsi, rdx);
3683   imulq(r9, rax);
3684   shlq(r10, 32);
3685   orq(r10, rbx);
3686   movl(eax, Address(rcx, 0));
3687   movl(r11, rsi);
3688   shrq(rsi, 32);
3689   addq(rdi, r11);
3690   movl(r11, rdi);
3691   shrq(rdi, 32);
3692   addq(r9, rsi);
3693   addq(r9, rdi);
3694   imulq(rdx, rax);
3695   pextrw(rbx, xmm0, 3);
3696   lea(rdi, ExternalAddress(PI_INV_TABLE));
3697   subq(rcx, rdi);
3698   addl(ecx, ecx);
3699   addl(ecx, ecx);
3700   addl(ecx, ecx);
3701   addl(ecx, 19);
3702   movl(rsi, 32768);
3703   andl(rsi, rbx);
3704   shrl(rbx, 4);
3705   andl(rbx, 2047);
3706   subl(rbx, 1023);
3707   subl(ecx, rbx);
3708   addq(r9, rdx);
3709   movl(edx, ecx);
3710   addl(edx, 32);
3711   cmpl(ecx, 1);
3712   jcc(Assembler::less, L_2TAG_PACKET_3_0_1);
3713   negl(ecx);
3714   addl(ecx, 29);
3715   shll(r9);
3716   movl(rdi, r9);
3717   andl(r9, 536870911);
3718   testl(r9, 268435456);
3719   jcc(Assembler::notEqual, L_2TAG_PACKET_4_0_1);
3720   shrl(r9);
3721   movl(rbx, 0);
3722   shlq(r9, 32);
3723   orq(r9, r11);
3724 
3725   bind(L_2TAG_PACKET_5_0_1);
3726 
3727   bind(L_2TAG_PACKET_6_0_1);
3728   cmpq(r9, 0);
3729   jcc(Assembler::equal, L_2TAG_PACKET_7_0_1);
3730 
3731   bind(L_2TAG_PACKET_8_0_1);
3732   bsrq(r11, r9);
3733   movl(ecx, 29);
3734   subl(ecx, r11);
3735   jcc(Assembler::lessEqual, L_2TAG_PACKET_9_0_1);
3736   shlq(r9);
3737   movq(rax, r10);
3738   shlq(r10);
3739   addl(edx, ecx);
3740   negl(ecx);
3741   addl(ecx, 64);
3742   shrq(rax);
3743   shrq(r8);
3744   orq(r9, rax);
3745   orq(r10, r8);
3746 
3747   bind(L_2TAG_PACKET_10_0_1);
3748   cvtsi2sdq(xmm0, r9);
3749   shrq(r10, 1);
3750   cvtsi2sdq(xmm3, r10);
3751   xorpd(xmm4, xmm4);
3752   shll(edx, 4);
3753   negl(edx);
3754   addl(edx, 16368);
3755   orl(edx, rsi);
3756   xorl(edx, rbx);
3757   pinsrw(xmm4, edx, 3);
3758   movq(xmm2, ExternalAddress(PI_4));    //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
3759   movq(xmm6, ExternalAddress(8 + PI_4));    //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
3760   xorpd(xmm5, xmm5);
3761   subl(edx, 1008);
3762   pinsrw(xmm5, edx, 3);
3763   mulsd(xmm0, xmm4);
3764   shll(rsi, 16);
3765   sarl(rsi, 31);
3766   mulsd(xmm3, xmm5);
3767   movdqu(xmm1, xmm0);
3768   mulsd(xmm0, xmm2);
3769   shrl(rdi, 29);
3770   addsd(xmm1, xmm3);
3771   mulsd(xmm3, xmm2);
3772   addl(rdi, rsi);
3773   xorl(rdi, rsi);
3774   mulsd(xmm6, xmm1);
3775   movl(eax, rdi);
3776   addsd(xmm6, xmm3);
3777   movdqu(xmm2, xmm0);
3778   addsd(xmm0, xmm6);
3779   subsd(xmm2, xmm0);
3780   addsd(xmm6, xmm2);
3781 
3782   bind(L_2TAG_PACKET_11_0_1);
3783   movq(xmm1, ExternalAddress(PI32INV));    //0x6dc9c883UL, 0x40245f30UL
3784   mulsd(xmm1, xmm0);
3785   movq(xmm5, ExternalAddress(ONEHALF));    //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
3786   movq(xmm4, ExternalAddress(SIGN_MASK));    //0x00000000UL, 0x80000000UL
3787   pand(xmm4, xmm0);
3788   por(xmm5, xmm4);
3789   addpd(xmm1, xmm5);
3790   cvttsd2siq(rdx, xmm1);
3791   cvtsi2sdq(xmm1, rdx);
3792   movq(xmm3, ExternalAddress(P_1));    //0x54400000UL, 0x3fb921fbUL
3793   movdqu(xmm2, ExternalAddress(P_2));    //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
3794   mulsd(xmm3, xmm1);
3795   unpcklpd(xmm1, xmm1);
3796   shll(eax, 3);
3797   addl(edx, 1865232);
3798   movdqu(xmm4, xmm0);
3799   addl(edx, eax);
3800   andl(edx, 63);
3801   movdqu(xmm5, ExternalAddress(SC_4));    //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
3802   lea(rax, ExternalAddress(Ctable));
3803   shll(edx, 5);
3804   addq(rax, rdx);
3805   mulpd(xmm2, xmm1);
3806   subsd(xmm0, xmm3);
3807   mulsd(xmm1, ExternalAddress(P_3));    //0x2e037073UL, 0x3b63198aUL
3808   subsd(xmm4, xmm3);
3809   movq(xmm7, Address(rax, 8));
3810   unpcklpd(xmm0, xmm0);
3811   movdqu(xmm3, xmm4);
3812   subsd(xmm4, xmm2);
3813   mulpd(xmm5, xmm0);
3814   subpd(xmm0, xmm2);
3815   mulsd(xmm7, xmm4);
3816   subsd(xmm3, xmm4);
3817   mulpd(xmm5, xmm0);
3818   mulpd(xmm0, xmm0);
3819   subsd(xmm3, xmm2);
3820   movdqu(xmm2, Address(rax, 0));
3821   subsd(xmm1, xmm3);
3822   movq(xmm3, Address(rax, 24));
3823   addsd(xmm2, xmm3);
3824   subsd(xmm7, xmm2);
3825   subsd(xmm1, xmm6);
3826   movdqu(xmm6, ExternalAddress(SC_2));    //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
3827   mulsd(xmm2, xmm4);
3828   mulpd(xmm6, xmm0);
3829   mulsd(xmm3, xmm4);
3830   mulpd(xmm2, xmm0);
3831   mulpd(xmm0, xmm0);
3832   addpd(xmm5, ExternalAddress(SC_3));    //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
3833   mulsd(xmm4, Address(rax, 0));
3834   addpd(xmm6, ExternalAddress(SC_1));    //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
3835   mulpd(xmm5, xmm0);
3836   movdqu(xmm0, xmm3);
3837   addsd(xmm3, Address(rax, 8));
3838   mulpd(xmm1, xmm7);
3839   movdqu(xmm7, xmm4);
3840   addsd(xmm4, xmm3);
3841   addpd(xmm6, xmm5);
3842   movq(xmm5, Address(rax, 8));
3843   subsd(xmm5, xmm3);
3844   subsd(xmm3, xmm4);
3845   addsd(xmm1, Address(rax, 16));
3846   mulpd(xmm6, xmm2);
3847   addsd(xmm5, xmm0);
3848   addsd(xmm3, xmm7);
3849   addsd(xmm1, xmm5);
3850   addsd(xmm1, xmm3);
3851   addsd(xmm1, xmm6);
3852   unpckhpd(xmm6, xmm6);
3853   movdqu(xmm0, xmm4);
3854   addsd(xmm1, xmm6);
3855   addsd(xmm0, xmm1);
3856   jmp(B1_4);
3857 
3858   bind(L_2TAG_PACKET_7_0_1);
3859   addl(edx, 64);
3860   movq(r9, r10);
3861   movq(r10, r8);
3862   movl(r8, 0);
3863   cmpq(r9, 0);
3864   jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1);
3865   addl(edx, 64);
3866   movq(r9, r10);
3867   movq(r10, r8);
3868   cmpq(r9, 0);
3869   jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1);
3870   xorpd(xmm0, xmm0);
3871   xorpd(xmm6, xmm6);
3872   jmp(L_2TAG_PACKET_11_0_1);
3873 
3874   bind(L_2TAG_PACKET_9_0_1);
3875   jcc(Assembler::equal, L_2TAG_PACKET_10_0_1);
3876   negl(ecx);
3877   shrq(r10);
3878   movq(rax, r9);
3879   shrq(r9);
3880   subl(edx, ecx);
3881   negl(ecx);
3882   addl(ecx, 64);
3883   shlq(rax);
3884   orq(r10, rax);
3885   jmp(L_2TAG_PACKET_10_0_1);
3886   bind(L_2TAG_PACKET_3_0_1);
3887   negl(ecx);
3888   shlq(r9, 32);
3889   orq(r9, r11);
3890   shlq(r9);
3891   movq(rdi, r9);
3892   testl(r9, INT_MIN);
3893   jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_1);
3894   shrl(r9);
3895   movl(rbx, 0);
3896   shrq(rdi, 3);
3897   jmp(L_2TAG_PACKET_6_0_1);
3898 
3899   bind(L_2TAG_PACKET_4_0_1);
3900   shrl(r9);
3901   movl(rbx, 536870912);
3902   shrl(rbx);
3903   shlq(r9, 32);
3904   orq(r9, r11);
3905   shlq(rbx, 32);
3906   addl(rdi, 536870912);
3907   movl(rcx, 0);
3908   movl(r11, 0);
3909   subq(rcx, r8);
3910   sbbq(r11, r10);
3911   sbbq(rbx, r9);
3912   movq(r8, rcx);
3913   movq(r10, r11);
3914   movq(r9, rbx);
3915   movl(rbx, 32768);
3916   jmp(L_2TAG_PACKET_5_0_1);
3917 
3918   bind(L_2TAG_PACKET_12_0_1);
3919   shrl(r9);
3920   mov64(rbx, 0x100000000);
3921   shrq(rbx);
3922   movl(rcx, 0);
3923   movl(r11, 0);
3924   subq(rcx, r8);
3925   sbbq(r11, r10);
3926   sbbq(rbx, r9);
3927   movq(r8, rcx);
3928   movq(r10, r11);
3929   movq(r9, rbx);
3930   movl(rbx, 32768);
3931   shrq(rdi, 3);
3932   addl(rdi, 536870912);
3933   jmp(L_2TAG_PACKET_6_0_1);
3934 
3935   bind(L_2TAG_PACKET_2_0_1);
3936   movsd(xmm0, Address(rsp, 8));
3937   mulsd(xmm0, ExternalAddress(NEG_ZERO));    //0x00000000UL, 0x80000000UL
3938   movq(Address(rsp, 0), xmm0);
3939 
3940   bind(L_2TAG_PACKET_13_0_1);
3941 
3942   bind(B1_4);
3943   addq(rsp, 16);
3944   pop(rbx);
3945 }