/* * Copyright (c) 2015, Intel Corporation. * Intel Math Library (LIBM) Source Code * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. * */ #include "precompiled.hpp" #include "asm/assembler.hpp" #include "asm/assembler.inline.hpp" #include "macroAssembler_x86.hpp" #ifdef _MSC_VER #define ALIGNED_(x) __declspec(align(x)) #else #define ALIGNED_(x) __attribute__ ((aligned(x))) #endif /******************************************************************************/ // ALGORITHM DESCRIPTION - EXP() // --------------------- // // Description: // Let K = 64 (table size). // x x/log(2) n // e = 2 = 2 * T[j] * (1 + P(y)) // where // x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K] // m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2] // j/K // values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). // // P(y) is a minimax polynomial approximation of exp(x)-1 // on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V). // // To avoid problems with arithmetic overflow and underflow, // n n1 n2 // value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] // where BIAS is a value of exponent bias. // // Special cases: // exp(NaN) = NaN // exp(+INF) = +INF // exp(-INF) = 0 // exp(x) = 1 for subnormals // for finite argument, only exp(0)=1 is exact // For IEEE double // if x > 709.782712893383973096 then exp(x) overflow // if x < -745.133219101941108420 then exp(x) underflow // /******************************************************************************/ #ifdef _LP64 ALIGNED_(16) juint _cv[] = { 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL }; ALIGNED_(16) juint _shifter[] = { 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL }; ALIGNED_(16) juint _mmask[] = { 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL }; ALIGNED_(16) juint _bias[] = { 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL }; ALIGNED_(16) juint _Tbl_addr[] = { 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL, 0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL, 0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL, 0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL, 0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL, 0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL, 0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL, 0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL, 0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL, 0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL, 0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL, 0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL, 0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL, 0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL, 0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL, 0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL, 0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL, 0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL, 0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL, 0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL, 0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL, 0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL, 0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL, 0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL, 0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL, 0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL, 0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL, 0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL, 0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL, 0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL, 0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL, 0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL, 0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL, 0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL, 0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL, 0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL, 0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL, 0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL, 0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL, 0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL, 0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL, 0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL, 0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL, 0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL, 0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL, 0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL, 0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL, 0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL, 0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL, 0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL, 0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL, 0x000fa7c1UL }; ALIGNED_(16) juint _ALLONES[] = { 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL }; ALIGNED_(16) juint _ebias[] = { 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL }; ALIGNED_(4) juint _XMAX[] = { 0xffffffffUL, 0x7fefffffUL }; ALIGNED_(4) juint _XMIN[] = { 0x00000000UL, 0x00100000UL }; ALIGNED_(4) juint _INF[] = { 0x00000000UL, 0x7ff00000UL }; ALIGNED_(4) juint _ZERO[] = { 0x00000000UL, 0x00000000UL }; ALIGNED_(4) juint _ONE_val[] = { 0x00000000UL, 0x3ff00000UL }; // Registers: // input: xmm0 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 // rax, rdx, rcx, tmp - r11 // Code generated by Intel C compiler for LIBM library void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start; assert_different_registers(tmp, eax, ecx, edx); jmp(start); address cv = (address)_cv; address Shifter = (address)_shifter; address mmask = (address)_mmask; address bias = (address)_bias; address Tbl_addr = (address)_Tbl_addr; address ALLONES = (address)_ALLONES; address ebias = (address)_ebias; address XMAX = (address)_XMAX; address XMIN = (address)_XMIN; address INF = (address)_INF; address ZERO = (address)_ZERO; address ONE_val = (address)_ONE_val; bind(start); subq(rsp, 24); movsd(Address(rsp, 8), xmm0); unpcklpd(xmm0, xmm0); movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL movdqu(xmm2, ExternalAddress(16+cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL movdqu(xmm3, ExternalAddress(32+cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL pextrw(eax, xmm0, 3); andl(eax, 32767); movl(edx, 16527); subl(edx, eax); subl(eax, 15504); orl(edx, eax); cmpl(edx, INT_MIN); jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); mulpd(xmm1, xmm0); addpd(xmm1, xmm6); movapd(xmm7, xmm1); subpd(xmm1, xmm6); mulpd(xmm2, xmm1); movdqu(xmm4, ExternalAddress(64+cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL mulpd(xmm3, xmm1); movdqu(xmm5, ExternalAddress(80+cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL subpd(xmm0, xmm2); movdl(eax, xmm7); movl(ecx, eax); andl(ecx, 63); shll(ecx, 4); sarl(eax, 6); movl(edx, eax); movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL pand(xmm7, xmm6); movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL paddq(xmm7, xmm6); psllq(xmm7, 46); subpd(xmm0, xmm3); lea(tmp, ExternalAddress(Tbl_addr)); movdqu(xmm2, Address(ecx,tmp)); mulpd(xmm4, xmm0); movapd(xmm6, xmm0); movapd(xmm1, xmm0); mulpd(xmm6, xmm6); mulpd(xmm0, xmm6); addpd(xmm5, xmm4); mulsd(xmm0, xmm6); mulpd(xmm6, ExternalAddress(48+cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL addsd(xmm1, xmm2); unpckhpd(xmm2, xmm2); mulpd(xmm0, xmm5); addsd(xmm1, xmm0); por(xmm2, xmm7); unpckhpd(xmm0, xmm0); addsd(xmm0, xmm1); addsd(xmm0, xmm6); addl(edx, 894); cmpl(edx, 1916); jcc (Assembler::above, L_2TAG_PACKET_1_0_2); mulsd(xmm0, xmm2); addsd(xmm0, xmm2); jmp (B1_5); bind(L_2TAG_PACKET_1_0_2); xorpd(xmm3, xmm3); movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL movl(edx, -1022); subl(edx, eax); movdl(xmm5, edx); psllq(xmm4, xmm5); movl(ecx, eax); sarl(eax, 1); pinsrw(xmm3, eax, 3); movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL psllq(xmm3, 4); psubd(xmm2, xmm3); mulsd(xmm0, xmm2); cmpl(edx, 52); jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); pand(xmm4, xmm2); paddd(xmm3, xmm6); subsd(xmm2, xmm4); addsd(xmm0, xmm2); cmpl(ecx, 1023); jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2); pextrw(ecx, xmm0, 3); andl(ecx, 32768); orl(edx, ecx); cmpl(edx, 0); jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); movapd(xmm6, xmm0); addsd(xmm0, xmm4); mulsd(xmm0, xmm3); pextrw(ecx, xmm0, 3); andl(ecx, 32752); cmpl(ecx, 0); jcc(Assembler::equal, L_2TAG_PACKET_5_0_2); jmp(B1_5); bind(L_2TAG_PACKET_5_0_2); mulsd(xmm6, xmm3); mulsd(xmm4, xmm3); movdqu(xmm0, xmm6); pxor(xmm6, xmm4); psrad(xmm6, 31); pshufd(xmm6, xmm6, 85); psllq(xmm0, 1); psrlq(xmm0, 1); pxor(xmm0, xmm6); psrlq(xmm6, 63); paddq(xmm0, xmm6); paddq(xmm0, xmm4); movl(Address(rsp,0), 15); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_4_0_2); addsd(xmm0, xmm4); mulsd(xmm0, xmm3); jmp(B1_5); bind(L_2TAG_PACKET_3_0_2); addsd(xmm0, xmm4); mulsd(xmm0, xmm3); pextrw(ecx, xmm0, 3); andl(ecx, 32752); cmpl(ecx, 32752); jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); jmp(B1_5); bind(L_2TAG_PACKET_2_0_2); paddd(xmm3, xmm6); addpd(xmm0, xmm2); mulsd(xmm0, xmm3); movl(Address(rsp,0), 15); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_8_0_2); cmpl(eax, 2146435072); jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2); movl(eax, Address(rsp,12)); cmpl(eax, INT_MIN); jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2); movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL mulsd(xmm0, xmm0); bind(L_2TAG_PACKET_7_0_2); movl(Address(rsp,0), 14); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_10_0_2); movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL mulsd(xmm0, xmm0); movl(Address(rsp,0), 15); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_9_0_2); movl(edx, Address(rsp,8)); cmpl(eax, 2146435072); jcc(Assembler::above, L_2TAG_PACKET_11_0_2); cmpl(edx, 0); jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); movl(eax, Address(rsp,12)); cmpl(eax, 2146435072); jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2); movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL jmp(B1_5); bind(L_2TAG_PACKET_12_0_2); movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL jmp(B1_5); bind(L_2TAG_PACKET_11_0_2); movsd(xmm0, Address(rsp, 8)); addsd(xmm0, xmm0); jmp(B1_5); bind(L_2TAG_PACKET_0_0_2); movl(eax, Address(rsp, 12)); andl(eax, 2147483647); cmpl(eax, 1083179008); jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2); movsd(Address(rsp, 8), xmm0); addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL jmp(B1_5); bind(L_2TAG_PACKET_6_0_2); movq(Address(rsp, 16), xmm0); bind(B1_3); movq(xmm0, Address(rsp, 16)); bind(B1_5); addq(rsp, 24); } #endif #ifndef _LP64 ALIGNED_(16) juint _static_const_table[] = { 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL, 0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL, 0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL, 0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL, 0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL, 0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL, 0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL, 0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL, 0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL, 0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL, 0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL, 0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL, 0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL, 0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL, 0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL, 0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL, 0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL, 0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL, 0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL, 0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL, 0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL, 0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL, 0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL, 0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL, 0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL, 0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL, 0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL, 0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL, 0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL, 0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL, 0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL, 0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL, 0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL, 0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL, 0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL, 0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL, 0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL, 0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL, 0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL, 0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL, 0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL, 0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL, 0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL, 0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL, 0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL, 0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL, 0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL, 0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL, 0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL, 0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL, 0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL, 0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL, 0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL, 0x00100000UL }; //registers, // input: (rbp + 8) // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 // rax, rdx, rcx, rbx (tmp) // Code generated by Intel C compiler for LIBM library void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; assert_different_registers(tmp, eax, ecx, edx); jmp(start); address static_const_table = (address)_static_const_table; bind(start); subl(rsp, 120); movl(Address(rsp, 64), tmp); lea(tmp, ExternalAddress(static_const_table)); movdqu(xmm0, Address(rsp, 128)); unpcklpd(xmm0, xmm0); movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL pextrw(eax, xmm0, 3); andl(eax, 32767); movl(edx, 16527); subl(edx, eax); subl(eax, 15504); orl(edx, eax); cmpl(edx, INT_MIN); jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); mulpd(xmm1, xmm0); addpd(xmm1, xmm6); movapd(xmm7, xmm1); subpd(xmm1, xmm6); mulpd(xmm2, xmm1); movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL mulpd(xmm3, xmm1); movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL subpd(xmm0, xmm2); movdl(eax, xmm7); movl(ecx, eax); andl(ecx, 63); shll(ecx, 4); sarl(eax, 6); movl(edx, eax); movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL pand(xmm7, xmm6); movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL paddq(xmm7, xmm6); psllq(xmm7, 46); subpd(xmm0, xmm3); movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160)); mulpd(xmm4, xmm0); movapd(xmm6, xmm0); movapd(xmm1, xmm0); mulpd(xmm6, xmm6); mulpd(xmm0, xmm6); addpd(xmm5, xmm4); mulsd(xmm0, xmm6); mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL addsd(xmm1, xmm2); unpckhpd(xmm2, xmm2); mulpd(xmm0, xmm5); addsd(xmm1, xmm0); por(xmm2, xmm7); unpckhpd(xmm0, xmm0); addsd(xmm0, xmm1); addsd(xmm0, xmm6); addl(edx, 894); cmpl(edx, 1916); jcc (Assembler::above, L_2TAG_PACKET_1_0_2); mulsd(xmm0, xmm2); addsd(xmm0, xmm2); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_1_0_2); fnstcw(Address(rsp, 24)); movzwl(edx, Address(rsp, 24)); orl(edx, 768); movw(Address(rsp, 28), edx); fldcw(Address(rsp, 28)); movl(edx, eax); sarl(eax, 1); subl(edx, eax); movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL pandn(xmm6, xmm2); addl(eax, 1023); movdl(xmm3, eax); psllq(xmm3, 52); por(xmm6, xmm3); addl(edx, 1023); movdl(xmm4, edx); psllq(xmm4, 52); movsd(Address(rsp, 8), xmm0); fld_d(Address(rsp, 8)); movsd(Address(rsp, 16), xmm6); fld_d(Address(rsp, 16)); fmula(1); faddp(1); movsd(Address(rsp, 8), xmm4); fld_d(Address(rsp, 8)); fmulp(1); fstp_d(Address(rsp, 8)); movsd(xmm0,Address(rsp, 8)); fldcw(Address(rsp, 24)); pextrw(ecx, xmm0, 3); andl(ecx, 32752); cmpl(ecx, 32752); jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2); cmpl(ecx, 0); jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); jmp(L_2TAG_PACKET_2_0_2); cmpl(ecx, INT_MIN); jcc(Assembler::less, L_2TAG_PACKET_3_0_2); cmpl(ecx, -1064950997); jcc(Assembler::less, L_2TAG_PACKET_2_0_2); jcc(Assembler::greater, L_2TAG_PACKET_4_0_2); movl(edx, Address(rsp, 128)); cmpl(edx ,-17155601); jcc(Assembler::less, L_2TAG_PACKET_2_0_2); jmp(L_2TAG_PACKET_4_0_2); bind(L_2TAG_PACKET_3_0_2); movl(edx, 14); jmp(L_2TAG_PACKET_5_0_2); bind(L_2TAG_PACKET_4_0_2); movl(edx, 15); bind(L_2TAG_PACKET_5_0_2); movsd(Address(rsp, 0), xmm0); movsd(xmm0, Address(rsp, 128)); fld_d(Address(rsp, 0)); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_7_0_2); cmpl(eax, 2146435072); jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2); movl(eax, Address(rsp, 132)); cmpl(eax, INT_MIN); jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2); movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL mulsd(xmm0, xmm0); movl(edx, 14); jmp(L_2TAG_PACKET_5_0_2); bind(L_2TAG_PACKET_9_0_2); movsd(xmm0, Address(tmp, 1216)); mulsd(xmm0, xmm0); movl(edx, 15); jmp(L_2TAG_PACKET_5_0_2); bind(L_2TAG_PACKET_8_0_2); movl(edx, Address(rsp, 128)); cmpl(eax, 2146435072); jcc(Assembler::above, L_2TAG_PACKET_10_0_2); cmpl(edx, 0); jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2); movl(eax, Address(rsp, 132)); cmpl(eax, 2146435072); jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_11_0_2); movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_10_0_2); movsd(xmm0, Address(rsp, 128)); addsd(xmm0, xmm0); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_0_0_2); movl(eax, Address(rsp, 132)); andl(eax, 2147483647); cmpl(eax, 1083179008); jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); movsd(xmm0, Address(rsp, 128)); addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_2_0_2); movsd(Address(rsp, 48), xmm0); fld_d(Address(rsp, 48)); bind(L_2TAG_PACKET_6_0_2); movl(tmp, Address(rsp, 64)); } #endif /******************************************************************************/ // ALGORITHM DESCRIPTION - LOG() // --------------------- // // x=2^k * mx, mx in [1,2) // // Get B~1/mx based on the output of rcpss instruction (B0) // B = int((B0*2^7+0.5))/2^7 // // Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts) // // Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and // p(r) is a degree 7 polynomial // -log(B) read from data table (high, low parts) // Result is formed from high and low parts // // Special cases: // log(NaN) = quiet NaN, and raise invalid exception // log(+INF) = that INF // log(0) = -INF with divide-by-zero exception raised // log(1) = +0 // log(x) = NaN with invalid exception raised if x < -0, including -INF // /******************************************************************************/ #ifdef _LP64 ALIGNED_(16) juint _L_tbl[] = { 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL, 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL, 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL, 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL, 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL, 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL, 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL, 0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL, 0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL, 0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL, 0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL, 0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL, 0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL, 0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL, 0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL, 0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL, 0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL, 0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL, 0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL, 0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL, 0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL, 0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL, 0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL, 0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL, 0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL, 0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL, 0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL, 0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL, 0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL, 0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL, 0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL, 0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL, 0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL, 0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL, 0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL, 0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL, 0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL, 0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL, 0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL, 0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL, 0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL, 0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL, 0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL, 0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL, 0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL, 0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL, 0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL, 0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL, 0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL, 0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL, 0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL, 0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL, 0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL, 0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL, 0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL, 0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL, 0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL, 0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL, 0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL, 0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL, 0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL, 0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL, 0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL, 0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL, 0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL, 0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL, 0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL, 0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL, 0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL, 0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL, 0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL, 0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL, 0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL, 0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL, 0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL, 0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL, 0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL, 0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL, 0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL, 0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL, 0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL, 0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL, 0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL, 0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL, 0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL, 0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL, 0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL, 0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL, 0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL, 0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL, 0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL, 0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL, 0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL, 0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL, 0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL, 0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL, 0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL, 0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL, 0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL, 0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL, 0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL, 0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL, 0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL }; ALIGNED_(16) juint _log2[] = { 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL }; ALIGNED_(16) juint _coeff[] = { 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL }; //registers, // input: xmm0 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 // rax, rdx, rcx, r8, r11 void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) { Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; Label L_2TAG_PACKET_8_0_2; Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; assert_different_registers(tmp1, tmp2, eax, ecx, edx); jmp(start); address L_tbl = (address)_L_tbl; address log2 = (address)_log2; address coeff = (address)_coeff; bind(start); subq(rsp, 24); movsd(Address(rsp, 0), xmm0); mov64(rax, 0x3ff0000000000000); movdq(xmm2, rax); mov64(rdx, 0x77f0000000000000); movdq(xmm3, rdx); movl(ecx, 32768); movdl(xmm4, rcx); mov64(tmp1, 0xffffe00000000000); movdq(xmm5, tmp1); movdqu(xmm1, xmm0); pextrw(eax, xmm0, 3); por(xmm0, xmm2); movl(ecx, 16352); psrlq(xmm0, 27); lea(tmp2, ExternalAddress(L_tbl)); psrld(xmm0, 2); rcpps(xmm0, xmm0); psllq(xmm1, 12); pshufd(xmm6, xmm5, 228); psrlq(xmm1, 12); subl(eax, 16); cmpl(eax, 32736); jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); bind(L_2TAG_PACKET_1_0_2); paddd(xmm0, xmm4); por(xmm1, xmm3); movdl(edx, xmm0); psllq(xmm0, 29); pand(xmm5, xmm1); pand(xmm0, xmm6); subsd(xmm1, xmm5); mulpd(xmm5, xmm0); andl(eax, 32752); subl(eax, ecx); cvtsi2sdl(xmm7, eax); mulsd(xmm1, xmm0); movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL subsd(xmm5, xmm2); andl(edx, 16711680); shrl(edx, 12); movdqu(xmm0, Address(tmp2, edx)); movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL addsd(xmm1, xmm5); movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL mulsd(xmm6, xmm7); movddup(xmm5, xmm1); mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL mulsd(xmm3, xmm1); addsd(xmm0, xmm6); mulpd(xmm4, xmm5); mulpd(xmm5, xmm5); movddup(xmm6, xmm0); addsd(xmm0, xmm1); addpd(xmm4, xmm2); mulpd(xmm3, xmm5); subsd(xmm6, xmm0); mulsd(xmm4, xmm1); pshufd(xmm2, xmm0, 238); addsd(xmm1, xmm6); mulsd(xmm5, xmm5); addsd(xmm7, xmm2); addpd(xmm4, xmm3); addsd(xmm1, xmm7); mulpd(xmm4, xmm5); addsd(xmm1, xmm4); pshufd(xmm5, xmm4, 238); addsd(xmm1, xmm5); addsd(xmm0, xmm1); jmp(B1_5); bind(L_2TAG_PACKET_0_0_2); movq(xmm0, Address(rsp, 0)); movq(xmm1, Address(rsp, 0)); addl(eax, 16); cmpl(eax, 32768); jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2); cmpl(eax, 16); jcc(Assembler::below, L_2TAG_PACKET_3_0_2); bind(L_2TAG_PACKET_4_0_2); addsd(xmm0, xmm0); jmp(B1_5); bind(L_2TAG_PACKET_5_0_2); jcc(Assembler::above, L_2TAG_PACKET_4_0_2); cmpl(edx, 0); jcc(Assembler::above, L_2TAG_PACKET_4_0_2); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_3_0_2); xorpd(xmm1, xmm1); addsd(xmm1, xmm0); movdl(edx, xmm1); psrlq(xmm1, 32); movdl(ecx, xmm1); orl(edx, ecx); cmpl(edx, 0); jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); xorpd(xmm1, xmm1); movl(eax, 18416); pinsrw(xmm1, eax, 3); mulsd(xmm0, xmm1); movdqu(xmm1, xmm0); pextrw(eax, xmm0, 3); por(xmm0, xmm2); psrlq(xmm0, 27); movl(ecx, 18416); psrld(xmm0, 2); rcpps(xmm0, xmm0); psllq(xmm1, 12); pshufd(xmm6, xmm5, 228); psrlq(xmm1, 12); jmp(L_2TAG_PACKET_1_0_2); bind(L_2TAG_PACKET_2_0_2); movdl(edx, xmm1); psrlq(xmm1, 32); movdl(ecx, xmm1); addl(ecx, ecx); cmpl(ecx, -2097152); jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); orl(edx, ecx); cmpl(edx, 0); jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); bind(L_2TAG_PACKET_6_0_2); xorpd(xmm1, xmm1); xorpd(xmm0, xmm0); movl(eax, 32752); pinsrw(xmm1, eax, 3); mulsd(xmm0, xmm1); movl(Address(rsp, 16), 3); jmp(L_2TAG_PACKET_8_0_2); bind(L_2TAG_PACKET_7_0_2); xorpd(xmm1, xmm1); xorpd(xmm0, xmm0); movl(eax, 49136); pinsrw(xmm0, eax, 3); divsd(xmm0, xmm1); movl(Address(rsp, 16), 2); bind(L_2TAG_PACKET_8_0_2); movq(Address(rsp, 8), xmm0); bind(B1_3); movq(xmm0, Address(rsp, 8)); bind(B1_5); addq(rsp, 24); } #endif #ifndef _LP64 ALIGNED_(16) juint _static_const_table_log[] = { 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL, 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL, 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL, 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL, 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL, 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL, 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL, 0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL, 0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL, 0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL, 0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL, 0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL, 0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL, 0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL, 0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL, 0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL, 0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL, 0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL, 0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL, 0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL, 0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL, 0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL, 0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL, 0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL, 0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL, 0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL, 0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL, 0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL, 0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL, 0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL, 0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL, 0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL, 0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL, 0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL, 0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL, 0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL, 0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL, 0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL, 0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL, 0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL, 0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL, 0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL, 0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL, 0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL, 0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL, 0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL, 0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL, 0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL, 0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL, 0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL, 0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL, 0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL, 0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL, 0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL, 0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL, 0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL, 0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL, 0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL, 0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL, 0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL, 0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL, 0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL, 0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL, 0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL, 0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL, 0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL, 0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL, 0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL, 0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL, 0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL, 0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL, 0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL, 0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL, 0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL, 0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL, 0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL, 0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL, 0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL, 0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL, 0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL, 0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL, 0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL, 0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL, 0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL, 0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL, 0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL, 0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL, 0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL, 0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL, 0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL, 0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL, 0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL, 0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL, 0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL, 0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL, 0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL, 0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL, 0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL, 0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL, 0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL, 0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL, 0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL, 0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL, 0xffffe000UL }; //registers, // input: xmm0 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 // rax, rdx, rcx, rbx (tmp) void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2; Label L_2TAG_PACKET_10_0_2, start; assert_different_registers(tmp, eax, ecx, edx); jmp(start); address static_const_table = (address)_static_const_table_log; bind(start); subl(rsp, 104); movl(Address(rsp, 40), tmp); lea(tmp, ExternalAddress(static_const_table)); xorpd(xmm2, xmm2); movl(eax, 16368); pinsrw(xmm2, eax, 3); xorpd(xmm3, xmm3); movl(edx, 30704); pinsrw(xmm3, edx, 3); movsd(xmm0, Address(rsp, 112)); movapd(xmm1, xmm0); movl(ecx, 32768); movdl(xmm4, ecx); movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL pextrw(eax, xmm0, 3); por(xmm0, xmm2); psllq(xmm0, 5); movl(ecx, 16352); psrlq(xmm0, 34); rcpss(xmm0, xmm0); psllq(xmm1, 12); pshufd(xmm6, xmm5, 228); psrlq(xmm1, 12); subl(eax, 16); cmpl(eax, 32736); jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); bind(L_2TAG_PACKET_1_0_2); paddd(xmm0, xmm4); por(xmm1, xmm3); movdl(edx, xmm0); psllq(xmm0, 29); pand(xmm5, xmm1); pand(xmm0, xmm6); subsd(xmm1, xmm5); mulpd(xmm5, xmm0); andl(eax, 32752); subl(eax, ecx); cvtsi2sdl(xmm7, eax); mulsd(xmm1, xmm0); movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL subsd(xmm5, xmm2); andl(edx, 16711680); shrl(edx, 12); movdqu(xmm0, Address(tmp, edx)); movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL addsd(xmm1, xmm5); movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL mulsd(xmm6, xmm7); pshufd(xmm5, xmm1, 68); mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL mulsd(xmm3, xmm1); addsd(xmm0, xmm6); mulpd(xmm4, xmm5); mulpd(xmm5, xmm5); pshufd(xmm6, xmm0, 228); addsd(xmm0, xmm1); addpd(xmm4, xmm2); mulpd(xmm3, xmm5); subsd(xmm6, xmm0); mulsd(xmm4, xmm1); pshufd(xmm2, xmm0, 238); addsd(xmm1, xmm6); mulsd(xmm5, xmm5); addsd(xmm7, xmm2); addpd(xmm4, xmm3); addsd(xmm1, xmm7); mulpd(xmm4, xmm5); addsd(xmm1, xmm4); pshufd(xmm5, xmm4, 238); addsd(xmm1, xmm5); addsd(xmm0, xmm1); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_0_0_2); movsd(xmm0, Address(rsp, 112)); movdqu(xmm1, xmm0); addl(eax, 16); cmpl(eax, 32768); jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2); cmpl(eax, 16); jcc(Assembler::below, L_2TAG_PACKET_4_0_2); bind(L_2TAG_PACKET_5_0_2); addsd(xmm0, xmm0); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_6_0_2); jcc(Assembler::above, L_2TAG_PACKET_5_0_2); cmpl(edx, 0); jcc(Assembler::above, L_2TAG_PACKET_5_0_2); jmp(L_2TAG_PACKET_7_0_2); bind(L_2TAG_PACKET_3_0_2); movdl(edx, xmm1); psrlq(xmm1, 32); movdl(ecx, xmm1); addl(ecx, ecx); cmpl(ecx, -2097152); jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2); orl(edx, ecx); cmpl(edx, 0); jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); bind(L_2TAG_PACKET_7_0_2); xorpd(xmm1, xmm1); xorpd(xmm0, xmm0); movl(eax, 32752); pinsrw(xmm1, eax, 3); movl(edx, 3); mulsd(xmm0, xmm1); bind(L_2TAG_PACKET_9_0_2); movsd(Address(rsp, 0), xmm0); movsd(xmm0, Address(rsp, 112)); fld_d(Address(rsp, 0)); jmp(L_2TAG_PACKET_10_0_2); bind(L_2TAG_PACKET_8_0_2); xorpd(xmm1, xmm1); xorpd(xmm0, xmm0); movl(eax, 49136); pinsrw(xmm0, eax, 3); divsd(xmm0, xmm1); movl(edx, 2); jmp(L_2TAG_PACKET_9_0_2); bind(L_2TAG_PACKET_4_0_2); movdl(edx, xmm1); psrlq(xmm1, 32); movdl(ecx, xmm1); orl(edx, ecx); cmpl(edx, 0); jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); xorpd(xmm1, xmm1); movl(eax, 18416); pinsrw(xmm1, eax, 3); mulsd(xmm0, xmm1); movapd(xmm1, xmm0); pextrw(eax, xmm0, 3); por(xmm0, xmm2); psllq(xmm0, 5); movl(ecx, 18416); psrlq(xmm0, 34); rcpss(xmm0, xmm0); psllq(xmm1, 12); pshufd(xmm6, xmm5, 228); psrlq(xmm1, 12); jmp(L_2TAG_PACKET_1_0_2); bind(L_2TAG_PACKET_2_0_2); movsd(Address(rsp, 24), xmm0); fld_d(Address(rsp, 24)); bind(L_2TAG_PACKET_10_0_2); movl(tmp, Address(rsp, 40)); } #endif /******************************************************************************/ // ALGORITHM DESCRIPTION - SIN() // --------------------- // // 1. RANGE REDUCTION // // We perform an initial range reduction from X to r with // // X =~= N * pi/32 + r // // so that |r| <= pi/64 + epsilon. We restrict inputs to those // where |N| <= 932560. Beyond this, the range reduction is // insufficiently accurate. For extremely small inputs, // denormalization can occur internally, impacting performance. // This means that the main path is actually only taken for // 2^-252 <= |X| < 90112. // // To avoid branches, we perform the range reduction to full // accuracy each time. // // X - N * (P_1 + P_2 + P_3) // // where P_1 and P_2 are 32-bit numbers (so multiplication by N // is exact) and P_3 is a 53-bit number. Together, these // approximate pi well enough for all cases in the restricted // range. // // The main reduction sequence is: // // y = 32/pi * x // N = integer(y) // (computed by adding and subtracting off SHIFTER) // // m_1 = N * P_1 // m_2 = N * P_2 // r_1 = x - m_1 // r = r_1 - m_2 // (this r can be used for most of the calculation) // // c_1 = r_1 - r // m_3 = N * P_3 // c_2 = c_1 - m_2 // c = c_2 - m_3 // // 2. MAIN ALGORITHM // // The algorithm uses a table lookup based on B = M * pi / 32 // where M = N mod 64. The stored values are: // sigma closest power of 2 to cos(B) // C_hl 53-bit cos(B) - sigma // S_hi + S_lo 2 * 53-bit sin(B) // // The computation is organized as follows: // // sin(B + r + c) = [sin(B) + sigma * r] + // r * (cos(B) - sigma) + // sin(B) * [cos(r + c) - 1] + // cos(B) * [sin(r + c) - r] // // which is approximately: // // [S_hi + sigma * r] + // C_hl * r + // S_lo + S_hi * [(cos(r) - 1) - r * c] + // (C_hl + sigma) * [(sin(r) - r) + c] // // and this is what is actually computed. We separate this sum // into four parts: // // hi + med + pols + corr // // where // // hi = S_hi + sigma r // med = C_hl * r // pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) // corr = S_lo + c * ((C_hl + sigma) - S_hi * r) // // 3. POLYNOMIAL // // The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * // (sin(r) - r) can be rearranged freely, since it is quite // small, so we exploit parallelism to the fullest. // // psc4 = SC_4 * r_1 // msc4 = psc4 * r // r2 = r * r // msc2 = SC_2 * r2 // r4 = r2 * r2 // psc3 = SC_3 + msc4 // psc1 = SC_1 + msc2 // msc3 = r4 * psc3 // sincospols = psc1 + msc3 // pols = sincospols * // // // 4. CORRECTION TERM // // This is where the "c" component of the range reduction is // taken into account; recall that just "r" is used for most of // the calculation. // // -c = m_3 - c_2 // -d = S_hi * r - (C_hl + sigma) // corr = -c * -d + S_lo // // 5. COMPENSATED SUMMATIONS // // The two successive compensated summations add up the high // and medium parts, leaving just the low parts to add up at // the end. // // rs = sigma * r // res_int = S_hi + rs // k_0 = S_hi - res_int // k_2 = k_0 + rs // med = C_hl * r // res_hi = res_int + med // k_1 = res_int - res_hi // k_3 = k_1 + med // // 6. FINAL SUMMATION // // We now add up all the small parts: // // res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 // // Now the overall result is just: // // res_hi + res_lo // // 7. SMALL ARGUMENTS // // If |x| < SNN (SNN meaning the smallest normal number), we // simply perform 0.1111111 cdots 1111 * x. For SNN <= |x|, we // do 2^-55 * (2^55 * x - x). // // Special cases: // sin(NaN) = quiet NaN, and raise invalid exception // sin(INF) = NaN and raise invalid exception // sin(+/-0) = +/-0 // /******************************************************************************/ #ifdef _LP64 ALIGNED_(16) juint _ONEHALF[] = { 0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL }; ALIGNED_(16) juint _P_2[] = { 0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL }; ALIGNED_(16) juint _SC_4[] = { 0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL }; ALIGNED_(16) juint _Ctable[] = { 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL, 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL, 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, 0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL, 0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL, 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, 0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL, 0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL, 0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL, 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL, 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL, 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, 0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL, 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, 0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL, 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, 0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL, 0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL, 0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, 0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL, 0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, 0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL, 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, 0x00000000UL, 0x3ff00000UL }; ALIGNED_(16) juint _SC_2[] = { 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL }; ALIGNED_(16) juint _SC_3[] = { 0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL }; ALIGNED_(16) juint _SC_1[] = { 0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL }; ALIGNED_(16) juint _PI_INV_TABLE[] = { 0x00000000UL, 0x00000000UL, 0xa2f9836eUL, 0x4e441529UL, 0xfc2757d1UL, 0xf534ddc0UL, 0xdb629599UL, 0x3c439041UL, 0xfe5163abUL, 0xdebbc561UL, 0xb7246e3aUL, 0x424dd2e0UL, 0x06492eeaUL, 0x09d1921cUL, 0xfe1deb1cUL, 0xb129a73eUL, 0xe88235f5UL, 0x2ebb4484UL, 0xe99c7026UL, 0xb45f7e41UL, 0x3991d639UL, 0x835339f4UL, 0x9c845f8bUL, 0xbdf9283bUL, 0x1ff897ffUL, 0xde05980fUL, 0xef2f118bUL, 0x5a0a6d1fUL, 0x6d367ecfUL, 0x27cb09b7UL, 0x4f463f66UL, 0x9e5fea2dUL, 0x7527bac7UL, 0xebe5f17bUL, 0x3d0739f7UL, 0x8a5292eaUL, 0x6bfb5fb1UL, 0x1f8d5d08UL, 0x56033046UL, 0xfc7b6babUL, 0xf0cfbc21UL }; ALIGNED_(8) juint _PI_4[] = { 0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL }; ALIGNED_(8) juint _PI32INV[] = { 0x6dc9c883UL, 0x40245f30UL }; ALIGNED_(8) juint _SHIFTER[] = { 0x00000000UL, 0x43380000UL }; ALIGNED_(8) juint _SIGN_MASK[] = { 0x00000000UL, 0x80000000UL }; ALIGNED_(8) juint _P_3[] = { 0x2e037073UL, 0x3b63198aUL }; ALIGNED_(8) juint _ALL_ONES[] = { 0xffffffffUL, 0x3fefffffUL }; ALIGNED_(8) juint _TWO_POW_55[] = { 0x00000000UL, 0x43600000UL }; ALIGNED_(8) juint _TWO_POW_M55[] = { 0x00000000UL, 0x3c800000ULL }; ALIGNED_(8) juint _P_1[] = { 0x54400000UL, 0x3fb921fbUL }; ALIGNED_(8) juint _NEG_ZERO[] = { 0x00000000UL, 0x80000000UL }; void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ebx, Register ecx, Register edx, Register tmp1, Register tmp2, Register tmp3, Register tmp4) { Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1; Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1; Label L_2TAG_PACKET_13_0_1, L_2TAG_PACKET_14_0_1; Label L_2TAG_PACKET_12_0_1, B1_1, B1_2, B1_4, start; assert_different_registers(tmp1, tmp2, tmp3, tmp4, eax, ebx, ecx, edx); address ONEHALF = (address)_ONEHALF; address P_2 = (address)_P_2; address SC_4 = (address)_SC_4; address Ctable = (address)_Ctable; address SC_2 = (address)_SC_2; address SC_3 = (address)_SC_3; address SC_1 = (address)_SC_1; address PI_INV_TABLE = (address)_PI_INV_TABLE; address PI_4 = (address)_PI_4; address PI32INV = (address)_PI32INV; address SHIFTER = (address)_SHIFTER; address SIGN_MASK = (address)_SIGN_MASK; address P_3 = (address)_P_3; address ALL_ONES = (address)_ALL_ONES; address TWO_POW_55 = (address)_TWO_POW_55; address TWO_POW_M55 = (address)_TWO_POW_M55; address P_1 = (address)_P_1; address NEG_ZERO = (address)_NEG_ZERO; bind(start); push(rbx); subq(rsp, 16); movsd(Address(rsp, 8), xmm0); movl(eax, Address(rsp, 12)); movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL movq(xmm2, ExternalAddress(SHIFTER)); //0x00000000UL, 0x43380000UL andl(eax, 2147418112); subl(eax, 808452096); cmpl(eax, 281346048); jcc(Assembler::above, L_2TAG_PACKET_0_0_1); mulsd(xmm1, xmm0); movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL pand(xmm4, xmm0); por(xmm5, xmm4); addpd(xmm1, xmm5); cvttsd2sil(edx, xmm1); cvtsi2sdl(xmm1, edx); movdqu(xmm6, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL mov64(r8, 0x3fb921fb54400000); movdq(xmm3, r8); movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL pshufd(xmm4, xmm0, 68); mulsd(xmm3, xmm1); movddup(xmm1, xmm1); andl(edx, 63); shll(edx, 5); lea(rax, ExternalAddress(Ctable)); addq(rax, rdx); mulpd(xmm6, xmm1); mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL subsd(xmm4, xmm3); movq(xmm7, Address(rax, 8)); subsd(xmm0, xmm3); movddup(xmm3, xmm4); subsd(xmm4, xmm6); pshufd(xmm0, xmm0, 68); movdqu(xmm2, Address(rax, 0)); mulpd(xmm5, xmm0); subpd(xmm0, xmm6); mulsd(xmm7, xmm4); subsd(xmm3, xmm4); mulpd(xmm5, xmm0); mulpd(xmm0, xmm0); subsd(xmm3, xmm6); movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL subsd(xmm1, xmm3); movq(xmm3, Address(rax, 24)); addsd(xmm2, xmm3); subsd(xmm7, xmm2); mulsd(xmm2, xmm4); mulpd(xmm6, xmm0); mulsd(xmm3, xmm4); mulpd(xmm2, xmm0); mulpd(xmm0, xmm0); addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL mulsd(xmm4, Address(rax, 0)); addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL mulpd(xmm5, xmm0); movdqu(xmm0, xmm3); addsd(xmm3, Address(rax, 8)); mulpd(xmm1, xmm7); movdqu(xmm7, xmm4); addsd(xmm4, xmm3); addpd(xmm6, xmm5); movq(xmm5, Address(rax, 8)); subsd(xmm5, xmm3); subsd(xmm3, xmm4); addsd(xmm1, Address(rax, 16)); mulpd(xmm6, xmm2); addsd(xmm5, xmm0); addsd(xmm3, xmm7); addsd(xmm1, xmm5); addsd(xmm1, xmm3); addsd(xmm1, xmm6); unpckhpd(xmm6, xmm6); movdqu(xmm0, xmm4); addsd(xmm1, xmm6); addsd(xmm0, xmm1); jmp(B1_4); bind(L_2TAG_PACKET_0_0_1); jcc(Assembler::greater, L_2TAG_PACKET_1_0_1); shrl(eax, 20); cmpl(eax, 3325); jcc(Assembler::notEqual, L_2TAG_PACKET_2_0_1); mulsd(xmm0, ExternalAddress(ALL_ONES)); //0xffffffffUL, 0x3fefffffUL jmp(B1_4); bind(L_2TAG_PACKET_2_0_1); movq(xmm3, ExternalAddress(TWO_POW_55)); //0x00000000UL, 0x43600000UL mulsd(xmm3, xmm0); subsd(xmm3, xmm0); mulsd(xmm3, ExternalAddress(TWO_POW_M55)); //0x00000000UL, 0x3c800000UL jmp(B1_4); bind(L_2TAG_PACKET_1_0_1); pextrw(eax, xmm0, 3); andl(eax, 32752); cmpl(eax, 32752); jcc(Assembler::equal, L_2TAG_PACKET_3_0_1); pextrw(ecx, xmm0, 3); andl(ecx, 32752); subl(ecx, 16224); shrl(ecx, 7); andl(ecx, 65532); lea(r11, ExternalAddress(PI_INV_TABLE)); addq(rcx, r11); movdq(rax, xmm0); movl(r10, Address(rcx, 20)); movl(r8, Address(rcx, 24)); movl(edx, eax); shrq(rax, 21); orl(eax, INT_MIN); shrl(eax, 11); movl(r9, r10); imulq(r10, rdx); imulq(r9, rax); imulq(r8, rax); movl(rsi, Address(rcx, 16)); movl(rdi, Address(rcx, 12)); movl(r11, r10); shrq(r10, 32); addq(r9, r10); addq(r11, r8); movl(r8, r11); shrq(r11, 32); addq(r9, r11); movl(r10, rsi); imulq(rsi, rdx); imulq(r10, rax); movl(r11, rdi); imulq(rdi, rdx); movl(ebx, rsi); shrq(rsi, 32); addq(r9, rbx); movl(ebx, r9); shrq(r9, 32); addq(r10, rsi); addq(r10, r9); shlq(rbx, 32); orq(r8, rbx); imulq(r11, rax); movl(r9, Address(rcx, 8)); movl(rsi, Address(rcx, 4)); movl(ebx, rdi); shrq(rdi, 32); addq(r10, rbx); movl(ebx, r10); shrq(r10, 32); addq(r11, rdi); addq(r11, r10); movq(rdi, r9); imulq(r9, rdx); imulq(rdi, rax); movl(r10, r9); shrq(r9, 32); addq(r11, r10); movl(r10, r11); shrq(r11, 32); addq(rdi, r9); addq(rdi, r11); movq(r9, rsi); imulq(rsi, rdx); imulq(r9, rax); shlq(r10, 32); orq(r10, rbx); movl(eax, Address(rcx, 0)); movl(r11, rsi); shrq(rsi, 32); addq(rdi, r11); movl(r11, rdi); shrq(rdi, 32); addq(r9, rsi); addq(r9, rdi); imulq(rdx, rax); pextrw(ebx, xmm0, 3); lea(rdi, ExternalAddress(PI_INV_TABLE)); subq(rcx, rdi); addl(ecx, ecx); addl(ecx, ecx); addl(ecx, ecx); addl(ecx, 19); movl(rsi, 32768); andl(rsi, ebx); shrl(ebx, 4); andl(ebx, 2047); subl(ebx, 1023); subl(ecx, ebx); addq(r9, rdx); movl(edx, ecx); addl(edx, 32); cmpl(ecx, 1); jcc(Assembler::less, L_2TAG_PACKET_4_0_1); negl(ecx); addl(ecx, 29); shll(r9); movl(rdi, r9); andl(r9, 536870911); testl(r9, 268435456); jcc(Assembler::notEqual, L_2TAG_PACKET_5_0_1); shrl(r9); movl(ebx, 0); shlq(r9, 32); orq(r9, r11); bind(L_2TAG_PACKET_6_0_1); bind(L_2TAG_PACKET_7_0_1); cmpq(r9, 0); jcc(Assembler::equal, L_2TAG_PACKET_8_0_1); bind(L_2TAG_PACKET_9_0_1); bsrq(r11, r9); movl(ecx, 29); subl(ecx, r11); jcc(Assembler::lessEqual, L_2TAG_PACKET_10_0_1); shlq(r9); movq(rax, r10); shlq(r10); addl(edx, ecx); negl(ecx); addl(ecx, 64); shrq(rax); shrq(r8); orq(r9, rax); orq(r10, r8); bind(L_2TAG_PACKET_11_0_1); cvtsi2sdq(xmm0, r9); shrq(r10, 1); cvtsi2sdq(xmm3, r10); xorpd(xmm4, xmm4); shll(edx, 4); negl(edx); addl(edx, 16368); orl(edx, rsi); xorl(edx, ebx); pinsrw(xmm4, edx, 3); movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL movq(xmm6, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL xorpd(xmm5, xmm5); subl(edx, 1008); pinsrw(xmm5, edx, 3); mulsd(xmm0, xmm4); shll(rsi, 16); sarl(rsi, 31); mulsd(xmm3, xmm5); movdqu(xmm1, xmm0); mulsd(xmm0, xmm2); shrl(rdi, 29); addsd(xmm1, xmm3); mulsd(xmm3, xmm2); addl(rdi, rsi); xorl(rdi, rsi); mulsd(xmm6, xmm1); movl(eax, rdi); addsd(xmm6, xmm3); movdqu(xmm2, xmm0); addsd(xmm0, xmm6); subsd(xmm2, xmm0); addsd(xmm6, xmm2); bind(L_2TAG_PACKET_12_0_1); movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL mulsd(xmm1, xmm0); movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL pand(xmm4, xmm0); por(xmm5, xmm4); addpd(xmm1, xmm5); cvttsd2sil(edx, xmm1); cvtsi2sdl(xmm1, edx); movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL mulsd(xmm3, xmm1); unpcklpd(xmm1, xmm1); shll(eax, 3); addl(edx, 1865216); movdqu(xmm4, xmm0); addl(edx, eax); andl(edx, 63); movdqu(xmm5, ExternalAddress(SC_4)); //0x54400000UL, 0x3fb921fbUL lea(rax, ExternalAddress(Ctable)); shll(edx, 5); addq(rax, rdx); mulpd(xmm2, xmm1); subsd(xmm0, xmm3); mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL subsd(xmm4, xmm3); movq(xmm7, Address(rax, 8)); unpcklpd(xmm0, xmm0); movdqu(xmm3, xmm4); subsd(xmm4, xmm2); mulpd(xmm5, xmm0); subpd(xmm0, xmm2); mulsd(xmm7, xmm4); subsd(xmm3, xmm4); mulpd(xmm5, xmm0); mulpd(xmm0, xmm0); subsd(xmm3, xmm2); movdqu(xmm2, Address(rax, 0)); subsd(xmm1, xmm3); movq(xmm3, Address(rax, 24)); addsd(xmm2, xmm3); subsd(xmm7, xmm2); subsd(xmm1, xmm6); movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL mulsd(xmm2, xmm4); mulpd(xmm6, xmm0); mulsd(xmm3, xmm4); mulpd(xmm2, xmm0); mulpd(xmm0, xmm0); addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL mulsd(xmm4, Address(rax, 0)); addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL mulpd(xmm5, xmm0); movdqu(xmm0, xmm3); addsd(xmm3, Address(rax, 8)); mulpd(xmm1, xmm7); movdqu(xmm7, xmm4); addsd(xmm4, xmm3); addpd(xmm6, xmm5); movq(xmm5, Address(rax, 8)); subsd(xmm5, xmm3); subsd(xmm3, xmm4); addsd(xmm1, Address(rax, 16)); mulpd(xmm6, xmm2); addsd(xmm5, xmm0); addsd(xmm3, xmm7); addsd(xmm1, xmm5); addsd(xmm1, xmm3); addsd(xmm1, xmm6); unpckhpd(xmm6, xmm6); movdqu(xmm0, xmm4); addsd(xmm1, xmm6); addsd(xmm0, xmm1); jmp(B1_4); bind(L_2TAG_PACKET_8_0_1); addl(edx, 64); movq(r9, r10); movq(r10, r8); movl(r8, 0); cmpq(r9, 0); jcc(Assembler::notEqual, L_2TAG_PACKET_9_0_1); addl(edx, 64); movq(r9, r10); movq(r10, r8); cmpq(r9, 0); jcc(Assembler::notEqual, L_2TAG_PACKET_9_0_1); xorpd(xmm0, xmm0); xorpd(xmm6, xmm6); jmp(L_2TAG_PACKET_12_0_1); bind(L_2TAG_PACKET_10_0_1); jcc(Assembler::equal, L_2TAG_PACKET_11_0_1); negl(ecx); shrq(r10); movq(rax, r9); shrq(r9); subl(edx, ecx); negl(ecx); addl(ecx, 64); shlq(rax); orq(r10, rax); jmp(L_2TAG_PACKET_11_0_1); bind(L_2TAG_PACKET_4_0_1); negl(ecx); shlq(r9, 32); orq(r9, r11); shlq(r9); movq(rdi, r9); testl(r9, INT_MIN); jcc(Assembler::notEqual, L_2TAG_PACKET_13_0_1); shrl(r9); movl(ebx, 0); shrq(rdi, 3); jmp(L_2TAG_PACKET_7_0_1); bind(L_2TAG_PACKET_5_0_1); shrl(r9); movl(ebx, 536870912); shrl(ebx); shlq(r9, 32); orq(r9, r11); shlq(rbx, 32); addl(rdi, 536870912); movl(rcx, 0); movl(r11, 0); subq(rcx, r8); sbbq(r11, r10); sbbq(rbx, r9); movq(r8, rcx); movq(r10, r11); movq(r9, rbx); movl(ebx, 32768); jmp(L_2TAG_PACKET_6_0_1); bind(L_2TAG_PACKET_13_0_1); shrl(r9); mov64(rbx, 0x100000000); shrq(rbx); movl(rcx, 0); movl(r11, 0); subq(rcx, r8); sbbq(r11, r10); sbbq(rbx, r9); movq(r8, rcx); movq(r10, r11); movq(r9, rbx); movl(ebx, 32768); shrq(rdi, 3); addl(rdi, 536870912); jmp(L_2TAG_PACKET_7_0_1); bind(L_2TAG_PACKET_3_0_1); movq(xmm0, Address(rsp, 8)); mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL movq(Address(rsp, 0), xmm0); bind(L_2TAG_PACKET_14_0_1); bind(B1_4); addq(rsp, 16); pop(rbx); } #endif #ifndef _LP64 ALIGNED_(8) juint _zero_none[] = { 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL }; ALIGNED_(4) juint __4onpi_d[] = { 0x6dc9c883UL, 0x3ff45f30UL }; ALIGNED_(4) juint _TWO_32H[] = { 0x00000000UL, 0x41f80000UL }; ALIGNED_(4) juint _pi04_3d[] = { 0x54442d00UL, 0x3fe921fbUL, 0x98cc5180UL, 0x3ce84698UL, 0xcbb5bf6cUL, 0xb9dfc8f8UL }; ALIGNED_(4) juint _pi04_5d[] = { 0x54400000UL, 0x3fe921fbUL, 0x1a600000UL, 0x3dc0b461UL, 0x2e000000UL, 0x3b93198aUL, 0x25200000UL, 0x396b839aUL, 0x533e63a0UL, 0x37027044UL }; ALIGNED_(4) juint _SCALE[] = { 0x00000000UL, 0x32600000UL }; ALIGNED_(4) juint _zeros[] = { 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL }; ALIGNED_(4) juint _pi04_2d[] = { 0x54400000UL, 0x3fe921fbUL, 0x1a626331UL, 0x3dc0b461UL }; ALIGNED_(4) juint _TWO_12H[] = { 0x00000000UL, 0x40b80000UL }; ALIGNED_(2) jushort __4onpi_31l[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x836e, 0xa2f9, 0x40d8, 0x0000, 0x0000, 0x0000, 0x2a50, 0x9c88, 0x40b7, 0x0000, 0x0000, 0x0000, 0xabe8, 0xfe13, 0x4099, 0x0000, 0x0000, 0x0000, 0x6ee0, 0xfa9a, 0x4079, 0x0000, 0x0000, 0x0000, 0x9580, 0xdb62, 0x4058, 0x0000, 0x0000, 0x0000, 0x1c82, 0xc9e2, 0x403d, 0x0000, 0x0000, 0x0000, 0xb1c0, 0xff28, 0x4019, 0x0000, 0x0000, 0x0000, 0xef14, 0xaf7a, 0x3ffe, 0x0000, 0x0000, 0x0000, 0x48dc, 0xc36e, 0x3fdf, 0x0000, 0x0000, 0x0000, 0x3740, 0xe909, 0x3fbe, 0x0000, 0x0000, 0x0000, 0x924a, 0xb801, 0x3fa2, 0x0000, 0x0000, 0x0000, 0x3a32, 0xdd41, 0x3f83, 0x0000, 0x0000, 0x0000, 0x8778, 0x873f, 0x3f62, 0x0000, 0x0000, 0x0000, 0x1298, 0xb1cb, 0x3f44, 0x0000, 0x0000, 0x0000, 0xa208, 0x9cfb, 0x3f26, 0x0000, 0x0000, 0x0000, 0xbaec, 0xd7d4, 0x3f06, 0x0000, 0x0000, 0x0000, 0xd338, 0x8909, 0x3ee7, 0x0000, 0x0000, 0x0000, 0x68b8, 0xe04d, 0x3ec7, 0x0000, 0x0000, 0x0000, 0x4e64, 0xdf90, 0x3eaa, 0x0000, 0x0000, 0x0000, 0xc1a8, 0xeb1c, 0x3e89, 0x0000, 0x0000, 0x0000, 0x2720, 0xce7d, 0x3e6a, 0x0000, 0x0000, 0x0000, 0x77b8, 0x8bf1, 0x3e4b, 0x0000, 0x0000, 0x0000, 0xec7e, 0xe4a0, 0x3e2e, 0x0000, 0x0000, 0x0000, 0xffbc, 0xf12f, 0x3e0f, 0x0000, 0x0000, 0x0000, 0xfdc0, 0xb301, 0x3deb, 0x0000, 0x0000, 0x0000, 0xc5ac, 0x9788, 0x3dd1, 0x0000, 0x0000, 0x0000, 0x47da, 0x829b, 0x3db2, 0x0000, 0x0000, 0x0000, 0xd9e4, 0xa6cf, 0x3d93, 0x0000, 0x0000, 0x0000, 0x36e8, 0xf961, 0x3d73, 0x0000, 0x0000, 0x0000, 0xf668, 0xf463, 0x3d54, 0x0000, 0x0000, 0x0000, 0x5168, 0xf2ff, 0x3d35, 0x0000, 0x0000, 0x0000, 0x758e, 0xea4f, 0x3d17, 0x0000, 0x0000, 0x0000, 0xf17a, 0xebe5, 0x3cf8, 0x0000, 0x0000, 0x0000, 0x9cfa, 0x9e83, 0x3cd9, 0x0000, 0x0000, 0x0000, 0xa4ba, 0xe294, 0x3cba, 0x0000, 0x0000, 0x0000, 0xd7ec, 0x9afe, 0x3c9a, 0x0000, 0x0000, 0x0000, 0xae80, 0x8fc6, 0x3c79, 0x0000, 0x0000, 0x0000, 0x3304, 0x8560, 0x3c5c, 0x0000, 0x0000, 0x0000, 0x6d70, 0xdf8f, 0x3c3b, 0x0000, 0x0000, 0x0000, 0x3ef0, 0xafc3, 0x3c1e, 0x0000, 0x0000, 0x0000, 0xd0d8, 0x826b, 0x3bfe, 0x0000, 0x0000, 0x0000, 0x1c80, 0xed4f, 0x3bdd, 0x0000, 0x0000, 0x0000, 0x730c, 0xb0af, 0x3bc1, 0x0000, 0x0000, 0x0000, 0x6660, 0xc219, 0x3ba2, 0x0000, 0x0000, 0x0000, 0x940c, 0xabe2, 0x3b83, 0x0000, 0x0000, 0x0000, 0xdffc, 0x8408, 0x3b64, 0x0000, 0x0000, 0x0000, 0x6b98, 0xc402, 0x3b45, 0x0000, 0x0000, 0x0000, 0x1818, 0x9cc4, 0x3b26, 0x0000, 0x0000, 0x0000, 0x5390, 0xaab6, 0x3b05, 0x0000, 0x0000, 0x0000, 0xb070, 0xd464, 0x3ae9, 0x0000, 0x0000, 0x0000, 0x231a, 0x9ef0, 0x3aca, 0x0000, 0x0000, 0x0000, 0x0670, 0xd1f1, 0x3aaa, 0x0000, 0x0000, 0x0000, 0x7738, 0xd9f3, 0x3a8a, 0x0000, 0x0000, 0x0000, 0xa834, 0x8092, 0x3a6c, 0x0000, 0x0000, 0x0000, 0xb45c, 0xce23, 0x3a4d, 0x0000, 0x0000, 0x0000, 0x36e8, 0xb0e5, 0x3a2d, 0x0000, 0x0000, 0x0000, 0xd156, 0xaf44, 0x3a10, 0x0000, 0x0000, 0x0000, 0x9f52, 0x8c82, 0x39f1, 0x0000, 0x0000, 0x0000, 0x829c, 0xff83, 0x39d1, 0x0000, 0x0000, 0x0000, 0x7d06, 0xefc6, 0x39b3, 0x0000, 0x0000, 0x0000, 0x93e0, 0xb0b7, 0x3992, 0x0000, 0x0000, 0x0000, 0xedde, 0xc193, 0x3975, 0x0000, 0x0000, 0x0000, 0xbbc0, 0xcf49, 0x3952, 0x0000, 0x0000, 0x0000, 0xbdf0, 0xd63c, 0x3937, 0x0000, 0x0000, 0x0000, 0x1f34, 0x9f3a, 0x3918, 0x0000, 0x0000, 0x0000, 0x3f8e, 0xe579, 0x38f9, 0x0000, 0x0000, 0x0000, 0x90c8, 0xc3f8, 0x38d9, 0x0000, 0x0000, 0x0000, 0x48c0, 0xf8f8, 0x38b7, 0x0000, 0x0000, 0x0000, 0xed56, 0xafa6, 0x389c, 0x0000, 0x0000, 0x0000, 0x8218, 0xb969, 0x387d, 0x0000, 0x0000, 0x0000, 0x1852, 0xec57, 0x385e, 0x0000, 0x0000, 0x0000, 0x670c, 0xd674, 0x383e, 0x0000, 0x0000, 0x0000, 0xad40, 0xc2c4, 0x3820, 0x0000, 0x0000, 0x0000, 0x2e80, 0xa696, 0x3801, 0x0000, 0x0000, 0x0000, 0xd800, 0xc467, 0x37dc, 0x0000, 0x0000, 0x0000, 0x3c72, 0xc5ae, 0x37c3, 0x0000, 0x0000, 0x0000, 0xb006, 0xac69, 0x37a4, 0x0000, 0x0000, 0x0000, 0x34a0, 0x8cdf, 0x3782, 0x0000, 0x0000, 0x0000, 0x9ed2, 0xd25e, 0x3766, 0x0000, 0x0000, 0x0000, 0x6fec, 0xaaaa, 0x3747, 0x0000, 0x0000, 0x0000, 0x6040, 0xfb5c, 0x3726, 0x0000, 0x0000, 0x0000, 0x764c, 0xa3fc, 0x3708, 0x0000, 0x0000, 0x0000, 0xb254, 0x954e, 0x36e9, 0x0000, 0x0000, 0x0000, 0x3e1c, 0xf5dc, 0x36ca, 0x0000, 0x0000, 0x0000, 0x7b06, 0xc635, 0x36ac, 0x0000, 0x0000, 0x0000, 0xa8ba, 0xd738, 0x368d, 0x0000, 0x0000, 0x0000, 0x06cc, 0xb24e, 0x366d, 0x0000, 0x0000, 0x0000, 0x7108, 0xac76, 0x364f, 0x0000, 0x0000, 0x0000, 0x2324, 0xa7cb, 0x3630, 0x0000, 0x0000, 0x0000, 0xac40, 0xef15, 0x360f, 0x0000, 0x0000, 0x0000, 0xae46, 0xd516, 0x35f2, 0x0000, 0x0000, 0x0000, 0x615e, 0xe003, 0x35d3, 0x0000, 0x0000, 0x0000, 0x0cf0, 0xefe7, 0x35b1, 0x0000, 0x0000, 0x0000, 0xfb50, 0xf98c, 0x3595, 0x0000, 0x0000, 0x0000, 0x0abc, 0xf333, 0x3575, 0x0000, 0x0000, 0x0000, 0xdd60, 0xca3f, 0x3555, 0x0000, 0x0000, 0x0000, 0x7eb6, 0xd87f, 0x3538, 0x0000, 0x0000, 0x0000, 0x44f4, 0xb291, 0x3519, 0x0000, 0x0000, 0x0000, 0xff80, 0xc982, 0x34f6, 0x0000, 0x0000, 0x0000, 0x9de0, 0xd9b8, 0x34db, 0x0000, 0x0000, 0x0000, 0xcd42, 0x9366, 0x34bc, 0x0000, 0x0000, 0x0000, 0xbef0, 0xfaee, 0x349d, 0x0000, 0x0000, 0x0000, 0xdac4, 0xb6f1, 0x347d, 0x0000, 0x0000, 0x0000, 0xf140, 0x94de, 0x345d, 0x0000, 0x0000, 0x0000, 0xa218, 0x8b4b, 0x343e, 0x0000, 0x0000, 0x0000, 0x6380, 0xa135, 0x341e, 0x0000, 0x0000, 0x0000, 0xb184, 0x8cb2, 0x3402, 0x0000, 0x0000, 0x0000, 0x196e, 0xdc61, 0x33e3, 0x0000, 0x0000, 0x0000, 0x0c00, 0xde05, 0x33c4, 0x0000, 0x0000, 0x0000, 0xef9a, 0xbd38, 0x33a5, 0x0000, 0x0000, 0x0000, 0xc1a0, 0xdf00, 0x3385, 0x0000, 0x0000, 0x0000, 0x1090, 0x9973, 0x3365, 0x0000, 0x0000, 0x0000, 0x4882, 0x8301, 0x3348, 0x0000, 0x0000, 0x0000, 0x7abe, 0xadc7, 0x3329, 0x0000, 0x0000, 0x0000, 0x7cba, 0xec2b, 0x330a, 0x0000, 0x0000, 0x0000, 0xa520, 0x8f21, 0x32e9, 0x0000, 0x0000, 0x0000, 0x710c, 0x8d36, 0x32cc, 0x0000, 0x0000, 0x0000, 0x5212, 0xc6ed, 0x32ad, 0x0000, 0x0000, 0x0000, 0x7308, 0xfd76, 0x328d, 0x0000, 0x0000, 0x0000, 0x5014, 0xd548, 0x326f, 0x0000, 0x0000, 0x0000, 0xd3f2, 0xb499, 0x3250, 0x0000, 0x0000, 0x0000, 0x7f74, 0xa606, 0x3230, 0x0000, 0x0000, 0x0000, 0xf0a8, 0xd720, 0x3212, 0x0000, 0x0000, 0x0000, 0x185c, 0xe20f, 0x31f2, 0x0000, 0x0000, 0x0000, 0xa5a8, 0x8738, 0x31d4, 0x0000, 0x0000, 0x0000, 0xdd74, 0xcafb, 0x31b4, 0x0000, 0x0000, 0x0000, 0x98b6, 0xbd8e, 0x3196, 0x0000, 0x0000, 0x0000, 0xe9de, 0x977f, 0x3177, 0x0000, 0x0000, 0x0000, 0x67c0, 0x818d, 0x3158, 0x0000, 0x0000, 0x0000, 0xe52a, 0x9322, 0x3139, 0x0000, 0x0000, 0x0000, 0xe568, 0x9b6c, 0x3119, 0x0000, 0x0000, 0x0000, 0x2358, 0xaa0a, 0x30fa, 0x0000, 0x0000, 0x0000, 0xe480, 0xe13b, 0x30d9, 0x0000, 0x0000, 0x0000, 0x3024, 0x90a1, 0x30bd, 0x0000, 0x0000, 0x0000, 0x9620, 0xda30, 0x309d, 0x0000, 0x0000, 0x0000, 0x898a, 0xb388, 0x307f, 0x0000, 0x0000, 0x0000, 0xb24c, 0xc891, 0x3060, 0x0000, 0x0000, 0x0000, 0x8056, 0xf98b, 0x3041, 0x0000, 0x0000, 0x0000, 0x72a4, 0xa1ea, 0x3021, 0x0000, 0x0000, 0x0000, 0x6af8, 0x9488, 0x3001, 0x0000, 0x0000, 0x0000, 0xe00c, 0xdfcb, 0x2fe4, 0x0000, 0x0000, 0x0000, 0xeeec, 0xc941, 0x2fc4, 0x0000, 0x0000, 0x0000, 0x53e0, 0xe70f, 0x2fa4, 0x0000, 0x0000, 0x0000, 0x8f60, 0x9c07, 0x2f85, 0x0000, 0x0000, 0x0000, 0xb328, 0xc3e7, 0x2f68, 0x0000, 0x0000, 0x0000, 0x9404, 0xf8c7, 0x2f48, 0x0000, 0x0000, 0x0000, 0x38e0, 0xc99f, 0x2f29, 0x0000, 0x0000, 0x0000, 0x9778, 0xd984, 0x2f09, 0x0000, 0x0000, 0x0000, 0xe700, 0xd142, 0x2eea, 0x0000, 0x0000, 0x0000, 0xd904, 0x9443, 0x2ecd, 0x0000, 0x0000, 0x0000, 0xd4ba, 0xae7e, 0x2eae, 0x0000, 0x0000, 0x0000, 0x8e5e, 0x8524, 0x2e8f, 0x0000, 0x0000, 0x0000, 0xb550, 0xc9ed, 0x2e6e, 0x0000, 0x0000, 0x0000, 0x53b8, 0x8648, 0x2e51, 0x0000, 0x0000, 0x0000, 0xdae4, 0x87f9, 0x2e32, 0x0000, 0x0000, 0x0000, 0x2942, 0xd966, 0x2e13, 0x0000, 0x0000, 0x0000, 0x4f28, 0xcf3c, 0x2df3, 0x0000, 0x0000, 0x0000, 0xfa40, 0xc4ef, 0x2dd1, 0x0000, 0x0000, 0x0000, 0x4424, 0xbca7, 0x2db5, 0x0000, 0x0000, 0x0000, 0x2e62, 0xcdc5, 0x2d97, 0x0000, 0x0000, 0x0000, 0xed88, 0x996b, 0x2d78, 0x0000, 0x0000, 0x0000, 0x7c30, 0xd97d, 0x2d56, 0x0000, 0x0000, 0x0000, 0xed26, 0xbf6e, 0x2d3a, 0x0000, 0x0000, 0x0000, 0x2918, 0x921b, 0x2d1a, 0x0000, 0x0000, 0x0000, 0x4e24, 0xe84e, 0x2cfb, 0x0000, 0x0000, 0x0000, 0x6dc0, 0x92ec, 0x2cdd, 0x0000, 0x0000, 0x0000, 0x4f2c, 0xacf8, 0x2cbd, 0x0000, 0x0000, 0x0000, 0xc634, 0xf094, 0x2c9e, 0x0000, 0x0000, 0x0000, 0xdc70, 0xe5d3, 0x2c7e, 0x0000, 0x0000, 0x0000, 0x2180, 0xa600, 0x2c5b, 0x0000, 0x0000, 0x0000, 0x8480, 0xd680, 0x2c3c, 0x0000, 0x0000, 0x0000, 0x8b24, 0xd63b, 0x2c22, 0x0000, 0x0000, 0x0000, 0x02e0, 0xaa47, 0x2c00, 0x0000, 0x0000, 0x0000, 0x9ad0, 0xee84, 0x2be3, 0x0000, 0x0000, 0x0000, 0xf7dc, 0xf699, 0x2bc6, 0x0000, 0x0000, 0x0000, 0xddde, 0xe490, 0x2ba7, 0x0000, 0x0000, 0x0000, 0x34a0, 0xb4fd, 0x2b85, 0x0000, 0x0000, 0x0000, 0x91b4, 0x8ef6, 0x2b68, 0x0000, 0x0000, 0x0000, 0xa3e0, 0xa2a7, 0x2b47, 0x0000, 0x0000, 0x0000, 0xcce4, 0x82b3, 0x2b2a, 0x0000, 0x0000, 0x0000, 0xe4be, 0x8207, 0x2b0c, 0x0000, 0x0000, 0x0000, 0x1d92, 0xab43, 0x2aed, 0x0000, 0x0000, 0x0000, 0xe818, 0xf9f6, 0x2acd, 0x0000, 0x0000, 0x0000, 0xff12, 0xba80, 0x2aaf, 0x0000, 0x0000, 0x0000, 0x5254, 0x8529, 0x2a90, 0x0000, 0x0000, 0x0000, 0x1b88, 0xe032, 0x2a71, 0x0000, 0x0000, 0x0000, 0x3248, 0xd86d, 0x2a50, 0x0000, 0x0000, 0x0000, 0x3140, 0xc9d5, 0x2a2e, 0x0000, 0x0000, 0x0000, 0x14e6, 0xbd47, 0x2a14, 0x0000, 0x0000, 0x0000, 0x5c10, 0xe544, 0x29f4, 0x0000, 0x0000, 0x0000, 0x9f50, 0x90b6, 0x29d4, 0x0000, 0x0000, 0x0000, 0x9850, 0xab55, 0x29b6, 0x0000, 0x0000, 0x0000, 0x2750, 0x9d07, 0x2998, 0x0000, 0x0000, 0x0000, 0x6700, 0x8bbb, 0x2973, 0x0000, 0x0000, 0x0000, 0x5dba, 0xed31, 0x295a, 0x0000, 0x0000, 0x0000, 0x61dc, 0x85fe, 0x293a, 0x0000, 0x0000, 0x0000, 0x9ba2, 0xd6b4, 0x291c, 0x0000, 0x0000, 0x0000, 0x2d30, 0xe3a5, 0x28fb, 0x0000, 0x0000, 0x0000, 0x6630, 0xb566, 0x28dd, 0x0000, 0x0000, 0x0000, 0x5ad4, 0xa829, 0x28bf, 0x0000, 0x0000, 0x0000, 0x89d8, 0xe290, 0x28a0, 0x0000, 0x0000, 0x0000, 0x3916, 0xc428, 0x2881, 0x0000, 0x0000, 0x0000, 0x0490, 0xbea4, 0x2860, 0x0000, 0x0000, 0x0000, 0xee06, 0x80ee, 0x2843, 0x0000, 0x0000, 0x0000, 0xfc00, 0xf327, 0x2820, 0x0000, 0x0000, 0x0000, 0xea40, 0xa871, 0x2800, 0x0000, 0x0000, 0x0000, 0x63d8, 0x9c26, 0x27e4, 0x0000, 0x0000, 0x0000, 0x07ba, 0xc0c9, 0x27c7, 0x0000, 0x0000, 0x0000, 0x3fa2, 0x9797, 0x27a8, 0x0000, 0x0000, 0x0000, 0x21c6, 0xfeca, 0x2789, 0x0000, 0x0000, 0x0000, 0xde40, 0x860d, 0x2768, 0x0000, 0x0000, 0x0000, 0x9cc8, 0x98ce, 0x2749, 0x0000, 0x0000, 0x0000, 0x3778, 0xa31c, 0x272a, 0x0000, 0x0000, 0x0000, 0xe778, 0xf6e2, 0x270b, 0x0000, 0x0000, 0x0000, 0x59b8, 0xf841, 0x26ed, 0x0000, 0x0000, 0x0000, 0x02e0, 0xad04, 0x26cd, 0x0000, 0x0000, 0x0000, 0x5a92, 0x9380, 0x26b0, 0x0000, 0x0000, 0x0000, 0xc740, 0x8886, 0x268d, 0x0000, 0x0000, 0x0000, 0x0680, 0xfaf8, 0x266c, 0x0000, 0x0000, 0x0000, 0xfb60, 0x897f, 0x2653, 0x0000, 0x0000, 0x0000, 0x8760, 0xf903, 0x2634, 0x0000, 0x0000, 0x0000, 0xad2a, 0xc2c8, 0x2615, 0x0000, 0x0000, 0x0000, 0x2d86, 0x8aef, 0x25f6, 0x0000, 0x0000, 0x0000, 0x1ef4, 0xe627, 0x25d6, 0x0000, 0x0000, 0x0000, 0x09e4, 0x8020, 0x25b7, 0x0000, 0x0000, 0x0000, 0x7548, 0xd227, 0x2598, 0x0000, 0x0000, 0x0000, 0x75dc, 0xfb5b, 0x2579, 0x0000, 0x0000, 0x0000, 0xea84, 0xc8b6, 0x255a, 0x0000, 0x0000, 0x0000, 0xe4d0, 0x8145, 0x253b, 0x0000, 0x0000, 0x0000, 0x3640, 0x9768, 0x251c, 0x0000, 0x0000, 0x0000, 0x246a, 0xccec, 0x24fe, 0x0000, 0x0000, 0x0000, 0x51d0, 0xa075, 0x24dd, 0x0000, 0x0000, 0x0000, 0x4638, 0xa385, 0x24bf, 0x0000, 0x0000, 0x0000, 0xd788, 0xd776, 0x24a1, 0x0000, 0x0000, 0x0000, 0x1370, 0x8997, 0x2482, 0x0000, 0x0000, 0x0000, 0x1e88, 0x9b67, 0x2462, 0x0000, 0x0000, 0x0000, 0x6c08, 0xd975, 0x2444, 0x0000, 0x0000, 0x0000, 0xfdb0, 0xcfc0, 0x2422, 0x0000, 0x0000, 0x0000, 0x3100, 0xc026, 0x2406, 0x0000, 0x0000, 0x0000, 0xc5b4, 0xae64, 0x23e6, 0x0000, 0x0000, 0x0000, 0x2280, 0xf687, 0x23c3, 0x0000, 0x0000, 0x0000, 0x2de0, 0x9006, 0x23a9, 0x0000, 0x0000, 0x0000, 0x24bc, 0xf631, 0x238a, 0x0000, 0x0000, 0x0000, 0xb8d4, 0xa975, 0x236b, 0x0000, 0x0000, 0x0000, 0xd9a4, 0xb949, 0x234b, 0x0000, 0x0000, 0x0000, 0xb54e, 0xbd39, 0x232d, 0x0000, 0x0000, 0x0000, 0x4aac, 0x9a52, 0x230e, 0x0000, 0x0000, 0x0000, 0xbbbc, 0xd085, 0x22ef, 0x0000, 0x0000, 0x0000, 0xdf18, 0xc633, 0x22cf, 0x0000, 0x0000, 0x0000, 0x16d0, 0xeca5, 0x22af, 0x0000, 0x0000, 0x0000, 0xf2a0, 0xdf6f, 0x228e, 0x0000, 0x0000, 0x0000, 0x8c44, 0xe86b, 0x2272, 0x0000, 0x0000, 0x0000, 0x35c0, 0xbbf4, 0x2253, 0x0000, 0x0000, 0x0000, 0x0c40, 0xdafb, 0x2230, 0x0000, 0x0000, 0x0000, 0x92dc, 0x9935, 0x2216, 0x0000, 0x0000, 0x0000, 0x0ca0, 0xbda6, 0x21f3, 0x0000, 0x0000, 0x0000, 0x5958, 0xa6fd, 0x21d6, 0x0000, 0x0000, 0x0000, 0xa3dc, 0x9d7f, 0x21b9, 0x0000, 0x0000, 0x0000, 0x79dc, 0xfcb5, 0x2199, 0x0000, 0x0000, 0x0000, 0xf264, 0xcebb, 0x217b, 0x0000, 0x0000, 0x0000, 0x0abe, 0x8308, 0x215c, 0x0000, 0x0000, 0x0000, 0x30ae, 0xb463, 0x213d, 0x0000, 0x0000, 0x0000, 0x6228, 0xb040, 0x211c, 0x0000, 0x0000, 0x0000, 0xc9b2, 0xf43b, 0x20ff, 0x0000, 0x0000, 0x0000, 0x3d8e, 0xa4b3, 0x20e0, 0x0000, 0x0000, 0x0000, 0x84e6, 0x8dab, 0x20c1, 0x0000, 0x0000, 0x0000, 0xa124, 0x9b74, 0x20a1, 0x0000, 0x0000, 0x0000, 0xc276, 0xd497, 0x2083, 0x0000, 0x0000, 0x0000, 0x6354, 0xa466, 0x2063, 0x0000, 0x0000, 0x0000, 0x8654, 0xaf0a, 0x2044, 0x0000, 0x0000, 0x0000, 0x1d20, 0xfa5c, 0x2024, 0x0000, 0x0000, 0x0000, 0xbcd0, 0xf3f0, 0x2004, 0x0000, 0x0000, 0x0000, 0xedf0, 0xf0b6, 0x1fe7, 0x0000, 0x0000, 0x0000, 0x45bc, 0x9182, 0x1fc9, 0x0000, 0x0000, 0x0000, 0xe254, 0xdc85, 0x1faa, 0x0000, 0x0000, 0x0000, 0xb898, 0xe9b1, 0x1f8a, 0x0000, 0x0000, 0x0000, 0x0ebe, 0xe6f0, 0x1f6c, 0x0000, 0x0000, 0x0000, 0xa9b8, 0xf584, 0x1f4c, 0x0000, 0x0000, 0x0000, 0x12e8, 0xdf6b, 0x1f2e, 0x0000, 0x0000, 0x0000, 0x9f9e, 0xcd55, 0x1f0f, 0x0000, 0x0000, 0x0000, 0x05a0, 0xec3a, 0x1eef, 0x0000, 0x0000, 0x0000, 0xd8e0, 0x96f8, 0x1ed1, 0x0000, 0x0000, 0x0000, 0x3bd4, 0xccc6, 0x1eb1, 0x0000, 0x0000, 0x0000, 0x4910, 0xb87b, 0x1e93, 0x0000, 0x0000, 0x0000, 0xbefc, 0xd40b, 0x1e73, 0x0000, 0x0000, 0x0000, 0x317e, 0xa406, 0x1e55, 0x0000, 0x0000, 0x0000, 0x6bb2, 0xc2b2, 0x1e36, 0x0000, 0x0000, 0x0000, 0xb87e, 0xbb78, 0x1e17, 0x0000, 0x0000, 0x0000, 0xa03c, 0xdbbd, 0x1df7, 0x0000, 0x0000, 0x0000, 0x5b6c, 0xe3c8, 0x1dd9, 0x0000, 0x0000, 0x0000, 0x8968, 0xca8e, 0x1dba, 0x0000, 0x0000, 0x0000, 0xc024, 0xe6ab, 0x1d9a, 0x0000, 0x0000, 0x0000, 0x4110, 0xd4eb, 0x1d7a, 0x0000, 0x0000, 0x0000, 0xa168, 0xbdb5, 0x1d5d, 0x0000, 0x0000, 0x0000, 0x012e, 0xa5fa, 0x1d3e, 0x0000, 0x0000, 0x0000, 0x6838, 0x9c1f, 0x1d1e, 0x0000, 0x0000, 0x0000, 0xa158, 0xaa76, 0x1d00, 0x0000, 0x0000, 0x0000, 0x090a, 0xbd95, 0x1ce1, 0x0000, 0x0000, 0x0000, 0xf73e, 0x8b6d, 0x1cc2, 0x0000, 0x0000, 0x0000, 0x5fda, 0xbcbf, 0x1ca3, 0x0000, 0x0000, 0x0000, 0xdbe8, 0xb89f, 0x1c84, 0x0000, 0x0000, 0x0000, 0x6e4c, 0x96c7, 0x1c64, 0x0000, 0x0000, 0x0000, 0x19c2, 0xf2a4, 0x1c46, 0x0000, 0x0000, 0x0000, 0xb800, 0xf855, 0x1c1e, 0x0000, 0x0000, 0x0000, 0x87fc, 0x85ff, 0x1c08, 0x0000, 0x0000, 0x0000, 0x1418, 0x839f, 0x1be9, 0x0000, 0x0000, 0x0000, 0x6186, 0xd9d8, 0x1bca, 0x0000, 0x0000, 0x0000, 0xf500, 0xabaa, 0x1ba6, 0x0000, 0x0000, 0x0000, 0x7b36, 0xdafe, 0x1b8c, 0x0000, 0x0000, 0x0000, 0xf394, 0xe6d8, 0x1b6c, 0x0000, 0x0000, 0x0000, 0x6efc, 0x9e55, 0x1b4e, 0x0000, 0x0000, 0x0000, 0x5e10, 0xc523, 0x1b2e, 0x0000, 0x0000, 0x0000, 0x8210, 0xb6f9, 0x1b0d, 0x0000, 0x0000, 0x0000, 0x9ab0, 0x96e3, 0x1af1, 0x0000, 0x0000, 0x0000, 0x3864, 0x92e7, 0x1ad1, 0x0000, 0x0000, 0x0000, 0x9878, 0xdc65, 0x1ab1, 0x0000, 0x0000, 0x0000, 0xfa20, 0xd6cb, 0x1a94, 0x0000, 0x0000, 0x0000, 0x6c00, 0xa4e4, 0x1a70, 0x0000, 0x0000, 0x0000, 0xab40, 0xb41b, 0x1a53, 0x0000, 0x0000, 0x0000, 0x43a4, 0x8ede, 0x1a37, 0x0000, 0x0000, 0x0000, 0x22e0, 0x9314, 0x1a15, 0x0000, 0x0000, 0x0000, 0x6170, 0xb949, 0x19f8, 0x0000, 0x0000, 0x0000, 0x6b00, 0xe056, 0x19d8, 0x0000, 0x0000, 0x0000, 0x9ba8, 0xa94c, 0x19b9, 0x0000, 0x0000, 0x0000, 0xfaa0, 0xaa16, 0x199b, 0x0000, 0x0000, 0x0000, 0x899a, 0xf627, 0x197d, 0x0000, 0x0000, 0x0000, 0x9f20, 0xfb70, 0x195d, 0x0000, 0x0000, 0x0000, 0xa4b8, 0xc176, 0x193e, 0x0000, 0x0000, 0x0000, 0xb21c, 0x85c3, 0x1920, 0x0000, 0x0000, 0x0000, 0x50d2, 0x9b19, 0x1901, 0x0000, 0x0000, 0x0000, 0xd4b0, 0xb708, 0x18e0, 0x0000, 0x0000, 0x0000, 0xfb88, 0xf510, 0x18c1, 0x0000, 0x0000, 0x0000, 0x31ec, 0xdc8d, 0x18a3, 0x0000, 0x0000, 0x0000, 0x3c00, 0xbff9, 0x1885, 0x0000, 0x0000, 0x0000, 0x5020, 0xc30b, 0x1862, 0x0000, 0x0000, 0x0000, 0xd4f0, 0xda0c, 0x1844, 0x0000, 0x0000, 0x0000, 0x20d2, 0x99a5, 0x1828, 0x0000, 0x0000, 0x0000, 0x852e, 0xd159, 0x1809, 0x0000, 0x0000, 0x0000, 0x7cd8, 0x97a1, 0x17e9, 0x0000, 0x0000, 0x0000, 0x423a, 0x997b, 0x17cb, 0x0000, 0x0000, 0x0000, 0xc1c0, 0xbe7d, 0x17a8, 0x0000, 0x0000, 0x0000, 0xe8bc, 0xdcdd, 0x178d, 0x0000, 0x0000, 0x0000, 0x8b28, 0xae06, 0x176e, 0x0000, 0x0000, 0x0000, 0x102e, 0xb8d4, 0x174f, 0x0000, 0x0000, 0x0000, 0xaa00, 0xaa5c, 0x172f, 0x0000, 0x0000, 0x0000, 0x51f0, 0x9fc0, 0x170e, 0x0000, 0x0000, 0x0000, 0xf858, 0xe181, 0x16f2, 0x0000, 0x0000, 0x0000, 0x91a8, 0x8162, 0x16d3, 0x0000, 0x0000, 0x0000, 0x5f40, 0xcb6f, 0x16b1, 0x0000, 0x0000, 0x0000, 0xbb50, 0xe55f, 0x1693, 0x0000, 0x0000, 0x0000, 0xacd2, 0xd895, 0x1676, 0x0000, 0x0000, 0x0000, 0xef30, 0x97bf, 0x1654, 0x0000, 0x0000, 0x0000, 0xf700, 0xb3d7, 0x1633, 0x0000, 0x0000, 0x0000, 0x3454, 0xa7b5, 0x1619, 0x0000, 0x0000, 0x0000, 0x6b00, 0xa929, 0x15f6, 0x0000, 0x0000, 0x0000, 0x9f04, 0x89f7, 0x15db, 0x0000, 0x0000, 0x0000, 0xad78, 0xd985, 0x15bc, 0x0000, 0x0000, 0x0000, 0xa46a, 0xae3f, 0x159d, 0x0000, 0x0000, 0x0000, 0x63a0, 0xd0da, 0x157c, 0x0000, 0x0000, 0x0000, 0x5e90, 0x817d, 0x155e, 0x0000, 0x0000, 0x0000, 0x1494, 0xb13f, 0x1540, 0x0000, 0x0000, 0x0000, 0x0090, 0x9c40, 0x1521, 0x0000, 0x0000, 0x0000, 0xdd70, 0xcc86, 0x1500, 0x0000, 0x0000, 0x0000, 0x64f8, 0xdb6f, 0x14e1, 0x0000, 0x0000, 0x0000, 0xe22c, 0xac17, 0x14c3, 0x0000, 0x0000, 0x0000, 0x60e0, 0xa9ad, 0x14a3, 0x0000, 0x0000, 0x0000, 0x4640, 0xd658, 0x1481, 0x0000, 0x0000, 0x0000, 0x6490, 0xa181, 0x1467, 0x0000, 0x0000, 0x0000, 0x1df4, 0xaaa2, 0x1447, 0x0000, 0x0000, 0x0000, 0xb94a, 0x8f61, 0x1429, 0x0000, 0x0000, 0x0000, 0x5198, 0x9d83, 0x1409, 0x0000, 0x0000, 0x0000, 0x0f7a, 0xa818, 0x13eb, 0x0000, 0x0000, 0x0000, 0xc45e, 0xc06c, 0x13cc, 0x0000, 0x0000, 0x0000, 0x4ec0, 0xfa29, 0x13a8, 0x0000, 0x0000, 0x0000, 0x6418, 0x8cad, 0x138c, 0x0000, 0x0000, 0x0000, 0xbcc8, 0xe7d1, 0x136f, 0x0000, 0x0000, 0x0000, 0xc934, 0xf9b0, 0x134f, 0x0000, 0x0000, 0x0000, 0x6ce0, 0x98df, 0x1331, 0x0000, 0x0000, 0x0000, 0x3516, 0xe5e9, 0x1312, 0x0000, 0x0000, 0x0000, 0xc6c0, 0xef8b, 0x12ef, 0x0000, 0x0000, 0x0000, 0xaf02, 0x913d, 0x12d4, 0x0000, 0x0000, 0x0000, 0xd230, 0xe1d5, 0x12b5, 0x0000, 0x0000, 0x0000, 0xfba8, 0xc232, 0x1295, 0x0000, 0x0000, 0x0000, 0x7ba4, 0xabeb, 0x1277, 0x0000, 0x0000, 0x0000, 0x6e5c, 0xc692, 0x1258, 0x0000, 0x0000, 0x0000, 0x76a2, 0x9756, 0x1239, 0x0000, 0x0000, 0x0000, 0xe180, 0xe423, 0x1214, 0x0000, 0x0000, 0x0000, 0x8c3c, 0x90f8, 0x11fb, 0x0000, 0x0000, 0x0000, 0x9f3c, 0x9fd2, 0x11dc, 0x0000, 0x0000, 0x0000, 0x53e0, 0xb73e, 0x11bd, 0x0000, 0x0000, 0x0000, 0x45be, 0x88d6, 0x119e, 0x0000, 0x0000, 0x0000, 0x111a, 0x8bc0, 0x117f, 0x0000, 0x0000, 0x0000, 0xe26a, 0xd7ff, 0x1160, 0x0000, 0x0000, 0x0000, 0xfb60, 0xdd8d, 0x113f, 0x0000, 0x0000, 0x0000, 0x9370, 0xc108, 0x1120, 0x0000, 0x0000, 0x0000, 0x9654, 0x8baf, 0x1103, 0x0000, 0x0000, 0x0000, 0xd6ec, 0xd6b9, 0x10e4, 0x0000, 0x0000, 0x0000, 0x23e4, 0xd7b7, 0x10c4, 0x0000, 0x0000, 0x0000, 0x1aa6, 0xa847, 0x10a6, 0x0000, 0x0000, 0x0000, 0xbee6, 0x9fef, 0x1087, 0x0000, 0x0000, 0x0000, 0x26d0, 0xa6eb, 0x1066, 0x0000, 0x0000, 0x0000, 0x5b86, 0xa880, 0x1049, 0x0000, 0x0000, 0x0000, 0x125c, 0xd971, 0x1029, 0x0000, 0x0000, 0x0000, 0x1f78, 0x9d18, 0x100a, 0x0000, 0x0000, 0x0000, 0x0e84, 0xb15b, 0x0feb, 0x0000, 0x0000, 0x0000, 0xd0c0, 0xc150, 0x0fcc, 0x0000, 0x0000, 0x0000, 0xa330, 0xc40c, 0x0fad, 0x0000, 0x0000, 0x0000, 0x5202, 0xfc2c, 0x0f8f, 0x0000, 0x0000, 0x0000, 0x3f7c, 0xecf5, 0x0f6f, 0x0000, 0x0000, 0x0000, 0xef44, 0xfdfd, 0x0f50, 0x0000, 0x0000, 0x0000, 0x3f6c, 0xab1b, 0x0f31, 0x0000, 0x0000, 0x0000, 0xf658, 0x89ec, 0x0f11, 0x0000, 0x0000, 0x0000, 0xbfc8, 0x9ba8, 0x0ef4, 0x0000, 0x0000, 0x0000, 0x3d40, 0xbe21, 0x0ed5, 0x0000, 0x0000, 0x0000, 0xbbc4, 0xc70d, 0x0eb6, 0x0000, 0x0000, 0x0000, 0x5158, 0xdb16, 0x0e96, 0x0000, 0x0000, 0x0000, 0xb5a8, 0xa8d8, 0x0e78, 0x0000, 0x0000, 0x0000, 0xcccc, 0xb40e, 0x0e58, 0x0000, 0x0000, 0x0000, 0x448c, 0xcb62, 0x0e3a, 0x0000, 0x0000, 0x0000, 0xf12a, 0x8aed, 0x0e1b, 0x0000, 0x0000, 0x0000, 0x79d0, 0xc59c, 0x0dfb, 0x0000, 0x0000, 0x0000, 0x06b4, 0xcdc9, 0x0ddd, 0x0000, 0x0000, 0x0000, 0xae70, 0xa979, 0x0dbe, 0x0000, 0x0000, 0x0000, 0x317c, 0xa8fb, 0x0d9e, 0x0000, 0x0000, 0x0000, 0x5fe0, 0x8a50, 0x0d7d, 0x0000, 0x0000, 0x0000, 0x70b6, 0xfdfa, 0x0d61, 0x0000, 0x0000, 0x0000, 0x1640, 0x9dc7, 0x0d41, 0x0000, 0x0000, 0x0000, 0x9a9c, 0xdc50, 0x0d23, 0x0000, 0x0000, 0x0000, 0x4fcc, 0x9a9b, 0x0d04, 0x0000, 0x0000, 0x0000, 0x7e48, 0x8f77, 0x0ce5, 0x0000, 0x0000, 0x0000, 0x84e4, 0xd4b9, 0x0cc6, 0x0000, 0x0000, 0x0000, 0x84e0, 0xbd10, 0x0ca6, 0x0000, 0x0000, 0x0000, 0x1b0a, 0xc8d9, 0x0c88, 0x0000, 0x0000, 0x0000, 0x6a48, 0xfc81, 0x0c68, 0x0000, 0x0000, 0x0000, 0x070a, 0xbef6, 0x0c4a, 0x0000, 0x0000, 0x0000, 0x8a70, 0xf096, 0x0c2b, 0x0000, 0x0000, 0x0000, 0xecc2, 0xc994, 0x0c0c, 0x0000, 0x0000, 0x0000, 0x1540, 0x9537, 0x0bea, 0x0000, 0x0000, 0x0000, 0x1b02, 0xab5b, 0x0bce, 0x0000, 0x0000, 0x0000, 0x5dc0, 0xb0c8, 0x0bad, 0x0000, 0x0000, 0x0000, 0xc928, 0xe034, 0x0b8f, 0x0000, 0x0000, 0x0000, 0x2d12, 0xb4b0, 0x0b71, 0x0000, 0x0000, 0x0000, 0x8fc2, 0xbb94, 0x0b52, 0x0000, 0x0000, 0x0000, 0xe236, 0xe22f, 0x0b33, 0x0000, 0x0000, 0x0000, 0xb97c, 0xbe9e, 0x0b13, 0x0000, 0x0000, 0x0000, 0xe1a6, 0xe16d, 0x0af5, 0x0000, 0x0000, 0x0000, 0xd330, 0xbaf0, 0x0ad6, 0x0000, 0x0000, 0x0000, 0xc0bc, 0xbbd0, 0x0ab7, 0x0000, 0x0000, 0x0000, 0x8e66, 0xdd9b, 0x0a98, 0x0000, 0x0000, 0x0000, 0xc95c, 0xf799, 0x0a79, 0x0000, 0x0000, 0x0000, 0xdac0, 0xbe4c, 0x0a55, 0x0000, 0x0000, 0x0000, 0xafc0, 0xc378, 0x0a37, 0x0000, 0x0000, 0x0000, 0xa880, 0xe341, 0x0a19, 0x0000, 0x0000, 0x0000, 0xc242, 0x81f6, 0x09fd, 0x0000, 0x0000, 0x0000, 0x7470, 0xc777, 0x09de, 0x0000, 0x0000, 0x0000, 0x62bc, 0xb684, 0x09be, 0x0000, 0x0000, 0x0000, 0x43ac, 0x8c58, 0x099f, 0x0000, 0x0000, 0x0000, 0xcc3c, 0xf9ac, 0x0981, 0x0000, 0x0000, 0x0000, 0x1526, 0xb670, 0x0962, 0x0000, 0x0000, 0x0000, 0xc9fe, 0xdf50, 0x0943, 0x0000, 0x0000, 0x0000, 0x6ae6, 0xc065, 0x0924, 0x0000, 0x0000, 0x0000, 0xb114, 0xcf29, 0x0905, 0x0000, 0x0000, 0x0000, 0xd388, 0x922a, 0x08e4, 0x0000, 0x0000, 0x0000, 0xcf54, 0xb926, 0x08c7, 0x0000, 0x0000, 0x0000, 0x3826, 0xe855, 0x08a8, 0x0000, 0x0000, 0x0000, 0xe7c8, 0x829b, 0x0888, 0x0000, 0x0000, 0x0000, 0x546c, 0xa903, 0x086a, 0x0000, 0x0000, 0x0000, 0x8768, 0x99cc, 0x0849, 0x0000, 0x0000, 0x0000, 0x00ac, 0xf529, 0x082b, 0x0000, 0x0000, 0x0000, 0x2658, 0x9f0b, 0x080c, 0x0000, 0x0000, 0x0000, 0xfe5c, 0x9e21, 0x07ee, 0x0000, 0x0000, 0x0000, 0x6da2, 0x9910, 0x07cf, 0x0000, 0x0000, 0x0000, 0x9220, 0xf9b3, 0x07b0, 0x0000, 0x0000, 0x0000, 0x3d90, 0xa541, 0x0791, 0x0000, 0x0000, 0x0000, 0x6e4c, 0xe7cc, 0x0771, 0x0000, 0x0000, 0x0000, 0xa8fa, 0xe80a, 0x0753, 0x0000, 0x0000, 0x0000, 0x4e14, 0xc3a7, 0x0734, 0x0000, 0x0000, 0x0000, 0xf7e0, 0xbad9, 0x0712, 0x0000, 0x0000, 0x0000, 0xfea0, 0xeff2, 0x06f5, 0x0000, 0x0000, 0x0000, 0xcef6, 0xbd48, 0x06d7, 0x0000, 0x0000, 0x0000, 0x7544, 0xf559, 0x06b7, 0x0000, 0x0000, 0x0000, 0x2388, 0xf655, 0x0698, 0x0000, 0x0000, 0x0000, 0xe900, 0xad56, 0x0676, 0x0000, 0x0000, 0x0000, 0x2cc0, 0x8437, 0x0659, 0x0000, 0x0000, 0x0000, 0x3068, 0xc544, 0x063b, 0x0000, 0x0000, 0x0000, 0xdc70, 0xe73c, 0x061b, 0x0000, 0x0000, 0x0000, 0xee50, 0x9d49, 0x05fc, 0x0000, 0x0000, 0x0000, 0x93d2, 0x81f6, 0x05df, 0x0000, 0x0000, 0x0000, 0x941c, 0xadff, 0x05bf, 0x0000, 0x0000, 0x0000, 0x2ce2, 0x8e45, 0x05a1, 0x0000, 0x0000, 0x0000, 0x4a60, 0x95fd, 0x0581, 0x0000, 0x0000, 0x0000, 0x79f8, 0xb83a, 0x0563, 0x0000, 0x0000, 0x0000, 0xcb58, 0xa1f5, 0x0543, 0x0000, 0x0000, 0x0000, 0x2a3a, 0xdc36, 0x0525, 0x0000, 0x0000, 0x0000, 0x14ee, 0x890e, 0x0506, 0x0000, 0x0000, 0x0000, 0x8f20, 0xc432, 0x04e3, 0x0000, 0x0000, 0x0000, 0x8440, 0xb21d, 0x04c6, 0x0000, 0x0000, 0x0000, 0x5430, 0xf698, 0x04a7, 0x0000, 0x0000, 0x0000, 0x04ae, 0x8b20, 0x048a, 0x0000, 0x0000, 0x0000, 0x04d0, 0xe872, 0x046b, 0x0000, 0x0000, 0x0000, 0xc78e, 0x8893, 0x044c, 0x0000, 0x0000, 0x0000, 0x0f78, 0x9895, 0x042b, 0x0000, 0x0000, 0x0000, 0x11d4, 0xdf2e, 0x040d, 0x0000, 0x0000, 0x0000, 0xe84c, 0x89d5, 0x03ef, 0x0000, 0x0000, 0x0000, 0xf7be, 0x8a67, 0x03d0, 0x0000, 0x0000, 0x0000, 0x95d0, 0xc906, 0x03b1, 0x0000, 0x0000, 0x0000, 0x64ce, 0xd96c, 0x0392, 0x0000, 0x0000, 0x0000, 0x97ba, 0xa16f, 0x0373, 0x0000, 0x0000, 0x0000, 0x463c, 0xc51a, 0x0354, 0x0000, 0x0000, 0x0000, 0xef0a, 0xe93e, 0x0335, 0x0000, 0x0000, 0x0000, 0x526a, 0xa466, 0x0316, 0x0000, 0x0000, 0x0000, 0x4140, 0xa94d, 0x02f5, 0x0000, 0x0000, 0x0000, 0xb4ec, 0xce68, 0x02d8, 0x0000, 0x0000, 0x0000, 0x4fa2, 0x8490, 0x02b9, 0x0000, 0x0000, 0x0000, 0x4e60, 0xca98, 0x0298, 0x0000, 0x0000, 0x0000, 0x08dc, 0xe09c, 0x027a, 0x0000, 0x0000, 0x0000, 0x2b90, 0xc7e3, 0x025c, 0x0000, 0x0000, 0x0000, 0x5a7c, 0xf8ef, 0x023c, 0x0000, 0x0000, 0x0000, 0x5022, 0x9d58, 0x021e, 0x0000, 0x0000, 0x0000, 0x553a, 0xe242, 0x01ff, 0x0000, 0x0000, 0x0000, 0x7e6e, 0xb54d, 0x01e0, 0x0000, 0x0000, 0x0000, 0xd2d4, 0xa88c, 0x01c1, 0x0000, 0x0000, 0x0000, 0x75b6, 0xfe6d, 0x01a2, 0x0000, 0x0000, 0x0000, 0x3bb2, 0xf04c, 0x0183, 0x0000, 0x0000, 0x0000, 0xc2d0, 0xc046, 0x0163, 0x0000, 0x0000, 0x0000, 0x250c, 0xf9d6, 0x0145, 0x0000, 0x0000, 0x0000, 0xb7b4, 0x8a0d, 0x0126, 0x0000, 0x0000, 0x0000, 0x1a72, 0xe4f5, 0x0107, 0x0000, 0x0000, 0x0000, 0x825c, 0xa9b8, 0x00e8, 0x0000, 0x0000, 0x0000, 0x6c90, 0xc9ad, 0x00c6, 0x0000, 0x0000, 0x0000, 0x4d00, 0xd1bb, 0x00aa, 0x0000, 0x0000, 0x0000, 0xa4a0, 0xee01, 0x0087, 0x0000, 0x0000, 0x0000, 0x89a8, 0xbe9f, 0x006b, 0x0000, 0x0000, 0x0000, 0x038e, 0xc80c, 0x004d, 0x0000, 0x0000, 0x0000, 0xfe26, 0x8384, 0x002e, 0x0000, 0x0000, 0x0000, 0xcd90, 0xca57, 0x000e, 0x0000 }; void MacroAssembler::libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp) { Label B1_1, B1_2, B1_3, B1_4, B1_5, B1_6, B1_7, B1_8, B1_9, B1_10, B1_11, B1_12; Label B1_13, B1_14, B1_15; assert_different_registers(ebx, eax, ecx, edx, esi, edi, ebp, esp); address zero_none = (address)_zero_none; address _4onpi_d = (address)__4onpi_d; address TWO_32H = (address)_TWO_32H; address pi04_3d = (address)_pi04_3d; address pi04_5d = (address)_pi04_5d; address SCALE = (address)_SCALE; address zeros = (address)_zeros; address pi04_2d = (address)_pi04_2d; address TWO_12H = (address)_TWO_12H; address _4onpi_31l = (address)__4onpi_31l; bind(B1_1); push(ebp); movl(ebp, esp); andl(esp, -16); push(esi); push(edi); push(ebx); subl(esp, 20); movzwl(ebx, Address(ebp, 16)); andl(ebx, 32767); movl(eax, Address(ebp, 20)); cmpl(ebx, 16413); movl(esi, Address(ebp, 24)); movl(Address(esp, 4), eax); jcc(Assembler::greaterEqual, B1_8); bind(B1_2); fld_x(Address(ebp, 8)); fld_d(ExternalAddress(_4onpi_d)); //0x6dc9c883UL, 0x3ff45f30UL fmul(1); fstp_x(Address(esp, 8)); movzwl(ecx, Address(esp, 16)); negl(ecx); addl(ecx, 30); movl(eax, Address(esp, 12)); shrl(eax); cmpl(Address(esp, 4), 0); jcc(Assembler::notEqual, B1_4); bind(B1_3); lea(ecx, Address(eax, 1)); andl(ecx, -2); jmp(B1_5); bind(B1_4); movl(ecx, eax); addl(eax, Address(esp, 4)); movl(edx, eax); andl(edx, 1); addl(ecx, edx); bind(B1_5); fld_d(ExternalAddress(TWO_32H)); //0x00000000UL, 0x41f80000UL cmpl(ebx, 16400); movl(Address(esp, 0), ecx); fild_s(Address(esp, 0)); jcc(Assembler::greaterEqual, B1_7); bind(B1_6); fld_d(ExternalAddress(pi04_3d)); //0x54442d00UL, 0x3fe921fbUL fmul(1); fsubp(3); fxch(1); fmul(2); fld_s(2); fadd(1); fsubrp(1); fld_s(0); fxch(1); fsuba(3); fld_d(ExternalAddress(8 + pi04_3d)); //0x98cc5180UL, 0x3ce84698UL fmul(3); fsuba(2); fxch(1); fsub(2); fsubrp(1); faddp(3); fld_d(ExternalAddress(16 + pi04_3d)); //0xcbb5bf6cUL, 0xb9dfc8f8UL fmulp(2); fld_s(1); fsubr(1); fsuba(1); fxch(2); fsubp(1); faddp(2); fxch(1); jmp(B1_15); bind(B1_7); fld_d(ExternalAddress(pi04_5d)); //0x54400000UL, 0x3fe921fbUL fmul(1); fsubp(3); fxch(1); fmul(2); fld_s(2); fadd(1); fsubrp(1); fld_s(0); fxch(1); fsuba(3); fld_d(ExternalAddress(8 + pi04_5d)); //0x1a600000UL, 0x3dc0b461UL fmul(3); fsuba(2); fxch(1); fsub(2); fsubrp(1); faddp(3); fld_d(ExternalAddress(16 + pi04_5d)); //0x2e000000UL, 0x3b93198aUL fmul(2); fld_s(0); fsubr(2); fsuba(2); fxch(1); fsubp(2); fxch(1); faddp(3); fld_d(ExternalAddress(24 + pi04_5d)); //0x25200000UL, 0x396b839aUL fmul(2); fld_s(0); fsubr(2); fsuba(2); fxch(1); fsubp(2); fxch(1); faddp(3); fld_d(ExternalAddress(32 + pi04_5d)); //0x533e63a0UL, 0x37027044UL fmulp(2); fld_s(1); fsubr(1); fsuba(1); fxch(2); fsubp(1); faddp(2); fxch(1); jmp(B1_15); bind(B1_8); fld_x(Address(ebp, 8)); addl(ebx, -16417); fmul_d(as_Address(ExternalAddress(SCALE))); //0x00000000UL, 0x32600000UL movl(eax, -2078209981); imull(ebx); addl(edx, ebx); movl(ecx, ebx); sarl(edx, 4); sarl(ecx, 31); subl(edx, ecx); movl(eax, edx); shll(eax, 5); fstp_x(Address(ebp, 8)); fld_x(Address(ebp, 8)); subl(eax, edx); movl(Address(ebp, 8), 0); subl(ebx, eax); fld_x(Address(ebp, 8)); cmpl(ebx, 17); fsuba(1); jcc(Assembler::less, B1_10); bind(B1_9); lea(eax, Address(noreg, edx, Address::times_8)); lea(ecx, Address(eax, edx, Address::times_4)); incl(edx); fld_x(Address(_4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); fmul(2); fld_x(Address(12 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); fmul(2); fld_s(0); fadd(2); fsuba(2); fxch(1); faddp(2); fld_s(1); fadd(1); fstp_x(Address(esp, 8)); andl(Address(esp, 8), -16777216); fld_x(Address(esp, 8)); fsubp(1); jmp(B1_11); bind(B1_10); fld_d(ExternalAddress(zeros)); //0x00000000UL, 0x00000000UL fld_s(0); bind(B1_11); fld_s(0); lea(eax, Address(noreg, edx, Address::times_8)); fld_s(3); lea(edx, Address(eax, edx, Address::times_4)); fld_x(Address(_4onpi_31l, RelocationHolder::none).plus_disp(edx, Address::times_1)); fmul(6); movl(Address(esp, 0), edx); fadda(2); fxch(2); fsuba(3); fxch(2); faddp(3); fxch(2); faddp(3); fld_x(Address(12 + _4onpi_31l, RelocationHolder::none).plus_disp(edx, Address::times_1)); fmula(2); fld_s(2); fadd(2); fld_s(0); fxch(1); fsubra(3); fxch(3); fchs(); faddp(4); fxch(3); faddp(4); fxch(2); fadd(3); fxch(2); fmul(5); fadda(2); fld_s(4); fld_x(Address(24 + _4onpi_31l, RelocationHolder::none).plus_disp(edx, Address::times_1)); fmula(1); fxch(1); fadda(4); fxch(4); fstp_x(Address(esp, 8)); movzwl(ebx, Address(esp, 16)); andl(ebx, 32767); cmpl(ebx, 16415); jcc(Assembler::greaterEqual, B1_13); bind(B1_12); negl(ebx); addl(ebx, 30); movl(ecx, ebx); movl(eax, Address(esp, 12)); shrl(eax); shll(eax); movl(Address(esp, 12), eax); movl(Address(esp, 8), 0); shrl(eax); jmp(B1_14); bind(B1_13); negl(ebx); addl(ebx, 30); movl(ecx, ebx); movl(edx, Address(esp, 8)); shrl(edx); shll(edx); negl(ecx); movl(eax, Address(esp, 12)); shll(eax); movl(ecx, ebx); movl(Address(esp, 8), edx); shrl(edx); orl(eax, edx); bind(B1_14); fld_x(Address(esp, 8)); addl(eax, Address(esp, 4)); fsubp(3); fmul(6); fld_s(4); movl(edx, eax); andl(edx, 1); fadd(3); movl(ecx, Address(esp, 0)); fsuba(3); fxch(3); faddp(5); fld_s(1); fxch(3); fadd_d(Address(zero_none, RelocationHolder::none).plus_disp(edx, Address::times_8)); fadda(3); fsub(3); faddp(2); fxch(1); faddp(4); fld_s(2); fadd(2); fsuba(2); fxch(3); faddp(2); fxch(1); faddp(3); fld_s(0); fadd(2); fsuba(2); fxch(1); faddp(2); fxch(1); faddp(2); fld_s(2); fld_x(Address(36 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); fmula(1); fld_s(1); fadd(3); fsuba(3); fxch(2); faddp(3); fxch(2); faddp(3); fxch(1); fmul(4); fld_s(0); fadd(2); fsuba(2); fxch(1); faddp(2); fxch(1); faddp(2); fld_s(2); fld_x(Address(48 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); fmula(1); fld_s(1); fadd(3); fsuba(3); fxch(2); faddp(3); fxch(2); faddp(3); fld_s(3); fxch(2); fmul(5); fld_x(Address(60 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); fmula(3); fxch(3); faddp(1); fld_s(0); fadd(2); fsuba(2); fxch(1); faddp(2); fxch(1); faddp(3); fld_s(3); fxch(2); fmul(5); fld_x(Address(72 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); fmula(3); fxch(3); faddp(1); fld_s(0); fadd(2); fsuba(2); fxch(1); faddp(2); fxch(1); faddp(3); fxch(1); fmulp(4); fld_x(Address(84 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); fmulp(3); fxch(2); faddp(3); fld_s(2); fadd(2); fld_d(ExternalAddress(TWO_32H)); //0x00000000UL, 0x41f80000UL fmul(1); fadda(1); fsubp(1); fsuba(2); fxch(3); faddp(2); faddp(1); fld_d(ExternalAddress(pi04_2d)); //0x54400000UL, 0x3fe921fbUL fld_s(0); fmul(2); fxch(2); fadd(3); fxch(1); fmulp(3); fmul_d(as_Address(ExternalAddress(8 + pi04_2d))); //0x1a626331UL, 0x3dc0b461UL faddp(1); bind(B1_15); fld_d(ExternalAddress(TWO_12H)); //0x00000000UL, 0x40b80000UL fld_s(2); fadd(2); fmula(1); fstp_x(Address(esp, 8)); fld_x(Address(esp, 8)); fadd(1); fsubrp(1); fst_d(Address(esi, 0)); fsubp(2); faddp(1); fstp_d(Address(esi, 8)); addl(esp, 20); pop(ebx); pop(edi); pop(esi); movl(esp, ebp); pop(ebp); ret(0); } ALIGNED_(16) juint _L_2il0floatpacket_0[] = { 0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL }; ALIGNED_(16) juint _Pi4Inv[] = { 0x6dc9c883UL, 0x3ff45f30UL }; ALIGNED_(16) juint _Pi4x3[] = { 0x54443000UL, 0xbfe921fbUL, 0x3b39a000UL, 0x3d373dcbUL, 0xe0e68948UL, 0xba845c06UL }; ALIGNED_(16) juint _Pi4x4[] = { 0x54400000UL, 0xbfe921fbUL, 0x1a600000UL, 0xbdc0b461UL, 0x2e000000UL, 0xbb93198aUL, 0x252049c1UL, 0xb96b839aUL }; ALIGNED_(16) jushort _SP[] = { 0xaaab, 0xaaaa, 0xaaaa, 0xaaaa, 0xbffc, 0x0000, 0x8887, 0x8888, 0x8888, 0x8888, 0x3ff8, 0x0000, 0xc527, 0x0d00, 0x00d0, 0xd00d, 0xbff2, 0x0000, 0x45f6, 0xb616, 0x1d2a, 0xb8ef, 0x3fec, 0x0000, 0x825b, 0x3997, 0x2b3f, 0xd732, 0xbfe5, 0x0000, 0xbf33, 0x8bb4, 0x2fda, 0xb092, 0x3fde, 0x0000, 0x44a6, 0xed1a, 0x29ef, 0xd73e, 0xbfd6, 0x0000, 0x8610, 0x307f, 0x62a1, 0xc921, 0x3fce, 0x0000 }; ALIGNED_(16) jushort _CP[] = { 0x0000, 0x0000, 0x0000, 0x8000, 0xbffe, 0x0000, 0xaaa5, 0xaaaa, 0xaaaa, 0xaaaa, 0x3ffa, 0x0000, 0x9c2f, 0x0b60, 0x60b6, 0xb60b, 0xbff5, 0x0000, 0xf024, 0x0cac, 0x00d0, 0xd00d, 0x3fef, 0x0000, 0x03fe, 0x3f65, 0x7dbb, 0x93f2, 0xbfe9, 0x0000, 0xd84d, 0xadee, 0xc698, 0x8f76, 0x3fe2, 0x0000, 0xdaba, 0xfe79, 0xea36, 0xc9c9, 0xbfda, 0x0000, 0x3ac6, 0x0ba0, 0x07ce, 0xd585, 0x3fd2, 0x0000 }; ALIGNED_(16) juint _ones[] = { 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xbff00000UL }; void MacroAssembler::libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp) { Label B1_1, B1_2, B1_3, B1_4, B1_5, B1_6, B1_7, B1_8, B1_9, B1_10, B1_11, B1_12; Label B1_13, B1_14, B1_15, B1_16, B1_17, B1_18, B1_19, B1_20, B1_21, B1_22, B1_23; Label B1_24, B1_25, B1_26, B1_27, B1_28, B1_29, B1_30, B1_31, B1_32, B1_33, B1_34; Label B1_35, B1_36, B1_37, B1_38, B1_39, B1_40, B1_41, B1_42, B1_43, B1_44, B1_45, B1_46; assert_different_registers(ebx, eax, ecx, edx, esi, edi, ebp, esp); address L_2il0floatpacket_0 = (address)_L_2il0floatpacket_0; address Pi4Inv = (address)_Pi4Inv; address Pi4x3 = (address)_Pi4x3; address Pi4x4 = (address)_Pi4x4; address ones = (address)_ones; address CP = (address)_CP; address SP = (address)_SP; bind(B1_1); push(ebp); movl(ebp, esp); andl(esp, -64); push(esi); push(edi); push(ebx); subl(esp, 52); movl(eax, Address(ebp, 16)); movl(edx, Address(ebp, 20)); movl(Address(esp, 32), eax); movl(Address(esp, 36), edx); bind(B1_2); fnstcw(Address(esp, 30)); bind(B1_3); movsd(xmm1, Address(ebp, 8)); movl(esi, Address(ebp, 12)); movl(eax, esi); andl(eax, 2147483647); andps(xmm1, ExternalAddress(L_2il0floatpacket_0)); //0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL shrl(esi, 31); movl(Address(esp, 40), eax); cmpl(eax, 1104150528); movsd(Address(ebp, 8), xmm1); jcc(Assembler::aboveEqual, B1_11); bind(B1_4); movsd(xmm0, ExternalAddress(Pi4Inv)); //0x6dc9c883UL, 0x3ff45f30UL mulsd(xmm0, xmm1); movzwl(edx, Address(esp, 30)); movl(eax, edx); andl(eax, 768); movsd(Address(esp, 0), xmm0); cmpl(eax, 768); jcc(Assembler::equal, B1_42); bind(B1_5); orl(edx, -64768); movw(Address(esp, 28), edx); bind(B1_6); fldcw(Address(esp, 28)); bind(B1_7); movsd(xmm1, Address(ebp, 8)); movl(ebx, 1); bind(B1_8); movl(Address(esp, 12), ebx); movl(ebx, Address(esp, 4)); movl(eax, ebx); movl(Address(esp, 8), esi); movl(esi, ebx); shrl(esi, 20); andl(eax, 1048575); movl(ecx, esi); orl(eax, 1048576); negl(ecx); movl(edx, eax); addl(ecx, 19); addl(esi, 13); movl(Address(esp, 24), ecx); shrl(edx); movl(ecx, esi); shll(eax); movl(ecx, Address(esp, 24)); movl(esi, Address(esp, 0)); shrl(esi); orl(eax, esi); cmpl(ebx, 1094713344); movsd(Address(esp, 16), xmm1); fld_d(Address(esp, 16)); cmov32(Assembler::below, eax, edx); movl(esi, Address(esp, 8)); lea(edx, Address(eax, 1)); movl(ebx, edx); andl(ebx, -2); movl(Address(esp, 16), ebx); fild_s(Address(esp, 16)); movl(ebx, Address(esp, 12)); cmpl(Address(esp, 40), 1094713344); jcc(Assembler::aboveEqual, B1_10); bind(B1_9); fld_d(ExternalAddress(Pi4x3)); //0x54443000UL, 0xbfe921fbUL fmul(1); faddp(2); fld_d(ExternalAddress(8 + Pi4x3)); //0x3b39a000UL, 0x3d373dcbUL fmul(1); faddp(2); fld_d(ExternalAddress(16 + Pi4x3)); //0xe0e68948UL, 0xba845c06UL fmulp(1); faddp(1); jmp(B1_17); bind(B1_10); fld_d(ExternalAddress(Pi4x4)); //0x54400000UL, 0xbfe921fbUL fmul(1); faddp(2); fld_d(ExternalAddress(8 + Pi4x4)); //0x1a600000UL, 0xbdc0b461UL fmul(1); faddp(2); fld_d(ExternalAddress(16 + Pi4x4)); //0x2e000000UL, 0xbb93198aUL fmul(1); faddp(2); fld_d(ExternalAddress(24 + Pi4x4)); //0x252049c1UL, 0xb96b839aUL fmulp(1); faddp(1); jmp(B1_17); bind(B1_11); movzwl(edx, Address(esp, 30)); movl(eax, edx); andl(eax, 768); cmpl(eax, 768); jcc(Assembler::equal, B1_43); bind(B1_12); orl(edx, -64768); movw(Address(esp, 28), edx); bind(B1_13); fldcw(Address(esp, 28)); bind(B1_14); movsd(xmm1, Address(ebp, 8)); movl(ebx, 1); bind(B1_15); movsd(Address(esp, 16), xmm1); fld_d(Address(esp, 16)); addl(esp, -32); lea(eax, Address(esp, 32)); fstp_x(Address(esp, 0)); movl(Address(esp, 12), 0); movl(Address(esp, 16), eax); call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_reduce_pi04l()))); bind(B1_46); addl(esp, 32); bind(B1_16); fld_d(Address(esp, 0)); lea(edx, Address(eax, 1)); fld_d(Address(esp, 8)); faddp(1); bind(B1_17); movl(ecx, edx); addl(eax, 3); shrl(ecx, 2); andl(ecx, 1); shrl(eax, 2); xorl(esi, ecx); movl(ecx, Address(esp, 36)); andl(eax, 1); andl(ecx, 3); cmpl(ecx, 3); jcc(Assembler::notEqual, B1_25); bind(B1_18); fld_x(ExternalAddress(84 + SP)); //0x8610, 0x307f, 0x62 fld_s(1); fmul((2)); testb(edx, 2); fmula((1)); fld_x(ExternalAddress(72 + SP)); //0x44a6, 0xed1a, 0x29 faddp(2); fmula(1); fld_x(ExternalAddress(60 + SP)); //0xbf33, 0x8bb4, 0x2f faddp(2); fmula(1); fld_x(ExternalAddress(48 + SP)); //0x825b, 0x3997, 0x2b faddp(2); fmula(1); fld_x(ExternalAddress(36 + SP)); //0x45f6, 0xb616, 0x1d faddp(2); fmula(1); fld_x(ExternalAddress(24 + SP)); //0xc527, 0x0d00, 0x00 faddp(2); fmula(1); fld_x(ExternalAddress(12 + SP)); //0x8887, 0x8888, 0x88 faddp(2); fmula(1); fld_x(ExternalAddress(SP)); //0xaaab, 0xaaaa, 0xaa faddp(2); fmula(1); fld_x(ExternalAddress(84 + CP)); //0x3ac6, 0x0ba0, 0x07 fmul(1); fld_x(ExternalAddress(72 + CP)); //0xdaba, 0xfe79, 0xea faddp(1); fmul(1); fld_x(ExternalAddress(62 + CP)); //0xd84d, 0xadee, 0xc6 faddp(1); fmul(1); fld_x(ExternalAddress(48 + CP)); //0x03fe, 0x3f65, 0x7d faddp(1); fmul(1); fld_x(ExternalAddress(36 + CP)); //0xf024, 0x0cac, 0x00 faddp(1); fmul(1); fld_x(ExternalAddress(24 + CP)); //0x9c2f, 0x0b60, 0x60 faddp(1); fmul(1); fld_x(ExternalAddress(12 + CP)); //0xaaa5, 0xaaaa, 0xaa faddp(1); fmul(1); fld_x(ExternalAddress(CP)); //0x0000, 0x0000, 0x00 faddp(1); fmulp(1); fld_d(Address(ones, RelocationHolder::none).plus_disp(esi, Address::times_8)); fld_d(Address(ones, RelocationHolder::none).plus_disp(eax, Address::times_8)); jcc(Assembler::equal, B1_22); bind(B1_19); fmulp(4); testl(ebx, ebx); fxch(2); fmul(3); movl(eax, Address(esp, 2)); faddp(3); fxch(2); fstp_d(Address(eax, 0)); fmula(1); faddp(1); fstp_d(Address(eax, 8)); jcc(Assembler::equal, B1_21); bind(B1_20); fldcw(Address(esp, 30)); bind(B1_21); addl(esp, 52); pop(ebx); pop(edi); pop(esi); movl(esp, ebp); pop(ebp); ret(0); bind(B1_22); fxch(1); fmulp(4); testl(ebx, ebx); fxch(2); fmul(3); movl(eax, Address(esp, 32)); faddp(3); fxch(2); fstp_d(Address(eax, 8)); fmula(1); faddp(1); fstp_d(Address(eax, 0)); jcc(Assembler::equal, B1_24); bind(B1_23); fldcw(Address(esp, 30)); bind(B1_24); addl(esp, 52); pop(ebx); pop(edi); pop(esi); movl(esp, ebp); pop(ebp); ret(0); bind(B1_25); testb(Address(esp, 36), 2); jcc(Assembler::equal, B1_33); bind(B1_26); fld_s(0); testb(edx, 2); fmul(1); fld_s(0); fmul(1); jcc(Assembler::equal, B1_30); bind(B1_27); fstp_d(2); fld_x(ExternalAddress(84 + CP)); //0x3ac6, 0x0ba0, 0x07 testl(ebx, ebx); fmul(2); fld_x(ExternalAddress(72 + CP)); //0xdaba, 0xfe79, 0xea fmul(3); fld_x(ExternalAddress(60 + CP)); //0xd84d, 0xadee, 0xc6 movl(eax, Address(rsp, 32)); faddp(2); fxch(1); fmul(3); fld_x(ExternalAddress(48 + CP)); //0x03fe, 0x3f65, 0x7d faddp(2); fxch(1); fmul(3); fld_x(ExternalAddress(36 + CP)); //0xf024, 0x0cac, 0x00 faddp(2); fxch(1); fmul(3); fld_x(ExternalAddress(24 + CP)); //0x9c2f, 0x0b60, 0x60 faddp(2); fxch(1); fmul(3); fld_x(ExternalAddress(12 + CP)); //0xaaa5, 0xaaaa, 0xaa faddp(2); fxch(1); fmulp(3); fld_x(ExternalAddress(CP)); //0x0000, 0x0000, 0x00 faddp(1); fmulp(1); faddp(1); fld_d(Address(ones, RelocationHolder::none).plus_disp(rsi, Address::times_8)); fmula(1); faddp(1); fstp_d(Address(eax, 8)); jcc(Assembler::equal, B1_29); bind(B1_28); fldcw(Address(esp, 30)); bind(B1_29); addl(esp, 52); pop(ebx); pop(edi); pop(esi); movl(esp, ebp); pop(ebp); ret(0); bind(B1_30); fld_x(ExternalAddress(84 + SP)); //0x8610, 0x307f, 0x62 testl(ebx, ebx); fmul(1); fld_x(ExternalAddress(72 + SP)); //0x44a6, 0xed1a, 0x29 fmul(2); fld_x(ExternalAddress(60 + SP)); //0xbf33, 0x8bb4, 0x2f movl(eax, Address(rsp, 32)); faddp(2); fxch(1); fmul(2); fld_x(ExternalAddress(48 + SP)); //0x825b, 0x3997, 0x2b faddp(2); fxch(1); fmul(2); fld_x(ExternalAddress(36 + SP)); //0x45f6, 0xb616, 0x1d faddp(2); fxch(1); fmul(2); fld_x(ExternalAddress(24 + SP)); //0xc527, 0x0d00, 0x00 faddp(2); fxch(1); fmul(2); fld_x(ExternalAddress(12 + SP)); //0x8887, 0x8888, 0x88 faddp(2); fxch(1); fmulp(2); fld_x(ExternalAddress(SP)); //0xaaab, 0xaaaa, 0xaa faddp(1); fmulp(2); faddp(1); fld_d(Address(ones, RelocationHolder::none).plus_disp(rsi, Address::times_8)); fmulp(2); fmul(1); faddp(1); fstp_d(Address(eax, 8)); jcc(Assembler::equal, B1_32); bind(B1_31); fldcw(Address(esp, 30)); bind(B1_32); addl(esp, 52); pop(ebx); pop(edi); pop(esi); movl(esp, ebp); pop(ebp); ret(0); bind(B1_33); testb(Address(esp, 36), 1); jcc(Assembler::equal, B1_41); bind(B1_34); fld_s(0); testb(edx, 2); fmul(1); fld_s(0); fmul(1); jcc(Assembler::equal, B1_38); bind(B1_35); fld_x(ExternalAddress(84 + SP)); //0x8610, 0x307f, 0x62 testl(ebx, ebx); fmul(1); fld_x(ExternalAddress(72 + SP)); //0x44a6, 0xed1a, 0x29 fmul(2); fld_x(ExternalAddress(60 + SP)); //0xbf33, 0x8bb4, 0x2f faddp(2); fxch(1); fmul(2); fld_x(ExternalAddress(48 + SP)); //0x825b, 0x3997, 0x2b faddp(2); fxch(1); fmul(2); fld_x(ExternalAddress(36 + SP)); //0x45f6, 0xb616, 0x1d faddp(2); fxch(1); fmul(2); fld_x(ExternalAddress(24 + SP)); //0xc527, 0x0d00, 0x00 faddp(2); fxch(1); fmul(2); fld_x(ExternalAddress(12 + SP)); //0x8887, 0x8888, 0x88 faddp(2); fxch(1); fmulp(2); fld_x(ExternalAddress(SP)); //0xaaab, 0xaaaa, 0xaa faddp(1); fmulp(2); faddp(1); fld_d(Address(ones, RelocationHolder::none).plus_disp(eax, Address::times_8)); fmulp(2); fmul(1); movl(eax, Address(esp, 32)); faddp(1); fstp_d(Address(eax, 0)); jcc(Assembler::equal, B1_37); bind(B1_36); fldcw(Address(esp, 30)); bind(B1_37); addl(esp, 52); pop(ebx); pop(edi); pop(esi); movl(esp, ebp); pop(ebp); ret(0); bind(B1_38); fstp_d(2); fld_x(ExternalAddress(84 + CP)); //0x3ac6, 0x0ba0, 0x07 testl(ebx, ebx); fmul(2); fld_x(ExternalAddress(72 + CP)); //0xdaba, 0xfe79, 0xea fmul(3); fld_x(ExternalAddress(60 + CP)); //0xd84d, 0xadee, 0xc6 faddp(2); fxch(1); fmul(3); fld_x(ExternalAddress(48 + CP)); //0x03fe, 0x3f65, 0x7d faddp(2); fxch(1); fmul(3); fld_x(ExternalAddress(36 + CP)); //0xf024, 0x0cac, 0x00 faddp(2); fxch(1); fmul(3); fld_x(ExternalAddress(24 + CP)); //0x9c2f, 0x0b60, 0x60 faddp(2); fxch(1); fmul(3); fld_x(ExternalAddress(12 + CP)); //0xaaa5, 0xaaaa, 0xaa faddp(2); fxch(1); fmulp(3); fld_x(ExternalAddress(CP)); //0x0000, 0x0000, 0x00 faddp(1); fmulp(1); faddp(1); fld_d(Address(ones, RelocationHolder::none).plus_disp(eax, Address::times_8)); fmula(1); movl(eax, Address(esp, 32)); faddp(1); fstp_d(Address(eax, 0)); jcc(Assembler::equal, B1_40); bind(B1_39); fldcw(Address(esp, 30)); bind(B1_40); addl(esp, 52); pop(ebx); pop(edi); pop(esi); movl(esp, ebp); pop(ebp); ret(0); bind(B1_41); fstp_d(0); addl(esp, 52); pop(ebx); pop(edi); pop(esi); movl(esp, ebp); pop(ebp); ret(0); bind(B1_42); xorl(ebx, ebx); jmp(B1_8); bind(B1_43); xorl(ebx, ebx); jmp(B1_15); } ALIGNED_(16) juint _static_const_table_sin[] = { 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL, 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL, 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, 0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL, 0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL, 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, 0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL, 0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL, 0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL, 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL, 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL, 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, 0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL, 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, 0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL, 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, 0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL, 0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL, 0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, 0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL, 0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, 0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL, 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, 0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL, 0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL, 0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL, 0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x43600000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3c800000UL, 0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x3fefffffUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL }; void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ebx, Register edx) { Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; Label L_2TAG_PACKET_4_0_2, start; assert_different_registers(eax, ebx, edx); address static_const_table_sin = (address)_static_const_table_sin; bind(start); subl(rsp, 120); movl(Address(rsp, 56), ebx); lea(ebx, ExternalAddress(static_const_table_sin)); movsd(xmm0, Address(rsp, 128)); pextrw(eax, xmm0, 3); andl(eax, 32767); subl(eax, 12336); cmpl(eax, 4293); jcc(Assembler::above, L_2TAG_PACKET_0_0_2); movsd(xmm1, Address(ebx, 2160)); mulsd(xmm1, xmm0); movsd(xmm5, Address(ebx, 2272)); movdqu(xmm4, Address(ebx, 2256)); pand(xmm4, xmm0); por(xmm5, xmm4); movsd(xmm3, Address(ebx, 2128)); movdqu(xmm2, Address(ebx, 2112)); addpd(xmm1, xmm5); cvttsd2sil(edx, xmm1); cvtsi2sdl(xmm1, edx); mulsd(xmm3, xmm1); unpcklpd(xmm1, xmm1); addl(edx, 1865216); movdqu(xmm4, xmm0); andl(edx, 63); movdqu(xmm5, Address(ebx, 2096)); lea(eax, Address(ebx, 0)); shll(edx, 5); addl(eax, edx); mulpd(xmm2, xmm1); subsd(xmm0, xmm3); mulsd(xmm1, Address(ebx, 2144)); subsd(xmm4, xmm3); movsd(xmm7, Address(eax, 8)); unpcklpd(xmm0, xmm0); movapd(xmm3, xmm4); subsd(xmm4, xmm2); mulpd(xmm5, xmm0); subpd(xmm0, xmm2); movdqu(xmm6, Address(ebx, 2064)); mulsd(xmm7, xmm4); subsd(xmm3, xmm4); mulpd(xmm5, xmm0); mulpd(xmm0, xmm0); subsd(xmm3, xmm2); movdqu(xmm2, Address(eax, 0)); subsd(xmm1, xmm3); movsd(xmm3, Address(eax, 24)); addsd(xmm2, xmm3); subsd(xmm7, xmm2); mulsd(xmm2, xmm4); mulpd(xmm6, xmm0); mulsd(xmm3, xmm4); mulpd(xmm2, xmm0); mulpd(xmm0, xmm0); addpd(xmm5, Address(ebx, 2080)); mulsd(xmm4, Address(eax, 0)); addpd(xmm6, Address(ebx, 2048)); mulpd(xmm5, xmm0); movapd(xmm0, xmm3); addsd(xmm3, Address(eax, 8)); mulpd(xmm1, xmm7); movapd(xmm7, xmm4); addsd(xmm4, xmm3); addpd(xmm6, xmm5); movsd(xmm5, Address(eax, 8)); subsd(xmm5, xmm3); subsd(xmm3, xmm4); addsd(xmm1, Address(eax, 16)); mulpd(xmm6, xmm2); addsd(xmm5, xmm0); addsd(xmm3, xmm7); addsd(xmm1, xmm5); addsd(xmm1, xmm3); addsd(xmm1, xmm6); unpckhpd(xmm6, xmm6); addsd(xmm1, xmm6); addsd(xmm4, xmm1); movsd(Address(rsp, 0), xmm4); fld_d(Address(rsp, 0)); jmp(L_2TAG_PACKET_1_0_2); bind(L_2TAG_PACKET_0_0_2); jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); shrl(eax, 4); cmpl(eax, 268434685); jcc(Assembler::notEqual, L_2TAG_PACKET_3_0_2); movsd(Address(rsp, 0), xmm0); fld_d(Address(rsp, 0)); jmp(L_2TAG_PACKET_1_0_2); bind(L_2TAG_PACKET_3_0_2); movsd(xmm3, Address(ebx, 2192)); mulsd(xmm3, xmm0); subsd(xmm3, xmm0); mulsd(xmm3, Address(ebx, 2208)); movsd(Address(rsp, 0), xmm0); fld_d(Address(rsp, 0)); jmp(L_2TAG_PACKET_1_0_2); bind(L_2TAG_PACKET_2_0_2); movl(eax, Address(rsp, 132)); andl(eax, 2146435072); cmpl(eax, 2146435072); jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); subl(rsp, 32); movsd(Address(rsp, 0), xmm0); lea(eax, Address(rsp, 40)); movl(Address(rsp, 8), eax); movl(eax, 2); movl(Address(rsp, 12), eax); call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge()))); addl(rsp, 32); fld_d(Address(rsp, 16)); jmp(L_2TAG_PACKET_1_0_2); bind(L_2TAG_PACKET_4_0_2); fld_d(Address(rsp, 128)); fmul_d(Address(ebx, 2240)); bind(L_2TAG_PACKET_1_0_2); movl(ebx, Address(rsp, 56)); } #endif /******************************************************************************/ // ALGORITHM DESCRIPTION - COS() // --------------------- // // 1. RANGE REDUCTION // // We perform an initial range reduction from X to r with // // X =~= N * pi/32 + r // // so that |r| <= pi/64 + epsilon. We restrict inputs to those // where |N| <= 932560. Beyond this, the range reduction is // insufficiently accurate. For extremely small inputs, // denormalization can occur internally, impacting performance. // This means that the main path is actually only taken for // 2^-252 <= |X| < 90112. // // To avoid branches, we perform the range reduction to full // accuracy each time. // // X - N * (P_1 + P_2 + P_3) // // where P_1 and P_2 are 32-bit numbers (so multiplication by N // is exact) and P_3 is a 53-bit number. Together, these // approximate pi well enough for all cases in the restricted // range. // // The main reduction sequence is: // // y = 32/pi * x // N = integer(y) // (computed by adding and subtracting off SHIFTER) // // m_1 = N * P_1 // m_2 = N * P_2 // r_1 = x - m_1 // r = r_1 - m_2 // (this r can be used for most of the calculation) // // c_1 = r_1 - r // m_3 = N * P_3 // c_2 = c_1 - m_2 // c = c_2 - m_3 // // 2. MAIN ALGORITHM // // The algorithm uses a table lookup based on B = M * pi / 32 // where M = N mod 64. The stored values are: // sigma closest power of 2 to cos(B) // C_hl 53-bit cos(B) - sigma // S_hi + S_lo 2 * 53-bit sin(B) // // The computation is organized as follows: // // sin(B + r + c) = [sin(B) + sigma * r] + // r * (cos(B) - sigma) + // sin(B) * [cos(r + c) - 1] + // cos(B) * [sin(r + c) - r] // // which is approximately: // // [S_hi + sigma * r] + // C_hl * r + // S_lo + S_hi * [(cos(r) - 1) - r * c] + // (C_hl + sigma) * [(sin(r) - r) + c] // // and this is what is actually computed. We separate this sum // into four parts: // // hi + med + pols + corr // // where // // hi = S_hi + sigma r // med = C_hl * r // pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) // corr = S_lo + c * ((C_hl + sigma) - S_hi * r) // // 3. POLYNOMIAL // // The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * // (sin(r) - r) can be rearranged freely, since it is quite // small, so we exploit parallelism to the fullest. // // psc4 = SC_4 * r_1 // msc4 = psc4 * r // r2 = r * r // msc2 = SC_2 * r2 // r4 = r2 * r2 // psc3 = SC_3 + msc4 // psc1 = SC_1 + msc2 // msc3 = r4 * psc3 // sincospols = psc1 + msc3 // pols = sincospols * // // // 4. CORRECTION TERM // // This is where the "c" component of the range reduction is // taken into account; recall that just "r" is used for most of // the calculation. // // -c = m_3 - c_2 // -d = S_hi * r - (C_hl + sigma) // corr = -c * -d + S_lo // // 5. COMPENSATED SUMMATIONS // // The two successive compensated summations add up the high // and medium parts, leaving just the low parts to add up at // the end. // // rs = sigma * r // res_int = S_hi + rs // k_0 = S_hi - res_int // k_2 = k_0 + rs // med = C_hl * r // res_hi = res_int + med // k_1 = res_int - res_hi // k_3 = k_1 + med // // 6. FINAL SUMMATION // // We now add up all the small parts: // // res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 // // Now the overall result is just: // // res_hi + res_lo // // 7. SMALL ARGUMENTS // // Inputs with |X| < 2^-252 are treated specially as // 1 - |x|. // // Special cases: // cos(NaN) = quiet NaN, and raise invalid exception // cos(INF) = NaN and raise invalid exception // cos(0) = 1 // /******************************************************************************/ #ifdef _LP64 ALIGNED_(8) juint _ONE[] = { 0x00000000UL, 0x3ff00000UL }; void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r8, Register r9, Register r10, Register r11) { Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1; Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1; Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, B1_2, B1_3, B1_4, B1_5, start; assert_different_registers(r8, r9, r10, r11, eax, ecx, edx); address ONEHALF = (address)_ONEHALF; address P_2 = (address)_P_2; address SC_4 = (address)_SC_4; address Ctable = (address)_Ctable; address SC_2 = (address)_SC_2; address SC_3 = (address)_SC_3; address SC_1 = (address)_SC_1; address PI_INV_TABLE = (address)_PI_INV_TABLE; address PI_4 = (address)_PI_4; address PI32INV = (address)_PI32INV; address SIGN_MASK = (address)_SIGN_MASK; address P_1 = (address)_P_1; address P_3 = (address)_P_3; address ONE = (address)_ONE; address NEG_ZERO = (address)_NEG_ZERO; bind(start); push(rbx); subq(rsp, 16); movsd(Address(rsp, 8), xmm0); bind(B1_2); movl(eax, Address(rsp, 12)); movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL andl(eax, 2147418112); subl(eax, 808452096); cmpl(eax, 281346048); jcc(Assembler::above, L_2TAG_PACKET_0_0_1); mulsd(xmm1, xmm0); movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL pand(xmm4, xmm0); por(xmm5, xmm4); addpd(xmm1, xmm5); cvttsd2sil(edx, xmm1); cvtsi2sdl(xmm1, edx); movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL mulsd(xmm3, xmm1); unpcklpd(xmm1, xmm1); addq(rdx, 1865232); movdqu(xmm4, xmm0); andq(rdx, 63); movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL lea(rax, ExternalAddress(Ctable)); shlq(rdx, 5); addq(rax, rdx); mulpd(xmm2, xmm1); subsd(xmm0, xmm3); mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL subsd(xmm4, xmm3); movq(xmm7, Address(rax, 8)); unpcklpd(xmm0, xmm0); movdqu(xmm3, xmm4); subsd(xmm4, xmm2); mulpd(xmm5, xmm0); subpd(xmm0, xmm2); movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL mulsd(xmm7, xmm4); subsd(xmm3, xmm4); mulpd(xmm5, xmm0); mulpd(xmm0, xmm0); subsd(xmm3, xmm2); movdqu(xmm2, Address(rax, 0)); subsd(xmm1, xmm3); movq(xmm3, Address(rax, 24)); addsd(xmm2, xmm3); subsd(xmm7, xmm2); mulsd(xmm2, xmm4); mulpd(xmm6, xmm0); mulsd(xmm3, xmm4); mulpd(xmm2, xmm0); mulpd(xmm0, xmm0); addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL mulsd(xmm4, Address(rax, 0)); addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL mulpd(xmm5, xmm0); movdqu(xmm0, xmm3); addsd(xmm3, Address(rax, 8)); mulpd(xmm1, xmm7); movdqu(xmm7, xmm4); addsd(xmm4, xmm3); addpd(xmm6, xmm5); movq(xmm5, Address(rax, 8)); subsd(xmm5, xmm3); subsd(xmm3, xmm4); addsd(xmm1, Address(rax, 16)); mulpd(xmm6, xmm2); addsd(xmm0, xmm5); addsd(xmm3, xmm7); addsd(xmm0, xmm1); addsd(xmm0, xmm3); addsd(xmm0, xmm6); unpckhpd(xmm6, xmm6); addsd(xmm0, xmm6); addsd(xmm0, xmm4); jmp(B1_4); bind(L_2TAG_PACKET_0_0_1); jcc(Assembler::greater, L_2TAG_PACKET_1_0_1); pextrw(eax, xmm0, 3); andl(eax, 32767); pinsrw(xmm0, eax, 3); movq(xmm1, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL subsd(xmm1, xmm0); movdqu(xmm0, xmm1); jmp(B1_4); bind(L_2TAG_PACKET_1_0_1); pextrw(eax, xmm0, 3); andl(eax, 32752); cmpl(eax, 32752); jcc(Assembler::equal, L_2TAG_PACKET_2_0_1); pextrw(ecx, xmm0, 3); andl(ecx, 32752); subl(ecx, 16224); shrl(ecx, 7); andl(ecx, 65532); lea(r11, ExternalAddress(PI_INV_TABLE)); addq(rcx, r11); movdq(rax, xmm0); movl(r10, Address(rcx, 20)); movl(r8, Address(rcx, 24)); movl(edx, eax); shrq(rax, 21); orl(eax, INT_MIN); shrl(eax, 11); movl(r9, r10); imulq(r10, rdx); imulq(r9, rax); imulq(r8, rax); movl(rsi, Address(rcx, 16)); movl(rdi, Address(rcx, 12)); movl(r11, r10); shrq(r10, 32); addq(r9, r10); addq(r11, r8); movl(r8, r11); shrq(r11, 32); addq(r9, r11); movl(r10, rsi); imulq(rsi, rdx); imulq(r10, rax); movl(r11, rdi); imulq(rdi, rdx); movl(rbx, rsi); shrq(rsi, 32); addq(r9, rbx); movl(rbx, r9); shrq(r9, 32); addq(r10, rsi); addq(r10, r9); shlq(rbx, 32); orq(r8, rbx); imulq(r11, rax); movl(r9, Address(rcx, 8)); movl(rsi, Address(rcx, 4)); movl(rbx, rdi); shrq(rdi, 32); addq(r10, rbx); movl(rbx, r10); shrq(r10, 32); addq(r11, rdi); addq(r11, r10); movq(rdi, r9); imulq(r9, rdx); imulq(rdi, rax); movl(r10, r9); shrq(r9, 32); addq(r11, r10); movl(r10, r11); shrq(r11, 32); addq(rdi, r9); addq(rdi, r11); movq(r9, rsi); imulq(rsi, rdx); imulq(r9, rax); shlq(r10, 32); orq(r10, rbx); movl(eax, Address(rcx, 0)); movl(r11, rsi); shrq(rsi, 32); addq(rdi, r11); movl(r11, rdi); shrq(rdi, 32); addq(r9, rsi); addq(r9, rdi); imulq(rdx, rax); pextrw(rbx, xmm0, 3); lea(rdi, ExternalAddress(PI_INV_TABLE)); subq(rcx, rdi); addl(ecx, ecx); addl(ecx, ecx); addl(ecx, ecx); addl(ecx, 19); movl(rsi, 32768); andl(rsi, rbx); shrl(rbx, 4); andl(rbx, 2047); subl(rbx, 1023); subl(ecx, rbx); addq(r9, rdx); movl(edx, ecx); addl(edx, 32); cmpl(ecx, 1); jcc(Assembler::less, L_2TAG_PACKET_3_0_1); negl(ecx); addl(ecx, 29); shll(r9); movl(rdi, r9); andl(r9, 536870911); testl(r9, 268435456); jcc(Assembler::notEqual, L_2TAG_PACKET_4_0_1); shrl(r9); movl(rbx, 0); shlq(r9, 32); orq(r9, r11); bind(L_2TAG_PACKET_5_0_1); bind(L_2TAG_PACKET_6_0_1); cmpq(r9, 0); jcc(Assembler::equal, L_2TAG_PACKET_7_0_1); bind(L_2TAG_PACKET_8_0_1); bsrq(r11, r9); movl(ecx, 29); subl(ecx, r11); jcc(Assembler::lessEqual, L_2TAG_PACKET_9_0_1); shlq(r9); movq(rax, r10); shlq(r10); addl(edx, ecx); negl(ecx); addl(ecx, 64); shrq(rax); shrq(r8); orq(r9, rax); orq(r10, r8); bind(L_2TAG_PACKET_10_0_1); cvtsi2sdq(xmm0, r9); shrq(r10, 1); cvtsi2sdq(xmm3, r10); xorpd(xmm4, xmm4); shll(edx, 4); negl(edx); addl(edx, 16368); orl(edx, rsi); xorl(edx, rbx); pinsrw(xmm4, edx, 3); movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL movq(xmm6, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL xorpd(xmm5, xmm5); subl(edx, 1008); pinsrw(xmm5, edx, 3); mulsd(xmm0, xmm4); shll(rsi, 16); sarl(rsi, 31); mulsd(xmm3, xmm5); movdqu(xmm1, xmm0); mulsd(xmm0, xmm2); shrl(rdi, 29); addsd(xmm1, xmm3); mulsd(xmm3, xmm2); addl(rdi, rsi); xorl(rdi, rsi); mulsd(xmm6, xmm1); movl(eax, rdi); addsd(xmm6, xmm3); movdqu(xmm2, xmm0); addsd(xmm0, xmm6); subsd(xmm2, xmm0); addsd(xmm6, xmm2); bind(L_2TAG_PACKET_11_0_1); movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL mulsd(xmm1, xmm0); movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL pand(xmm4, xmm0); por(xmm5, xmm4); addpd(xmm1, xmm5); cvttsd2siq(rdx, xmm1); cvtsi2sdq(xmm1, rdx); movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL mulsd(xmm3, xmm1); unpcklpd(xmm1, xmm1); shll(eax, 3); addl(edx, 1865232); movdqu(xmm4, xmm0); addl(edx, eax); andl(edx, 63); movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL lea(rax, ExternalAddress(Ctable)); shll(edx, 5); addq(rax, rdx); mulpd(xmm2, xmm1); subsd(xmm0, xmm3); mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL subsd(xmm4, xmm3); movq(xmm7, Address(rax, 8)); unpcklpd(xmm0, xmm0); movdqu(xmm3, xmm4); subsd(xmm4, xmm2); mulpd(xmm5, xmm0); subpd(xmm0, xmm2); mulsd(xmm7, xmm4); subsd(xmm3, xmm4); mulpd(xmm5, xmm0); mulpd(xmm0, xmm0); subsd(xmm3, xmm2); movdqu(xmm2, Address(rax, 0)); subsd(xmm1, xmm3); movq(xmm3, Address(rax, 24)); addsd(xmm2, xmm3); subsd(xmm7, xmm2); subsd(xmm1, xmm6); movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL mulsd(xmm2, xmm4); mulpd(xmm6, xmm0); mulsd(xmm3, xmm4); mulpd(xmm2, xmm0); mulpd(xmm0, xmm0); addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL mulsd(xmm4, Address(rax, 0)); addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL mulpd(xmm5, xmm0); movdqu(xmm0, xmm3); addsd(xmm3, Address(rax, 8)); mulpd(xmm1, xmm7); movdqu(xmm7, xmm4); addsd(xmm4, xmm3); addpd(xmm6, xmm5); movq(xmm5, Address(rax, 8)); subsd(xmm5, xmm3); subsd(xmm3, xmm4); addsd(xmm1, Address(rax, 16)); mulpd(xmm6, xmm2); addsd(xmm5, xmm0); addsd(xmm3, xmm7); addsd(xmm1, xmm5); addsd(xmm1, xmm3); addsd(xmm1, xmm6); unpckhpd(xmm6, xmm6); movdqu(xmm0, xmm4); addsd(xmm1, xmm6); addsd(xmm0, xmm1); jmp(B1_4); bind(L_2TAG_PACKET_7_0_1); addl(edx, 64); movq(r9, r10); movq(r10, r8); movl(r8, 0); cmpq(r9, 0); jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1); addl(edx, 64); movq(r9, r10); movq(r10, r8); cmpq(r9, 0); jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1); xorpd(xmm0, xmm0); xorpd(xmm6, xmm6); jmp(L_2TAG_PACKET_11_0_1); bind(L_2TAG_PACKET_9_0_1); jcc(Assembler::equal, L_2TAG_PACKET_10_0_1); negl(ecx); shrq(r10); movq(rax, r9); shrq(r9); subl(edx, ecx); negl(ecx); addl(ecx, 64); shlq(rax); orq(r10, rax); jmp(L_2TAG_PACKET_10_0_1); bind(L_2TAG_PACKET_3_0_1); negl(ecx); shlq(r9, 32); orq(r9, r11); shlq(r9); movq(rdi, r9); testl(r9, INT_MIN); jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_1); shrl(r9); movl(rbx, 0); shrq(rdi, 3); jmp(L_2TAG_PACKET_6_0_1); bind(L_2TAG_PACKET_4_0_1); shrl(r9); movl(rbx, 536870912); shrl(rbx); shlq(r9, 32); orq(r9, r11); shlq(rbx, 32); addl(rdi, 536870912); movl(rcx, 0); movl(r11, 0); subq(rcx, r8); sbbq(r11, r10); sbbq(rbx, r9); movq(r8, rcx); movq(r10, r11); movq(r9, rbx); movl(rbx, 32768); jmp(L_2TAG_PACKET_5_0_1); bind(L_2TAG_PACKET_12_0_1); shrl(r9); mov64(rbx, 0x100000000); shrq(rbx); movl(rcx, 0); movl(r11, 0); subq(rcx, r8); sbbq(r11, r10); sbbq(rbx, r9); movq(r8, rcx); movq(r10, r11); movq(r9, rbx); movl(rbx, 32768); shrq(rdi, 3); addl(rdi, 536870912); jmp(L_2TAG_PACKET_6_0_1); bind(L_2TAG_PACKET_2_0_1); movsd(xmm0, Address(rsp, 8)); mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL movq(Address(rsp, 0), xmm0); bind(L_2TAG_PACKET_13_0_1); bind(B1_4); addq(rsp, 16); pop(rbx); } #endif #ifndef _LP64 ALIGNED_(16) juint _static_const_table_cos[] = { 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL, 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL, 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, 0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL, 0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL, 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, 0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL, 0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL, 0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL, 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL, 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL, 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, 0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL, 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, 0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL, 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, 0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL, 0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL, 0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, 0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL, 0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, 0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL, 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, 0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL, 0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL, 0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL, 0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL }; //registers, // input: (rbp + 8) // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 // rax, rdx, rcx, rbx (tmp) // Code generated by Intel C compiler for LIBM library void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; assert_different_registers(tmp, eax, ecx, edx); address static_const_table_cos = (address)_static_const_table_cos; bind(start); subl(rsp, 120); movl(Address(rsp, 56), tmp); lea(tmp, ExternalAddress(static_const_table_cos)); movsd(xmm0, Address(rsp, 128)); pextrw(eax, xmm0, 3); andl(eax, 32767); subl(eax, 12336); cmpl(eax, 4293); jcc(Assembler::above, L_2TAG_PACKET_0_0_2); movsd(xmm1, Address(tmp, 2160)); mulsd(xmm1, xmm0); movdqu(xmm5, Address(tmp, 2240)); movsd(xmm4, Address(tmp, 2224)); pand(xmm4, xmm0); por(xmm5, xmm4); movsd(xmm3, Address(tmp, 2128)); movdqu(xmm2, Address(tmp, 2112)); addpd(xmm1, xmm5); cvttsd2sil(edx, xmm1); cvtsi2sdl(xmm1, edx); mulsd(xmm3, xmm1); unpcklpd(xmm1, xmm1); addl(edx, 1865232); movdqu(xmm4, xmm0); andl(edx, 63); movdqu(xmm5, Address(tmp, 2096)); lea(eax, Address(tmp, 0)); shll(edx, 5); addl(eax, edx); mulpd(xmm2, xmm1); subsd(xmm0, xmm3); mulsd(xmm1, Address(tmp, 2144)); subsd(xmm4, xmm3); movsd(xmm7, Address(eax, 8)); unpcklpd(xmm0, xmm0); movapd(xmm3, xmm4); subsd(xmm4, xmm2); mulpd(xmm5, xmm0); subpd(xmm0, xmm2); movdqu(xmm6, Address(tmp, 2064)); mulsd(xmm7, xmm4); subsd(xmm3, xmm4); mulpd(xmm5, xmm0); mulpd(xmm0, xmm0); subsd(xmm3, xmm2); movdqu(xmm2, Address(eax, 0)); subsd(xmm1, xmm3); movsd(xmm3, Address(eax, 24)); addsd(xmm2, xmm3); subsd(xmm7, xmm2); mulsd(xmm2, xmm4); mulpd(xmm6, xmm0); mulsd(xmm3, xmm4); mulpd(xmm2, xmm0); mulpd(xmm0, xmm0); addpd(xmm5, Address(tmp, 2080)); mulsd(xmm4, Address(eax, 0)); addpd(xmm6, Address(tmp, 2048)); mulpd(xmm5, xmm0); movapd(xmm0, xmm3); addsd(xmm3, Address(eax, 8)); mulpd(xmm1, xmm7); movapd(xmm7, xmm4); addsd(xmm4, xmm3); addpd(xmm6, xmm5); movsd(xmm5, Address(eax, 8)); subsd(xmm5, xmm3); subsd(xmm3, xmm4); addsd(xmm1, Address(eax, 16)); mulpd(xmm6, xmm2); addsd(xmm5, xmm0); addsd(xmm3, xmm7); addsd(xmm1, xmm5); addsd(xmm1, xmm3); addsd(xmm1, xmm6); unpckhpd(xmm6, xmm6); addsd(xmm1, xmm6); addsd(xmm4, xmm1); movsd(Address(rsp, 0), xmm4); fld_d(Address(rsp, 0)); jmp(L_2TAG_PACKET_1_0_2); bind(L_2TAG_PACKET_0_0_2); jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); pextrw(eax, xmm0, 3); andl(eax, 32767); pinsrw(xmm0, eax, 3); movsd(xmm1, Address(tmp, 2192)); subsd(xmm1, xmm0); movsd(Address(rsp, 0), xmm1); fld_d(Address(rsp, 0)); jmp(L_2TAG_PACKET_1_0_2); bind(L_2TAG_PACKET_2_0_2); movl(eax, Address(rsp, 132)); andl(eax, 2146435072); cmpl(eax, 2146435072); jcc(Assembler::equal, L_2TAG_PACKET_3_0_2); subl(rsp, 32); movsd(Address(rsp, 0), xmm0); lea(eax, Address(rsp, 40)); movl(Address(rsp, 8), eax); movl(eax, 1); movl(Address(rsp, 12), eax); call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge()))); addl(rsp, 32); fld_d(Address(rsp, 8)); jmp(L_2TAG_PACKET_1_0_2); bind(L_2TAG_PACKET_3_0_2); fld_d(Address(rsp, 128)); fmul_d(Address(tmp, 2208)); bind(L_2TAG_PACKET_1_0_2); movl(tmp, Address(rsp, 56)); } #endif