< prev index next >

src/cpu/x86/vm/macroAssembler_libm_x86_64.cpp

Print this page

        

*** 735,744 **** --- 735,1092 ---- bind(B1_5); addq(rsp, 24); } /******************************************************************************/ + // ALGORITHM DESCRIPTION - LOG10() + // --------------------- + // + // Let x=2^k * mx, mx in [1,2) + // + // Get B~1/mx based on the output of rcpss instruction (B0) + // B = int((B0*LH*2^7+0.5))/2^7 + // LH is a short approximation for log10(e) + // + // Reduced argument: r=B*mx-LH (computed accurately in high and low parts) + // + // Result: k*log10(2) - log(B) + p(r) + // p(r) is a degree 7 polynomial + // -log(B) read from data table (high, low parts) + // Result is formed from high and low parts + // + // Special cases: + // log10(0) = -INF with divide-by-zero exception raised + // log10(1) = +0 + // log10(x) = NaN with invalid exception raised if x < -0, including -INF + // log10(+INF) = +INF + // + /******************************************************************************/ + + ALIGNED_(16) juint _HIGHSIGMASK_log10[] = + { + 0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL + }; + + ALIGNED_(16) juint _LOG10_E[] = + { + 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL + }; + + ALIGNED_(16) juint _L_tbl_log10[] = + { + 0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL, + 0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL, + 0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL, + 0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL, + 0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL, + 0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL, + 0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL, + 0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL, + 0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL, + 0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL, + 0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL, + 0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL, + 0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL, + 0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL, + 0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL, + 0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL, + 0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL, + 0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL, + 0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL, + 0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL, + 0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL, + 0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL, + 0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL, + 0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL, + 0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL, + 0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL, + 0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL, + 0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL, + 0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL, + 0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL, + 0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL, + 0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL, + 0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL, + 0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL, + 0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL, + 0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL, + 0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL, + 0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL, + 0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL, + 0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL, + 0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL, + 0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL, + 0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL, + 0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL, + 0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL, + 0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL, + 0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL, + 0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL, + 0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL, + 0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL, + 0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL, + 0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL, + 0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL, + 0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL, + 0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL, + 0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL, + 0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL, + 0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL, + 0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL, + 0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL, + 0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL, + 0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL, + 0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL, + 0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL, + 0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL, + 0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL, + 0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL, + 0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL, + 0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL, + 0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL, + 0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL, + 0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL, + 0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL, + 0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL, + 0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL, + 0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL, + 0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL, + 0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL, + 0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL, + 0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL, + 0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL, + 0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL, + 0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL, + 0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL, + 0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL, + 0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL, + 0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL, + 0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL, + 0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL, + 0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL, + 0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL, + 0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL, + 0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL, + 0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL, + 0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL, + 0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL, + 0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL, + 0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL, + 0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL, + 0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL, + 0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL, + 0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL, + 0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL + }; + + ALIGNED_(16) juint _log2_log10[] = + { + 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL + }; + + ALIGNED_(16) juint _coeff_log10[] = + { + 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL, + 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, + 0xdc77b115UL, 0xbff27af2UL + }; + + // Registers: + // input: xmm0 + // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 + // rax, rdx, rcx, tmp - r11 + + // Code generated by Intel C compiler for LIBM library + + void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r11) { + Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; + Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; + Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, B1_2, B1_3, B1_4, B1_5, start; + + assert_different_registers(r11, eax, ecx, edx); + + address HIGHSIGMASK = (address)_HIGHSIGMASK_log10; + address LOG10_E = (address)_LOG10_E; + address L_tbl = (address)_L_tbl_log10; + address log2 = (address)_log2_log10; + address coeff = (address)_coeff_log10; + + bind(start); + subq(rsp, 24); + movsd(Address(rsp, 0), xmm0); + + bind(B1_2); + xorpd(xmm2, xmm2); + movl(eax, 16368); + pinsrw(xmm2, eax, 3); + movl(ecx, 1054736384); + movdl(xmm7, ecx); + xorpd(xmm3, xmm3); + movl(edx, 30704); + pinsrw(xmm3, edx, 3); + movdqu(xmm1, xmm0); + movl(edx, 32768); + movdl(xmm4, edx); + movdqu(xmm5, ExternalAddress(HIGHSIGMASK)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL + pextrw(eax, xmm0, 3); + por(xmm0, xmm2); + movl(ecx, 16352); + psrlq(xmm0, 27); + movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL + psrld(xmm0, 2); + rcpps(xmm0, xmm0); + psllq(xmm1, 12); + pshufd(xmm6, xmm5, 78); + psrlq(xmm1, 12); + subl(eax, 16); + cmpl(eax, 32736); + jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); + + bind(L_2TAG_PACKET_1_0_2); + mulss(xmm0, xmm7); + por(xmm1, xmm3); + lea(r11, ExternalAddress(L_tbl)); + andpd(xmm5, xmm1); + paddd(xmm0, xmm4); + subsd(xmm1, xmm5); + movdl(edx, xmm0); + psllq(xmm0, 29); + andpd(xmm0, xmm6); + andl(eax, 32752); + subl(eax, ecx); + cvtsi2sdl(xmm7, eax); + mulpd(xmm5, xmm0); + mulsd(xmm1, xmm0); + movq(xmm6, ExternalAddress(log2)); //0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL + movdqu(xmm3, ExternalAddress(coeff)); //0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL + subsd(xmm5, xmm2); + andl(edx, 16711680); + shrl(edx, 12); + movdqu(xmm0, Address(r11, rdx, Address::times_1, -1504)); + movdqu(xmm4, ExternalAddress(16 + coeff)); //0x385593b1UL, 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL + addsd(xmm1, xmm5); + movdqu(xmm2, ExternalAddress(32 + coeff)); //0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL, 0xbff27af2UL + mulsd(xmm6, xmm7); + pshufd(xmm5, xmm1, 68); + mulsd(xmm7, ExternalAddress(8 + log2)); //0x1f12b358UL, 0x3cdfef31UL + mulsd(xmm3, xmm1); + addsd(xmm0, xmm6); + mulpd(xmm4, xmm5); + movq(xmm6, ExternalAddress(8 + LOG10_E)); //0xbf2e4108UL, 0x3f5a7a6cUL + mulpd(xmm5, xmm5); + addpd(xmm4, xmm2); + mulpd(xmm3, xmm5); + pshufd(xmm2, xmm0, 228); + addsd(xmm0, xmm1); + mulsd(xmm4, xmm1); + subsd(xmm2, xmm0); + mulsd(xmm6, xmm1); + addsd(xmm1, xmm2); + pshufd(xmm2, xmm0, 238); + mulsd(xmm5, xmm5); + addsd(xmm7, xmm2); + addsd(xmm1, xmm6); + addpd(xmm4, xmm3); + addsd(xmm1, xmm7); + mulpd(xmm4, xmm5); + addsd(xmm1, xmm4); + pshufd(xmm5, xmm4, 238); + addsd(xmm1, xmm5); + addsd(xmm0, xmm1); + jmp(B1_5); + + bind(L_2TAG_PACKET_0_0_2); + movq(xmm0, Address(rsp, 0)); + movq(xmm1, Address(rsp, 0)); + addl(eax, 16); + cmpl(eax, 32768); + jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2); + cmpl(eax, 16); + jcc(Assembler::below, L_2TAG_PACKET_3_0_2); + + bind(L_2TAG_PACKET_4_0_2); + addsd(xmm0, xmm0); + jmp(B1_5); + + bind(L_2TAG_PACKET_5_0_2); + jcc(Assembler::above, L_2TAG_PACKET_4_0_2); + cmpl(edx, 0); + jcc(Assembler::above, L_2TAG_PACKET_4_0_2); + jmp(L_2TAG_PACKET_6_0_2); + + bind(L_2TAG_PACKET_3_0_2); + xorpd(xmm1, xmm1); + addsd(xmm1, xmm0); + movdl(edx, xmm1); + psrlq(xmm1, 32); + movdl(ecx, xmm1); + orl(edx, ecx); + cmpl(edx, 0); + jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); + xorpd(xmm1, xmm1); + movl(eax, 18416); + pinsrw(xmm1, eax, 3); + mulsd(xmm0, xmm1); + xorpd(xmm2, xmm2); + movl(eax, 16368); + pinsrw(xmm2, eax, 3); + movdqu(xmm1, xmm0); + pextrw(eax, xmm0, 3); + por(xmm0, xmm2); + movl(ecx, 18416); + psrlq(xmm0, 27); + movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL + psrld(xmm0, 2); + rcpps(xmm0, xmm0); + psllq(xmm1, 12); + pshufd(xmm6, xmm5, 78); + psrlq(xmm1, 12); + jmp(L_2TAG_PACKET_1_0_2); + + bind(L_2TAG_PACKET_2_0_2); + movdl(edx, xmm1); + psrlq(xmm1, 32); + movdl(ecx, xmm1); + addl(ecx, ecx); + cmpl(ecx, -2097152); + jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); + orl(edx, ecx); + cmpl(edx, 0); + jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); + + bind(L_2TAG_PACKET_6_0_2); + xorpd(xmm1, xmm1); + xorpd(xmm0, xmm0); + movl(eax, 32752); + pinsrw(xmm1, eax, 3); + mulsd(xmm0, xmm1); + movl(Address(rsp, 16), 9); + jmp(L_2TAG_PACKET_8_0_2); + + bind(L_2TAG_PACKET_7_0_2); + xorpd(xmm1, xmm1); + xorpd(xmm0, xmm0); + movl(eax, 49136); + pinsrw(xmm0, eax, 3); + divsd(xmm0, xmm1); + movl(Address(rsp, 16), 8); + + bind(L_2TAG_PACKET_8_0_2); + movq(Address(rsp, 8), xmm0); + + bind(B1_3); + movq(xmm0, Address(rsp, 8)); + + bind(L_2TAG_PACKET_9_0_2); + + bind(B1_5); + addq(rsp, 24); + + } + + /******************************************************************************/ // ALGORITHM DESCRIPTION - POW() // --------------------- // // Let x=2^k * mx, mx in [1,2) //
*** 2940,2960 **** --- 3288,3317 ---- mov64(r8, 0x3fb921fb54400000); movdq(xmm3, r8); movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL pshufd(xmm4, xmm0, 68); mulsd(xmm3, xmm1); + if (VM_Version::supports_sse3()) { movddup(xmm1, xmm1); + } else { + movlhps(xmm1, xmm1); + } andl(edx, 63); shll(edx, 5); lea(rax, ExternalAddress(Ctable)); addq(rax, rdx); mulpd(xmm6, xmm1); mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL subsd(xmm4, xmm3); movq(xmm7, Address(rax, 8)); subsd(xmm0, xmm3); + if (VM_Version::supports_sse3()) { movddup(xmm3, xmm4); + } else { + movdqu(xmm3, xmm4); + movlhps(xmm3, xmm3); + } subsd(xmm4, xmm6); pshufd(xmm0, xmm0, 68); movdqu(xmm2, Address(rax, 0)); mulpd(xmm5, xmm0); subpd(xmm0, xmm6);
*** 3941,3945 **** --- 4298,5326 ---- bind(B1_4); addq(rsp, 16); pop(rbx); } + + /******************************************************************************/ + // ALGORITHM DESCRIPTION - TAN() + // --------------------- + // + // Polynomials coefficients and other constants. + // + // Note that in this algorithm, there is a different polynomial for + // each breakpoint, so there are 32 sets of polynomial coefficients + // as well as 32 instances of the other constants. + // + // The polynomial coefficients and constants are offset from the start + // of the main block as follows: + // + // 0: c8 | c0 + // 16: c9 | c1 + // 32: c10 | c2 + // 48: c11 | c3 + // 64: c12 | c4 + // 80: c13 | c5 + // 96: c14 | c6 + // 112: c15 | c7 + // 128: T_hi + // 136: T_lo + // 144: Sigma + // 152: T_hl + // 160: Tau + // 168: Mask + // 176: (end of block) + // + // The total table size is therefore 5632 bytes. + // + // Note that c0 and c1 are always zero. We could try storing + // other constants here, and just loading the low part of the + // SIMD register in these cases, after ensuring the high part + // is zero. + // + // The higher terms of the polynomial are computed in the *low* + // part of the SIMD register. This is so we can overlap the + // multiplication by r^8 and the unpacking of the other part. + // + // The constants are: + // T_hi + T_lo = accurate constant term in power series + // Sigma + T_hl = accurate coefficient of r in power series (Sigma=1 bit) + // Tau = multiplier for the reciprocal, always -1 or 0 + // + // The basic reconstruction formula using these constants is: + // + // High = tau * recip_hi + t_hi + // Med = (sgn * r + t_hl * r)_hi + // Low = (sgn * r + t_hl * r)_lo + + // tau * recip_lo + T_lo + (T_hl + sigma) * c + pol + // + // where pol = c0 + c1 * r + c2 * r^2 + ... + c15 * r^15 + // + // (c0 = c1 = 0, but using them keeps SIMD regularity) + // + // We then do a compensated sum High + Med, add the low parts together + // and then do the final sum. + // + // Here recip_hi + recip_lo is an accurate reciprocal of the remainder + // modulo pi/2 + // + // Special cases: + // tan(NaN) = quiet NaN, and raise invalid exception + // tan(INF) = NaN and raise invalid exception + // tan(+/-0) = +/-0 + // + /******************************************************************************/ + + + ALIGNED_(16) juint _ONEHALF_tan[] = + { + 0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL + }; + + ALIGNED_(16) juint _MUL16[] = + { + 0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL + }; + + ALIGNED_(16) juint _sign_mask_tan[] = + { + 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL + }; + + ALIGNED_(16) juint _PI32INV_tan[] = + { + 0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL + }; + + ALIGNED_(16) juint _P_1_tan[] = + { + 0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL + }; + + ALIGNED_(16) juint _P_2_tan[] = + { + 0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL + }; + + ALIGNED_(16) juint _P_3_tan[] = + { + 0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL + }; + + ALIGNED_(16) juint _Ctable_tan[] = + { + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x882c10faUL, + 0x3f9664f4UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x55e6c23dUL, 0x3f8226e3UL, 0x55555555UL, + 0x3fd55555UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x0e157de0UL, 0x3f6d6d3dUL, 0x11111111UL, 0x3fc11111UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x452b75e3UL, 0x3f57da36UL, + 0x1ba1ba1cUL, 0x3faba1baUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x4e435f9bUL, + 0x3f953f83UL, 0x00000000UL, 0x00000000UL, 0x3c6e8e46UL, 0x3f9b74eaUL, + 0x00000000UL, 0x00000000UL, 0xda5b7511UL, 0x3f85ad63UL, 0xdc230b9bUL, + 0x3fb97558UL, 0x26cb3788UL, 0x3f881308UL, 0x76fc4985UL, 0x3fd62ac9UL, + 0x77bb08baUL, 0x3f757c85UL, 0xb6247521UL, 0x3fb1381eUL, 0x5922170cUL, + 0x3f754e95UL, 0x8746482dUL, 0x3fc27f83UL, 0x11055b30UL, 0x3f64e391UL, + 0x3e666320UL, 0x3fa3e609UL, 0x0de9dae3UL, 0x3f6301dfUL, 0x1f1dca06UL, + 0x3fafa8aeUL, 0x8c5b2da2UL, 0x3fb936bbUL, 0x4e88f7a5UL, 0x3c587d05UL, + 0x00000000UL, 0x3ff00000UL, 0xa8935dd9UL, 0x3f83dde2UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x5a279ea3UL, 0x3faa3407UL, + 0x00000000UL, 0x00000000UL, 0x432d65faUL, 0x3fa70153UL, 0x00000000UL, + 0x00000000UL, 0x891a4602UL, 0x3f9d03efUL, 0xd62ca5f8UL, 0x3fca77d9UL, + 0xb35f4628UL, 0x3f97a265UL, 0x433258faUL, 0x3fd8cf51UL, 0xb58fd909UL, + 0x3f8f88e3UL, 0x01771ceaUL, 0x3fc2b154UL, 0xf3562f8eUL, 0x3f888f57UL, + 0xc028a723UL, 0x3fc7370fUL, 0x20b7f9f0UL, 0x3f80f44cUL, 0x214368e9UL, + 0x3fb6dfaaUL, 0x28891863UL, 0x3f79b4b6UL, 0x172dbbf0UL, 0x3fb6cb8eUL, + 0xe0553158UL, 0x3fc975f5UL, 0x593fe814UL, 0x3c2ef5d3UL, 0x00000000UL, + 0x3ff00000UL, 0x03dec550UL, 0x3fa44203UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x9314533eUL, 0x3fbb8ec5UL, 0x00000000UL, + 0x00000000UL, 0x09aa36d0UL, 0x3fb6d3f4UL, 0x00000000UL, 0x00000000UL, + 0xdcb427fdUL, 0x3fb13950UL, 0xd87ab0bbUL, 0x3fd5335eUL, 0xce0ae8a5UL, + 0x3fabb382UL, 0x79143126UL, 0x3fddba41UL, 0x5f2b28d4UL, 0x3fa552f1UL, + 0x59f21a6dUL, 0x3fd015abUL, 0x22c27d95UL, 0x3fa0e984UL, 0xe19fc6aaUL, + 0x3fd0576cUL, 0x8f2c2950UL, 0x3f9a4898UL, 0xc0b3f22cUL, 0x3fc59462UL, + 0x1883a4b8UL, 0x3f94b61cUL, 0x3f838640UL, 0x3fc30eb8UL, 0x355c63dcUL, + 0x3fd36a08UL, 0x1dce993dUL, 0xbc6d704dUL, 0x00000000UL, 0x3ff00000UL, + 0x2b82ab63UL, 0x3fb78e92UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x56f37042UL, 0x3fccfc56UL, 0x00000000UL, 0x00000000UL, + 0xaa563951UL, 0x3fc90125UL, 0x00000000UL, 0x00000000UL, 0x3d0e7c5dUL, + 0x3fc50533UL, 0x9bed9b2eUL, 0x3fdf0ed9UL, 0x5fe7c47cUL, 0x3fc1f250UL, + 0x96c125e5UL, 0x3fe2edd9UL, 0x5a02bbd8UL, 0x3fbe5c71UL, 0x86362c20UL, + 0x3fda08b7UL, 0x4b4435edUL, 0x3fb9d342UL, 0x4b494091UL, 0x3fd911bdUL, + 0xb56658beUL, 0x3fb5e4c7UL, 0x93a2fd76UL, 0x3fd3c092UL, 0xda271794UL, + 0x3fb29910UL, 0x3303df2bUL, 0x3fd189beUL, 0x99fcef32UL, 0x3fda8279UL, + 0xb68c1467UL, 0x3c708b2fUL, 0x00000000UL, 0x3ff00000UL, 0x980c4337UL, + 0x3fc5f619UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0xcc03e501UL, 0x3fdff10fUL, 0x00000000UL, 0x00000000UL, 0x44a4e845UL, + 0x3fddb63bUL, 0x00000000UL, 0x00000000UL, 0x3768ad9fUL, 0x3fdb72a4UL, + 0x3dd01ccaUL, 0x3fe5fdb9UL, 0xa61d2811UL, 0x3fd972b2UL, 0x5645ad0bUL, + 0x3fe977f9UL, 0xd013b3abUL, 0x3fd78ca3UL, 0xbf0bf914UL, 0x3fe4f192UL, + 0x4d53e730UL, 0x3fd5d060UL, 0x3f8b9000UL, 0x3fe49933UL, 0xe2b82f08UL, + 0x3fd4322aUL, 0x5936a835UL, 0x3fe27ae1UL, 0xb1c61c9bUL, 0x3fd2b3fbUL, + 0xef478605UL, 0x3fe1659eUL, 0x190834ecUL, 0x3fe11ab7UL, 0xcdb625eaUL, + 0xbc8e564bUL, 0x00000000UL, 0x3ff00000UL, 0xb07217e3UL, 0x3fd248f1UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2b2c49d0UL, + 0x3ff2de9cUL, 0x00000000UL, 0x00000000UL, 0x2655bc98UL, 0x3ff33e58UL, + 0x00000000UL, 0x00000000UL, 0xff691fa2UL, 0x3ff3972eUL, 0xe93463bdUL, + 0x3feeed87UL, 0x070e10a0UL, 0x3ff3f5b2UL, 0xf4d790a4UL, 0x3ff20c10UL, + 0xa04e8ea3UL, 0x3ff4541aUL, 0x386accd3UL, 0x3ff1369eUL, 0x222a66ddUL, + 0x3ff4b521UL, 0x22a9777eUL, 0x3ff20817UL, 0x52a04a6eUL, 0x3ff5178fUL, + 0xddaa0031UL, 0x3ff22137UL, 0x4447d47cUL, 0x3ff57c01UL, 0x1e9c7f1dUL, + 0x3ff29311UL, 0x2ab7f990UL, 0x3fe561b8UL, 0x209c7df1UL, 0x3c87a8c5UL, + 0x00000000UL, 0x3ff00000UL, 0x4170bcc6UL, 0x3fdc92d8UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xc7ab4d5aUL, 0x40085e24UL, + 0x00000000UL, 0x00000000UL, 0xe93ea75dUL, 0x400b963dUL, 0x00000000UL, + 0x00000000UL, 0x94a7f25aUL, 0x400f37e2UL, 0x4b6261cbUL, 0x3ff5f984UL, + 0x5a9dd812UL, 0x4011aab0UL, 0x74c30018UL, 0x3ffaf5a5UL, 0x7f2ce8e3UL, + 0x4013fe8bUL, 0xfe8e54faUL, 0x3ffd7334UL, 0x670d618dUL, 0x4016a10cUL, + 0x4db97058UL, 0x4000e012UL, 0x24df44ddUL, 0x40199c5fUL, 0x697d6eceUL, + 0x4003006eUL, 0x83298b82UL, 0x401cfc4dUL, 0x19d490d6UL, 0x40058c19UL, + 0x2ae42850UL, 0x3fea4300UL, 0x118e20e6UL, 0xbc7a6db8UL, 0x00000000UL, + 0x40000000UL, 0xe33345b8UL, 0xbfd4e526UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x65965966UL, 0x40219659UL, 0x00000000UL, + 0x00000000UL, 0x882c10faUL, 0x402664f4UL, 0x00000000UL, 0x00000000UL, + 0x83cd3723UL, 0x402c8342UL, 0x00000000UL, 0x40000000UL, 0x55e6c23dUL, + 0x403226e3UL, 0x55555555UL, 0x40055555UL, 0x34451939UL, 0x40371c96UL, + 0xaaaaaaabUL, 0x400aaaaaUL, 0x0e157de0UL, 0x403d6d3dUL, 0x11111111UL, + 0x40111111UL, 0xa738201fUL, 0x4042bbceUL, 0x05b05b06UL, 0x4015b05bUL, + 0x452b75e3UL, 0x4047da36UL, 0x1ba1ba1cUL, 0x401ba1baUL, 0x00000000UL, + 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x40000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x4f48b8d3UL, 0xbf33eaf9UL, 0x00000000UL, 0x00000000UL, + 0x0cf7586fUL, 0x3f20b8eaUL, 0x00000000UL, 0x00000000UL, 0xd0258911UL, + 0xbf0abaf3UL, 0x23e49fe9UL, 0xbfab5a8cUL, 0x2d53222eUL, 0x3ef60d15UL, + 0x21169451UL, 0x3fa172b2UL, 0xbb254dbcUL, 0xbee1d3b5UL, 0xdbf93b8eUL, + 0xbf84c7dbUL, 0x05b4630bUL, 0x3ecd3364UL, 0xee9aada7UL, 0x3f743924UL, + 0x794a8297UL, 0xbeb7b7b9UL, 0xe015f797UL, 0xbf5d41f5UL, 0xe41a4a56UL, + 0x3ea35dfbUL, 0xe4c2a251UL, 0x3f49a2abUL, 0x5af9e000UL, 0xbfce49ceUL, + 0x8c743719UL, 0x3d1eb860UL, 0x00000000UL, 0x00000000UL, 0x1b4863cfUL, + 0x3fd78294UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, + 0x535ad890UL, 0xbf2b9320UL, 0x00000000UL, 0x00000000UL, 0x018fdf1fUL, + 0x3f16d61dUL, 0x00000000UL, 0x00000000UL, 0x0359f1beUL, 0xbf0139e4UL, + 0xa4317c6dUL, 0xbfa67e17UL, 0x82672d0fUL, 0x3eebb405UL, 0x2f1b621eUL, + 0x3f9f455bUL, 0x51ccf238UL, 0xbed55317UL, 0xf437b9acUL, 0xbf804beeUL, + 0xc791a2b5UL, 0x3ec0e993UL, 0x919a1db2UL, 0x3f7080c2UL, 0x336a5b0eUL, + 0xbeaa48a2UL, 0x0a268358UL, 0xbf55a443UL, 0xdfd978e4UL, 0x3e94b61fUL, + 0xd7767a58UL, 0x3f431806UL, 0x2aea0000UL, 0xbfc9bbe8UL, 0x7723ea61UL, + 0xbd3a2369UL, 0x00000000UL, 0x00000000UL, 0xdf7796ffUL, 0x3fd6e642UL, + 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0xb9ff07ceUL, + 0xbf231c78UL, 0x00000000UL, 0x00000000UL, 0xa5517182UL, 0x3f0ff0e0UL, + 0x00000000UL, 0x00000000UL, 0x790b4cbcUL, 0xbef66191UL, 0x848a46c6UL, + 0xbfa21ac0UL, 0xb16435faUL, 0x3ee1d3ecUL, 0x2a1aa832UL, 0x3f9c71eaUL, + 0xfdd299efUL, 0xbec9dd1aUL, 0x3f8dbaafUL, 0xbf793363UL, 0x309fc6eaUL, + 0x3eb415d6UL, 0xbee60471UL, 0x3f6b83baUL, 0x94a0a697UL, 0xbe9dae11UL, + 0x3e5c67b3UL, 0xbf4fd07bUL, 0x9a8f3e3eUL, 0x3e86bd75UL, 0xa4beb7a4UL, + 0x3f3d1eb1UL, 0x29cfc000UL, 0xbfc549ceUL, 0xbf159358UL, 0xbd397b33UL, + 0x00000000UL, 0x00000000UL, 0x871fee6cUL, 0x3fd666f0UL, 0x00000000UL, + 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x7d98a556UL, 0xbf1a3958UL, + 0x00000000UL, 0x00000000UL, 0x9d88dc01UL, 0x3f0704c2UL, 0x00000000UL, + 0x00000000UL, 0x73742a2bUL, 0xbeed054aUL, 0x58844587UL, 0xbf9c2a13UL, + 0x55688a79UL, 0x3ed7a326UL, 0xee33f1d6UL, 0x3f9a48f4UL, 0xa8dc9888UL, + 0xbebf8939UL, 0xaad4b5b8UL, 0xbf72f746UL, 0x9102efa1UL, 0x3ea88f82UL, + 0xdabc29cfUL, 0x3f678228UL, 0x9289afb8UL, 0xbe90f456UL, 0x741fb4edUL, + 0xbf46f3a3UL, 0xa97f6663UL, 0x3e79b4bfUL, 0xca89ff3fUL, 0x3f36db70UL, + 0xa8a2a000UL, 0xbfc0ee13UL, 0x3da24be1UL, 0xbd338b9fUL, 0x00000000UL, + 0x00000000UL, 0x11cd6c69UL, 0x3fd601fdUL, 0x00000000UL, 0x3ff00000UL, + 0x00000000UL, 0xfffffff8UL, 0x1a154b97UL, 0xbf116b01UL, 0x00000000UL, + 0x00000000UL, 0x2d427630UL, 0x3f0147bfUL, 0x00000000UL, 0x00000000UL, + 0xb93820c8UL, 0xbee264d4UL, 0xbb6cbb18UL, 0xbf94ab8cUL, 0x888d4d92UL, + 0x3ed0568bUL, 0x60730f7cUL, 0x3f98b19bUL, 0xe4b1fb11UL, 0xbeb2f950UL, + 0x22cf9f74UL, 0xbf6b21cdUL, 0x4a3ff0a6UL, 0x3e9f499eUL, 0xfd2b83ceUL, + 0x3f64aad7UL, 0x637b73afUL, 0xbe83487cUL, 0xe522591aUL, 0xbf3fc092UL, + 0xa158e8bcUL, 0x3e6e3aaeUL, 0xe5e82ffaUL, 0x3f329d2fUL, 0xd636a000UL, + 0xbfb9477fUL, 0xc2c2d2bcUL, 0xbd135ef9UL, 0x00000000UL, 0x00000000UL, + 0xf2fdb123UL, 0x3fd5b566UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, + 0xfffffff8UL, 0xc41acb64UL, 0xbf05448dUL, 0x00000000UL, 0x00000000UL, + 0xdbb03d6fUL, 0x3efb7ad2UL, 0x00000000UL, 0x00000000UL, 0x9e42962dUL, + 0xbed5aea5UL, 0x2579f8efUL, 0xbf8b2398UL, 0x288a1ed9UL, 0x3ec81441UL, + 0xb0198dc5UL, 0x3f979a3aUL, 0x2fdfe253UL, 0xbea57cd3UL, 0x5766336fUL, + 0xbf617caaUL, 0x600944c3UL, 0x3e954ed6UL, 0xa4e0aaf8UL, 0x3f62c646UL, + 0x6b8fb29cUL, 0xbe74e3a3UL, 0xdc4c0409UL, 0xbf33f952UL, 0x9bffe365UL, + 0x3e6301ecUL, 0xb8869e44UL, 0x3f2fc566UL, 0xe1e04000UL, 0xbfb0cc62UL, + 0x016b907fUL, 0xbd119cbcUL, 0x00000000UL, 0x00000000UL, 0xe6b9d8faUL, + 0x3fd57fb3UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, + 0x5daf22a6UL, 0xbef429d7UL, 0x00000000UL, 0x00000000UL, 0x06bca545UL, + 0x3ef7a27dUL, 0x00000000UL, 0x00000000UL, 0x7211c19aUL, 0xbec41c3eUL, + 0x956ed53eUL, 0xbf7ae3f4UL, 0xee750e72UL, 0x3ec3901bUL, 0x91d443f5UL, + 0x3f96f713UL, 0x36661e6cUL, 0xbe936e09UL, 0x506f9381UL, 0xbf5122e8UL, + 0xcb6dd43fUL, 0x3e9041b9UL, 0x6698b2ffUL, 0x3f61b0c7UL, 0x576bf12bUL, + 0xbe625a8aUL, 0xe5a0e9dcUL, 0xbf23499dUL, 0x110384ddUL, 0x3e5b1c2cUL, + 0x68d43db6UL, 0x3f2cb899UL, 0x6ecac000UL, 0xbfa0c414UL, 0xcd7dd58cUL, + 0x3d13500fUL, 0x00000000UL, 0x00000000UL, 0x85a2c8fbUL, 0x3fd55fe0UL, + 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2bf70ebeUL, 0x3ef66a8fUL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0xd644267fUL, 0x3ec22805UL, 0x16c16c17UL, 0x3f96c16cUL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xc4e09162UL, + 0x3e8d6db2UL, 0xbc011567UL, 0x3f61566aUL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x1f79955cUL, 0x3e57da4eUL, 0x9334ef0bUL, + 0x3f2bbd77UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x55555555UL, 0x3fd55555UL, 0x00000000UL, + 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x5daf22a6UL, 0x3ef429d7UL, + 0x00000000UL, 0x00000000UL, 0x06bca545UL, 0x3ef7a27dUL, 0x00000000UL, + 0x00000000UL, 0x7211c19aUL, 0x3ec41c3eUL, 0x956ed53eUL, 0x3f7ae3f4UL, + 0xee750e72UL, 0x3ec3901bUL, 0x91d443f5UL, 0x3f96f713UL, 0x36661e6cUL, + 0x3e936e09UL, 0x506f9381UL, 0x3f5122e8UL, 0xcb6dd43fUL, 0x3e9041b9UL, + 0x6698b2ffUL, 0x3f61b0c7UL, 0x576bf12bUL, 0x3e625a8aUL, 0xe5a0e9dcUL, + 0x3f23499dUL, 0x110384ddUL, 0x3e5b1c2cUL, 0x68d43db6UL, 0x3f2cb899UL, + 0x6ecac000UL, 0x3fa0c414UL, 0xcd7dd58cUL, 0xbd13500fUL, 0x00000000UL, + 0x00000000UL, 0x85a2c8fbUL, 0x3fd55fe0UL, 0x00000000UL, 0x3ff00000UL, + 0x00000000UL, 0xfffffff8UL, 0xc41acb64UL, 0x3f05448dUL, 0x00000000UL, + 0x00000000UL, 0xdbb03d6fUL, 0x3efb7ad2UL, 0x00000000UL, 0x00000000UL, + 0x9e42962dUL, 0x3ed5aea5UL, 0x2579f8efUL, 0x3f8b2398UL, 0x288a1ed9UL, + 0x3ec81441UL, 0xb0198dc5UL, 0x3f979a3aUL, 0x2fdfe253UL, 0x3ea57cd3UL, + 0x5766336fUL, 0x3f617caaUL, 0x600944c3UL, 0x3e954ed6UL, 0xa4e0aaf8UL, + 0x3f62c646UL, 0x6b8fb29cUL, 0x3e74e3a3UL, 0xdc4c0409UL, 0x3f33f952UL, + 0x9bffe365UL, 0x3e6301ecUL, 0xb8869e44UL, 0x3f2fc566UL, 0xe1e04000UL, + 0x3fb0cc62UL, 0x016b907fUL, 0x3d119cbcUL, 0x00000000UL, 0x00000000UL, + 0xe6b9d8faUL, 0x3fd57fb3UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, + 0xfffffff8UL, 0x1a154b97UL, 0x3f116b01UL, 0x00000000UL, 0x00000000UL, + 0x2d427630UL, 0x3f0147bfUL, 0x00000000UL, 0x00000000UL, 0xb93820c8UL, + 0x3ee264d4UL, 0xbb6cbb18UL, 0x3f94ab8cUL, 0x888d4d92UL, 0x3ed0568bUL, + 0x60730f7cUL, 0x3f98b19bUL, 0xe4b1fb11UL, 0x3eb2f950UL, 0x22cf9f74UL, + 0x3f6b21cdUL, 0x4a3ff0a6UL, 0x3e9f499eUL, 0xfd2b83ceUL, 0x3f64aad7UL, + 0x637b73afUL, 0x3e83487cUL, 0xe522591aUL, 0x3f3fc092UL, 0xa158e8bcUL, + 0x3e6e3aaeUL, 0xe5e82ffaUL, 0x3f329d2fUL, 0xd636a000UL, 0x3fb9477fUL, + 0xc2c2d2bcUL, 0x3d135ef9UL, 0x00000000UL, 0x00000000UL, 0xf2fdb123UL, + 0x3fd5b566UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, + 0x7d98a556UL, 0x3f1a3958UL, 0x00000000UL, 0x00000000UL, 0x9d88dc01UL, + 0x3f0704c2UL, 0x00000000UL, 0x00000000UL, 0x73742a2bUL, 0x3eed054aUL, + 0x58844587UL, 0x3f9c2a13UL, 0x55688a79UL, 0x3ed7a326UL, 0xee33f1d6UL, + 0x3f9a48f4UL, 0xa8dc9888UL, 0x3ebf8939UL, 0xaad4b5b8UL, 0x3f72f746UL, + 0x9102efa1UL, 0x3ea88f82UL, 0xdabc29cfUL, 0x3f678228UL, 0x9289afb8UL, + 0x3e90f456UL, 0x741fb4edUL, 0x3f46f3a3UL, 0xa97f6663UL, 0x3e79b4bfUL, + 0xca89ff3fUL, 0x3f36db70UL, 0xa8a2a000UL, 0x3fc0ee13UL, 0x3da24be1UL, + 0x3d338b9fUL, 0x00000000UL, 0x00000000UL, 0x11cd6c69UL, 0x3fd601fdUL, + 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0xb9ff07ceUL, + 0x3f231c78UL, 0x00000000UL, 0x00000000UL, 0xa5517182UL, 0x3f0ff0e0UL, + 0x00000000UL, 0x00000000UL, 0x790b4cbcUL, 0x3ef66191UL, 0x848a46c6UL, + 0x3fa21ac0UL, 0xb16435faUL, 0x3ee1d3ecUL, 0x2a1aa832UL, 0x3f9c71eaUL, + 0xfdd299efUL, 0x3ec9dd1aUL, 0x3f8dbaafUL, 0x3f793363UL, 0x309fc6eaUL, + 0x3eb415d6UL, 0xbee60471UL, 0x3f6b83baUL, 0x94a0a697UL, 0x3e9dae11UL, + 0x3e5c67b3UL, 0x3f4fd07bUL, 0x9a8f3e3eUL, 0x3e86bd75UL, 0xa4beb7a4UL, + 0x3f3d1eb1UL, 0x29cfc000UL, 0x3fc549ceUL, 0xbf159358UL, 0x3d397b33UL, + 0x00000000UL, 0x00000000UL, 0x871fee6cUL, 0x3fd666f0UL, 0x00000000UL, + 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x535ad890UL, 0x3f2b9320UL, + 0x00000000UL, 0x00000000UL, 0x018fdf1fUL, 0x3f16d61dUL, 0x00000000UL, + 0x00000000UL, 0x0359f1beUL, 0x3f0139e4UL, 0xa4317c6dUL, 0x3fa67e17UL, + 0x82672d0fUL, 0x3eebb405UL, 0x2f1b621eUL, 0x3f9f455bUL, 0x51ccf238UL, + 0x3ed55317UL, 0xf437b9acUL, 0x3f804beeUL, 0xc791a2b5UL, 0x3ec0e993UL, + 0x919a1db2UL, 0x3f7080c2UL, 0x336a5b0eUL, 0x3eaa48a2UL, 0x0a268358UL, + 0x3f55a443UL, 0xdfd978e4UL, 0x3e94b61fUL, 0xd7767a58UL, 0x3f431806UL, + 0x2aea0000UL, 0x3fc9bbe8UL, 0x7723ea61UL, 0x3d3a2369UL, 0x00000000UL, + 0x00000000UL, 0xdf7796ffUL, 0x3fd6e642UL, 0x00000000UL, 0x3ff00000UL, + 0x00000000UL, 0xfffffff8UL, 0x4f48b8d3UL, 0x3f33eaf9UL, 0x00000000UL, + 0x00000000UL, 0x0cf7586fUL, 0x3f20b8eaUL, 0x00000000UL, 0x00000000UL, + 0xd0258911UL, 0x3f0abaf3UL, 0x23e49fe9UL, 0x3fab5a8cUL, 0x2d53222eUL, + 0x3ef60d15UL, 0x21169451UL, 0x3fa172b2UL, 0xbb254dbcUL, 0x3ee1d3b5UL, + 0xdbf93b8eUL, 0x3f84c7dbUL, 0x05b4630bUL, 0x3ecd3364UL, 0xee9aada7UL, + 0x3f743924UL, 0x794a8297UL, 0x3eb7b7b9UL, 0xe015f797UL, 0x3f5d41f5UL, + 0xe41a4a56UL, 0x3ea35dfbUL, 0xe4c2a251UL, 0x3f49a2abUL, 0x5af9e000UL, + 0x3fce49ceUL, 0x8c743719UL, 0xbd1eb860UL, 0x00000000UL, 0x00000000UL, + 0x1b4863cfUL, 0x3fd78294UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, + 0xfffffff8UL, 0x65965966UL, 0xc0219659UL, 0x00000000UL, 0x00000000UL, + 0x882c10faUL, 0x402664f4UL, 0x00000000UL, 0x00000000UL, 0x83cd3723UL, + 0xc02c8342UL, 0x00000000UL, 0xc0000000UL, 0x55e6c23dUL, 0x403226e3UL, + 0x55555555UL, 0x40055555UL, 0x34451939UL, 0xc0371c96UL, 0xaaaaaaabUL, + 0xc00aaaaaUL, 0x0e157de0UL, 0x403d6d3dUL, 0x11111111UL, 0x40111111UL, + 0xa738201fUL, 0xc042bbceUL, 0x05b05b06UL, 0xc015b05bUL, 0x452b75e3UL, + 0x4047da36UL, 0x1ba1ba1cUL, 0x401ba1baUL, 0x00000000UL, 0xbff00000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x40000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0xc7ab4d5aUL, 0xc0085e24UL, 0x00000000UL, 0x00000000UL, 0xe93ea75dUL, + 0x400b963dUL, 0x00000000UL, 0x00000000UL, 0x94a7f25aUL, 0xc00f37e2UL, + 0x4b6261cbUL, 0xbff5f984UL, 0x5a9dd812UL, 0x4011aab0UL, 0x74c30018UL, + 0x3ffaf5a5UL, 0x7f2ce8e3UL, 0xc013fe8bUL, 0xfe8e54faUL, 0xbffd7334UL, + 0x670d618dUL, 0x4016a10cUL, 0x4db97058UL, 0x4000e012UL, 0x24df44ddUL, + 0xc0199c5fUL, 0x697d6eceUL, 0xc003006eUL, 0x83298b82UL, 0x401cfc4dUL, + 0x19d490d6UL, 0x40058c19UL, 0x2ae42850UL, 0xbfea4300UL, 0x118e20e6UL, + 0x3c7a6db8UL, 0x00000000UL, 0x40000000UL, 0xe33345b8UL, 0xbfd4e526UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2b2c49d0UL, + 0xbff2de9cUL, 0x00000000UL, 0x00000000UL, 0x2655bc98UL, 0x3ff33e58UL, + 0x00000000UL, 0x00000000UL, 0xff691fa2UL, 0xbff3972eUL, 0xe93463bdUL, + 0xbfeeed87UL, 0x070e10a0UL, 0x3ff3f5b2UL, 0xf4d790a4UL, 0x3ff20c10UL, + 0xa04e8ea3UL, 0xbff4541aUL, 0x386accd3UL, 0xbff1369eUL, 0x222a66ddUL, + 0x3ff4b521UL, 0x22a9777eUL, 0x3ff20817UL, 0x52a04a6eUL, 0xbff5178fUL, + 0xddaa0031UL, 0xbff22137UL, 0x4447d47cUL, 0x3ff57c01UL, 0x1e9c7f1dUL, + 0x3ff29311UL, 0x2ab7f990UL, 0xbfe561b8UL, 0x209c7df1UL, 0xbc87a8c5UL, + 0x00000000UL, 0x3ff00000UL, 0x4170bcc6UL, 0x3fdc92d8UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xcc03e501UL, 0xbfdff10fUL, + 0x00000000UL, 0x00000000UL, 0x44a4e845UL, 0x3fddb63bUL, 0x00000000UL, + 0x00000000UL, 0x3768ad9fUL, 0xbfdb72a4UL, 0x3dd01ccaUL, 0xbfe5fdb9UL, + 0xa61d2811UL, 0x3fd972b2UL, 0x5645ad0bUL, 0x3fe977f9UL, 0xd013b3abUL, + 0xbfd78ca3UL, 0xbf0bf914UL, 0xbfe4f192UL, 0x4d53e730UL, 0x3fd5d060UL, + 0x3f8b9000UL, 0x3fe49933UL, 0xe2b82f08UL, 0xbfd4322aUL, 0x5936a835UL, + 0xbfe27ae1UL, 0xb1c61c9bUL, 0x3fd2b3fbUL, 0xef478605UL, 0x3fe1659eUL, + 0x190834ecUL, 0xbfe11ab7UL, 0xcdb625eaUL, 0x3c8e564bUL, 0x00000000UL, + 0x3ff00000UL, 0xb07217e3UL, 0x3fd248f1UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x56f37042UL, 0xbfccfc56UL, 0x00000000UL, + 0x00000000UL, 0xaa563951UL, 0x3fc90125UL, 0x00000000UL, 0x00000000UL, + 0x3d0e7c5dUL, 0xbfc50533UL, 0x9bed9b2eUL, 0xbfdf0ed9UL, 0x5fe7c47cUL, + 0x3fc1f250UL, 0x96c125e5UL, 0x3fe2edd9UL, 0x5a02bbd8UL, 0xbfbe5c71UL, + 0x86362c20UL, 0xbfda08b7UL, 0x4b4435edUL, 0x3fb9d342UL, 0x4b494091UL, + 0x3fd911bdUL, 0xb56658beUL, 0xbfb5e4c7UL, 0x93a2fd76UL, 0xbfd3c092UL, + 0xda271794UL, 0x3fb29910UL, 0x3303df2bUL, 0x3fd189beUL, 0x99fcef32UL, + 0xbfda8279UL, 0xb68c1467UL, 0xbc708b2fUL, 0x00000000UL, 0x3ff00000UL, + 0x980c4337UL, 0x3fc5f619UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x9314533eUL, 0xbfbb8ec5UL, 0x00000000UL, 0x00000000UL, + 0x09aa36d0UL, 0x3fb6d3f4UL, 0x00000000UL, 0x00000000UL, 0xdcb427fdUL, + 0xbfb13950UL, 0xd87ab0bbUL, 0xbfd5335eUL, 0xce0ae8a5UL, 0x3fabb382UL, + 0x79143126UL, 0x3fddba41UL, 0x5f2b28d4UL, 0xbfa552f1UL, 0x59f21a6dUL, + 0xbfd015abUL, 0x22c27d95UL, 0x3fa0e984UL, 0xe19fc6aaUL, 0x3fd0576cUL, + 0x8f2c2950UL, 0xbf9a4898UL, 0xc0b3f22cUL, 0xbfc59462UL, 0x1883a4b8UL, + 0x3f94b61cUL, 0x3f838640UL, 0x3fc30eb8UL, 0x355c63dcUL, 0xbfd36a08UL, + 0x1dce993dUL, 0x3c6d704dUL, 0x00000000UL, 0x3ff00000UL, 0x2b82ab63UL, + 0x3fb78e92UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x5a279ea3UL, 0xbfaa3407UL, 0x00000000UL, 0x00000000UL, 0x432d65faUL, + 0x3fa70153UL, 0x00000000UL, 0x00000000UL, 0x891a4602UL, 0xbf9d03efUL, + 0xd62ca5f8UL, 0xbfca77d9UL, 0xb35f4628UL, 0x3f97a265UL, 0x433258faUL, + 0x3fd8cf51UL, 0xb58fd909UL, 0xbf8f88e3UL, 0x01771ceaUL, 0xbfc2b154UL, + 0xf3562f8eUL, 0x3f888f57UL, 0xc028a723UL, 0x3fc7370fUL, 0x20b7f9f0UL, + 0xbf80f44cUL, 0x214368e9UL, 0xbfb6dfaaUL, 0x28891863UL, 0x3f79b4b6UL, + 0x172dbbf0UL, 0x3fb6cb8eUL, 0xe0553158UL, 0xbfc975f5UL, 0x593fe814UL, + 0xbc2ef5d3UL, 0x00000000UL, 0x3ff00000UL, 0x03dec550UL, 0x3fa44203UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x4e435f9bUL, + 0xbf953f83UL, 0x00000000UL, 0x00000000UL, 0x3c6e8e46UL, 0x3f9b74eaUL, + 0x00000000UL, 0x00000000UL, 0xda5b7511UL, 0xbf85ad63UL, 0xdc230b9bUL, + 0xbfb97558UL, 0x26cb3788UL, 0x3f881308UL, 0x76fc4985UL, 0x3fd62ac9UL, + 0x77bb08baUL, 0xbf757c85UL, 0xb6247521UL, 0xbfb1381eUL, 0x5922170cUL, + 0x3f754e95UL, 0x8746482dUL, 0x3fc27f83UL, 0x11055b30UL, 0xbf64e391UL, + 0x3e666320UL, 0xbfa3e609UL, 0x0de9dae3UL, 0x3f6301dfUL, 0x1f1dca06UL, + 0x3fafa8aeUL, 0x8c5b2da2UL, 0xbfb936bbUL, 0x4e88f7a5UL, 0xbc587d05UL, + 0x00000000UL, 0x3ff00000UL, 0xa8935dd9UL, 0x3f83dde2UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL + }; + + ALIGNED_(16) juint _MASK_35_tan[] = + { + 0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL + }; + + ALIGNED_(16) juint _Q_11_tan[] = + { + 0xb8fe4d77UL, 0x3f82609aUL + }; + + ALIGNED_(16) juint _Q_9_tan[] = + { + 0xbf847a43UL, 0x3f9664a0UL + }; + + ALIGNED_(16) juint _Q_7_tan[] = + { + 0x52c4c8abUL, 0x3faba1baUL + }; + + ALIGNED_(16) juint _Q_5_tan[] = + { + 0x11092746UL, 0x3fc11111UL + }; + + ALIGNED_(16) juint _Q_3_tan[] = + { + 0x55555612UL, 0x3fd55555UL + }; + + ALIGNED_(16) juint _PI_INV_TABLE_tan[] = + { + 0x00000000UL, 0x00000000UL, 0xa2f9836eUL, 0x4e441529UL, 0xfc2757d1UL, + 0xf534ddc0UL, 0xdb629599UL, 0x3c439041UL, 0xfe5163abUL, 0xdebbc561UL, + 0xb7246e3aUL, 0x424dd2e0UL, 0x06492eeaUL, 0x09d1921cUL, 0xfe1deb1cUL, + 0xb129a73eUL, 0xe88235f5UL, 0x2ebb4484UL, 0xe99c7026UL, 0xb45f7e41UL, + 0x3991d639UL, 0x835339f4UL, 0x9c845f8bUL, 0xbdf9283bUL, 0x1ff897ffUL, + 0xde05980fUL, 0xef2f118bUL, 0x5a0a6d1fUL, 0x6d367ecfUL, 0x27cb09b7UL, + 0x4f463f66UL, 0x9e5fea2dUL, 0x7527bac7UL, 0xebe5f17bUL, 0x3d0739f7UL, + 0x8a5292eaUL, 0x6bfb5fb1UL, 0x1f8d5d08UL, 0x56033046UL, 0xfc7b6babUL, + 0xf0cfbc21UL + }; + + ALIGNED_(8) juint _PI_4_tan[] = + { + 0x00000000UL, 0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL + }; + + ALIGNED_(8) juint _QQ_2_tan[] = + { + 0x676733afUL, 0x3d32e7b9UL + }; + + ALIGNED_(8) juint _ONE_tan[] = + { + 0x00000000UL, 0x3ff00000UL + }; + + ALIGNED_(8) juint _TWO_POW_55_tan[] = + { + 0x00000000UL, 0x43600000UL + }; + + ALIGNED_(4) juint _TWO_POW_M55_tan[] = + { + 0x00000000UL, 0x3c800000UL + }; + + ALIGNED_(4) juint _NEG_ZERO_tan[] = + { + 0x00000000UL, 0x80000000UL + }; + + void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r8, Register r9, Register r10, Register r11) { + + Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; + Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1; + Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1; + Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, L_2TAG_PACKET_14_0_1, B1_2, B1_3, B1_4, B1_5, start; + + address ONEHALF = (address)_ONEHALF_tan; + address MUL16 = (address)_MUL16; + address sign_mask = (address)_sign_mask_tan; + address PI32INV = (address)_PI32INV_tan; + address P_1 = (address)_P_1_tan; + address P_2 = (address)_P_2_tan; + address P_3 = (address)_P_3_tan; + address Ctable = (address)_Ctable_tan; + address MASK_35 = (address)_MASK_35_tan; + address Q_11 = (address)_Q_11_tan; + address Q_9 = (address)_Q_9_tan; + address Q_7 = (address)_Q_7_tan; + address Q_5 = (address)_Q_5_tan; + address Q_3 = (address)_Q_3_tan; + address PI_INV_TABLE = (address)_PI_INV_TABLE_tan; + address PI_4 = (address)_PI_4_tan; + address QQ_2 = (address)_QQ_2_tan; + address ONE = (address)_ONE_tan; + address TWO_POW_55 = (address)_TWO_POW_55_tan; + address TWO_POW_M55 = (address)_TWO_POW_M55_tan; + address NEG_ZERO = (address)_NEG_ZERO_tan; + + bind(start); + push(rbx); + subq(rsp, 16); + movsd(Address(rsp, 8), xmm0); + + bind(B1_2); + pextrw(eax, xmm0, 3); + andl(eax, 32767); + subl(eax, 16314); + cmpl(eax, 270); + jcc(Assembler::above, L_2TAG_PACKET_0_0_1); + movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL + movdqu(xmm6, ExternalAddress(MUL16)); //0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL + unpcklpd(xmm0, xmm0); + movdqu(xmm4, ExternalAddress(sign_mask)); //0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL + andpd(xmm4, xmm0); + movdqu(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL + mulpd(xmm1, xmm0); + por(xmm5, xmm4); + addpd(xmm1, xmm5); + movdqu(xmm7, xmm1); + unpckhpd(xmm7, xmm7); + cvttsd2sil(edx, xmm7); + cvttpd2dq(xmm1, xmm1); + cvtdq2pd(xmm1, xmm1); + mulpd(xmm1, xmm6); + movdqu(xmm3, ExternalAddress(P_1)); //0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL + movq(xmm5, ExternalAddress(QQ_2)); //0x676733afUL, 0x3d32e7b9UL + addq(rdx, 469248); + movdqu(xmm4, ExternalAddress(P_2)); //0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL + mulpd(xmm3, xmm1); + andq(rdx, 31); + mulsd(xmm5, xmm1); + movq(rcx, rdx); + mulpd(xmm4, xmm1); + shlq(rcx, 1); + subpd(xmm0, xmm3); + mulpd(xmm1, ExternalAddress(P_3)); //0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL + addq(rdx, rcx); + shlq(rcx, 2); + addq(rdx, rcx); + addsd(xmm5, xmm0); + movdqu(xmm2, xmm0); + subpd(xmm0, xmm4); + movq(xmm6, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL + shlq(rdx, 4); + lea(rax, ExternalAddress(Ctable)); + andpd(xmm5, ExternalAddress(MASK_35)); //0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL + movdqu(xmm3, xmm0); + addq(rax, rdx); + subpd(xmm2, xmm0); + unpckhpd(xmm0, xmm0); + divsd(xmm6, xmm5); + subpd(xmm2, xmm4); + movdqu(xmm7, Address(rax, 16)); + subsd(xmm3, xmm5); + mulpd(xmm7, xmm0); + subpd(xmm2, xmm1); + movdqu(xmm1, Address(rax, 48)); + mulpd(xmm1, xmm0); + movdqu(xmm4, Address(rax, 96)); + mulpd(xmm4, xmm0); + addsd(xmm2, xmm3); + movdqu(xmm3, xmm0); + mulpd(xmm0, xmm0); + addpd(xmm7, Address(rax, 0)); + addpd(xmm1, Address(rax, 32)); + mulpd(xmm1, xmm0); + addpd(xmm4, Address(rax, 80)); + addpd(xmm7, xmm1); + movdqu(xmm1, Address(rax, 112)); + mulpd(xmm1, xmm0); + mulpd(xmm0, xmm0); + addpd(xmm4, xmm1); + movdqu(xmm1, Address(rax, 64)); + mulpd(xmm1, xmm0); + addpd(xmm7, xmm1); + movdqu(xmm1, xmm3); + mulpd(xmm3, xmm0); + mulsd(xmm0, xmm0); + mulpd(xmm1, Address(rax, 144)); + mulpd(xmm4, xmm3); + movdqu(xmm3, xmm1); + addpd(xmm7, xmm4); + movdqu(xmm4, xmm1); + mulsd(xmm0, xmm7); + unpckhpd(xmm7, xmm7); + addsd(xmm0, xmm7); + unpckhpd(xmm1, xmm1); + addsd(xmm3, xmm1); + subsd(xmm4, xmm3); + addsd(xmm1, xmm4); + movdqu(xmm4, xmm2); + movq(xmm7, Address(rax, 144)); + unpckhpd(xmm2, xmm2); + addsd(xmm7, Address(rax, 152)); + mulsd(xmm7, xmm2); + addsd(xmm7, Address(rax, 136)); + addsd(xmm7, xmm1); + addsd(xmm0, xmm7); + movq(xmm7, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL + mulsd(xmm4, xmm6); + movq(xmm2, Address(rax, 168)); + andpd(xmm2, xmm6); + mulsd(xmm5, xmm2); + mulsd(xmm6, Address(rax, 160)); + subsd(xmm7, xmm5); + subsd(xmm2, Address(rax, 128)); + subsd(xmm7, xmm4); + mulsd(xmm7, xmm6); + movdqu(xmm4, xmm3); + subsd(xmm3, xmm2); + addsd(xmm2, xmm3); + subsd(xmm4, xmm2); + addsd(xmm0, xmm4); + subsd(xmm0, xmm7); + addsd(xmm0, xmm3); + jmp(B1_4); + + bind(L_2TAG_PACKET_0_0_1); + jcc(Assembler::greater, L_2TAG_PACKET_1_0_1); + pextrw(eax, xmm0, 3); + movl(edx, eax); + andl(eax, 32752); + jcc(Assembler::equal, L_2TAG_PACKET_2_0_1); + andl(edx, 32767); + cmpl(edx, 15904); + jcc(Assembler::below, L_2TAG_PACKET_3_0_1); + movdqu(xmm2, xmm0); + movdqu(xmm3, xmm0); + movq(xmm1, ExternalAddress(Q_11)); //0xb8fe4d77UL, 0x3f82609aUL + mulsd(xmm2, xmm0); + mulsd(xmm3, xmm2); + mulsd(xmm1, xmm2); + addsd(xmm1, ExternalAddress(Q_9)); //0xbf847a43UL, 0x3f9664a0UL + mulsd(xmm1, xmm2); + addsd(xmm1, ExternalAddress(Q_7)); //0x52c4c8abUL, 0x3faba1baUL + mulsd(xmm1, xmm2); + addsd(xmm1, ExternalAddress(Q_5)); //0x11092746UL, 0x3fc11111UL + mulsd(xmm1, xmm2); + addsd(xmm1, ExternalAddress(Q_3)); //0x55555612UL, 0x3fd55555UL + mulsd(xmm1, xmm3); + addsd(xmm0, xmm1); + jmp(B1_4); + + bind(L_2TAG_PACKET_3_0_1); + movq(xmm3, ExternalAddress(TWO_POW_55)); //0x00000000UL, 0x43600000UL + mulsd(xmm3, xmm0); + addsd(xmm0, xmm3); + mulsd(xmm0, ExternalAddress(TWO_POW_M55)); //0x00000000UL, 0x3c800000UL + jmp(B1_4); + + bind(L_2TAG_PACKET_2_0_1); + movdqu(xmm1, xmm0); + mulsd(xmm1, xmm1); + jmp(B1_4); + + bind(L_2TAG_PACKET_1_0_1); + pextrw(eax, xmm0, 3); + andl(eax, 32752); + cmpl(eax, 32752); + jcc(Assembler::equal, L_2TAG_PACKET_4_0_1); + pextrw(ecx, xmm0, 3); + andl(ecx, 32752); + subl(ecx, 16224); + shrl(ecx, 7); + andl(ecx, 65532); + lea(r11, ExternalAddress(PI_INV_TABLE)); + addq(rcx, r11); + movdq(rax, xmm0); + movl(r10, Address(rcx, 20)); + movl(r8, Address(rcx, 24)); + movl(edx, eax); + shrq(rax, 21); + orl(eax, INT_MIN); + shrl(eax, 11); + movl(r9, r10); + imulq(r10, rdx); + imulq(r9, rax); + imulq(r8, rax); + movl(rsi, Address(rcx, 16)); + movl(rdi, Address(rcx, 12)); + movl(r11, r10); + shrq(r10, 32); + addq(r9, r10); + addq(r11, r8); + movl(r8, r11); + shrq(r11, 32); + addq(r9, r11); + movl(r10, rsi); + imulq(rsi, rdx); + imulq(r10, rax); + movl(r11, rdi); + imulq(rdi, rdx); + movl(rbx, rsi); + shrq(rsi, 32); + addq(r9, rbx); + movl(rbx, r9); + shrq(r9, 32); + addq(r10, rsi); + addq(r10, r9); + shlq(rbx, 32); + orq(r8, rbx); + imulq(r11, rax); + movl(r9, Address(rcx, 8)); + movl(rsi, Address(rcx, 4)); + movl(rbx, rdi); + shrq(rdi, 32); + addq(r10, rbx); + movl(rbx, r10); + shrq(r10, 32); + addq(r11, rdi); + addq(r11, r10); + movq(rdi, r9); + imulq(r9, rdx); + imulq(rdi, rax); + movl(r10, r9); + shrq(r9, 32); + addq(r11, r10); + movl(r10, r11); + shrq(r11, 32); + addq(rdi, r9); + addq(rdi, r11); + movq(r9, rsi); + imulq(rsi, rdx); + imulq(r9, rax); + shlq(r10, 32); + orq(r10, rbx); + movl(eax, Address(rcx, 0)); + movl(r11, rsi); + shrq(rsi, 32); + addq(rdi, r11); + movl(r11, rdi); + shrq(rdi, 32); + addq(r9, rsi); + addq(r9, rdi); + imulq(rdx, rax); + pextrw(rbx, xmm0, 3); + lea(rdi, ExternalAddress(PI_INV_TABLE)); + subq(rcx, rdi); + addl(ecx, ecx); + addl(ecx, ecx); + addl(ecx, ecx); + addl(ecx, 19); + movl(rsi, 32768); + andl(rsi, rbx); + shrl(rbx, 4); + andl(rbx, 2047); + subl(rbx, 1023); + subl(ecx, rbx); + addq(r9, rdx); + movl(edx, ecx); + addl(edx, 32); + cmpl(ecx, 0); + jcc(Assembler::less, L_2TAG_PACKET_5_0_1); + negl(ecx); + addl(ecx, 29); + shll(r9); + movl(rdi, r9); + andl(r9, 1073741823); + testl(r9, 536870912); + jcc(Assembler::notEqual, L_2TAG_PACKET_6_0_1); + shrl(r9); + movl(rbx, 0); + shlq(r9, 32); + orq(r9, r11); + + bind(L_2TAG_PACKET_7_0_1); + + bind(L_2TAG_PACKET_8_0_1); + cmpq(r9, 0); + jcc(Assembler::equal, L_2TAG_PACKET_9_0_1); + + bind(L_2TAG_PACKET_10_0_1); + bsrq(r11, r9); + movl(ecx, 29); + subl(ecx, r11); + jcc(Assembler::lessEqual, L_2TAG_PACKET_11_0_1); + shlq(r9); + movq(rax, r10); + shlq(r10); + addl(edx, ecx); + negl(ecx); + addl(ecx, 64); + shrq(rax); + shrq(r8); + orq(r9, rax); + orq(r10, r8); + + bind(L_2TAG_PACKET_12_0_1); + cvtsi2sdq(xmm0, r9); + shrq(r10, 1); + cvtsi2sdq(xmm3, r10); + xorpd(xmm4, xmm4); + shll(edx, 4); + negl(edx); + addl(edx, 16368); + orl(edx, rsi); + xorl(edx, rbx); + pinsrw(xmm4, edx, 3); + movq(xmm2, ExternalAddress(PI_4)); //0x00000000UL, 0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL + movq(xmm7, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL + xorpd(xmm5, xmm5); + subl(edx, 1008); + pinsrw(xmm5, edx, 3); + mulsd(xmm0, xmm4); + shll(rsi, 16); + sarl(rsi, 31); + mulsd(xmm3, xmm5); + movdqu(xmm1, xmm0); + mulsd(xmm0, xmm2); + shrl(rdi, 30); + addsd(xmm1, xmm3); + mulsd(xmm3, xmm2); + addl(rdi, rsi); + xorl(rdi, rsi); + mulsd(xmm7, xmm1); + movl(eax, rdi); + addsd(xmm7, xmm3); + movdqu(xmm2, xmm0); + addsd(xmm0, xmm7); + subsd(xmm2, xmm0); + addsd(xmm7, xmm2); + movdqu(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL + if (VM_Version::supports_sse3()) { + movddup(xmm0, xmm0); + } else { + movlhps(xmm0, xmm0); + } + movdqu(xmm4, ExternalAddress(sign_mask)); //0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL + andpd(xmm4, xmm0); + mulpd(xmm1, xmm0); + if (VM_Version::supports_sse3()) { + movddup(xmm7, xmm7); + } else { + movlhps(xmm7, xmm7); + } + movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL + movdqu(xmm6, ExternalAddress(MUL16)); //0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL + por(xmm5, xmm4); + addpd(xmm1, xmm5); + movdqu(xmm5, xmm1); + unpckhpd(xmm5, xmm5); + cvttsd2sil(edx, xmm5); + cvttpd2dq(xmm1, xmm1); + cvtdq2pd(xmm1, xmm1); + mulpd(xmm1, xmm6); + movdqu(xmm3, ExternalAddress(P_1)); //0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL + movq(xmm5, ExternalAddress(QQ_2)); //0x676733afUL, 0x3d32e7b9UL + shll(eax, 4); + addl(edx, 469248); + movdqu(xmm4, ExternalAddress(P_2)); //0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL + mulpd(xmm3, xmm1); + addl(edx, eax); + andl(edx, 31); + mulsd(xmm5, xmm1); + movl(ecx, edx); + mulpd(xmm4, xmm1); + shll(ecx, 1); + subpd(xmm0, xmm3); + mulpd(xmm1, ExternalAddress(P_3)); //0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL + addl(edx, ecx); + shll(ecx, 2); + addl(edx, ecx); + addsd(xmm5, xmm0); + movdqu(xmm2, xmm0); + subpd(xmm0, xmm4); + movq(xmm6, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL + shll(edx, 4); + lea(rax, ExternalAddress(Ctable)); + andpd(xmm5, ExternalAddress(MASK_35)); //0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL + movdqu(xmm3, xmm0); + addq(rax, rdx); + subpd(xmm2, xmm0); + unpckhpd(xmm0, xmm0); + divsd(xmm6, xmm5); + subpd(xmm2, xmm4); + subsd(xmm3, xmm5); + subpd(xmm2, xmm1); + movdqu(xmm1, Address(rax, 48)); + addpd(xmm2, xmm7); + movdqu(xmm7, Address(rax, 16)); + mulpd(xmm7, xmm0); + movdqu(xmm4, Address(rax, 96)); + mulpd(xmm1, xmm0); + mulpd(xmm4, xmm0); + addsd(xmm2, xmm3); + movdqu(xmm3, xmm0); + mulpd(xmm0, xmm0); + addpd(xmm7, Address(rax, 0)); + addpd(xmm1, Address(rax, 32)); + mulpd(xmm1, xmm0); + addpd(xmm4, Address(rax, 80)); + addpd(xmm7, xmm1); + movdqu(xmm1, Address(rax, 112)); + mulpd(xmm1, xmm0); + mulpd(xmm0, xmm0); + addpd(xmm4, xmm1); + movdqu(xmm1, Address(rax, 64)); + mulpd(xmm1, xmm0); + addpd(xmm7, xmm1); + movdqu(xmm1, xmm3); + mulpd(xmm3, xmm0); + mulsd(xmm0, xmm0); + mulpd(xmm1, Address(rax, 144)); + mulpd(xmm4, xmm3); + movdqu(xmm3, xmm1); + addpd(xmm7, xmm4); + movdqu(xmm4, xmm1); + mulsd(xmm0, xmm7); + unpckhpd(xmm7, xmm7); + addsd(xmm0, xmm7); + unpckhpd(xmm1, xmm1); + addsd(xmm3, xmm1); + subsd(xmm4, xmm3); + addsd(xmm1, xmm4); + movdqu(xmm4, xmm2); + movq(xmm7, Address(rax, 144)); + unpckhpd(xmm2, xmm2); + addsd(xmm7, Address(rax, 152)); + mulsd(xmm7, xmm2); + addsd(xmm7, Address(rax, 136)); + addsd(xmm7, xmm1); + addsd(xmm0, xmm7); + movq(xmm7, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL + mulsd(xmm4, xmm6); + movq(xmm2, Address(rax, 168)); + andpd(xmm2, xmm6); + mulsd(xmm5, xmm2); + mulsd(xmm6, Address(rax, 160)); + subsd(xmm7, xmm5); + subsd(xmm2, Address(rax, 128)); + subsd(xmm7, xmm4); + mulsd(xmm7, xmm6); + movdqu(xmm4, xmm3); + subsd(xmm3, xmm2); + addsd(xmm2, xmm3); + subsd(xmm4, xmm2); + addsd(xmm0, xmm4); + subsd(xmm0, xmm7); + addsd(xmm0, xmm3); + jmp(B1_4); + + bind(L_2TAG_PACKET_9_0_1); + addl(edx, 64); + movq(r9, r10); + movq(r10, r8); + movl(r8, 0); + cmpq(r9, 0); + jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_1); + addl(edx, 64); + movq(r9, r10); + movq(r10, r8); + cmpq(r9, 0); + jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_1); + jmp(L_2TAG_PACKET_12_0_1); + + bind(L_2TAG_PACKET_11_0_1); + jcc(Assembler::equal, L_2TAG_PACKET_12_0_1); + negl(ecx); + shrq(r10); + movq(rax, r9); + shrq(r9); + subl(edx, ecx); + negl(ecx); + addl(ecx, 64); + shlq(rax); + orq(r10, rax); + jmp(L_2TAG_PACKET_12_0_1); + + bind(L_2TAG_PACKET_5_0_1); + notl(ecx); + shlq(r9, 32); + orq(r9, r11); + shlq(r9); + movq(rdi, r9); + testl(r9, INT_MIN); + jcc(Assembler::notEqual, L_2TAG_PACKET_13_0_1); + shrl(r9); + movl(rbx, 0); + shrq(rdi, 2); + jmp(L_2TAG_PACKET_8_0_1); + + bind(L_2TAG_PACKET_6_0_1); + shrl(r9); + movl(rbx, 1073741824); + shrl(rbx); + shlq(r9, 32); + orq(r9, r11); + shlq(rbx, 32); + addl(rdi, 1073741824); + movl(rcx, 0); + movl(r11, 0); + subq(rcx, r8); + sbbq(r11, r10); + sbbq(rbx, r9); + movq(r8, rcx); + movq(r10, r11); + movq(r9, rbx); + movl(rbx, 32768); + jmp(L_2TAG_PACKET_7_0_1); + + bind(L_2TAG_PACKET_13_0_1); + shrl(r9); + mov64(rbx, 0x100000000); + shrq(rbx); + movl(rcx, 0); + movl(r11, 0); + subq(rcx, r8); + sbbq(r11, r10); + sbbq(rbx, r9); + movq(r8, rcx); + movq(r10, r11); + movq(r9, rbx); + movl(rbx, 32768); + shrq(rdi, 2); + addl(rdi, 1073741824); + jmp(L_2TAG_PACKET_8_0_1); + + bind(L_2TAG_PACKET_4_0_1); + movq(xmm0, Address(rsp, 8)); + mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL + movq(Address(rsp, 0), xmm0); + + bind(L_2TAG_PACKET_14_0_1); + + bind(B1_4); + addq(rsp, 16); + + }
< prev index next >