720 jmp(L_2TAG_PACKET_8_0_2); 721 bind(L_2TAG_PACKET_7_0_2); 722 xorpd(xmm1, xmm1); 723 xorpd(xmm0, xmm0); 724 movl(eax, 49136); 725 pinsrw(xmm0, eax, 3); 726 divsd(xmm0, xmm1); 727 movl(Address(rsp, 16), 2); 728 729 bind(L_2TAG_PACKET_8_0_2); 730 movq(Address(rsp, 8), xmm0); 731 732 bind(B1_3); 733 movq(xmm0, Address(rsp, 8)); 734 735 bind(B1_5); 736 addq(rsp, 24); 737 } 738 739 /******************************************************************************/ 740 // ALGORITHM DESCRIPTION - POW() 741 // --------------------- 742 // 743 // Let x=2^k * mx, mx in [1,2) 744 // 745 // log2(x) calculation: 746 // 747 // Get B~1/mx based on the output of rcpps instruction (B0) 748 // B = int((B0*LH*2^9+0.5))/2^9 749 // LH is a short approximation for log2(e) 750 // 751 // Reduced argument, scaled by LH: 752 // r=B*mx-LH (computed accurately in high and low parts) 753 // 754 // log2(x) result: k - log2(B) + p(r) 755 // p(r) is a degree 8 polynomial 756 // -log2(B) read from data table (high, low parts) 757 // log2(x) is formed from high and low parts 758 // For |x| in [1-1/32, 1+1/16), a slower but more accurate computation 759 // based om the same table design is performed. 2925 movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL 2926 movq(xmm2, ExternalAddress(SHIFTER)); //0x00000000UL, 0x43380000UL 2927 andl(eax, 2147418112); 2928 subl(eax, 808452096); 2929 cmpl(eax, 281346048); 2930 jcc(Assembler::above, L_2TAG_PACKET_0_0_1); 2931 mulsd(xmm1, xmm0); 2932 movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL 2933 movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL 2934 pand(xmm4, xmm0); 2935 por(xmm5, xmm4); 2936 addpd(xmm1, xmm5); 2937 cvttsd2sil(edx, xmm1); 2938 cvtsi2sdl(xmm1, edx); 2939 movdqu(xmm6, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL 2940 mov64(r8, 0x3fb921fb54400000); 2941 movdq(xmm3, r8); 2942 movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL 2943 pshufd(xmm4, xmm0, 68); 2944 mulsd(xmm3, xmm1); 2945 movddup(xmm1, xmm1); 2946 andl(edx, 63); 2947 shll(edx, 5); 2948 lea(rax, ExternalAddress(Ctable)); 2949 addq(rax, rdx); 2950 mulpd(xmm6, xmm1); 2951 mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL 2952 subsd(xmm4, xmm3); 2953 movq(xmm7, Address(rax, 8)); 2954 subsd(xmm0, xmm3); 2955 movddup(xmm3, xmm4); 2956 subsd(xmm4, xmm6); 2957 pshufd(xmm0, xmm0, 68); 2958 movdqu(xmm2, Address(rax, 0)); 2959 mulpd(xmm5, xmm0); 2960 subpd(xmm0, xmm6); 2961 mulsd(xmm7, xmm4); 2962 subsd(xmm3, xmm4); 2963 mulpd(xmm5, xmm0); 2964 mulpd(xmm0, xmm0); 2965 subsd(xmm3, xmm6); 2966 movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL 2967 subsd(xmm1, xmm3); 2968 movq(xmm3, Address(rax, 24)); 2969 addsd(xmm2, xmm3); 2970 subsd(xmm7, xmm2); 2971 mulsd(xmm2, xmm4); 2972 mulpd(xmm6, xmm0); 2973 mulsd(xmm3, xmm4); 2974 mulpd(xmm2, xmm0); 2975 mulpd(xmm0, xmm0); 3925 sbbq(r11, r10); 3926 sbbq(rbx, r9); 3927 movq(r8, rcx); 3928 movq(r10, r11); 3929 movq(r9, rbx); 3930 movl(rbx, 32768); 3931 shrq(rdi, 3); 3932 addl(rdi, 536870912); 3933 jmp(L_2TAG_PACKET_6_0_1); 3934 3935 bind(L_2TAG_PACKET_2_0_1); 3936 movsd(xmm0, Address(rsp, 8)); 3937 mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL 3938 movq(Address(rsp, 0), xmm0); 3939 3940 bind(L_2TAG_PACKET_13_0_1); 3941 3942 bind(B1_4); 3943 addq(rsp, 16); 3944 pop(rbx); 3945 } | 720 jmp(L_2TAG_PACKET_8_0_2); 721 bind(L_2TAG_PACKET_7_0_2); 722 xorpd(xmm1, xmm1); 723 xorpd(xmm0, xmm0); 724 movl(eax, 49136); 725 pinsrw(xmm0, eax, 3); 726 divsd(xmm0, xmm1); 727 movl(Address(rsp, 16), 2); 728 729 bind(L_2TAG_PACKET_8_0_2); 730 movq(Address(rsp, 8), xmm0); 731 732 bind(B1_3); 733 movq(xmm0, Address(rsp, 8)); 734 735 bind(B1_5); 736 addq(rsp, 24); 737 } 738 739 /******************************************************************************/ 740 // ALGORITHM DESCRIPTION - LOG10() 741 // --------------------- 742 // 743 // Let x=2^k * mx, mx in [1,2) 744 // 745 // Get B~1/mx based on the output of rcpss instruction (B0) 746 // B = int((B0*LH*2^7+0.5))/2^7 747 // LH is a short approximation for log10(e) 748 // 749 // Reduced argument: r=B*mx-LH (computed accurately in high and low parts) 750 // 751 // Result: k*log10(2) - log(B) + p(r) 752 // p(r) is a degree 7 polynomial 753 // -log(B) read from data table (high, low parts) 754 // Result is formed from high and low parts 755 // 756 // Special cases: 757 // log10(0) = -INF with divide-by-zero exception raised 758 // log10(1) = +0 759 // log10(x) = NaN with invalid exception raised if x < -0, including -INF 760 // log10(+INF) = +INF 761 // 762 /******************************************************************************/ 763 764 ALIGNED_(16) juint _HIGHSIGMASK_log10[] = 765 { 766 0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL 767 }; 768 769 ALIGNED_(16) juint _LOG10_E[] = 770 { 771 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL 772 }; 773 774 ALIGNED_(16) juint _L_tbl_log10[] = 775 { 776 0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL, 777 0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL, 778 0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL, 779 0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL, 780 0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL, 781 0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL, 782 0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL, 783 0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL, 784 0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL, 785 0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL, 786 0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL, 787 0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL, 788 0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL, 789 0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL, 790 0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL, 791 0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL, 792 0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL, 793 0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL, 794 0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL, 795 0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL, 796 0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL, 797 0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL, 798 0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL, 799 0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL, 800 0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL, 801 0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL, 802 0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL, 803 0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL, 804 0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL, 805 0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL, 806 0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL, 807 0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL, 808 0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL, 809 0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL, 810 0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL, 811 0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL, 812 0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL, 813 0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL, 814 0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL, 815 0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL, 816 0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL, 817 0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL, 818 0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL, 819 0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL, 820 0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL, 821 0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL, 822 0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL, 823 0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL, 824 0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL, 825 0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL, 826 0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL, 827 0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL, 828 0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL, 829 0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL, 830 0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL, 831 0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL, 832 0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL, 833 0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL, 834 0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL, 835 0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL, 836 0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL, 837 0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL, 838 0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL, 839 0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL, 840 0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL, 841 0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL, 842 0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL, 843 0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL, 844 0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL, 845 0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL, 846 0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL, 847 0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL, 848 0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL, 849 0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL, 850 0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL, 851 0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL, 852 0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL, 853 0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL, 854 0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL, 855 0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL, 856 0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL, 857 0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL, 858 0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL, 859 0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL, 860 0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL, 861 0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL, 862 0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL, 863 0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL, 864 0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL, 865 0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL, 866 0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL, 867 0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL, 868 0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL, 869 0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL, 870 0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL, 871 0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL, 872 0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL, 873 0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL, 874 0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL, 875 0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL, 876 0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL, 877 0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL, 878 0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 879 0x00000000UL 880 }; 881 882 ALIGNED_(16) juint _log2_log10[] = 883 { 884 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL 885 }; 886 887 ALIGNED_(16) juint _coeff_log10[] = 888 { 889 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL, 890 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, 891 0xdc77b115UL, 0xbff27af2UL 892 }; 893 894 // Registers: 895 // input: xmm0 896 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 897 // rax, rdx, rcx, tmp - r11 898 899 // Code generated by Intel C compiler for LIBM library 900 901 void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r11) { 902 Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; 903 Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; 904 Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, B1_2, B1_3, B1_4, B1_5, start; 905 906 assert_different_registers(r11, eax, ecx, edx); 907 908 address HIGHSIGMASK = (address)_HIGHSIGMASK_log10; 909 address LOG10_E = (address)_LOG10_E; 910 address L_tbl = (address)_L_tbl_log10; 911 address log2 = (address)_log2_log10; 912 address coeff = (address)_coeff_log10; 913 914 bind(start); 915 subq(rsp, 24); 916 movsd(Address(rsp, 0), xmm0); 917 918 bind(B1_2); 919 xorpd(xmm2, xmm2); 920 movl(eax, 16368); 921 pinsrw(xmm2, eax, 3); 922 movl(ecx, 1054736384); 923 movdl(xmm7, ecx); 924 xorpd(xmm3, xmm3); 925 movl(edx, 30704); 926 pinsrw(xmm3, edx, 3); 927 movdqu(xmm1, xmm0); 928 movl(edx, 32768); 929 movdl(xmm4, edx); 930 movdqu(xmm5, ExternalAddress(HIGHSIGMASK)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL 931 pextrw(eax, xmm0, 3); 932 por(xmm0, xmm2); 933 movl(ecx, 16352); 934 psrlq(xmm0, 27); 935 movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL 936 psrld(xmm0, 2); 937 rcpps(xmm0, xmm0); 938 psllq(xmm1, 12); 939 pshufd(xmm6, xmm5, 78); 940 psrlq(xmm1, 12); 941 subl(eax, 16); 942 cmpl(eax, 32736); 943 jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); 944 945 bind(L_2TAG_PACKET_1_0_2); 946 mulss(xmm0, xmm7); 947 por(xmm1, xmm3); 948 lea(r11, ExternalAddress(L_tbl)); 949 andpd(xmm5, xmm1); 950 paddd(xmm0, xmm4); 951 subsd(xmm1, xmm5); 952 movdl(edx, xmm0); 953 psllq(xmm0, 29); 954 andpd(xmm0, xmm6); 955 andl(eax, 32752); 956 subl(eax, ecx); 957 cvtsi2sdl(xmm7, eax); 958 mulpd(xmm5, xmm0); 959 mulsd(xmm1, xmm0); 960 movq(xmm6, ExternalAddress(log2)); //0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL 961 movdqu(xmm3, ExternalAddress(coeff)); //0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL 962 subsd(xmm5, xmm2); 963 andl(edx, 16711680); 964 shrl(edx, 12); 965 movdqu(xmm0, Address(r11, rdx, Address::times_1, -1504)); 966 movdqu(xmm4, ExternalAddress(16 + coeff)); //0x385593b1UL, 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL 967 addsd(xmm1, xmm5); 968 movdqu(xmm2, ExternalAddress(32 + coeff)); //0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL, 0xbff27af2UL 969 mulsd(xmm6, xmm7); 970 pshufd(xmm5, xmm1, 68); 971 mulsd(xmm7, ExternalAddress(8 + log2)); //0x1f12b358UL, 0x3cdfef31UL 972 mulsd(xmm3, xmm1); 973 addsd(xmm0, xmm6); 974 mulpd(xmm4, xmm5); 975 movq(xmm6, ExternalAddress(8 + LOG10_E)); //0xbf2e4108UL, 0x3f5a7a6cUL 976 mulpd(xmm5, xmm5); 977 addpd(xmm4, xmm2); 978 mulpd(xmm3, xmm5); 979 pshufd(xmm2, xmm0, 228); 980 addsd(xmm0, xmm1); 981 mulsd(xmm4, xmm1); 982 subsd(xmm2, xmm0); 983 mulsd(xmm6, xmm1); 984 addsd(xmm1, xmm2); 985 pshufd(xmm2, xmm0, 238); 986 mulsd(xmm5, xmm5); 987 addsd(xmm7, xmm2); 988 addsd(xmm1, xmm6); 989 addpd(xmm4, xmm3); 990 addsd(xmm1, xmm7); 991 mulpd(xmm4, xmm5); 992 addsd(xmm1, xmm4); 993 pshufd(xmm5, xmm4, 238); 994 addsd(xmm1, xmm5); 995 addsd(xmm0, xmm1); 996 jmp(B1_5); 997 998 bind(L_2TAG_PACKET_0_0_2); 999 movq(xmm0, Address(rsp, 0)); 1000 movq(xmm1, Address(rsp, 0)); 1001 addl(eax, 16); 1002 cmpl(eax, 32768); 1003 jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2); 1004 cmpl(eax, 16); 1005 jcc(Assembler::below, L_2TAG_PACKET_3_0_2); 1006 1007 bind(L_2TAG_PACKET_4_0_2); 1008 addsd(xmm0, xmm0); 1009 jmp(B1_5); 1010 1011 bind(L_2TAG_PACKET_5_0_2); 1012 jcc(Assembler::above, L_2TAG_PACKET_4_0_2); 1013 cmpl(edx, 0); 1014 jcc(Assembler::above, L_2TAG_PACKET_4_0_2); 1015 jmp(L_2TAG_PACKET_6_0_2); 1016 1017 bind(L_2TAG_PACKET_3_0_2); 1018 xorpd(xmm1, xmm1); 1019 addsd(xmm1, xmm0); 1020 movdl(edx, xmm1); 1021 psrlq(xmm1, 32); 1022 movdl(ecx, xmm1); 1023 orl(edx, ecx); 1024 cmpl(edx, 0); 1025 jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); 1026 xorpd(xmm1, xmm1); 1027 movl(eax, 18416); 1028 pinsrw(xmm1, eax, 3); 1029 mulsd(xmm0, xmm1); 1030 xorpd(xmm2, xmm2); 1031 movl(eax, 16368); 1032 pinsrw(xmm2, eax, 3); 1033 movdqu(xmm1, xmm0); 1034 pextrw(eax, xmm0, 3); 1035 por(xmm0, xmm2); 1036 movl(ecx, 18416); 1037 psrlq(xmm0, 27); 1038 movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL 1039 psrld(xmm0, 2); 1040 rcpps(xmm0, xmm0); 1041 psllq(xmm1, 12); 1042 pshufd(xmm6, xmm5, 78); 1043 psrlq(xmm1, 12); 1044 jmp(L_2TAG_PACKET_1_0_2); 1045 1046 bind(L_2TAG_PACKET_2_0_2); 1047 movdl(edx, xmm1); 1048 psrlq(xmm1, 32); 1049 movdl(ecx, xmm1); 1050 addl(ecx, ecx); 1051 cmpl(ecx, -2097152); 1052 jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); 1053 orl(edx, ecx); 1054 cmpl(edx, 0); 1055 jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); 1056 1057 bind(L_2TAG_PACKET_6_0_2); 1058 xorpd(xmm1, xmm1); 1059 xorpd(xmm0, xmm0); 1060 movl(eax, 32752); 1061 pinsrw(xmm1, eax, 3); 1062 mulsd(xmm0, xmm1); 1063 movl(Address(rsp, 16), 9); 1064 jmp(L_2TAG_PACKET_8_0_2); 1065 1066 bind(L_2TAG_PACKET_7_0_2); 1067 xorpd(xmm1, xmm1); 1068 xorpd(xmm0, xmm0); 1069 movl(eax, 49136); 1070 pinsrw(xmm0, eax, 3); 1071 divsd(xmm0, xmm1); 1072 movl(Address(rsp, 16), 8); 1073 1074 bind(L_2TAG_PACKET_8_0_2); 1075 movq(Address(rsp, 8), xmm0); 1076 1077 bind(B1_3); 1078 movq(xmm0, Address(rsp, 8)); 1079 1080 bind(L_2TAG_PACKET_9_0_2); 1081 1082 bind(B1_5); 1083 addq(rsp, 24); 1084 1085 } 1086 1087 /******************************************************************************/ 1088 // ALGORITHM DESCRIPTION - POW() 1089 // --------------------- 1090 // 1091 // Let x=2^k * mx, mx in [1,2) 1092 // 1093 // log2(x) calculation: 1094 // 1095 // Get B~1/mx based on the output of rcpps instruction (B0) 1096 // B = int((B0*LH*2^9+0.5))/2^9 1097 // LH is a short approximation for log2(e) 1098 // 1099 // Reduced argument, scaled by LH: 1100 // r=B*mx-LH (computed accurately in high and low parts) 1101 // 1102 // log2(x) result: k - log2(B) + p(r) 1103 // p(r) is a degree 8 polynomial 1104 // -log2(B) read from data table (high, low parts) 1105 // log2(x) is formed from high and low parts 1106 // For |x| in [1-1/32, 1+1/16), a slower but more accurate computation 1107 // based om the same table design is performed. 3273 movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL 3274 movq(xmm2, ExternalAddress(SHIFTER)); //0x00000000UL, 0x43380000UL 3275 andl(eax, 2147418112); 3276 subl(eax, 808452096); 3277 cmpl(eax, 281346048); 3278 jcc(Assembler::above, L_2TAG_PACKET_0_0_1); 3279 mulsd(xmm1, xmm0); 3280 movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL 3281 movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL 3282 pand(xmm4, xmm0); 3283 por(xmm5, xmm4); 3284 addpd(xmm1, xmm5); 3285 cvttsd2sil(edx, xmm1); 3286 cvtsi2sdl(xmm1, edx); 3287 movdqu(xmm6, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL 3288 mov64(r8, 0x3fb921fb54400000); 3289 movdq(xmm3, r8); 3290 movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL 3291 pshufd(xmm4, xmm0, 68); 3292 mulsd(xmm3, xmm1); 3293 if (VM_Version::supports_sse3()) { 3294 movddup(xmm1, xmm1); 3295 } else { 3296 movlhps(xmm1, xmm1); 3297 } 3298 andl(edx, 63); 3299 shll(edx, 5); 3300 lea(rax, ExternalAddress(Ctable)); 3301 addq(rax, rdx); 3302 mulpd(xmm6, xmm1); 3303 mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL 3304 subsd(xmm4, xmm3); 3305 movq(xmm7, Address(rax, 8)); 3306 subsd(xmm0, xmm3); 3307 if (VM_Version::supports_sse3()) { 3308 movddup(xmm3, xmm4); 3309 } else { 3310 movdqu(xmm3, xmm4); 3311 movlhps(xmm3, xmm3); 3312 } 3313 subsd(xmm4, xmm6); 3314 pshufd(xmm0, xmm0, 68); 3315 movdqu(xmm2, Address(rax, 0)); 3316 mulpd(xmm5, xmm0); 3317 subpd(xmm0, xmm6); 3318 mulsd(xmm7, xmm4); 3319 subsd(xmm3, xmm4); 3320 mulpd(xmm5, xmm0); 3321 mulpd(xmm0, xmm0); 3322 subsd(xmm3, xmm6); 3323 movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL 3324 subsd(xmm1, xmm3); 3325 movq(xmm3, Address(rax, 24)); 3326 addsd(xmm2, xmm3); 3327 subsd(xmm7, xmm2); 3328 mulsd(xmm2, xmm4); 3329 mulpd(xmm6, xmm0); 3330 mulsd(xmm3, xmm4); 3331 mulpd(xmm2, xmm0); 3332 mulpd(xmm0, xmm0); 4282 sbbq(r11, r10); 4283 sbbq(rbx, r9); 4284 movq(r8, rcx); 4285 movq(r10, r11); 4286 movq(r9, rbx); 4287 movl(rbx, 32768); 4288 shrq(rdi, 3); 4289 addl(rdi, 536870912); 4290 jmp(L_2TAG_PACKET_6_0_1); 4291 4292 bind(L_2TAG_PACKET_2_0_1); 4293 movsd(xmm0, Address(rsp, 8)); 4294 mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL 4295 movq(Address(rsp, 0), xmm0); 4296 4297 bind(L_2TAG_PACKET_13_0_1); 4298 4299 bind(B1_4); 4300 addq(rsp, 16); 4301 pop(rbx); 4302 } 4303 4304 /******************************************************************************/ 4305 // ALGORITHM DESCRIPTION - TAN() 4306 // --------------------- 4307 // 4308 // Polynomials coefficients and other constants. 4309 // 4310 // Note that in this algorithm, there is a different polynomial for 4311 // each breakpoint, so there are 32 sets of polynomial coefficients 4312 // as well as 32 instances of the other constants. 4313 // 4314 // The polynomial coefficients and constants are offset from the start 4315 // of the main block as follows: 4316 // 4317 // 0: c8 | c0 4318 // 16: c9 | c1 4319 // 32: c10 | c2 4320 // 48: c11 | c3 4321 // 64: c12 | c4 4322 // 80: c13 | c5 4323 // 96: c14 | c6 4324 // 112: c15 | c7 4325 // 128: T_hi 4326 // 136: T_lo 4327 // 144: Sigma 4328 // 152: T_hl 4329 // 160: Tau 4330 // 168: Mask 4331 // 176: (end of block) 4332 // 4333 // The total table size is therefore 5632 bytes. 4334 // 4335 // Note that c0 and c1 are always zero. We could try storing 4336 // other constants here, and just loading the low part of the 4337 // SIMD register in these cases, after ensuring the high part 4338 // is zero. 4339 // 4340 // The higher terms of the polynomial are computed in the *low* 4341 // part of the SIMD register. This is so we can overlap the 4342 // multiplication by r^8 and the unpacking of the other part. 4343 // 4344 // The constants are: 4345 // T_hi + T_lo = accurate constant term in power series 4346 // Sigma + T_hl = accurate coefficient of r in power series (Sigma=1 bit) 4347 // Tau = multiplier for the reciprocal, always -1 or 0 4348 // 4349 // The basic reconstruction formula using these constants is: 4350 // 4351 // High = tau * recip_hi + t_hi 4352 // Med = (sgn * r + t_hl * r)_hi 4353 // Low = (sgn * r + t_hl * r)_lo + 4354 // tau * recip_lo + T_lo + (T_hl + sigma) * c + pol 4355 // 4356 // where pol = c0 + c1 * r + c2 * r^2 + ... + c15 * r^15 4357 // 4358 // (c0 = c1 = 0, but using them keeps SIMD regularity) 4359 // 4360 // We then do a compensated sum High + Med, add the low parts together 4361 // and then do the final sum. 4362 // 4363 // Here recip_hi + recip_lo is an accurate reciprocal of the remainder 4364 // modulo pi/2 4365 // 4366 // Special cases: 4367 // tan(NaN) = quiet NaN, and raise invalid exception 4368 // tan(INF) = NaN and raise invalid exception 4369 // tan(+/-0) = +/-0 4370 // 4371 /******************************************************************************/ 4372 4373 4374 ALIGNED_(16) juint _ONEHALF_tan[] = 4375 { 4376 0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL 4377 }; 4378 4379 ALIGNED_(16) juint _MUL16[] = 4380 { 4381 0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL 4382 }; 4383 4384 ALIGNED_(16) juint _sign_mask_tan[] = 4385 { 4386 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL 4387 }; 4388 4389 ALIGNED_(16) juint _PI32INV_tan[] = 4390 { 4391 0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL 4392 }; 4393 4394 ALIGNED_(16) juint _P_1_tan[] = 4395 { 4396 0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL 4397 }; 4398 4399 ALIGNED_(16) juint _P_2_tan[] = 4400 { 4401 0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL 4402 }; 4403 4404 ALIGNED_(16) juint _P_3_tan[] = 4405 { 4406 0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL 4407 }; 4408 4409 ALIGNED_(16) juint _Ctable_tan[] = 4410 { 4411 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x882c10faUL, 4412 0x3f9664f4UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 4413 0x00000000UL, 0x00000000UL, 0x55e6c23dUL, 0x3f8226e3UL, 0x55555555UL, 4414 0x3fd55555UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 4415 0x0e157de0UL, 0x3f6d6d3dUL, 0x11111111UL, 0x3fc11111UL, 0x00000000UL, 4416 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x452b75e3UL, 0x3f57da36UL, 4417 0x1ba1ba1cUL, 0x3faba1baUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 4418 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 4419 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x4e435f9bUL, 4420 0x3f953f83UL, 0x00000000UL, 0x00000000UL, 0x3c6e8e46UL, 0x3f9b74eaUL, 4421 0x00000000UL, 0x00000000UL, 0xda5b7511UL, 0x3f85ad63UL, 0xdc230b9bUL, 4422 0x3fb97558UL, 0x26cb3788UL, 0x3f881308UL, 0x76fc4985UL, 0x3fd62ac9UL, 4423 0x77bb08baUL, 0x3f757c85UL, 0xb6247521UL, 0x3fb1381eUL, 0x5922170cUL, 4424 0x3f754e95UL, 0x8746482dUL, 0x3fc27f83UL, 0x11055b30UL, 0x3f64e391UL, 4425 0x3e666320UL, 0x3fa3e609UL, 0x0de9dae3UL, 0x3f6301dfUL, 0x1f1dca06UL, 4426 0x3fafa8aeUL, 0x8c5b2da2UL, 0x3fb936bbUL, 0x4e88f7a5UL, 0x3c587d05UL, 4427 0x00000000UL, 0x3ff00000UL, 0xa8935dd9UL, 0x3f83dde2UL, 0x00000000UL, 4428 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x5a279ea3UL, 0x3faa3407UL, 4429 0x00000000UL, 0x00000000UL, 0x432d65faUL, 0x3fa70153UL, 0x00000000UL, 4430 0x00000000UL, 0x891a4602UL, 0x3f9d03efUL, 0xd62ca5f8UL, 0x3fca77d9UL, 4431 0xb35f4628UL, 0x3f97a265UL, 0x433258faUL, 0x3fd8cf51UL, 0xb58fd909UL, 4432 0x3f8f88e3UL, 0x01771ceaUL, 0x3fc2b154UL, 0xf3562f8eUL, 0x3f888f57UL, 4433 0xc028a723UL, 0x3fc7370fUL, 0x20b7f9f0UL, 0x3f80f44cUL, 0x214368e9UL, 4434 0x3fb6dfaaUL, 0x28891863UL, 0x3f79b4b6UL, 0x172dbbf0UL, 0x3fb6cb8eUL, 4435 0xe0553158UL, 0x3fc975f5UL, 0x593fe814UL, 0x3c2ef5d3UL, 0x00000000UL, 4436 0x3ff00000UL, 0x03dec550UL, 0x3fa44203UL, 0x00000000UL, 0x00000000UL, 4437 0x00000000UL, 0x00000000UL, 0x9314533eUL, 0x3fbb8ec5UL, 0x00000000UL, 4438 0x00000000UL, 0x09aa36d0UL, 0x3fb6d3f4UL, 0x00000000UL, 0x00000000UL, 4439 0xdcb427fdUL, 0x3fb13950UL, 0xd87ab0bbUL, 0x3fd5335eUL, 0xce0ae8a5UL, 4440 0x3fabb382UL, 0x79143126UL, 0x3fddba41UL, 0x5f2b28d4UL, 0x3fa552f1UL, 4441 0x59f21a6dUL, 0x3fd015abUL, 0x22c27d95UL, 0x3fa0e984UL, 0xe19fc6aaUL, 4442 0x3fd0576cUL, 0x8f2c2950UL, 0x3f9a4898UL, 0xc0b3f22cUL, 0x3fc59462UL, 4443 0x1883a4b8UL, 0x3f94b61cUL, 0x3f838640UL, 0x3fc30eb8UL, 0x355c63dcUL, 4444 0x3fd36a08UL, 0x1dce993dUL, 0xbc6d704dUL, 0x00000000UL, 0x3ff00000UL, 4445 0x2b82ab63UL, 0x3fb78e92UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 4446 0x00000000UL, 0x56f37042UL, 0x3fccfc56UL, 0x00000000UL, 0x00000000UL, 4447 0xaa563951UL, 0x3fc90125UL, 0x00000000UL, 0x00000000UL, 0x3d0e7c5dUL, 4448 0x3fc50533UL, 0x9bed9b2eUL, 0x3fdf0ed9UL, 0x5fe7c47cUL, 0x3fc1f250UL, 4449 0x96c125e5UL, 0x3fe2edd9UL, 0x5a02bbd8UL, 0x3fbe5c71UL, 0x86362c20UL, 4450 0x3fda08b7UL, 0x4b4435edUL, 0x3fb9d342UL, 0x4b494091UL, 0x3fd911bdUL, 4451 0xb56658beUL, 0x3fb5e4c7UL, 0x93a2fd76UL, 0x3fd3c092UL, 0xda271794UL, 4452 0x3fb29910UL, 0x3303df2bUL, 0x3fd189beUL, 0x99fcef32UL, 0x3fda8279UL, 4453 0xb68c1467UL, 0x3c708b2fUL, 0x00000000UL, 0x3ff00000UL, 0x980c4337UL, 4454 0x3fc5f619UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 4455 0xcc03e501UL, 0x3fdff10fUL, 0x00000000UL, 0x00000000UL, 0x44a4e845UL, 4456 0x3fddb63bUL, 0x00000000UL, 0x00000000UL, 0x3768ad9fUL, 0x3fdb72a4UL, 4457 0x3dd01ccaUL, 0x3fe5fdb9UL, 0xa61d2811UL, 0x3fd972b2UL, 0x5645ad0bUL, 4458 0x3fe977f9UL, 0xd013b3abUL, 0x3fd78ca3UL, 0xbf0bf914UL, 0x3fe4f192UL, 4459 0x4d53e730UL, 0x3fd5d060UL, 0x3f8b9000UL, 0x3fe49933UL, 0xe2b82f08UL, 4460 0x3fd4322aUL, 0x5936a835UL, 0x3fe27ae1UL, 0xb1c61c9bUL, 0x3fd2b3fbUL, 4461 0xef478605UL, 0x3fe1659eUL, 0x190834ecUL, 0x3fe11ab7UL, 0xcdb625eaUL, 4462 0xbc8e564bUL, 0x00000000UL, 0x3ff00000UL, 0xb07217e3UL, 0x3fd248f1UL, 4463 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2b2c49d0UL, 4464 0x3ff2de9cUL, 0x00000000UL, 0x00000000UL, 0x2655bc98UL, 0x3ff33e58UL, 4465 0x00000000UL, 0x00000000UL, 0xff691fa2UL, 0x3ff3972eUL, 0xe93463bdUL, 4466 0x3feeed87UL, 0x070e10a0UL, 0x3ff3f5b2UL, 0xf4d790a4UL, 0x3ff20c10UL, 4467 0xa04e8ea3UL, 0x3ff4541aUL, 0x386accd3UL, 0x3ff1369eUL, 0x222a66ddUL, 4468 0x3ff4b521UL, 0x22a9777eUL, 0x3ff20817UL, 0x52a04a6eUL, 0x3ff5178fUL, 4469 0xddaa0031UL, 0x3ff22137UL, 0x4447d47cUL, 0x3ff57c01UL, 0x1e9c7f1dUL, 4470 0x3ff29311UL, 0x2ab7f990UL, 0x3fe561b8UL, 0x209c7df1UL, 0x3c87a8c5UL, 4471 0x00000000UL, 0x3ff00000UL, 0x4170bcc6UL, 0x3fdc92d8UL, 0x00000000UL, 4472 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xc7ab4d5aUL, 0x40085e24UL, 4473 0x00000000UL, 0x00000000UL, 0xe93ea75dUL, 0x400b963dUL, 0x00000000UL, 4474 0x00000000UL, 0x94a7f25aUL, 0x400f37e2UL, 0x4b6261cbUL, 0x3ff5f984UL, 4475 0x5a9dd812UL, 0x4011aab0UL, 0x74c30018UL, 0x3ffaf5a5UL, 0x7f2ce8e3UL, 4476 0x4013fe8bUL, 0xfe8e54faUL, 0x3ffd7334UL, 0x670d618dUL, 0x4016a10cUL, 4477 0x4db97058UL, 0x4000e012UL, 0x24df44ddUL, 0x40199c5fUL, 0x697d6eceUL, 4478 0x4003006eUL, 0x83298b82UL, 0x401cfc4dUL, 0x19d490d6UL, 0x40058c19UL, 4479 0x2ae42850UL, 0x3fea4300UL, 0x118e20e6UL, 0xbc7a6db8UL, 0x00000000UL, 4480 0x40000000UL, 0xe33345b8UL, 0xbfd4e526UL, 0x00000000UL, 0x00000000UL, 4481 0x00000000UL, 0x00000000UL, 0x65965966UL, 0x40219659UL, 0x00000000UL, 4482 0x00000000UL, 0x882c10faUL, 0x402664f4UL, 0x00000000UL, 0x00000000UL, 4483 0x83cd3723UL, 0x402c8342UL, 0x00000000UL, 0x40000000UL, 0x55e6c23dUL, 4484 0x403226e3UL, 0x55555555UL, 0x40055555UL, 0x34451939UL, 0x40371c96UL, 4485 0xaaaaaaabUL, 0x400aaaaaUL, 0x0e157de0UL, 0x403d6d3dUL, 0x11111111UL, 4486 0x40111111UL, 0xa738201fUL, 0x4042bbceUL, 0x05b05b06UL, 0x4015b05bUL, 4487 0x452b75e3UL, 0x4047da36UL, 0x1ba1ba1cUL, 0x401ba1baUL, 0x00000000UL, 4488 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x40000000UL, 4489 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 4490 0x00000000UL, 0x4f48b8d3UL, 0xbf33eaf9UL, 0x00000000UL, 0x00000000UL, 4491 0x0cf7586fUL, 0x3f20b8eaUL, 0x00000000UL, 0x00000000UL, 0xd0258911UL, 4492 0xbf0abaf3UL, 0x23e49fe9UL, 0xbfab5a8cUL, 0x2d53222eUL, 0x3ef60d15UL, 4493 0x21169451UL, 0x3fa172b2UL, 0xbb254dbcUL, 0xbee1d3b5UL, 0xdbf93b8eUL, 4494 0xbf84c7dbUL, 0x05b4630bUL, 0x3ecd3364UL, 0xee9aada7UL, 0x3f743924UL, 4495 0x794a8297UL, 0xbeb7b7b9UL, 0xe015f797UL, 0xbf5d41f5UL, 0xe41a4a56UL, 4496 0x3ea35dfbUL, 0xe4c2a251UL, 0x3f49a2abUL, 0x5af9e000UL, 0xbfce49ceUL, 4497 0x8c743719UL, 0x3d1eb860UL, 0x00000000UL, 0x00000000UL, 0x1b4863cfUL, 4498 0x3fd78294UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 4499 0x535ad890UL, 0xbf2b9320UL, 0x00000000UL, 0x00000000UL, 0x018fdf1fUL, 4500 0x3f16d61dUL, 0x00000000UL, 0x00000000UL, 0x0359f1beUL, 0xbf0139e4UL, 4501 0xa4317c6dUL, 0xbfa67e17UL, 0x82672d0fUL, 0x3eebb405UL, 0x2f1b621eUL, 4502 0x3f9f455bUL, 0x51ccf238UL, 0xbed55317UL, 0xf437b9acUL, 0xbf804beeUL, 4503 0xc791a2b5UL, 0x3ec0e993UL, 0x919a1db2UL, 0x3f7080c2UL, 0x336a5b0eUL, 4504 0xbeaa48a2UL, 0x0a268358UL, 0xbf55a443UL, 0xdfd978e4UL, 0x3e94b61fUL, 4505 0xd7767a58UL, 0x3f431806UL, 0x2aea0000UL, 0xbfc9bbe8UL, 0x7723ea61UL, 4506 0xbd3a2369UL, 0x00000000UL, 0x00000000UL, 0xdf7796ffUL, 0x3fd6e642UL, 4507 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0xb9ff07ceUL, 4508 0xbf231c78UL, 0x00000000UL, 0x00000000UL, 0xa5517182UL, 0x3f0ff0e0UL, 4509 0x00000000UL, 0x00000000UL, 0x790b4cbcUL, 0xbef66191UL, 0x848a46c6UL, 4510 0xbfa21ac0UL, 0xb16435faUL, 0x3ee1d3ecUL, 0x2a1aa832UL, 0x3f9c71eaUL, 4511 0xfdd299efUL, 0xbec9dd1aUL, 0x3f8dbaafUL, 0xbf793363UL, 0x309fc6eaUL, 4512 0x3eb415d6UL, 0xbee60471UL, 0x3f6b83baUL, 0x94a0a697UL, 0xbe9dae11UL, 4513 0x3e5c67b3UL, 0xbf4fd07bUL, 0x9a8f3e3eUL, 0x3e86bd75UL, 0xa4beb7a4UL, 4514 0x3f3d1eb1UL, 0x29cfc000UL, 0xbfc549ceUL, 0xbf159358UL, 0xbd397b33UL, 4515 0x00000000UL, 0x00000000UL, 0x871fee6cUL, 0x3fd666f0UL, 0x00000000UL, 4516 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x7d98a556UL, 0xbf1a3958UL, 4517 0x00000000UL, 0x00000000UL, 0x9d88dc01UL, 0x3f0704c2UL, 0x00000000UL, 4518 0x00000000UL, 0x73742a2bUL, 0xbeed054aUL, 0x58844587UL, 0xbf9c2a13UL, 4519 0x55688a79UL, 0x3ed7a326UL, 0xee33f1d6UL, 0x3f9a48f4UL, 0xa8dc9888UL, 4520 0xbebf8939UL, 0xaad4b5b8UL, 0xbf72f746UL, 0x9102efa1UL, 0x3ea88f82UL, 4521 0xdabc29cfUL, 0x3f678228UL, 0x9289afb8UL, 0xbe90f456UL, 0x741fb4edUL, 4522 0xbf46f3a3UL, 0xa97f6663UL, 0x3e79b4bfUL, 0xca89ff3fUL, 0x3f36db70UL, 4523 0xa8a2a000UL, 0xbfc0ee13UL, 0x3da24be1UL, 0xbd338b9fUL, 0x00000000UL, 4524 0x00000000UL, 0x11cd6c69UL, 0x3fd601fdUL, 0x00000000UL, 0x3ff00000UL, 4525 0x00000000UL, 0xfffffff8UL, 0x1a154b97UL, 0xbf116b01UL, 0x00000000UL, 4526 0x00000000UL, 0x2d427630UL, 0x3f0147bfUL, 0x00000000UL, 0x00000000UL, 4527 0xb93820c8UL, 0xbee264d4UL, 0xbb6cbb18UL, 0xbf94ab8cUL, 0x888d4d92UL, 4528 0x3ed0568bUL, 0x60730f7cUL, 0x3f98b19bUL, 0xe4b1fb11UL, 0xbeb2f950UL, 4529 0x22cf9f74UL, 0xbf6b21cdUL, 0x4a3ff0a6UL, 0x3e9f499eUL, 0xfd2b83ceUL, 4530 0x3f64aad7UL, 0x637b73afUL, 0xbe83487cUL, 0xe522591aUL, 0xbf3fc092UL, 4531 0xa158e8bcUL, 0x3e6e3aaeUL, 0xe5e82ffaUL, 0x3f329d2fUL, 0xd636a000UL, 4532 0xbfb9477fUL, 0xc2c2d2bcUL, 0xbd135ef9UL, 0x00000000UL, 0x00000000UL, 4533 0xf2fdb123UL, 0x3fd5b566UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 4534 0xfffffff8UL, 0xc41acb64UL, 0xbf05448dUL, 0x00000000UL, 0x00000000UL, 4535 0xdbb03d6fUL, 0x3efb7ad2UL, 0x00000000UL, 0x00000000UL, 0x9e42962dUL, 4536 0xbed5aea5UL, 0x2579f8efUL, 0xbf8b2398UL, 0x288a1ed9UL, 0x3ec81441UL, 4537 0xb0198dc5UL, 0x3f979a3aUL, 0x2fdfe253UL, 0xbea57cd3UL, 0x5766336fUL, 4538 0xbf617caaUL, 0x600944c3UL, 0x3e954ed6UL, 0xa4e0aaf8UL, 0x3f62c646UL, 4539 0x6b8fb29cUL, 0xbe74e3a3UL, 0xdc4c0409UL, 0xbf33f952UL, 0x9bffe365UL, 4540 0x3e6301ecUL, 0xb8869e44UL, 0x3f2fc566UL, 0xe1e04000UL, 0xbfb0cc62UL, 4541 0x016b907fUL, 0xbd119cbcUL, 0x00000000UL, 0x00000000UL, 0xe6b9d8faUL, 4542 0x3fd57fb3UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 4543 0x5daf22a6UL, 0xbef429d7UL, 0x00000000UL, 0x00000000UL, 0x06bca545UL, 4544 0x3ef7a27dUL, 0x00000000UL, 0x00000000UL, 0x7211c19aUL, 0xbec41c3eUL, 4545 0x956ed53eUL, 0xbf7ae3f4UL, 0xee750e72UL, 0x3ec3901bUL, 0x91d443f5UL, 4546 0x3f96f713UL, 0x36661e6cUL, 0xbe936e09UL, 0x506f9381UL, 0xbf5122e8UL, 4547 0xcb6dd43fUL, 0x3e9041b9UL, 0x6698b2ffUL, 0x3f61b0c7UL, 0x576bf12bUL, 4548 0xbe625a8aUL, 0xe5a0e9dcUL, 0xbf23499dUL, 0x110384ddUL, 0x3e5b1c2cUL, 4549 0x68d43db6UL, 0x3f2cb899UL, 0x6ecac000UL, 0xbfa0c414UL, 0xcd7dd58cUL, 4550 0x3d13500fUL, 0x00000000UL, 0x00000000UL, 0x85a2c8fbUL, 0x3fd55fe0UL, 4551 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x00000000UL, 4552 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2bf70ebeUL, 0x3ef66a8fUL, 4553 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 4554 0x00000000UL, 0xd644267fUL, 0x3ec22805UL, 0x16c16c17UL, 0x3f96c16cUL, 4555 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xc4e09162UL, 4556 0x3e8d6db2UL, 0xbc011567UL, 0x3f61566aUL, 0x00000000UL, 0x00000000UL, 4557 0x00000000UL, 0x00000000UL, 0x1f79955cUL, 0x3e57da4eUL, 0x9334ef0bUL, 4558 0x3f2bbd77UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 4559 0x00000000UL, 0x00000000UL, 0x55555555UL, 0x3fd55555UL, 0x00000000UL, 4560 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x5daf22a6UL, 0x3ef429d7UL, 4561 0x00000000UL, 0x00000000UL, 0x06bca545UL, 0x3ef7a27dUL, 0x00000000UL, 4562 0x00000000UL, 0x7211c19aUL, 0x3ec41c3eUL, 0x956ed53eUL, 0x3f7ae3f4UL, 4563 0xee750e72UL, 0x3ec3901bUL, 0x91d443f5UL, 0x3f96f713UL, 0x36661e6cUL, 4564 0x3e936e09UL, 0x506f9381UL, 0x3f5122e8UL, 0xcb6dd43fUL, 0x3e9041b9UL, 4565 0x6698b2ffUL, 0x3f61b0c7UL, 0x576bf12bUL, 0x3e625a8aUL, 0xe5a0e9dcUL, 4566 0x3f23499dUL, 0x110384ddUL, 0x3e5b1c2cUL, 0x68d43db6UL, 0x3f2cb899UL, 4567 0x6ecac000UL, 0x3fa0c414UL, 0xcd7dd58cUL, 0xbd13500fUL, 0x00000000UL, 4568 0x00000000UL, 0x85a2c8fbUL, 0x3fd55fe0UL, 0x00000000UL, 0x3ff00000UL, 4569 0x00000000UL, 0xfffffff8UL, 0xc41acb64UL, 0x3f05448dUL, 0x00000000UL, 4570 0x00000000UL, 0xdbb03d6fUL, 0x3efb7ad2UL, 0x00000000UL, 0x00000000UL, 4571 0x9e42962dUL, 0x3ed5aea5UL, 0x2579f8efUL, 0x3f8b2398UL, 0x288a1ed9UL, 4572 0x3ec81441UL, 0xb0198dc5UL, 0x3f979a3aUL, 0x2fdfe253UL, 0x3ea57cd3UL, 4573 0x5766336fUL, 0x3f617caaUL, 0x600944c3UL, 0x3e954ed6UL, 0xa4e0aaf8UL, 4574 0x3f62c646UL, 0x6b8fb29cUL, 0x3e74e3a3UL, 0xdc4c0409UL, 0x3f33f952UL, 4575 0x9bffe365UL, 0x3e6301ecUL, 0xb8869e44UL, 0x3f2fc566UL, 0xe1e04000UL, 4576 0x3fb0cc62UL, 0x016b907fUL, 0x3d119cbcUL, 0x00000000UL, 0x00000000UL, 4577 0xe6b9d8faUL, 0x3fd57fb3UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 4578 0xfffffff8UL, 0x1a154b97UL, 0x3f116b01UL, 0x00000000UL, 0x00000000UL, 4579 0x2d427630UL, 0x3f0147bfUL, 0x00000000UL, 0x00000000UL, 0xb93820c8UL, 4580 0x3ee264d4UL, 0xbb6cbb18UL, 0x3f94ab8cUL, 0x888d4d92UL, 0x3ed0568bUL, 4581 0x60730f7cUL, 0x3f98b19bUL, 0xe4b1fb11UL, 0x3eb2f950UL, 0x22cf9f74UL, 4582 0x3f6b21cdUL, 0x4a3ff0a6UL, 0x3e9f499eUL, 0xfd2b83ceUL, 0x3f64aad7UL, 4583 0x637b73afUL, 0x3e83487cUL, 0xe522591aUL, 0x3f3fc092UL, 0xa158e8bcUL, 4584 0x3e6e3aaeUL, 0xe5e82ffaUL, 0x3f329d2fUL, 0xd636a000UL, 0x3fb9477fUL, 4585 0xc2c2d2bcUL, 0x3d135ef9UL, 0x00000000UL, 0x00000000UL, 0xf2fdb123UL, 4586 0x3fd5b566UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 4587 0x7d98a556UL, 0x3f1a3958UL, 0x00000000UL, 0x00000000UL, 0x9d88dc01UL, 4588 0x3f0704c2UL, 0x00000000UL, 0x00000000UL, 0x73742a2bUL, 0x3eed054aUL, 4589 0x58844587UL, 0x3f9c2a13UL, 0x55688a79UL, 0x3ed7a326UL, 0xee33f1d6UL, 4590 0x3f9a48f4UL, 0xa8dc9888UL, 0x3ebf8939UL, 0xaad4b5b8UL, 0x3f72f746UL, 4591 0x9102efa1UL, 0x3ea88f82UL, 0xdabc29cfUL, 0x3f678228UL, 0x9289afb8UL, 4592 0x3e90f456UL, 0x741fb4edUL, 0x3f46f3a3UL, 0xa97f6663UL, 0x3e79b4bfUL, 4593 0xca89ff3fUL, 0x3f36db70UL, 0xa8a2a000UL, 0x3fc0ee13UL, 0x3da24be1UL, 4594 0x3d338b9fUL, 0x00000000UL, 0x00000000UL, 0x11cd6c69UL, 0x3fd601fdUL, 4595 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0xb9ff07ceUL, 4596 0x3f231c78UL, 0x00000000UL, 0x00000000UL, 0xa5517182UL, 0x3f0ff0e0UL, 4597 0x00000000UL, 0x00000000UL, 0x790b4cbcUL, 0x3ef66191UL, 0x848a46c6UL, 4598 0x3fa21ac0UL, 0xb16435faUL, 0x3ee1d3ecUL, 0x2a1aa832UL, 0x3f9c71eaUL, 4599 0xfdd299efUL, 0x3ec9dd1aUL, 0x3f8dbaafUL, 0x3f793363UL, 0x309fc6eaUL, 4600 0x3eb415d6UL, 0xbee60471UL, 0x3f6b83baUL, 0x94a0a697UL, 0x3e9dae11UL, 4601 0x3e5c67b3UL, 0x3f4fd07bUL, 0x9a8f3e3eUL, 0x3e86bd75UL, 0xa4beb7a4UL, 4602 0x3f3d1eb1UL, 0x29cfc000UL, 0x3fc549ceUL, 0xbf159358UL, 0x3d397b33UL, 4603 0x00000000UL, 0x00000000UL, 0x871fee6cUL, 0x3fd666f0UL, 0x00000000UL, 4604 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x535ad890UL, 0x3f2b9320UL, 4605 0x00000000UL, 0x00000000UL, 0x018fdf1fUL, 0x3f16d61dUL, 0x00000000UL, 4606 0x00000000UL, 0x0359f1beUL, 0x3f0139e4UL, 0xa4317c6dUL, 0x3fa67e17UL, 4607 0x82672d0fUL, 0x3eebb405UL, 0x2f1b621eUL, 0x3f9f455bUL, 0x51ccf238UL, 4608 0x3ed55317UL, 0xf437b9acUL, 0x3f804beeUL, 0xc791a2b5UL, 0x3ec0e993UL, 4609 0x919a1db2UL, 0x3f7080c2UL, 0x336a5b0eUL, 0x3eaa48a2UL, 0x0a268358UL, 4610 0x3f55a443UL, 0xdfd978e4UL, 0x3e94b61fUL, 0xd7767a58UL, 0x3f431806UL, 4611 0x2aea0000UL, 0x3fc9bbe8UL, 0x7723ea61UL, 0x3d3a2369UL, 0x00000000UL, 4612 0x00000000UL, 0xdf7796ffUL, 0x3fd6e642UL, 0x00000000UL, 0x3ff00000UL, 4613 0x00000000UL, 0xfffffff8UL, 0x4f48b8d3UL, 0x3f33eaf9UL, 0x00000000UL, 4614 0x00000000UL, 0x0cf7586fUL, 0x3f20b8eaUL, 0x00000000UL, 0x00000000UL, 4615 0xd0258911UL, 0x3f0abaf3UL, 0x23e49fe9UL, 0x3fab5a8cUL, 0x2d53222eUL, 4616 0x3ef60d15UL, 0x21169451UL, 0x3fa172b2UL, 0xbb254dbcUL, 0x3ee1d3b5UL, 4617 0xdbf93b8eUL, 0x3f84c7dbUL, 0x05b4630bUL, 0x3ecd3364UL, 0xee9aada7UL, 4618 0x3f743924UL, 0x794a8297UL, 0x3eb7b7b9UL, 0xe015f797UL, 0x3f5d41f5UL, 4619 0xe41a4a56UL, 0x3ea35dfbUL, 0xe4c2a251UL, 0x3f49a2abUL, 0x5af9e000UL, 4620 0x3fce49ceUL, 0x8c743719UL, 0xbd1eb860UL, 0x00000000UL, 0x00000000UL, 4621 0x1b4863cfUL, 0x3fd78294UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 4622 0xfffffff8UL, 0x65965966UL, 0xc0219659UL, 0x00000000UL, 0x00000000UL, 4623 0x882c10faUL, 0x402664f4UL, 0x00000000UL, 0x00000000UL, 0x83cd3723UL, 4624 0xc02c8342UL, 0x00000000UL, 0xc0000000UL, 0x55e6c23dUL, 0x403226e3UL, 4625 0x55555555UL, 0x40055555UL, 0x34451939UL, 0xc0371c96UL, 0xaaaaaaabUL, 4626 0xc00aaaaaUL, 0x0e157de0UL, 0x403d6d3dUL, 0x11111111UL, 0x40111111UL, 4627 0xa738201fUL, 0xc042bbceUL, 0x05b05b06UL, 0xc015b05bUL, 0x452b75e3UL, 4628 0x4047da36UL, 0x1ba1ba1cUL, 0x401ba1baUL, 0x00000000UL, 0xbff00000UL, 4629 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x40000000UL, 0x00000000UL, 4630 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 4631 0xc7ab4d5aUL, 0xc0085e24UL, 0x00000000UL, 0x00000000UL, 0xe93ea75dUL, 4632 0x400b963dUL, 0x00000000UL, 0x00000000UL, 0x94a7f25aUL, 0xc00f37e2UL, 4633 0x4b6261cbUL, 0xbff5f984UL, 0x5a9dd812UL, 0x4011aab0UL, 0x74c30018UL, 4634 0x3ffaf5a5UL, 0x7f2ce8e3UL, 0xc013fe8bUL, 0xfe8e54faUL, 0xbffd7334UL, 4635 0x670d618dUL, 0x4016a10cUL, 0x4db97058UL, 0x4000e012UL, 0x24df44ddUL, 4636 0xc0199c5fUL, 0x697d6eceUL, 0xc003006eUL, 0x83298b82UL, 0x401cfc4dUL, 4637 0x19d490d6UL, 0x40058c19UL, 0x2ae42850UL, 0xbfea4300UL, 0x118e20e6UL, 4638 0x3c7a6db8UL, 0x00000000UL, 0x40000000UL, 0xe33345b8UL, 0xbfd4e526UL, 4639 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2b2c49d0UL, 4640 0xbff2de9cUL, 0x00000000UL, 0x00000000UL, 0x2655bc98UL, 0x3ff33e58UL, 4641 0x00000000UL, 0x00000000UL, 0xff691fa2UL, 0xbff3972eUL, 0xe93463bdUL, 4642 0xbfeeed87UL, 0x070e10a0UL, 0x3ff3f5b2UL, 0xf4d790a4UL, 0x3ff20c10UL, 4643 0xa04e8ea3UL, 0xbff4541aUL, 0x386accd3UL, 0xbff1369eUL, 0x222a66ddUL, 4644 0x3ff4b521UL, 0x22a9777eUL, 0x3ff20817UL, 0x52a04a6eUL, 0xbff5178fUL, 4645 0xddaa0031UL, 0xbff22137UL, 0x4447d47cUL, 0x3ff57c01UL, 0x1e9c7f1dUL, 4646 0x3ff29311UL, 0x2ab7f990UL, 0xbfe561b8UL, 0x209c7df1UL, 0xbc87a8c5UL, 4647 0x00000000UL, 0x3ff00000UL, 0x4170bcc6UL, 0x3fdc92d8UL, 0x00000000UL, 4648 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xcc03e501UL, 0xbfdff10fUL, 4649 0x00000000UL, 0x00000000UL, 0x44a4e845UL, 0x3fddb63bUL, 0x00000000UL, 4650 0x00000000UL, 0x3768ad9fUL, 0xbfdb72a4UL, 0x3dd01ccaUL, 0xbfe5fdb9UL, 4651 0xa61d2811UL, 0x3fd972b2UL, 0x5645ad0bUL, 0x3fe977f9UL, 0xd013b3abUL, 4652 0xbfd78ca3UL, 0xbf0bf914UL, 0xbfe4f192UL, 0x4d53e730UL, 0x3fd5d060UL, 4653 0x3f8b9000UL, 0x3fe49933UL, 0xe2b82f08UL, 0xbfd4322aUL, 0x5936a835UL, 4654 0xbfe27ae1UL, 0xb1c61c9bUL, 0x3fd2b3fbUL, 0xef478605UL, 0x3fe1659eUL, 4655 0x190834ecUL, 0xbfe11ab7UL, 0xcdb625eaUL, 0x3c8e564bUL, 0x00000000UL, 4656 0x3ff00000UL, 0xb07217e3UL, 0x3fd248f1UL, 0x00000000UL, 0x00000000UL, 4657 0x00000000UL, 0x00000000UL, 0x56f37042UL, 0xbfccfc56UL, 0x00000000UL, 4658 0x00000000UL, 0xaa563951UL, 0x3fc90125UL, 0x00000000UL, 0x00000000UL, 4659 0x3d0e7c5dUL, 0xbfc50533UL, 0x9bed9b2eUL, 0xbfdf0ed9UL, 0x5fe7c47cUL, 4660 0x3fc1f250UL, 0x96c125e5UL, 0x3fe2edd9UL, 0x5a02bbd8UL, 0xbfbe5c71UL, 4661 0x86362c20UL, 0xbfda08b7UL, 0x4b4435edUL, 0x3fb9d342UL, 0x4b494091UL, 4662 0x3fd911bdUL, 0xb56658beUL, 0xbfb5e4c7UL, 0x93a2fd76UL, 0xbfd3c092UL, 4663 0xda271794UL, 0x3fb29910UL, 0x3303df2bUL, 0x3fd189beUL, 0x99fcef32UL, 4664 0xbfda8279UL, 0xb68c1467UL, 0xbc708b2fUL, 0x00000000UL, 0x3ff00000UL, 4665 0x980c4337UL, 0x3fc5f619UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 4666 0x00000000UL, 0x9314533eUL, 0xbfbb8ec5UL, 0x00000000UL, 0x00000000UL, 4667 0x09aa36d0UL, 0x3fb6d3f4UL, 0x00000000UL, 0x00000000UL, 0xdcb427fdUL, 4668 0xbfb13950UL, 0xd87ab0bbUL, 0xbfd5335eUL, 0xce0ae8a5UL, 0x3fabb382UL, 4669 0x79143126UL, 0x3fddba41UL, 0x5f2b28d4UL, 0xbfa552f1UL, 0x59f21a6dUL, 4670 0xbfd015abUL, 0x22c27d95UL, 0x3fa0e984UL, 0xe19fc6aaUL, 0x3fd0576cUL, 4671 0x8f2c2950UL, 0xbf9a4898UL, 0xc0b3f22cUL, 0xbfc59462UL, 0x1883a4b8UL, 4672 0x3f94b61cUL, 0x3f838640UL, 0x3fc30eb8UL, 0x355c63dcUL, 0xbfd36a08UL, 4673 0x1dce993dUL, 0x3c6d704dUL, 0x00000000UL, 0x3ff00000UL, 0x2b82ab63UL, 4674 0x3fb78e92UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 4675 0x5a279ea3UL, 0xbfaa3407UL, 0x00000000UL, 0x00000000UL, 0x432d65faUL, 4676 0x3fa70153UL, 0x00000000UL, 0x00000000UL, 0x891a4602UL, 0xbf9d03efUL, 4677 0xd62ca5f8UL, 0xbfca77d9UL, 0xb35f4628UL, 0x3f97a265UL, 0x433258faUL, 4678 0x3fd8cf51UL, 0xb58fd909UL, 0xbf8f88e3UL, 0x01771ceaUL, 0xbfc2b154UL, 4679 0xf3562f8eUL, 0x3f888f57UL, 0xc028a723UL, 0x3fc7370fUL, 0x20b7f9f0UL, 4680 0xbf80f44cUL, 0x214368e9UL, 0xbfb6dfaaUL, 0x28891863UL, 0x3f79b4b6UL, 4681 0x172dbbf0UL, 0x3fb6cb8eUL, 0xe0553158UL, 0xbfc975f5UL, 0x593fe814UL, 4682 0xbc2ef5d3UL, 0x00000000UL, 0x3ff00000UL, 0x03dec550UL, 0x3fa44203UL, 4683 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x4e435f9bUL, 4684 0xbf953f83UL, 0x00000000UL, 0x00000000UL, 0x3c6e8e46UL, 0x3f9b74eaUL, 4685 0x00000000UL, 0x00000000UL, 0xda5b7511UL, 0xbf85ad63UL, 0xdc230b9bUL, 4686 0xbfb97558UL, 0x26cb3788UL, 0x3f881308UL, 0x76fc4985UL, 0x3fd62ac9UL, 4687 0x77bb08baUL, 0xbf757c85UL, 0xb6247521UL, 0xbfb1381eUL, 0x5922170cUL, 4688 0x3f754e95UL, 0x8746482dUL, 0x3fc27f83UL, 0x11055b30UL, 0xbf64e391UL, 4689 0x3e666320UL, 0xbfa3e609UL, 0x0de9dae3UL, 0x3f6301dfUL, 0x1f1dca06UL, 4690 0x3fafa8aeUL, 0x8c5b2da2UL, 0xbfb936bbUL, 0x4e88f7a5UL, 0xbc587d05UL, 4691 0x00000000UL, 0x3ff00000UL, 0xa8935dd9UL, 0x3f83dde2UL, 0x00000000UL, 4692 0x00000000UL, 0x00000000UL, 0x00000000UL 4693 }; 4694 4695 ALIGNED_(16) juint _MASK_35_tan[] = 4696 { 4697 0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL 4698 }; 4699 4700 ALIGNED_(16) juint _Q_11_tan[] = 4701 { 4702 0xb8fe4d77UL, 0x3f82609aUL 4703 }; 4704 4705 ALIGNED_(16) juint _Q_9_tan[] = 4706 { 4707 0xbf847a43UL, 0x3f9664a0UL 4708 }; 4709 4710 ALIGNED_(16) juint _Q_7_tan[] = 4711 { 4712 0x52c4c8abUL, 0x3faba1baUL 4713 }; 4714 4715 ALIGNED_(16) juint _Q_5_tan[] = 4716 { 4717 0x11092746UL, 0x3fc11111UL 4718 }; 4719 4720 ALIGNED_(16) juint _Q_3_tan[] = 4721 { 4722 0x55555612UL, 0x3fd55555UL 4723 }; 4724 4725 ALIGNED_(16) juint _PI_INV_TABLE_tan[] = 4726 { 4727 0x00000000UL, 0x00000000UL, 0xa2f9836eUL, 0x4e441529UL, 0xfc2757d1UL, 4728 0xf534ddc0UL, 0xdb629599UL, 0x3c439041UL, 0xfe5163abUL, 0xdebbc561UL, 4729 0xb7246e3aUL, 0x424dd2e0UL, 0x06492eeaUL, 0x09d1921cUL, 0xfe1deb1cUL, 4730 0xb129a73eUL, 0xe88235f5UL, 0x2ebb4484UL, 0xe99c7026UL, 0xb45f7e41UL, 4731 0x3991d639UL, 0x835339f4UL, 0x9c845f8bUL, 0xbdf9283bUL, 0x1ff897ffUL, 4732 0xde05980fUL, 0xef2f118bUL, 0x5a0a6d1fUL, 0x6d367ecfUL, 0x27cb09b7UL, 4733 0x4f463f66UL, 0x9e5fea2dUL, 0x7527bac7UL, 0xebe5f17bUL, 0x3d0739f7UL, 4734 0x8a5292eaUL, 0x6bfb5fb1UL, 0x1f8d5d08UL, 0x56033046UL, 0xfc7b6babUL, 4735 0xf0cfbc21UL 4736 }; 4737 4738 ALIGNED_(8) juint _PI_4_tan[] = 4739 { 4740 0x00000000UL, 0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL 4741 }; 4742 4743 ALIGNED_(8) juint _QQ_2_tan[] = 4744 { 4745 0x676733afUL, 0x3d32e7b9UL 4746 }; 4747 4748 ALIGNED_(8) juint _ONE_tan[] = 4749 { 4750 0x00000000UL, 0x3ff00000UL 4751 }; 4752 4753 ALIGNED_(8) juint _TWO_POW_55_tan[] = 4754 { 4755 0x00000000UL, 0x43600000UL 4756 }; 4757 4758 ALIGNED_(4) juint _TWO_POW_M55_tan[] = 4759 { 4760 0x00000000UL, 0x3c800000UL 4761 }; 4762 4763 ALIGNED_(4) juint _NEG_ZERO_tan[] = 4764 { 4765 0x00000000UL, 0x80000000UL 4766 }; 4767 4768 void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r8, Register r9, Register r10, Register r11) { 4769 4770 Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; 4771 Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1; 4772 Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1; 4773 Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, L_2TAG_PACKET_14_0_1, B1_2, B1_3, B1_4, B1_5, start; 4774 4775 address ONEHALF = (address)_ONEHALF_tan; 4776 address MUL16 = (address)_MUL16; 4777 address sign_mask = (address)_sign_mask_tan; 4778 address PI32INV = (address)_PI32INV_tan; 4779 address P_1 = (address)_P_1_tan; 4780 address P_2 = (address)_P_2_tan; 4781 address P_3 = (address)_P_3_tan; 4782 address Ctable = (address)_Ctable_tan; 4783 address MASK_35 = (address)_MASK_35_tan; 4784 address Q_11 = (address)_Q_11_tan; 4785 address Q_9 = (address)_Q_9_tan; 4786 address Q_7 = (address)_Q_7_tan; 4787 address Q_5 = (address)_Q_5_tan; 4788 address Q_3 = (address)_Q_3_tan; 4789 address PI_INV_TABLE = (address)_PI_INV_TABLE_tan; 4790 address PI_4 = (address)_PI_4_tan; 4791 address QQ_2 = (address)_QQ_2_tan; 4792 address ONE = (address)_ONE_tan; 4793 address TWO_POW_55 = (address)_TWO_POW_55_tan; 4794 address TWO_POW_M55 = (address)_TWO_POW_M55_tan; 4795 address NEG_ZERO = (address)_NEG_ZERO_tan; 4796 4797 bind(start); 4798 push(rbx); 4799 subq(rsp, 16); 4800 movsd(Address(rsp, 8), xmm0); 4801 4802 bind(B1_2); 4803 pextrw(eax, xmm0, 3); 4804 andl(eax, 32767); 4805 subl(eax, 16314); 4806 cmpl(eax, 270); 4807 jcc(Assembler::above, L_2TAG_PACKET_0_0_1); 4808 movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL 4809 movdqu(xmm6, ExternalAddress(MUL16)); //0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL 4810 unpcklpd(xmm0, xmm0); 4811 movdqu(xmm4, ExternalAddress(sign_mask)); //0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL 4812 andpd(xmm4, xmm0); 4813 movdqu(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL 4814 mulpd(xmm1, xmm0); 4815 por(xmm5, xmm4); 4816 addpd(xmm1, xmm5); 4817 movdqu(xmm7, xmm1); 4818 unpckhpd(xmm7, xmm7); 4819 cvttsd2sil(edx, xmm7); 4820 cvttpd2dq(xmm1, xmm1); 4821 cvtdq2pd(xmm1, xmm1); 4822 mulpd(xmm1, xmm6); 4823 movdqu(xmm3, ExternalAddress(P_1)); //0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL 4824 movq(xmm5, ExternalAddress(QQ_2)); //0x676733afUL, 0x3d32e7b9UL 4825 addq(rdx, 469248); 4826 movdqu(xmm4, ExternalAddress(P_2)); //0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL 4827 mulpd(xmm3, xmm1); 4828 andq(rdx, 31); 4829 mulsd(xmm5, xmm1); 4830 movq(rcx, rdx); 4831 mulpd(xmm4, xmm1); 4832 shlq(rcx, 1); 4833 subpd(xmm0, xmm3); 4834 mulpd(xmm1, ExternalAddress(P_3)); //0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL 4835 addq(rdx, rcx); 4836 shlq(rcx, 2); 4837 addq(rdx, rcx); 4838 addsd(xmm5, xmm0); 4839 movdqu(xmm2, xmm0); 4840 subpd(xmm0, xmm4); 4841 movq(xmm6, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL 4842 shlq(rdx, 4); 4843 lea(rax, ExternalAddress(Ctable)); 4844 andpd(xmm5, ExternalAddress(MASK_35)); //0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL 4845 movdqu(xmm3, xmm0); 4846 addq(rax, rdx); 4847 subpd(xmm2, xmm0); 4848 unpckhpd(xmm0, xmm0); 4849 divsd(xmm6, xmm5); 4850 subpd(xmm2, xmm4); 4851 movdqu(xmm7, Address(rax, 16)); 4852 subsd(xmm3, xmm5); 4853 mulpd(xmm7, xmm0); 4854 subpd(xmm2, xmm1); 4855 movdqu(xmm1, Address(rax, 48)); 4856 mulpd(xmm1, xmm0); 4857 movdqu(xmm4, Address(rax, 96)); 4858 mulpd(xmm4, xmm0); 4859 addsd(xmm2, xmm3); 4860 movdqu(xmm3, xmm0); 4861 mulpd(xmm0, xmm0); 4862 addpd(xmm7, Address(rax, 0)); 4863 addpd(xmm1, Address(rax, 32)); 4864 mulpd(xmm1, xmm0); 4865 addpd(xmm4, Address(rax, 80)); 4866 addpd(xmm7, xmm1); 4867 movdqu(xmm1, Address(rax, 112)); 4868 mulpd(xmm1, xmm0); 4869 mulpd(xmm0, xmm0); 4870 addpd(xmm4, xmm1); 4871 movdqu(xmm1, Address(rax, 64)); 4872 mulpd(xmm1, xmm0); 4873 addpd(xmm7, xmm1); 4874 movdqu(xmm1, xmm3); 4875 mulpd(xmm3, xmm0); 4876 mulsd(xmm0, xmm0); 4877 mulpd(xmm1, Address(rax, 144)); 4878 mulpd(xmm4, xmm3); 4879 movdqu(xmm3, xmm1); 4880 addpd(xmm7, xmm4); 4881 movdqu(xmm4, xmm1); 4882 mulsd(xmm0, xmm7); 4883 unpckhpd(xmm7, xmm7); 4884 addsd(xmm0, xmm7); 4885 unpckhpd(xmm1, xmm1); 4886 addsd(xmm3, xmm1); 4887 subsd(xmm4, xmm3); 4888 addsd(xmm1, xmm4); 4889 movdqu(xmm4, xmm2); 4890 movq(xmm7, Address(rax, 144)); 4891 unpckhpd(xmm2, xmm2); 4892 addsd(xmm7, Address(rax, 152)); 4893 mulsd(xmm7, xmm2); 4894 addsd(xmm7, Address(rax, 136)); 4895 addsd(xmm7, xmm1); 4896 addsd(xmm0, xmm7); 4897 movq(xmm7, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL 4898 mulsd(xmm4, xmm6); 4899 movq(xmm2, Address(rax, 168)); 4900 andpd(xmm2, xmm6); 4901 mulsd(xmm5, xmm2); 4902 mulsd(xmm6, Address(rax, 160)); 4903 subsd(xmm7, xmm5); 4904 subsd(xmm2, Address(rax, 128)); 4905 subsd(xmm7, xmm4); 4906 mulsd(xmm7, xmm6); 4907 movdqu(xmm4, xmm3); 4908 subsd(xmm3, xmm2); 4909 addsd(xmm2, xmm3); 4910 subsd(xmm4, xmm2); 4911 addsd(xmm0, xmm4); 4912 subsd(xmm0, xmm7); 4913 addsd(xmm0, xmm3); 4914 jmp(B1_4); 4915 4916 bind(L_2TAG_PACKET_0_0_1); 4917 jcc(Assembler::greater, L_2TAG_PACKET_1_0_1); 4918 pextrw(eax, xmm0, 3); 4919 movl(edx, eax); 4920 andl(eax, 32752); 4921 jcc(Assembler::equal, L_2TAG_PACKET_2_0_1); 4922 andl(edx, 32767); 4923 cmpl(edx, 15904); 4924 jcc(Assembler::below, L_2TAG_PACKET_3_0_1); 4925 movdqu(xmm2, xmm0); 4926 movdqu(xmm3, xmm0); 4927 movq(xmm1, ExternalAddress(Q_11)); //0xb8fe4d77UL, 0x3f82609aUL 4928 mulsd(xmm2, xmm0); 4929 mulsd(xmm3, xmm2); 4930 mulsd(xmm1, xmm2); 4931 addsd(xmm1, ExternalAddress(Q_9)); //0xbf847a43UL, 0x3f9664a0UL 4932 mulsd(xmm1, xmm2); 4933 addsd(xmm1, ExternalAddress(Q_7)); //0x52c4c8abUL, 0x3faba1baUL 4934 mulsd(xmm1, xmm2); 4935 addsd(xmm1, ExternalAddress(Q_5)); //0x11092746UL, 0x3fc11111UL 4936 mulsd(xmm1, xmm2); 4937 addsd(xmm1, ExternalAddress(Q_3)); //0x55555612UL, 0x3fd55555UL 4938 mulsd(xmm1, xmm3); 4939 addsd(xmm0, xmm1); 4940 jmp(B1_4); 4941 4942 bind(L_2TAG_PACKET_3_0_1); 4943 movq(xmm3, ExternalAddress(TWO_POW_55)); //0x00000000UL, 0x43600000UL 4944 mulsd(xmm3, xmm0); 4945 addsd(xmm0, xmm3); 4946 mulsd(xmm0, ExternalAddress(TWO_POW_M55)); //0x00000000UL, 0x3c800000UL 4947 jmp(B1_4); 4948 4949 bind(L_2TAG_PACKET_2_0_1); 4950 movdqu(xmm1, xmm0); 4951 mulsd(xmm1, xmm1); 4952 jmp(B1_4); 4953 4954 bind(L_2TAG_PACKET_1_0_1); 4955 pextrw(eax, xmm0, 3); 4956 andl(eax, 32752); 4957 cmpl(eax, 32752); 4958 jcc(Assembler::equal, L_2TAG_PACKET_4_0_1); 4959 pextrw(ecx, xmm0, 3); 4960 andl(ecx, 32752); 4961 subl(ecx, 16224); 4962 shrl(ecx, 7); 4963 andl(ecx, 65532); 4964 lea(r11, ExternalAddress(PI_INV_TABLE)); 4965 addq(rcx, r11); 4966 movdq(rax, xmm0); 4967 movl(r10, Address(rcx, 20)); 4968 movl(r8, Address(rcx, 24)); 4969 movl(edx, eax); 4970 shrq(rax, 21); 4971 orl(eax, INT_MIN); 4972 shrl(eax, 11); 4973 movl(r9, r10); 4974 imulq(r10, rdx); 4975 imulq(r9, rax); 4976 imulq(r8, rax); 4977 movl(rsi, Address(rcx, 16)); 4978 movl(rdi, Address(rcx, 12)); 4979 movl(r11, r10); 4980 shrq(r10, 32); 4981 addq(r9, r10); 4982 addq(r11, r8); 4983 movl(r8, r11); 4984 shrq(r11, 32); 4985 addq(r9, r11); 4986 movl(r10, rsi); 4987 imulq(rsi, rdx); 4988 imulq(r10, rax); 4989 movl(r11, rdi); 4990 imulq(rdi, rdx); 4991 movl(rbx, rsi); 4992 shrq(rsi, 32); 4993 addq(r9, rbx); 4994 movl(rbx, r9); 4995 shrq(r9, 32); 4996 addq(r10, rsi); 4997 addq(r10, r9); 4998 shlq(rbx, 32); 4999 orq(r8, rbx); 5000 imulq(r11, rax); 5001 movl(r9, Address(rcx, 8)); 5002 movl(rsi, Address(rcx, 4)); 5003 movl(rbx, rdi); 5004 shrq(rdi, 32); 5005 addq(r10, rbx); 5006 movl(rbx, r10); 5007 shrq(r10, 32); 5008 addq(r11, rdi); 5009 addq(r11, r10); 5010 movq(rdi, r9); 5011 imulq(r9, rdx); 5012 imulq(rdi, rax); 5013 movl(r10, r9); 5014 shrq(r9, 32); 5015 addq(r11, r10); 5016 movl(r10, r11); 5017 shrq(r11, 32); 5018 addq(rdi, r9); 5019 addq(rdi, r11); 5020 movq(r9, rsi); 5021 imulq(rsi, rdx); 5022 imulq(r9, rax); 5023 shlq(r10, 32); 5024 orq(r10, rbx); 5025 movl(eax, Address(rcx, 0)); 5026 movl(r11, rsi); 5027 shrq(rsi, 32); 5028 addq(rdi, r11); 5029 movl(r11, rdi); 5030 shrq(rdi, 32); 5031 addq(r9, rsi); 5032 addq(r9, rdi); 5033 imulq(rdx, rax); 5034 pextrw(rbx, xmm0, 3); 5035 lea(rdi, ExternalAddress(PI_INV_TABLE)); 5036 subq(rcx, rdi); 5037 addl(ecx, ecx); 5038 addl(ecx, ecx); 5039 addl(ecx, ecx); 5040 addl(ecx, 19); 5041 movl(rsi, 32768); 5042 andl(rsi, rbx); 5043 shrl(rbx, 4); 5044 andl(rbx, 2047); 5045 subl(rbx, 1023); 5046 subl(ecx, rbx); 5047 addq(r9, rdx); 5048 movl(edx, ecx); 5049 addl(edx, 32); 5050 cmpl(ecx, 0); 5051 jcc(Assembler::less, L_2TAG_PACKET_5_0_1); 5052 negl(ecx); 5053 addl(ecx, 29); 5054 shll(r9); 5055 movl(rdi, r9); 5056 andl(r9, 1073741823); 5057 testl(r9, 536870912); 5058 jcc(Assembler::notEqual, L_2TAG_PACKET_6_0_1); 5059 shrl(r9); 5060 movl(rbx, 0); 5061 shlq(r9, 32); 5062 orq(r9, r11); 5063 5064 bind(L_2TAG_PACKET_7_0_1); 5065 5066 bind(L_2TAG_PACKET_8_0_1); 5067 cmpq(r9, 0); 5068 jcc(Assembler::equal, L_2TAG_PACKET_9_0_1); 5069 5070 bind(L_2TAG_PACKET_10_0_1); 5071 bsrq(r11, r9); 5072 movl(ecx, 29); 5073 subl(ecx, r11); 5074 jcc(Assembler::lessEqual, L_2TAG_PACKET_11_0_1); 5075 shlq(r9); 5076 movq(rax, r10); 5077 shlq(r10); 5078 addl(edx, ecx); 5079 negl(ecx); 5080 addl(ecx, 64); 5081 shrq(rax); 5082 shrq(r8); 5083 orq(r9, rax); 5084 orq(r10, r8); 5085 5086 bind(L_2TAG_PACKET_12_0_1); 5087 cvtsi2sdq(xmm0, r9); 5088 shrq(r10, 1); 5089 cvtsi2sdq(xmm3, r10); 5090 xorpd(xmm4, xmm4); 5091 shll(edx, 4); 5092 negl(edx); 5093 addl(edx, 16368); 5094 orl(edx, rsi); 5095 xorl(edx, rbx); 5096 pinsrw(xmm4, edx, 3); 5097 movq(xmm2, ExternalAddress(PI_4)); //0x00000000UL, 0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL 5098 movq(xmm7, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL 5099 xorpd(xmm5, xmm5); 5100 subl(edx, 1008); 5101 pinsrw(xmm5, edx, 3); 5102 mulsd(xmm0, xmm4); 5103 shll(rsi, 16); 5104 sarl(rsi, 31); 5105 mulsd(xmm3, xmm5); 5106 movdqu(xmm1, xmm0); 5107 mulsd(xmm0, xmm2); 5108 shrl(rdi, 30); 5109 addsd(xmm1, xmm3); 5110 mulsd(xmm3, xmm2); 5111 addl(rdi, rsi); 5112 xorl(rdi, rsi); 5113 mulsd(xmm7, xmm1); 5114 movl(eax, rdi); 5115 addsd(xmm7, xmm3); 5116 movdqu(xmm2, xmm0); 5117 addsd(xmm0, xmm7); 5118 subsd(xmm2, xmm0); 5119 addsd(xmm7, xmm2); 5120 movdqu(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL 5121 if (VM_Version::supports_sse3()) { 5122 movddup(xmm0, xmm0); 5123 } else { 5124 movlhps(xmm0, xmm0); 5125 } 5126 movdqu(xmm4, ExternalAddress(sign_mask)); //0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL 5127 andpd(xmm4, xmm0); 5128 mulpd(xmm1, xmm0); 5129 if (VM_Version::supports_sse3()) { 5130 movddup(xmm7, xmm7); 5131 } else { 5132 movlhps(xmm7, xmm7); 5133 } 5134 movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL 5135 movdqu(xmm6, ExternalAddress(MUL16)); //0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL 5136 por(xmm5, xmm4); 5137 addpd(xmm1, xmm5); 5138 movdqu(xmm5, xmm1); 5139 unpckhpd(xmm5, xmm5); 5140 cvttsd2sil(edx, xmm5); 5141 cvttpd2dq(xmm1, xmm1); 5142 cvtdq2pd(xmm1, xmm1); 5143 mulpd(xmm1, xmm6); 5144 movdqu(xmm3, ExternalAddress(P_1)); //0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL 5145 movq(xmm5, ExternalAddress(QQ_2)); //0x676733afUL, 0x3d32e7b9UL 5146 shll(eax, 4); 5147 addl(edx, 469248); 5148 movdqu(xmm4, ExternalAddress(P_2)); //0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL 5149 mulpd(xmm3, xmm1); 5150 addl(edx, eax); 5151 andl(edx, 31); 5152 mulsd(xmm5, xmm1); 5153 movl(ecx, edx); 5154 mulpd(xmm4, xmm1); 5155 shll(ecx, 1); 5156 subpd(xmm0, xmm3); 5157 mulpd(xmm1, ExternalAddress(P_3)); //0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL 5158 addl(edx, ecx); 5159 shll(ecx, 2); 5160 addl(edx, ecx); 5161 addsd(xmm5, xmm0); 5162 movdqu(xmm2, xmm0); 5163 subpd(xmm0, xmm4); 5164 movq(xmm6, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL 5165 shll(edx, 4); 5166 lea(rax, ExternalAddress(Ctable)); 5167 andpd(xmm5, ExternalAddress(MASK_35)); //0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL 5168 movdqu(xmm3, xmm0); 5169 addq(rax, rdx); 5170 subpd(xmm2, xmm0); 5171 unpckhpd(xmm0, xmm0); 5172 divsd(xmm6, xmm5); 5173 subpd(xmm2, xmm4); 5174 subsd(xmm3, xmm5); 5175 subpd(xmm2, xmm1); 5176 movdqu(xmm1, Address(rax, 48)); 5177 addpd(xmm2, xmm7); 5178 movdqu(xmm7, Address(rax, 16)); 5179 mulpd(xmm7, xmm0); 5180 movdqu(xmm4, Address(rax, 96)); 5181 mulpd(xmm1, xmm0); 5182 mulpd(xmm4, xmm0); 5183 addsd(xmm2, xmm3); 5184 movdqu(xmm3, xmm0); 5185 mulpd(xmm0, xmm0); 5186 addpd(xmm7, Address(rax, 0)); 5187 addpd(xmm1, Address(rax, 32)); 5188 mulpd(xmm1, xmm0); 5189 addpd(xmm4, Address(rax, 80)); 5190 addpd(xmm7, xmm1); 5191 movdqu(xmm1, Address(rax, 112)); 5192 mulpd(xmm1, xmm0); 5193 mulpd(xmm0, xmm0); 5194 addpd(xmm4, xmm1); 5195 movdqu(xmm1, Address(rax, 64)); 5196 mulpd(xmm1, xmm0); 5197 addpd(xmm7, xmm1); 5198 movdqu(xmm1, xmm3); 5199 mulpd(xmm3, xmm0); 5200 mulsd(xmm0, xmm0); 5201 mulpd(xmm1, Address(rax, 144)); 5202 mulpd(xmm4, xmm3); 5203 movdqu(xmm3, xmm1); 5204 addpd(xmm7, xmm4); 5205 movdqu(xmm4, xmm1); 5206 mulsd(xmm0, xmm7); 5207 unpckhpd(xmm7, xmm7); 5208 addsd(xmm0, xmm7); 5209 unpckhpd(xmm1, xmm1); 5210 addsd(xmm3, xmm1); 5211 subsd(xmm4, xmm3); 5212 addsd(xmm1, xmm4); 5213 movdqu(xmm4, xmm2); 5214 movq(xmm7, Address(rax, 144)); 5215 unpckhpd(xmm2, xmm2); 5216 addsd(xmm7, Address(rax, 152)); 5217 mulsd(xmm7, xmm2); 5218 addsd(xmm7, Address(rax, 136)); 5219 addsd(xmm7, xmm1); 5220 addsd(xmm0, xmm7); 5221 movq(xmm7, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL 5222 mulsd(xmm4, xmm6); 5223 movq(xmm2, Address(rax, 168)); 5224 andpd(xmm2, xmm6); 5225 mulsd(xmm5, xmm2); 5226 mulsd(xmm6, Address(rax, 160)); 5227 subsd(xmm7, xmm5); 5228 subsd(xmm2, Address(rax, 128)); 5229 subsd(xmm7, xmm4); 5230 mulsd(xmm7, xmm6); 5231 movdqu(xmm4, xmm3); 5232 subsd(xmm3, xmm2); 5233 addsd(xmm2, xmm3); 5234 subsd(xmm4, xmm2); 5235 addsd(xmm0, xmm4); 5236 subsd(xmm0, xmm7); 5237 addsd(xmm0, xmm3); 5238 jmp(B1_4); 5239 5240 bind(L_2TAG_PACKET_9_0_1); 5241 addl(edx, 64); 5242 movq(r9, r10); 5243 movq(r10, r8); 5244 movl(r8, 0); 5245 cmpq(r9, 0); 5246 jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_1); 5247 addl(edx, 64); 5248 movq(r9, r10); 5249 movq(r10, r8); 5250 cmpq(r9, 0); 5251 jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_1); 5252 jmp(L_2TAG_PACKET_12_0_1); 5253 5254 bind(L_2TAG_PACKET_11_0_1); 5255 jcc(Assembler::equal, L_2TAG_PACKET_12_0_1); 5256 negl(ecx); 5257 shrq(r10); 5258 movq(rax, r9); 5259 shrq(r9); 5260 subl(edx, ecx); 5261 negl(ecx); 5262 addl(ecx, 64); 5263 shlq(rax); 5264 orq(r10, rax); 5265 jmp(L_2TAG_PACKET_12_0_1); 5266 5267 bind(L_2TAG_PACKET_5_0_1); 5268 notl(ecx); 5269 shlq(r9, 32); 5270 orq(r9, r11); 5271 shlq(r9); 5272 movq(rdi, r9); 5273 testl(r9, INT_MIN); 5274 jcc(Assembler::notEqual, L_2TAG_PACKET_13_0_1); 5275 shrl(r9); 5276 movl(rbx, 0); 5277 shrq(rdi, 2); 5278 jmp(L_2TAG_PACKET_8_0_1); 5279 5280 bind(L_2TAG_PACKET_6_0_1); 5281 shrl(r9); 5282 movl(rbx, 1073741824); 5283 shrl(rbx); 5284 shlq(r9, 32); 5285 orq(r9, r11); 5286 shlq(rbx, 32); 5287 addl(rdi, 1073741824); 5288 movl(rcx, 0); 5289 movl(r11, 0); 5290 subq(rcx, r8); 5291 sbbq(r11, r10); 5292 sbbq(rbx, r9); 5293 movq(r8, rcx); 5294 movq(r10, r11); 5295 movq(r9, rbx); 5296 movl(rbx, 32768); 5297 jmp(L_2TAG_PACKET_7_0_1); 5298 5299 bind(L_2TAG_PACKET_13_0_1); 5300 shrl(r9); 5301 mov64(rbx, 0x100000000); 5302 shrq(rbx); 5303 movl(rcx, 0); 5304 movl(r11, 0); 5305 subq(rcx, r8); 5306 sbbq(r11, r10); 5307 sbbq(rbx, r9); 5308 movq(r8, rcx); 5309 movq(r10, r11); 5310 movq(r9, rbx); 5311 movl(rbx, 32768); 5312 shrq(rdi, 2); 5313 addl(rdi, 1073741824); 5314 jmp(L_2TAG_PACKET_8_0_1); 5315 5316 bind(L_2TAG_PACKET_4_0_1); 5317 movq(xmm0, Address(rsp, 8)); 5318 mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL 5319 movq(Address(rsp, 0), xmm0); 5320 5321 bind(L_2TAG_PACKET_14_0_1); 5322 5323 bind(B1_4); 5324 addq(rsp, 16); 5325 5326 } |