/* * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ /* * Intel Math Library (LIBM) Source Code * Copyright (c) 2015, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. */ /******************************************************************************/ // ALGORITHM DESCRIPTION // --------------------- // // Description: // Let K = 64 (table size). // x x/log(2) n // e = 2 = 2 * T[j] * (1 + P(y)) // where // x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K] // m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2] // j/K // values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). // // P(y) is a minimax polynomial approximation of exp(x)-1 // on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V). // // To avoid problems with arithmetic overflow and underflow, // n n1 n2 // value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] // where BIAS is a value of exponent bias. // // Special cases: // exp(NaN) = NaN // exp(+INF) = +INF // exp(-INF) = 0 // exp(x) = 1 for subnormals // for finite argument, only exp(0)=1 is exact // For IEEE double // if x > 709.782712893383973096 then exp(x) overflow // if x < -745.133219101941108420 then exp(x) underflow // /******************************************************************************/ #include "precompiled.hpp" #include "asm/assembler.hpp" #include "asm/assembler.inline.hpp" #ifdef _LP64 //registers, // input: xmm0 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 // rax, rdx, rcx, tmp - r11 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; assert_different_registers(tmp, eax, ecx, edx); jmp(start); address cv = pc(); emit_int32(1697350398); emit_int32(1079448903); emit_int32(1697350398); emit_int32(1079448903); emit_int32(4277796864); emit_int32(1065758274); emit_int32(4277796864); emit_int32(1065758274); emit_int32(3164486458); emit_int32(1025308570); emit_int32(3164486458); emit_int32(1025308570); emit_int32(4294967294); emit_int32(1071644671); emit_int32(4294967294); emit_int32(1071644671); emit_int32(3811088480); emit_int32(1062650204); emit_int32(1432067621); emit_int32(1067799893); emit_int32(3230715663); emit_int32(1065423125); emit_int32(1431604129); emit_int32(1069897045); address Shifter = pc(); emit_int32(0); emit_int32(1127743488); emit_int32(0); emit_int32(1127743488); address mmask = pc(); emit_int32(4294967232); emit_int32(0); emit_int32(4294967232); emit_int32(0); address bias = pc(); emit_int32(65472); emit_int32(0); emit_int32(65472); emit_int32(0); address Tbl_addr = pc(); emit_int32(0); emit_int32(0); emit_int32(0); emit_int32(0); emit_int32(235107661); emit_int32(1018002367); emit_int32(1048019040); emit_int32(11418); emit_int32(896005651); emit_int32(1015861842); emit_int32(3541402996); emit_int32(22960); emit_int32(1642514529); emit_int32(1012987726); emit_int32(410360776); emit_int32(34629); emit_int32(1568897900); emit_int32(1016568486); emit_int32(1828292879); emit_int32(46424); emit_int32(1882168529); emit_int32(1010744893); emit_int32(852742562); emit_int32(58348); emit_int32(509852888); emit_int32(1017336174); emit_int32(3490863952); emit_int32(70401); emit_int32(653277307); emit_int32(1017431380); emit_int32(2930322911); emit_int32(82586); emit_int32(1649557430); emit_int32(1017729363); emit_int32(1014845818); emit_int32(94904); emit_int32(1058231231); emit_int32(1015777676); emit_int32(3949972341); emit_int32(107355); emit_int32(1044000607); emit_int32(1016786167); emit_int32(828946858); emit_int32(119943); emit_int32(1151779725); emit_int32(1015705409); emit_int32(2288159958); emit_int32(132667); emit_int32(3819481236); emit_int32(1016499965); emit_int32(1853186616); emit_int32(145530); emit_int32(2552227826); emit_int32(1015039787); emit_int32(1709341917); emit_int32(158533); emit_int32(1829350193); emit_int32(1015216097); emit_int32(4112506593); emit_int32(171677); emit_int32(1913391795); emit_int32(1015756674); emit_int32(2799960843); emit_int32(184965); emit_int32(1303423926); emit_int32(1015238005); emit_int32(171030293); emit_int32(198398); emit_int32(1574172746); emit_int32(1016061241); emit_int32(2992903935); emit_int32(211976); emit_int32(3424156969); emit_int32(1017196428); emit_int32(926591434); emit_int32(225703); emit_int32(1938513547); emit_int32(1017631273); emit_int32(887463926); emit_int32(239579); emit_int32(2804567149); emit_int32(1015390024); emit_int32(1276261410); emit_int32(253606); emit_int32(631083525); emit_int32(1017690182); emit_int32(569847337); emit_int32(267786); emit_int32(1623370770); emit_int32(1011049453); emit_int32(1617004845); emit_int32(282120); emit_int32(3667985273); emit_int32(1013894369); emit_int32(3049340112); emit_int32(296610); emit_int32(3145379760); emit_int32(1014403278); emit_int32(3577096743); emit_int32(311258); emit_int32(2603100681); emit_int32(1017152460); emit_int32(1990012070); emit_int32(326066); emit_int32(3249202951); emit_int32(1017448880); emit_int32(1453150081); emit_int32(341035); emit_int32(419288974); emit_int32(1016280325); emit_int32(917841882); emit_int32(356167); emit_int32(3793507337); emit_int32(1016095713); emit_int32(3712504873); emit_int32(371463); emit_int32(728023093); emit_int32(1016345318); emit_int32(363667784); emit_int32(386927); emit_int32(2582678538); emit_int32(1017123460); emit_int32(2956612996); emit_int32(402558); emit_int32(7592966); emit_int32(1016721543); emit_int32(2186617380); emit_int32(418360); emit_int32(228611441); emit_int32(1016696141); emit_int32(1719614412); emit_int32(434334); emit_int32(2261665670); emit_int32(1017457593); emit_int32(1013258798); emit_int32(450482); emit_int32(544148907); emit_int32(1017323666); emit_int32(3907805043); emit_int32(466805); emit_int32(2383914918); emit_int32(1017143586); emit_int32(1447192520); emit_int32(483307); emit_int32(1176412038); emit_int32(1017267372); emit_int32(1944781190); emit_int32(499988); emit_int32(2882956373); emit_int32(1013312481); emit_int32(919555682); emit_int32(516851); emit_int32(3154077648); emit_int32(1016528543); emit_int32(2571947538); emit_int32(533897); emit_int32(348651999); emit_int32(1016405780); emit_int32(2604962540); emit_int32(551129); emit_int32(3253791412); emit_int32(1015920431); emit_int32(1110089947); emit_int32(568549); emit_int32(1509121860); emit_int32(1014756995); emit_int32(2568320822); emit_int32(586158); emit_int32(2617649212); emit_int32(1017340090); emit_int32(2966275556); emit_int32(603959); emit_int32(553214634); emit_int32(1016457425); emit_int32(2682146383); emit_int32(621954); emit_int32(730975783); emit_int32(1014083580); emit_int32(2191782032); emit_int32(640145); emit_int32(1486499517); emit_int32(1016818996); emit_int32(2069751140); emit_int32(658534); emit_int32(2595788928); emit_int32(1016407932); emit_int32(2990417244); emit_int32(677123); emit_int32(1853053619); emit_int32(1015310724); emit_int32(1434058175); emit_int32(695915); emit_int32(2462790535); emit_int32(1015814775); emit_int32(2572866477); emit_int32(714911); emit_int32(3693944214); emit_int32(1017259110); emit_int32(3092190714); emit_int32(734114); emit_int32(2979333550); emit_int32(1017188654); emit_int32(4076559942); emit_int32(753526); emit_int32(174054861); emit_int32(1014300631); emit_int32(2420883922); emit_int32(773150); emit_int32(816778419); emit_int32(1014197934); emit_int32(3716502172); emit_int32(792987); emit_int32(3507050924); emit_int32(1015341199); emit_int32(777507147); emit_int32(813041); emit_int32(1821514088); emit_int32(1013410604); emit_int32(3706687593); emit_int32(833312); emit_int32(920623539); emit_int32(1016295433); emit_int32(1242007931); emit_int32(853805); emit_int32(2789017511); emit_int32(1014276997); emit_int32(3707479175); emit_int32(874520); emit_int32(3586233004); emit_int32(1015962192); emit_int32(64696965); emit_int32(895462); emit_int32(474650514); emit_int32(1016642419); emit_int32(863738718); emit_int32(916631); emit_int32(1614448851); emit_int32(1014281732); emit_int32(3884662774); emit_int32(938030); emit_int32(2450082086); emit_int32(1016164135); emit_int32(2728693977); emit_int32(959663); emit_int32(1101668360); emit_int32(1015989180); emit_int32(3999357479); emit_int32(981531); emit_int32(835814894); emit_int32(1015702697); emit_int32(1533953344); emit_int32(1003638); emit_int32(1301400989); emit_int32(1014466875); emit_int32(2174652632); emit_int32(1025985); address ALLONES = pc(); emit_int32(4294967295); emit_int32(4294967295); emit_int32(4294967295); address ebias = pc(); emit_int32(0); emit_int32(1072693248); emit_int32(0); emit_int32(1072693248); address XMAX = pc(); emit_int32(4294967295); emit_int32(2146435071); address XMIN = pc(); emit_int32(0); emit_int32(1048576); address INF = pc(); emit_int32(0); emit_int32(2146435072); address ZERO = pc(); emit_int32(0); emit_int32(0); address ONE_val = pc(); emit_int32(0); emit_int32(1072693248); bind(start); subq(rsp, 24); movsd(Address(rsp, 8), xmm0); unpcklpd(xmm0, xmm0); movdqu(xmm1, InternalAddress(cv)); movdqu(xmm6, InternalAddress(Shifter)); movdqu(xmm2, InternalAddress(16+cv)); movdqu(xmm3, InternalAddress(32+cv)); pextrw(eax, xmm0, 3); andl(eax, 32767); movl(edx, 16527); subl(edx, eax); subl(eax, 15504); orl(edx, eax); cmpl(edx, INT_MIN); jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); mulpd(xmm1, xmm0); addpd(xmm1, xmm6); movapd(xmm7, xmm1); subpd(xmm1, xmm6); mulpd(xmm2, xmm1); movdqu(xmm4, InternalAddress(64+cv)); mulpd(xmm3, xmm1); movdqu(xmm5, InternalAddress(80+cv)); subpd(xmm0, xmm2); movdl(eax, xmm7); movl(ecx, eax); andl(ecx, 63); shll(ecx, 4); sarl(eax, 6); movl(edx, eax); movdqu(xmm6, InternalAddress(mmask)); pand(xmm7, xmm6); movdqu(xmm6, InternalAddress(bias)); paddq(xmm7, xmm6); psllq(xmm7, 46); subpd(xmm0, xmm3); lea(tmp, InternalAddress(Tbl_addr)); movdqu(xmm2, Address(ecx,tmp)); mulpd(xmm4, xmm0); movapd(xmm6, xmm0); movapd(xmm1, xmm0); mulpd(xmm6, xmm6); mulpd(xmm0, xmm6); addpd(xmm5, xmm4); mulsd(xmm0, xmm6); mulpd(xmm6, InternalAddress(48+cv)); addsd(xmm1, xmm2); unpckhpd(xmm2, xmm2); mulpd(xmm0, xmm5); addsd(xmm1, xmm0); por(xmm2, xmm7); unpckhpd(xmm0, xmm0); addsd(xmm0, xmm1); addsd(xmm0, xmm6); addl(edx, 894); cmpl(edx, 1916); jcc (Assembler::above, L_2TAG_PACKET_1_0_2); mulsd(xmm0, xmm2); addsd(xmm0, xmm2); jmp (B1_5); bind(L_2TAG_PACKET_1_0_2); xorpd(xmm3, xmm3); movdqu(xmm4, InternalAddress(ALLONES)); movl(edx, -1022); subl(edx, eax); movdl(xmm5, edx); psllq(xmm4, xmm5); movl(ecx, eax); sarl(eax, 1); pinsrw(xmm3, eax, 3); movdqu(xmm6, InternalAddress(ebias)); psllq(xmm3, 4); psubd(xmm2, xmm3); mulsd(xmm0, xmm2); cmpl(edx, 52); jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); pand(xmm4, xmm2); paddd(xmm3, xmm6); subsd(xmm2, xmm4); addsd(xmm0, xmm2); cmpl(ecx, 1023); jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2); pextrw(ecx, xmm0, 3); andl(ecx, 32768); orl(edx, ecx); cmpl(edx, 0); jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); movapd(xmm6, xmm0); addsd(xmm0, xmm4); mulsd(xmm0, xmm3); pextrw(ecx, xmm0, 3); andl(ecx, 32752); cmpl(ecx, 0); jcc(Assembler::equal, L_2TAG_PACKET_5_0_2); jmp(B1_5); bind(L_2TAG_PACKET_5_0_2); mulsd(xmm6, xmm3); mulsd(xmm4, xmm3); movdqu(xmm0, xmm6); pxor(xmm6, xmm4); psrad(xmm6, 31); pshufd(xmm6, xmm6, 85); psllq(xmm0, 1); psrlq(xmm0, 1); pxor(xmm0, xmm6); psrlq(xmm6, 63); paddq(xmm0, xmm6); paddq(xmm0, xmm4); movl(Address(rsp,0), 15); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_4_0_2); addsd(xmm0, xmm4); mulsd(xmm0, xmm3); jmp(B1_5); bind(L_2TAG_PACKET_3_0_2); addsd(xmm0, xmm4); mulsd(xmm0, xmm3); pextrw(ecx, xmm0, 3); andl(ecx, 32752); cmpl(ecx, 32752); jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); jmp(B1_5); bind(L_2TAG_PACKET_2_0_2); paddd(xmm3, xmm6); addpd(xmm0, xmm2); mulsd(xmm0, xmm3); movl(Address(rsp,0), 15); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_8_0_2); cmpl(eax, 2146435072); jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2); movl(eax, Address(rsp,12)); cmpl(eax, INT_MIN); jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2); movsd(xmm0, InternalAddress(XMAX)); mulsd(xmm0, xmm0); bind(L_2TAG_PACKET_7_0_2); movl(Address(rsp,0), 14); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_10_0_2); movsd(xmm0, InternalAddress(XMIN)); mulsd(xmm0, xmm0); movl(Address(rsp,0), 15); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_9_0_2); movl(edx, Address(rsp,8)); cmpl(eax, 2146435072); jcc(Assembler::above, L_2TAG_PACKET_11_0_2); cmpl(edx, 0); jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); movl(eax, Address(rsp,12)); cmpl(eax, 2146435072); jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2); movsd(xmm0, InternalAddress(INF)); jmp(B1_5); bind(L_2TAG_PACKET_12_0_2); movsd(xmm0, InternalAddress(ZERO)); jmp(B1_5); bind(L_2TAG_PACKET_11_0_2); movsd(xmm0, Address(rsp, 8)); addsd(xmm0, xmm0); jmp(B1_5); bind(L_2TAG_PACKET_0_0_2); movl(eax, Address(rsp, 12)); andl(eax, 2147483647); cmpl(eax, 1083179008); jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2); movsd(Address(rsp, 8), xmm0); addsd(xmm0, InternalAddress(ONE_val)); jmp(B1_5); bind(L_2TAG_PACKET_6_0_2); movq(Address(rsp, 16), xmm0); bind(B1_3); movq(xmm0, Address(rsp, 16)); bind(L_2TAG_PACKET_13_0_2); bind(B1_5); addq(rsp, 24); } #endif #ifndef _LP64 //registers, // input: (rbp + 8) // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 // rax, rdx, rcx, rbx (tmp) void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; assert_different_registers(tmp, eax, ecx, edx); jmp(start); address static_const_table = pc(); emit_int32(0); emit_int32(4293918720u); emit_int32(0); emit_int32(4293918720u); emit_int32(4294967232u); emit_int32(0); emit_int32(4294967232u); emit_int32(0); emit_int32(65472u); emit_int32(0); emit_int32(65472u); emit_int32(0); emit_int32(0); emit_int32(1127743488u); emit_int32(0); emit_int32(1127743488u); emit_int32(1697350398u); emit_int32(1079448903u); emit_int32(1697350398u); emit_int32(1079448903u); emit_int32(4277796864u); emit_int32(1065758274u); emit_int32(4277796864u); emit_int32(1065758274u); emit_int32(3164486458u); emit_int32(1025308570u); emit_int32(3164486458u); emit_int32(1025308570u); emit_int32(4294967294u); emit_int32(1071644671u); emit_int32(4294967294u); emit_int32(1071644671u); emit_int32(3811088480u); emit_int32(1062650204u); emit_int32(1432067621u); emit_int32(1067799893u); emit_int32(3230715663u); emit_int32(1065423125u); emit_int32(1431604129u); emit_int32(1069897045u); emit_int32(0); emit_int32(0); emit_int32(0); emit_int32(0); emit_int32(235107661u); emit_int32(1018002367u); emit_int32(1048019040u); emit_int32(11418u); emit_int32(896005651u); emit_int32(1015861842u); emit_int32(3541402996u); emit_int32(22960u); emit_int32(1642514529u); emit_int32(1012987726u); emit_int32(410360776u); emit_int32(34629u); emit_int32(1568897900u); emit_int32(1016568486u); emit_int32(1828292879u); emit_int32(46424u); emit_int32(1882168529u); emit_int32(1010744893u); emit_int32(852742562u); emit_int32(58348u); emit_int32(509852888u); emit_int32(1017336174u); emit_int32(3490863952u); emit_int32(70401u); emit_int32(653277307u); emit_int32(1017431380u); emit_int32(2930322911u); emit_int32(82586u); emit_int32(1649557430u); emit_int32(1017729363u); emit_int32(1014845818u); emit_int32(94904u); emit_int32(1058231231u); emit_int32(1015777676u); emit_int32(3949972341u); emit_int32(107355u); emit_int32(1044000607u); emit_int32(1016786167u); emit_int32(828946858u); emit_int32(119943u); emit_int32(1151779725u); emit_int32(1015705409u); emit_int32(2288159958u); emit_int32(132667u); emit_int32(3819481236u); emit_int32(1016499965u); emit_int32(1853186616u); emit_int32(145530u); emit_int32(2552227826u); emit_int32(1015039787u); emit_int32(1709341917u); emit_int32(158533u); emit_int32(1829350193u); emit_int32(1015216097u); emit_int32(4112506593u); emit_int32(171677u); emit_int32(1913391795u); emit_int32(1015756674u); emit_int32(2799960843u); emit_int32(184965u); emit_int32(1303423926u); emit_int32(1015238005u); emit_int32(171030293u); emit_int32(198398u); emit_int32(1574172746u); emit_int32(1016061241u); emit_int32(2992903935u); emit_int32(211976u); emit_int32(3424156969u); emit_int32(1017196428u); emit_int32(926591434u); emit_int32(225703u); emit_int32(1938513547u); emit_int32(1017631273u); emit_int32(887463926u); emit_int32(239579u); emit_int32(2804567149u); emit_int32(1015390024u); emit_int32(1276261410u); emit_int32(253606u); emit_int32(631083525u); emit_int32(1017690182u); emit_int32(569847337u); emit_int32(267786u); emit_int32(1623370770u); emit_int32(1011049453u); emit_int32(1617004845u); emit_int32(282120u); emit_int32(3667985273u); emit_int32(1013894369u); emit_int32(3049340112u); emit_int32(296610u); emit_int32(3145379760u); emit_int32(1014403278u); emit_int32(3577096743u); emit_int32(311258u); emit_int32(2603100681u); emit_int32(1017152460u); emit_int32(1990012070u); emit_int32(326066u); emit_int32(3249202951u); emit_int32(1017448880u); emit_int32(1453150081u); emit_int32(341035u); emit_int32(419288974u); emit_int32(1016280325u); emit_int32(917841882u); emit_int32(356167u); emit_int32(3793507337u); emit_int32(1016095713u); emit_int32(3712504873u); emit_int32(371463u); emit_int32(728023093u); emit_int32(1016345318u); emit_int32(363667784u); emit_int32(386927u); emit_int32(2582678538u); emit_int32(1017123460u); emit_int32(2956612996u); emit_int32(402558u); emit_int32(7592966u); emit_int32(1016721543u); emit_int32(2186617380u); emit_int32(418360u); emit_int32(228611441u); emit_int32(1016696141u); emit_int32(1719614412u); emit_int32(434334u); emit_int32(2261665670u); emit_int32(1017457593u); emit_int32(1013258798u); emit_int32(450482u); emit_int32(544148907u); emit_int32(1017323666u); emit_int32(3907805043u); emit_int32(466805u); emit_int32(2383914918u); emit_int32(1017143586u); emit_int32(1447192520u); emit_int32(483307u); emit_int32(1176412038u); emit_int32(1017267372u); emit_int32(1944781190u); emit_int32(499988u); emit_int32(2882956373u); emit_int32(1013312481u); emit_int32(919555682u); emit_int32(516851u); emit_int32(3154077648u); emit_int32(1016528543u); emit_int32(2571947538u); emit_int32(533897u); emit_int32(348651999u); emit_int32(1016405780u); emit_int32(2604962540u); emit_int32(551129u); emit_int32(3253791412u); emit_int32(1015920431u); emit_int32(1110089947u); emit_int32(568549u); emit_int32(1509121860u); emit_int32(1014756995u); emit_int32(2568320822u); emit_int32(586158u); emit_int32(2617649212u); emit_int32(1017340090u); emit_int32(2966275556u); emit_int32(603959u); emit_int32(553214634u); emit_int32(1016457425u); emit_int32(2682146383u); emit_int32(621954u); emit_int32(730975783u); emit_int32(1014083580u); emit_int32(2191782032u); emit_int32(640145u); emit_int32(1486499517u); emit_int32(1016818996u); emit_int32(2069751140u); emit_int32(658534u); emit_int32(2595788928u); emit_int32(1016407932u); emit_int32(2990417244u); emit_int32(677123u); emit_int32(1853053619u); emit_int32(1015310724u); emit_int32(1434058175u); emit_int32(695915u); emit_int32(2462790535u); emit_int32(1015814775u); emit_int32(2572866477u); emit_int32(714911u); emit_int32(3693944214u); emit_int32(1017259110u); emit_int32(3092190714u); emit_int32(734114u); emit_int32(2979333550u); emit_int32(1017188654u); emit_int32(4076559942u); emit_int32(753526u); emit_int32(174054861u); emit_int32(1014300631u); emit_int32(2420883922u); emit_int32(773150u); emit_int32(816778419u); emit_int32(1014197934u); emit_int32(3716502172u); emit_int32(792987u); emit_int32(3507050924u); emit_int32(1015341199u); emit_int32(777507147u); emit_int32(813041u); emit_int32(1821514088u); emit_int32(1013410604u); emit_int32(3706687593u); emit_int32(833312u); emit_int32(920623539u); emit_int32(1016295433u); emit_int32(1242007931u); emit_int32(853805u); emit_int32(2789017511u); emit_int32(1014276997u); emit_int32(3707479175u); emit_int32(874520u); emit_int32(3586233004u); emit_int32(1015962192u); emit_int32(64696965u); emit_int32(895462u); emit_int32(474650514u); emit_int32(1016642419u); emit_int32(863738718u); emit_int32(916631u); emit_int32(1614448851u); emit_int32(1014281732u); emit_int32(3884662774u); emit_int32(938030u); emit_int32(2450082086u); emit_int32(1016164135u); emit_int32(2728693977u); emit_int32(959663u); emit_int32(1101668360u); emit_int32(1015989180u); emit_int32(3999357479u); emit_int32(981531u); emit_int32(835814894u); emit_int32(1015702697u); emit_int32(1533953344u); emit_int32(1003638u); emit_int32(1301400989u); emit_int32(1014466875u); emit_int32(2174652632u); emit_int32(1025985u); emit_int32(0); emit_int32(1072693248u); emit_int32(0); emit_int32(2146435072u); emit_int32(0); emit_int32(0); emit_int32(4294967295u); emit_int32(2146435071u); emit_int32(0); emit_int32(1048576u); bind(start); subl(rsp, 120); movl(Address(rsp, 64), tmp); lea(tmp, InternalAddress(static_const_table)); movdqu(xmm0, Address(rsp, 128)); unpcklpd(xmm0, xmm0); movdqu(xmm1, Address(tmp, 64)); movdqu(xmm6, Address(tmp, 48)); movdqu(xmm2, Address(tmp, 80)); movdqu(xmm3, Address(tmp, 96)); pextrw(eax, xmm0, 3); andl(eax, 32767); movl(edx, 16527); subl(edx, eax); subl(eax, 15504); orl(edx, eax); cmpl(edx, INT_MIN); jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); mulpd(xmm1, xmm0); addpd(xmm1, xmm6); movapd(xmm7, xmm1); subpd(xmm1, xmm6); mulpd(xmm2, xmm1); movdqu(xmm4, Address(tmp, 128)); mulpd(xmm3, xmm1); movdqu(xmm5, Address(tmp, 144)); subpd(xmm0, xmm2); movdl(eax, xmm7); movl(ecx, eax); andl(ecx, 63); shll(ecx, 4); sarl(eax, 6); movl(edx, eax); movdqu(xmm6, Address(tmp, 16)); pand(xmm7, xmm6); movdqu(xmm6, Address(tmp, 32)); paddq(xmm7, xmm6); psllq(xmm7, 46); subpd(xmm0, xmm3); movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160)); mulpd(xmm4, xmm0); movapd(xmm6, xmm0); movapd(xmm1, xmm0); mulpd(xmm6, xmm6); mulpd(xmm0, xmm6); addpd(xmm5, xmm4); mulsd(xmm0, xmm6); mulpd(xmm6, Address(tmp, 112)); addsd(xmm1, xmm2); unpckhpd(xmm2, xmm2); mulpd(xmm0, xmm5); addsd(xmm1, xmm0); por(xmm2, xmm7); unpckhpd(xmm0, xmm0); addsd(xmm0, xmm1); addsd(xmm0, xmm6); addl(edx, 894); cmpl(edx, 1916); jcc (Assembler::above, L_2TAG_PACKET_1_0_2); mulsd(xmm0, xmm2); addsd(xmm0, xmm2); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_1_0_2); fnstcw(Address(rsp, 24)); movzwl(edx, Address(rsp, 24)); orl(edx, 768); movw(Address(rsp, 28), edx); fldcw(Address(rsp, 28)); movl(edx, eax); sarl(eax, 1); subl(edx, eax); movdqu(xmm6, Address(tmp, 0)); pandn(xmm6, xmm2); addl(eax, 1023); movdl(xmm3, eax); psllq(xmm3, 52); por(xmm6, xmm3); addl(edx, 1023); movdl(xmm4, edx); psllq(xmm4, 52); movsd(Address(rsp, 8), xmm0); fld_d(Address(rsp, 8)); movsd(Address(rsp, 16), xmm6); fld_d(Address(rsp, 16)); fmula(1); faddp(1); movsd(Address(rsp, 8), xmm4); fld_d(Address(rsp, 8)); fmulp(1); fstp_d(Address(rsp, 8)); movsd(xmm0,Address(rsp, 8)); fldcw(Address(rsp, 24)); pextrw(ecx, xmm0, 3); andl(ecx, 32752); cmpl(ecx, 32752); jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2); cmpl(ecx, 0); jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); jmp(L_2TAG_PACKET_2_0_2); cmpl(ecx, INT_MIN); jcc(Assembler::less, L_2TAG_PACKET_3_0_2); cmpl(ecx, -1064950997); jcc(Assembler::less, L_2TAG_PACKET_2_0_2); jcc(Assembler::greater, L_2TAG_PACKET_4_0_2); movl(edx, Address(rsp, 128)); cmpl(edx ,-17155601); jcc(Assembler::less, L_2TAG_PACKET_2_0_2); jmp(L_2TAG_PACKET_4_0_2); bind(L_2TAG_PACKET_3_0_2); movl(edx, 14); jmp(L_2TAG_PACKET_5_0_2); bind(L_2TAG_PACKET_4_0_2); movl(edx, 15); bind(L_2TAG_PACKET_5_0_2); movsd(Address(rsp, 0), xmm0); movsd(xmm0, Address(rsp, 128)); fld_d(Address(rsp, 0)); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_7_0_2); cmpl(eax, 2146435072); jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2); movl(eax, Address(rsp, 132)); cmpl(eax, INT_MIN); jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2); movsd(xmm0, Address(tmp, 1208)); mulsd(xmm0, xmm0); movl(edx, 14); jmp(L_2TAG_PACKET_5_0_2); bind(L_2TAG_PACKET_9_0_2); movsd(xmm0, Address(tmp, 1216)); mulsd(xmm0, xmm0); movl(edx, 15); jmp(L_2TAG_PACKET_5_0_2); bind(L_2TAG_PACKET_8_0_2); movl(edx, Address(rsp, 128)); cmpl(eax, 2146435072); jcc(Assembler::above, L_2TAG_PACKET_10_0_2); cmpl(edx, 0); jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2); movl(eax, Address(rsp, 132)); cmpl(eax, 2146435072); jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); movsd(xmm0, Address(tmp, 1192)); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_11_0_2); movsd(xmm0, Address(tmp, 1200)); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_10_0_2); movsd(xmm0, Address(rsp, 128)); addsd(xmm0, xmm0); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_0_0_2); movl(eax, Address(rsp, 132)); andl(eax, 2147483647); cmpl(eax, 1083179008); jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); movsd(xmm0, Address(rsp, 128)); addsd(xmm0, Address(tmp, 1184)); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_2_0_2); movsd(Address(rsp, 48), xmm0); fld_d(Address(rsp, 48)); bind(L_2TAG_PACKET_6_0_2); movl(tmp, Address(rsp, 64)); } #endif