--- old/src/cpu/x86/vm/assembler_x86.cpp 2015-11-19 18:51:00.536795000 -0800 +++ new/src/cpu/x86/vm/assembler_x86.cpp 2015-11-19 18:51:00.187760100 -0800 @@ -772,6 +772,7 @@ case 0x55: // andnps case 0x56: // orps case 0x57: // xorps + case 0x58: // addpd case 0x59: // mulpd case 0x6E: // movd case 0x7E: // movd @@ -1890,6 +1891,12 @@ emit_int8((unsigned char)(0xF0 | encode)); } +void Assembler::imull(Register src) { + int encode = prefix_and_encode(src->encoding()); + emit_int8((unsigned char)0xF7); + emit_int8((unsigned char)(0xE8 | encode)); +} + void Assembler::imull(Register dst, Register src) { int encode = prefix_and_encode(dst->encoding(), src->encoding()); emit_int8(0x0F); @@ -3753,6 +3760,15 @@ emit_arith_b(0xF6, 0xC0, dst, imm8); } +void Assembler::testb(Address dst, int imm8) { + InstructionMark im(this); + prefix(dst); + emit_int8((unsigned char)0xF6); + emit_operand(rax, dst, 1); + emit_int8(imm8); + +} + void Assembler::testl(Register dst, int32_t imm32) { // not using emit_arith because test // doesn't support sign-extension of @@ -4083,6 +4099,16 @@ emit_int8((unsigned char)(0xC0 | encode)); } +void Assembler::addpd(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x58); + emit_operand(dst, src); +} + void Assembler::addps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); --- old/src/cpu/x86/vm/assembler_x86.hpp 2015-11-19 18:51:03.005041800 -0800 +++ new/src/cpu/x86/vm/assembler_x86.hpp 2015-11-19 18:51:02.675008800 -0800 @@ -1211,6 +1211,7 @@ void idivq(Register src); #endif + void imull(Register src); void imull(Register dst, Register src); void imull(Register dst, Register src, int value); void imull(Register dst, Address src); @@ -1692,6 +1693,7 @@ void subss(XMMRegister dst, XMMRegister src); void testb(Register dst, int imm8); + void testb(Address dst, int imm8); void testl(Register dst, int32_t imm32); void testl(Register dst, Register src); @@ -1764,6 +1766,7 @@ // Add Packed Floating-Point Values void addpd(XMMRegister dst, XMMRegister src); + void addpd(XMMRegister dst, Address src); void addps(XMMRegister dst, XMMRegister src); void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); --- old/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp 2015-11-19 18:51:05.126253900 -0800 +++ new/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp 2015-11-19 18:51:04.789220200 -0800 @@ -2364,15 +2364,6 @@ case lir_log10 : __ flog10() ; break; case lir_abs : __ fabs() ; break; case lir_sqrt : __ fsqrt(); break; - case lir_sin : - // Should consider not saving rbx, if not necessary - __ trigfunc('s', op->as_Op2()->fpu_stack_size()); - break; - case lir_cos : - // Should consider not saving rbx, if not necessary - assert(op->as_Op2()->fpu_stack_size() <= 6, "sin and cos need two free stack slots"); - __ trigfunc('c', op->as_Op2()->fpu_stack_size()); - break; case lir_tan : // Should consider not saving rbx, if not necessary __ trigfunc('t', op->as_Op2()->fpu_stack_size()); --- old/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp 2015-11-19 18:51:07.330474300 -0800 +++ new/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp 2015-11-19 18:51:07.000441300 -0800 @@ -809,7 +809,8 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), "wrong type"); - if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog) { + if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog || + x->id() == vmIntrinsics::_dcos || x->id() == vmIntrinsics::_dsin) { do_LibmIntrinsic(x); return; } @@ -818,13 +819,12 @@ bool use_fpu = false; if (UseSSE >= 2) { - switch(x->id()) { - case vmIntrinsics::_dsin: - case vmIntrinsics::_dcos: + switch (x->id()) { case vmIntrinsics::_dtan: case vmIntrinsics::_dlog10: case vmIntrinsics::_dpow: use_fpu = true; + break; } } else { value.set_destroys_register(); @@ -869,8 +869,6 @@ switch(x->id()) { case vmIntrinsics::_dabs: __ abs (calc_input, calc_result, LIR_OprFact::illegalOpr); break; case vmIntrinsics::_dsqrt: __ sqrt (calc_input, calc_result, LIR_OprFact::illegalOpr); break; - case vmIntrinsics::_dsin: __ sin (calc_input, calc_result, tmp1, tmp2); break; - case vmIntrinsics::_dcos: __ cos (calc_input, calc_result, tmp1, tmp2); break; case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break; case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break; case vmIntrinsics::_dpow: __ pow (calc_input, calc_input2, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break; @@ -909,11 +907,24 @@ case vmIntrinsics::_dlog: if (VM_Version::supports_sse2()) { __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args()); - } - else { + } else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args()); } break; + case vmIntrinsics::_dsin: + if (VM_Version::supports_sse2() && StubRoutines::dsin() != NULL) { + __ call_runtime_leaf(StubRoutines::dsin(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args()); + } + break; + case vmIntrinsics::_dcos: + if (VM_Version::supports_sse2() && StubRoutines::dcos() != NULL) { + __ call_runtime_leaf(StubRoutines::dcos(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args()); + } + break; default: ShouldNotReachHere(); } #else @@ -924,6 +935,20 @@ case vmIntrinsics::_dlog: __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args()); break; + case vmIntrinsics::_dsin: + if (StubRoutines::dsin() != NULL) { + __ call_runtime_leaf(StubRoutines::dsin(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args()); + } + break; + case vmIntrinsics::_dcos: + if (StubRoutines::dcos() != NULL) { + __ call_runtime_leaf(StubRoutines::dcos(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args()); + } + break; } #endif __ move(result_reg, calc_result); --- old/src/cpu/x86/vm/c1_LinearScan_x86.cpp 2015-11-19 18:51:09.442685500 -0800 +++ new/src/cpu/x86/vm/c1_LinearScan_x86.cpp 2015-11-19 18:51:09.104651700 -0800 @@ -811,9 +811,7 @@ } - case lir_tan: - case lir_sin: - case lir_cos: { + case lir_tan: { // sin, cos and exp need two temporary fpu stack slots, so there are two temporary // registers (stored in right and temp of the operation). // the stack allocator must guarantee that the stack slots are really free, --- old/src/cpu/x86/vm/c1_LinearScan_x86.hpp 2015-11-19 18:51:11.538895100 -0800 +++ new/src/cpu/x86/vm/c1_LinearScan_x86.hpp 2015-11-19 18:51:11.206861900 -0800 @@ -67,9 +67,7 @@ inline void LinearScan::pd_add_temps(LIR_Op* op) { switch (op->code()) { - case lir_tan: - case lir_sin: - case lir_cos: { + case lir_tan:{ // The slow path for these functions may need to save and // restore all live registers but we don't want to save and // restore everything all the time, so mark the xmms as being @@ -119,7 +117,6 @@ return false; } - class FpuStackAllocator VALUE_OBJ_CLASS_SPEC { private: Compilation* _compilation; --- old/src/cpu/x86/vm/globals_x86.hpp 2015-11-19 18:51:13.552096400 -0800 +++ new/src/cpu/x86/vm/globals_x86.hpp 2015-11-19 18:51:13.222063400 -0800 @@ -188,5 +188,11 @@ "Use BMI1 instructions") \ \ product(bool, UseBMI2Instructions, false, \ - "Use BMI2 instructions") + "Use BMI2 instructions") \ + \ + product(bool, UseLibmCosIntrinsic, false, \ + "Use Libm Cos Intrinsic") \ + \ + product(bool, UseLibmSinIntrinsic, false, \ + "Use Libm Sin Intrinsic") #endif // CPU_X86_VM_GLOBALS_X86_HPP --- old/src/cpu/x86/vm/interpreter_x86_32.cpp 2015-11-19 18:51:15.572298400 -0800 +++ new/src/cpu/x86/vm/interpreter_x86_32.cpp 2015-11-19 18:51:15.240265200 -0800 @@ -117,10 +117,24 @@ __ fld_d(Address(rsp, 1*wordSize)); switch (kind) { case Interpreter::java_lang_math_sin : - __ trigfunc('s'); + __ subptr(rsp, 2 * wordSize); + __ fstp_d(Address(rsp, 0)); + if (VM_Version::supports_sse2() && StubRoutines::dsin() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin()))); + } else { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin))); + } + __ addptr(rsp, 2 * wordSize); break; case Interpreter::java_lang_math_cos : - __ trigfunc('c'); + __ subptr(rsp, 2 * wordSize); + __ fstp_d(Address(rsp, 0)); + if (VM_Version::supports_sse2() && StubRoutines::dcos() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos()))); + } else { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos))); + } + __ addptr(rsp, 2 * wordSize); break; case Interpreter::java_lang_math_tan : __ trigfunc('t'); @@ -136,8 +150,7 @@ __ fstp_d(Address(rsp, 0)); if (VM_Version::supports_sse2()) { __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog()))); - } - else { + } else { __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog))); } __ addptr(rsp, 2 * wordSize); --- old/src/cpu/x86/vm/interpreter_x86_64.cpp 2015-11-19 18:51:17.592500400 -0800 +++ new/src/cpu/x86/vm/interpreter_x86_64.cpp 2015-11-19 18:51:17.262467400 -0800 @@ -256,15 +256,23 @@ } else if (kind == Interpreter::java_lang_math_log) { __ movdbl(xmm0, Address(rsp, wordSize)); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog()))); + } else if (kind == Interpreter::java_lang_math_sin) { + __ movdbl(xmm0, Address(rsp, wordSize)); + if (StubRoutines::dsin() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin()))); + } else { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin))); + } + } else if (kind == Interpreter::java_lang_math_cos) { + __ movdbl(xmm0, Address(rsp, wordSize)); + if (StubRoutines::dcos() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos()))); + } else { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos))); + } } else { __ fld_d(Address(rsp, wordSize)); switch (kind) { - case Interpreter::java_lang_math_sin : - __ trigfunc('s'); - break; - case Interpreter::java_lang_math_cos : - __ trigfunc('c'); - break; case Interpreter::java_lang_math_tan : __ trigfunc('t'); break; --- old/src/cpu/x86/vm/macroAssembler_x86.cpp 2015-11-19 18:51:19.599701100 -0800 +++ new/src/cpu/x86/vm/macroAssembler_x86.cpp 2015-11-19 18:51:19.262667400 -0800 @@ -971,6 +971,15 @@ } } +void MacroAssembler::addpd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::addpd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::addpd(dst, Address(rscratch1, 0)); + } +} + void MacroAssembler::align(int modulus) { align(modulus, offset()); } --- old/src/cpu/x86/vm/macroAssembler_x86.hpp 2015-11-19 18:51:22.099951100 -0800 +++ new/src/cpu/x86/vm/macroAssembler_x86.hpp 2015-11-19 18:51:21.769918100 -0800 @@ -869,6 +869,7 @@ void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } void andpd(XMMRegister dst, AddressLiteral src); + void andpd(XMMRegister dst, XMMRegister src) { Assembler::andpd(dst, src); } void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } @@ -916,6 +917,26 @@ XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, Register rdx, Register tmp1 LP64_ONLY(COMMA Register tmp2)); + void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, + XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, + Register rax, Register rbx LP64_ONLY(COMMA Register rcx), Register rdx + LP64_ONLY(COMMA Register tmp1) LP64_ONLY(COMMA Register tmp2) + LP64_ONLY(COMMA Register tmp3) LP64_ONLY(COMMA Register tmp4)); + + void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, + XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, + Register rax, Register rcx, Register rdx NOT_LP64(COMMA Register tmp) + LP64_ONLY(COMMA Register r8) LP64_ONLY(COMMA Register r9) + LP64_ONLY(COMMA Register r10) LP64_ONLY(COMMA Register r11)); + +#ifndef _LP64 + void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, + Register edx, Register ebx, Register esi, Register edi, + Register ebp, Register esp); + void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, + Register esi, Register edi, Register ebp, Register esp); +#endif + void increase_precision(); void restore_precision(); @@ -953,6 +974,10 @@ void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } void addss(XMMRegister dst, AddressLiteral src); + void addpd(XMMRegister dst, XMMRegister src) { Assembler::addpd(dst, src); } + void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); } + void addpd(XMMRegister dst, AddressLiteral src); + void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } void divsd(XMMRegister dst, AddressLiteral src); --- old/src/cpu/x86/vm/macroAssembler_x86_libm.cpp 2015-11-19 18:51:24.219163000 -0800 +++ new/src/cpu/x86/vm/macroAssembler_x86_libm.cpp 2015-11-19 18:51:23.881129200 -0800 @@ -1286,3 +1286,3262 @@ } #endif + +/******************************************************************************/ +// ALGORITHM DESCRIPTION - SIN() +// --------------------- +// +// 1. RANGE REDUCTION +// +// We perform an initial range reduction from X to r with +// +// X =~= N * pi/32 + r +// +// so that |r| <= pi/64 + epsilon. We restrict inputs to those +// where |N| <= 932560. Beyond this, the range reduction is +// insufficiently accurate. For extremely small inputs, +// denormalization can occur internally, impacting performance. +// This means that the main path is actually only taken for +// 2^-252 <= |X| < 90112. +// +// To avoid branches, we perform the range reduction to full +// accuracy each time. +// +// X - N * (P_1 + P_2 + P_3) +// +// where P_1 and P_2 are 32-bit numbers (so multiplication by N +// is exact) and P_3 is a 53-bit number. Together, these +// approximate pi well enough for all cases in the restricted +// range. +// +// The main reduction sequence is: +// +// y = 32/pi * x +// N = integer(y) +// (computed by adding and subtracting off SHIFTER) +// +// m_1 = N * P_1 +// m_2 = N * P_2 +// r_1 = x - m_1 +// r = r_1 - m_2 +// (this r can be used for most of the calculation) +// +// c_1 = r_1 - r +// m_3 = N * P_3 +// c_2 = c_1 - m_2 +// c = c_2 - m_3 +// +// 2. MAIN ALGORITHM +// +// The algorithm uses a table lookup based on B = M * pi / 32 +// where M = N mod 64. The stored values are: +// sigma closest power of 2 to cos(B) +// C_hl 53-bit cos(B) - sigma +// S_hi + S_lo 2 * 53-bit sin(B) +// +// The computation is organized as follows: +// +// sin(B + r + c) = [sin(B) + sigma * r] + +// r * (cos(B) - sigma) + +// sin(B) * [cos(r + c) - 1] + +// cos(B) * [sin(r + c) - r] +// +// which is approximately: +// +// [S_hi + sigma * r] + +// C_hl * r + +// S_lo + S_hi * [(cos(r) - 1) - r * c] + +// (C_hl + sigma) * [(sin(r) - r) + c] +// +// and this is what is actually computed. We separate this sum +// into four parts: +// +// hi + med + pols + corr +// +// where +// +// hi = S_hi + sigma r +// med = C_hl * r +// pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) +// corr = S_lo + c * ((C_hl + sigma) - S_hi * r) +// +// 3. POLYNOMIAL +// +// The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * +// (sin(r) - r) can be rearranged freely, since it is quite +// small, so we exploit parallelism to the fullest. +// +// psc4 = SC_4 * r_1 +// msc4 = psc4 * r +// r2 = r * r +// msc2 = SC_2 * r2 +// r4 = r2 * r2 +// psc3 = SC_3 + msc4 +// psc1 = SC_1 + msc2 +// msc3 = r4 * psc3 +// sincospols = psc1 + msc3 +// pols = sincospols * +// +// +// 4. CORRECTION TERM +// +// This is where the "c" component of the range reduction is +// taken into account; recall that just "r" is used for most of +// the calculation. +// +// -c = m_3 - c_2 +// -d = S_hi * r - (C_hl + sigma) +// corr = -c * -d + S_lo +// +// 5. COMPENSATED SUMMATIONS +// +// The two successive compensated summations add up the high +// and medium parts, leaving just the low parts to add up at +// the end. +// +// rs = sigma * r +// res_int = S_hi + rs +// k_0 = S_hi - res_int +// k_2 = k_0 + rs +// med = C_hl * r +// res_hi = res_int + med +// k_1 = res_int - res_hi +// k_3 = k_1 + med +// +// 6. FINAL SUMMATION +// +// We now add up all the small parts: +// +// res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 +// +// Now the overall result is just: +// +// res_hi + res_lo +// +// 7. SMALL ARGUMENTS +// +// If |x| < SNN (SNN meaning the smallest normal number), we +// simply perform 0.1111111 cdots 1111 * x. For SNN <= |x|, we +// do 2^-55 * (2^55 * x - x). +// +// Special cases: +// sin(NaN) = quiet NaN, and raise invalid exception +// sin(INF) = NaN and raise invalid exception +// sin(+/-0) = +/-0 +// +/******************************************************************************/ + +#ifdef _LP64 + +ALIGNED_(16) juint _ONEHALF[] = +{ + 0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL +}; + +ALIGNED_(16) juint _P_2[] = +{ + 0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL +}; + +ALIGNED_(16) juint _SC_4[] = +{ + 0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL +}; + +ALIGNED_(16) juint _Ctable[] = +{ + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL, + 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, + 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, + 0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, + 0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, + 0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, + 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL, + 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL, + 0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL, + 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, + 0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, + 0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL, + 0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, + 0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, + 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL, + 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL, + 0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL, + 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, + 0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL, + 0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL, + 0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, + 0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, + 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL, + 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL, + 0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, + 0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL, + 0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, + 0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, + 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL, + 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL, + 0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL, + 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, + 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, + 0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, + 0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, + 0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, + 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL, + 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL, + 0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL, + 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, + 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, + 0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, + 0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, + 0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, + 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL, + 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL, + 0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL, + 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, + 0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, + 0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, + 0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, + 0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL, + 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, + 0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL, + 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL, + 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL, + 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, + 0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, + 0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, + 0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL, + 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, + 0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL, + 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL, + 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL, + 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, + 0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, + 0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, + 0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL, + 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, + 0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL, + 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL, + 0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL, + 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, + 0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL, + 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, + 0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL, + 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL, + 0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL, + 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, + 0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, + 0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, + 0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL, + 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, + 0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL, + 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL, + 0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL, + 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, + 0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, + 0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, + 0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL, + 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, + 0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL, + 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL, + 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL, + 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, + 0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, + 0x00000000UL, 0x3ff00000UL +}; + +ALIGNED_(16) juint _SC_2[] = +{ + 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL +}; + +ALIGNED_(16) juint _SC_3[] = +{ + 0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL +}; + +ALIGNED_(16) juint _SC_1[] = +{ + 0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL +}; + +ALIGNED_(16) juint _PI_INV_TABLE[] = +{ + 0x00000000UL, 0x00000000UL, 0xa2f9836eUL, 0x4e441529UL, 0xfc2757d1UL, + 0xf534ddc0UL, 0xdb629599UL, 0x3c439041UL, 0xfe5163abUL, 0xdebbc561UL, + 0xb7246e3aUL, 0x424dd2e0UL, 0x06492eeaUL, 0x09d1921cUL, 0xfe1deb1cUL, + 0xb129a73eUL, 0xe88235f5UL, 0x2ebb4484UL, 0xe99c7026UL, 0xb45f7e41UL, + 0x3991d639UL, 0x835339f4UL, 0x9c845f8bUL, 0xbdf9283bUL, 0x1ff897ffUL, + 0xde05980fUL, 0xef2f118bUL, 0x5a0a6d1fUL, 0x6d367ecfUL, 0x27cb09b7UL, + 0x4f463f66UL, 0x9e5fea2dUL, 0x7527bac7UL, 0xebe5f17bUL, 0x3d0739f7UL, + 0x8a5292eaUL, 0x6bfb5fb1UL, 0x1f8d5d08UL, 0x56033046UL, 0xfc7b6babUL, + 0xf0cfbc21UL +}; + +ALIGNED_(8) juint _PI_4[] = +{ + 0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL +}; + +ALIGNED_(8) juint _PI32INV[] = +{ + 0x6dc9c883UL, 0x40245f30UL +}; + +ALIGNED_(8) juint _SHIFTER[] = +{ + 0x00000000UL, 0x43380000UL +}; + +ALIGNED_(8) juint _SIGN_MASK[] = +{ + 0x00000000UL, 0x80000000UL +}; + +ALIGNED_(8) juint _P_3[] = +{ + 0x2e037073UL, 0x3b63198aUL +}; + +ALIGNED_(8) juint _ALL_ONES[] = +{ + 0xffffffffUL, 0x3fefffffUL +}; + +ALIGNED_(8) juint _TWO_POW_55[] = +{ + 0x00000000UL, 0x43600000UL +}; + +ALIGNED_(8) juint _TWO_POW_M55[] = +{ + 0x00000000UL, 0x3c800000ULL +}; + +ALIGNED_(8) juint _P_1[] = +{ + 0x54400000UL, 0x3fb921fbUL +}; + +ALIGNED_(8) juint _NEG_ZERO[] = +{ + 0x00000000UL, 0x80000000UL +}; + +void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ebx, Register ecx, Register edx, Register tmp1, Register tmp2, Register tmp3, Register tmp4) { + Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; + Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1; + Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1; + Label L_2TAG_PACKET_13_0_1, L_2TAG_PACKET_14_0_1; + Label L_2TAG_PACKET_12_0_1, B1_1, B1_2, B1_4, start; + + assert_different_registers(tmp1, tmp2, tmp3, tmp4, eax, ebx, ecx, edx); + address ONEHALF = (address)_ONEHALF; + address P_2 = (address)_P_2; + address SC_4 = (address)_SC_4; + address Ctable = (address)_Ctable; + address SC_2 = (address)_SC_2; + address SC_3 = (address)_SC_3; + address SC_1 = (address)_SC_1; + address PI_INV_TABLE = (address)_PI_INV_TABLE; + address PI_4 = (address)_PI_4; + address PI32INV = (address)_PI32INV; + address SHIFTER = (address)_SHIFTER; + address SIGN_MASK = (address)_SIGN_MASK; + address P_3 = (address)_P_3; + address ALL_ONES = (address)_ALL_ONES; + address TWO_POW_55 = (address)_TWO_POW_55; + address TWO_POW_M55 = (address)_TWO_POW_M55; + address P_1 = (address)_P_1; + address NEG_ZERO = (address)_NEG_ZERO; + + bind(start); + push(rbx); + subq(rsp, 16); + movsd(Address(rsp, 8), xmm0); + movl(eax, Address(rsp, 12)); + movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL + movq(xmm2, ExternalAddress(SHIFTER)); //0x00000000UL, 0x43380000UL + andl(eax, 2147418112); + subl(eax, 808452096); + cmpl(eax, 281346048); + jcc(Assembler::above, L_2TAG_PACKET_0_0_1); + mulsd(xmm1, xmm0); + movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL + movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL + pand(xmm4, xmm0); + por(xmm5, xmm4); + addpd(xmm1, xmm5); + cvttsd2sil(edx, xmm1); + cvtsi2sdl(xmm1, edx); + movdqu(xmm6, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL + mov64(r8, 0x3fb921fb54400000); + movdq(xmm3, r8); + movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL + pshufd(xmm4, xmm0, 68); + mulsd(xmm3, xmm1); + movddup(xmm1, xmm1); + andl(edx, 63); + shll(edx, 5); + lea(rax, ExternalAddress(Ctable)); + addq(rax, rdx); + mulpd(xmm6, xmm1); + mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL + subsd(xmm4, xmm3); + movq(xmm7, Address(rax, 8)); + subsd(xmm0, xmm3); + movddup(xmm3, xmm4); + subsd(xmm4, xmm6); + pshufd(xmm0, xmm0, 68); + movdqu(xmm2, Address(rax, 0)); + mulpd(xmm5, xmm0); + subpd(xmm0, xmm6); + mulsd(xmm7, xmm4); + subsd(xmm3, xmm4); + mulpd(xmm5, xmm0); + mulpd(xmm0, xmm0); + subsd(xmm3, xmm6); + movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL + subsd(xmm1, xmm3); + movq(xmm3, Address(rax, 24)); + addsd(xmm2, xmm3); + subsd(xmm7, xmm2); + mulsd(xmm2, xmm4); + mulpd(xmm6, xmm0); + mulsd(xmm3, xmm4); + mulpd(xmm2, xmm0); + mulpd(xmm0, xmm0); + addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL + mulsd(xmm4, Address(rax, 0)); + addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL + mulpd(xmm5, xmm0); + movdqu(xmm0, xmm3); + addsd(xmm3, Address(rax, 8)); + mulpd(xmm1, xmm7); + movdqu(xmm7, xmm4); + addsd(xmm4, xmm3); + addpd(xmm6, xmm5); + movq(xmm5, Address(rax, 8)); + subsd(xmm5, xmm3); + subsd(xmm3, xmm4); + addsd(xmm1, Address(rax, 16)); + mulpd(xmm6, xmm2); + addsd(xmm5, xmm0); + addsd(xmm3, xmm7); + addsd(xmm1, xmm5); + addsd(xmm1, xmm3); + addsd(xmm1, xmm6); + unpckhpd(xmm6, xmm6); + movdqu(xmm0, xmm4); + addsd(xmm1, xmm6); + addsd(xmm0, xmm1); + jmp(B1_4); + + bind(L_2TAG_PACKET_0_0_1); + jcc(Assembler::greater, L_2TAG_PACKET_1_0_1); + shrl(eax, 20); + cmpl(eax, 3325); + jcc(Assembler::notEqual, L_2TAG_PACKET_2_0_1); + mulsd(xmm0, ExternalAddress(ALL_ONES)); //0xffffffffUL, 0x3fefffffUL + jmp(B1_4); + + bind(L_2TAG_PACKET_2_0_1); + movq(xmm3, ExternalAddress(TWO_POW_55)); //0x00000000UL, 0x43600000UL + mulsd(xmm3, xmm0); + subsd(xmm3, xmm0); + mulsd(xmm3, ExternalAddress(TWO_POW_M55)); //0x00000000UL, 0x3c800000UL + jmp(B1_4); + + bind(L_2TAG_PACKET_1_0_1); + pextrw(eax, xmm0, 3); + andl(eax, 32752); + cmpl(eax, 32752); + jcc(Assembler::equal, L_2TAG_PACKET_3_0_1); + pextrw(ecx, xmm0, 3); + andl(ecx, 32752); + subl(ecx, 16224); + shrl(ecx, 7); + andl(ecx, 65532); + lea(r11, ExternalAddress(PI_INV_TABLE)); + addq(rcx, r11); + movdq(rax, xmm0); + movl(r10, Address(rcx, 20)); + movl(r8, Address(rcx, 24)); + movl(edx, eax); + shrq(rax, 21); + orl(eax, INT_MIN); + shrl(eax, 11); + movl(r9, r10); + imulq(r10, rdx); + imulq(r9, rax); + imulq(r8, rax); + movl(rsi, Address(rcx, 16)); + movl(rdi, Address(rcx, 12)); + movl(r11, r10); + shrq(r10, 32); + addq(r9, r10); + addq(r11, r8); + movl(r8, r11); + shrq(r11, 32); + addq(r9, r11); + movl(r10, rsi); + imulq(rsi, rdx); + imulq(r10, rax); + movl(r11, rdi); + imulq(rdi, rdx); + movl(ebx, rsi); + shrq(rsi, 32); + addq(r9, rbx); + movl(ebx, r9); + shrq(r9, 32); + addq(r10, rsi); + addq(r10, r9); + shlq(rbx, 32); + orq(r8, rbx); + imulq(r11, rax); + movl(r9, Address(rcx, 8)); + movl(rsi, Address(rcx, 4)); + movl(ebx, rdi); + shrq(rdi, 32); + addq(r10, rbx); + movl(ebx, r10); + shrq(r10, 32); + addq(r11, rdi); + addq(r11, r10); + movq(rdi, r9); + imulq(r9, rdx); + imulq(rdi, rax); + movl(r10, r9); + shrq(r9, 32); + addq(r11, r10); + movl(r10, r11); + shrq(r11, 32); + addq(rdi, r9); + addq(rdi, r11); + movq(r9, rsi); + imulq(rsi, rdx); + imulq(r9, rax); + shlq(r10, 32); + orq(r10, rbx); + movl(eax, Address(rcx, 0)); + movl(r11, rsi); + shrq(rsi, 32); + addq(rdi, r11); + movl(r11, rdi); + shrq(rdi, 32); + addq(r9, rsi); + addq(r9, rdi); + imulq(rdx, rax); + pextrw(ebx, xmm0, 3); + lea(rdi, ExternalAddress(PI_INV_TABLE)); + subq(rcx, rdi); + addl(ecx, ecx); + addl(ecx, ecx); + addl(ecx, ecx); + addl(ecx, 19); + movl(rsi, 32768); + andl(rsi, ebx); + shrl(ebx, 4); + andl(ebx, 2047); + subl(ebx, 1023); + subl(ecx, ebx); + addq(r9, rdx); + movl(edx, ecx); + addl(edx, 32); + cmpl(ecx, 1); + jcc(Assembler::less, L_2TAG_PACKET_4_0_1); + negl(ecx); + addl(ecx, 29); + shll(r9); + movl(rdi, r9); + andl(r9, 536870911); + testl(r9, 268435456); + jcc(Assembler::notEqual, L_2TAG_PACKET_5_0_1); + shrl(r9); + movl(ebx, 0); + shlq(r9, 32); + orq(r9, r11); + + bind(L_2TAG_PACKET_6_0_1); + + bind(L_2TAG_PACKET_7_0_1); + + cmpq(r9, 0); + jcc(Assembler::equal, L_2TAG_PACKET_8_0_1); + + bind(L_2TAG_PACKET_9_0_1); + bsrq(r11, r9); + movl(ecx, 29); + subl(ecx, r11); + jcc(Assembler::lessEqual, L_2TAG_PACKET_10_0_1); + shlq(r9); + movq(rax, r10); + shlq(r10); + addl(edx, ecx); + negl(ecx); + addl(ecx, 64); + shrq(rax); + shrq(r8); + orq(r9, rax); + orq(r10, r8); + + bind(L_2TAG_PACKET_11_0_1); + cvtsi2sdq(xmm0, r9); + shrq(r10, 1); + cvtsi2sdq(xmm3, r10); + xorpd(xmm4, xmm4); + shll(edx, 4); + negl(edx); + addl(edx, 16368); + orl(edx, rsi); + xorl(edx, ebx); + pinsrw(xmm4, edx, 3); + movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL + movq(xmm6, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL + xorpd(xmm5, xmm5); + subl(edx, 1008); + pinsrw(xmm5, edx, 3); + mulsd(xmm0, xmm4); + shll(rsi, 16); + sarl(rsi, 31); + mulsd(xmm3, xmm5); + movdqu(xmm1, xmm0); + mulsd(xmm0, xmm2); + shrl(rdi, 29); + addsd(xmm1, xmm3); + mulsd(xmm3, xmm2); + addl(rdi, rsi); + xorl(rdi, rsi); + mulsd(xmm6, xmm1); + movl(eax, rdi); + addsd(xmm6, xmm3); + movdqu(xmm2, xmm0); + addsd(xmm0, xmm6); + subsd(xmm2, xmm0); + addsd(xmm6, xmm2); + + bind(L_2TAG_PACKET_12_0_1); + movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL + mulsd(xmm1, xmm0); + movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL + movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL + pand(xmm4, xmm0); + por(xmm5, xmm4); + addpd(xmm1, xmm5); + cvttsd2sil(edx, xmm1); + cvtsi2sdl(xmm1, edx); + movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL + movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL + mulsd(xmm3, xmm1); + unpcklpd(xmm1, xmm1); + shll(eax, 3); + addl(edx, 1865216); + movdqu(xmm4, xmm0); + addl(edx, eax); + andl(edx, 63); + movdqu(xmm5, ExternalAddress(SC_4)); //0x54400000UL, 0x3fb921fbUL + lea(rax, ExternalAddress(Ctable)); + shll(edx, 5); + addq(rax, rdx); + mulpd(xmm2, xmm1); + subsd(xmm0, xmm3); + mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL + subsd(xmm4, xmm3); + movq(xmm7, Address(rax, 8)); + unpcklpd(xmm0, xmm0); + movdqu(xmm3, xmm4); + subsd(xmm4, xmm2); + mulpd(xmm5, xmm0); + subpd(xmm0, xmm2); + mulsd(xmm7, xmm4); + subsd(xmm3, xmm4); + mulpd(xmm5, xmm0); + mulpd(xmm0, xmm0); + subsd(xmm3, xmm2); + movdqu(xmm2, Address(rax, 0)); + subsd(xmm1, xmm3); + movq(xmm3, Address(rax, 24)); + addsd(xmm2, xmm3); + subsd(xmm7, xmm2); + subsd(xmm1, xmm6); + movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL + mulsd(xmm2, xmm4); + mulpd(xmm6, xmm0); + mulsd(xmm3, xmm4); + mulpd(xmm2, xmm0); + mulpd(xmm0, xmm0); + addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL + mulsd(xmm4, Address(rax, 0)); + addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL + mulpd(xmm5, xmm0); + movdqu(xmm0, xmm3); + addsd(xmm3, Address(rax, 8)); + mulpd(xmm1, xmm7); + movdqu(xmm7, xmm4); + addsd(xmm4, xmm3); + addpd(xmm6, xmm5); + movq(xmm5, Address(rax, 8)); + subsd(xmm5, xmm3); + subsd(xmm3, xmm4); + addsd(xmm1, Address(rax, 16)); + mulpd(xmm6, xmm2); + addsd(xmm5, xmm0); + addsd(xmm3, xmm7); + addsd(xmm1, xmm5); + addsd(xmm1, xmm3); + addsd(xmm1, xmm6); + unpckhpd(xmm6, xmm6); + movdqu(xmm0, xmm4); + addsd(xmm1, xmm6); + addsd(xmm0, xmm1); + jmp(B1_4); + + bind(L_2TAG_PACKET_8_0_1); + addl(edx, 64); + movq(r9, r10); + movq(r10, r8); + movl(r8, 0); + cmpq(r9, 0); + jcc(Assembler::notEqual, L_2TAG_PACKET_9_0_1); + addl(edx, 64); + movq(r9, r10); + movq(r10, r8); + cmpq(r9, 0); + jcc(Assembler::notEqual, L_2TAG_PACKET_9_0_1); + xorpd(xmm0, xmm0); + xorpd(xmm6, xmm6); + jmp(L_2TAG_PACKET_12_0_1); + + bind(L_2TAG_PACKET_10_0_1); + jcc(Assembler::equal, L_2TAG_PACKET_11_0_1); + negl(ecx); + shrq(r10); + movq(rax, r9); + shrq(r9); + subl(edx, ecx); + negl(ecx); + addl(ecx, 64); + shlq(rax); + orq(r10, rax); + jmp(L_2TAG_PACKET_11_0_1); + + bind(L_2TAG_PACKET_4_0_1); + negl(ecx); + shlq(r9, 32); + orq(r9, r11); + shlq(r9); + movq(rdi, r9); + testl(r9, INT_MIN); + jcc(Assembler::notEqual, L_2TAG_PACKET_13_0_1); + shrl(r9); + movl(ebx, 0); + shrq(rdi, 3); + jmp(L_2TAG_PACKET_7_0_1); + + bind(L_2TAG_PACKET_5_0_1); + shrl(r9); + movl(ebx, 536870912); + shrl(ebx); + shlq(r9, 32); + orq(r9, r11); + shlq(rbx, 32); + addl(rdi, 536870912); + movl(rcx, 0); + movl(r11, 0); + subq(rcx, r8); + sbbq(r11, r10); + sbbq(rbx, r9); + movq(r8, rcx); + movq(r10, r11); + movq(r9, rbx); + movl(ebx, 32768); + jmp(L_2TAG_PACKET_6_0_1); + + bind(L_2TAG_PACKET_13_0_1); + shrl(r9); + mov64(rbx, 0x100000000); + shrq(rbx); + movl(rcx, 0); + movl(r11, 0); + subq(rcx, r8); + sbbq(r11, r10); + sbbq(rbx, r9); + movq(r8, rcx); + movq(r10, r11); + movq(r9, rbx); + movl(ebx, 32768); + shrq(rdi, 3); + addl(rdi, 536870912); + jmp(L_2TAG_PACKET_7_0_1); + + bind(L_2TAG_PACKET_3_0_1); + movq(xmm0, Address(rsp, 8)); + mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL + movq(Address(rsp, 0), xmm0); + + bind(L_2TAG_PACKET_14_0_1); + + bind(B1_4); + addq(rsp, 16); + pop(rbx); +} + +#endif + +#ifndef _LP64 + +ALIGNED_(8) juint _zero_none[] = +{ + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL +}; + +ALIGNED_(4) juint __4onpi_d[] = +{ + 0x6dc9c883UL, 0x3ff45f30UL +}; + +ALIGNED_(4) juint _TWO_32H[] = +{ + 0x00000000UL, 0x41f80000UL +}; + +ALIGNED_(4) juint _pi04_3d[] = +{ + 0x54442d00UL, 0x3fe921fbUL, 0x98cc5180UL, 0x3ce84698UL, 0xcbb5bf6cUL, + 0xb9dfc8f8UL +}; + +ALIGNED_(4) juint _pi04_5d[] = +{ + 0x54400000UL, 0x3fe921fbUL, 0x1a600000UL, 0x3dc0b461UL, 0x2e000000UL, + 0x3b93198aUL, 0x25200000UL, 0x396b839aUL, 0x533e63a0UL, 0x37027044UL +}; + +ALIGNED_(4) juint _SCALE[] = +{ + 0x00000000UL, 0x32600000UL +}; + +ALIGNED_(4) juint _zeros[] = +{ + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL +}; + +ALIGNED_(4) juint _pi04_2d[] = +{ + 0x54400000UL, 0x3fe921fbUL, 0x1a626331UL, 0x3dc0b461UL +}; + +ALIGNED_(4) juint _TWO_12H[] = +{ + 0x00000000UL, 0x40b80000UL +}; + +ALIGNED_(2) jushort __4onpi_31l[] = +{ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x836e, 0xa2f9, + 0x40d8, 0x0000, 0x0000, 0x0000, 0x2a50, 0x9c88, 0x40b7, 0x0000, 0x0000, 0x0000, + 0xabe8, 0xfe13, 0x4099, 0x0000, 0x0000, 0x0000, 0x6ee0, 0xfa9a, 0x4079, 0x0000, + 0x0000, 0x0000, 0x9580, 0xdb62, 0x4058, 0x0000, 0x0000, 0x0000, 0x1c82, 0xc9e2, + 0x403d, 0x0000, 0x0000, 0x0000, 0xb1c0, 0xff28, 0x4019, 0x0000, 0x0000, 0x0000, + 0xef14, 0xaf7a, 0x3ffe, 0x0000, 0x0000, 0x0000, 0x48dc, 0xc36e, 0x3fdf, 0x0000, + 0x0000, 0x0000, 0x3740, 0xe909, 0x3fbe, 0x0000, 0x0000, 0x0000, 0x924a, 0xb801, + 0x3fa2, 0x0000, 0x0000, 0x0000, 0x3a32, 0xdd41, 0x3f83, 0x0000, 0x0000, 0x0000, + 0x8778, 0x873f, 0x3f62, 0x0000, 0x0000, 0x0000, 0x1298, 0xb1cb, 0x3f44, 0x0000, + 0x0000, 0x0000, 0xa208, 0x9cfb, 0x3f26, 0x0000, 0x0000, 0x0000, 0xbaec, 0xd7d4, + 0x3f06, 0x0000, 0x0000, 0x0000, 0xd338, 0x8909, 0x3ee7, 0x0000, 0x0000, 0x0000, + 0x68b8, 0xe04d, 0x3ec7, 0x0000, 0x0000, 0x0000, 0x4e64, 0xdf90, 0x3eaa, 0x0000, + 0x0000, 0x0000, 0xc1a8, 0xeb1c, 0x3e89, 0x0000, 0x0000, 0x0000, 0x2720, 0xce7d, + 0x3e6a, 0x0000, 0x0000, 0x0000, 0x77b8, 0x8bf1, 0x3e4b, 0x0000, 0x0000, 0x0000, + 0xec7e, 0xe4a0, 0x3e2e, 0x0000, 0x0000, 0x0000, 0xffbc, 0xf12f, 0x3e0f, 0x0000, + 0x0000, 0x0000, 0xfdc0, 0xb301, 0x3deb, 0x0000, 0x0000, 0x0000, 0xc5ac, 0x9788, + 0x3dd1, 0x0000, 0x0000, 0x0000, 0x47da, 0x829b, 0x3db2, 0x0000, 0x0000, 0x0000, + 0xd9e4, 0xa6cf, 0x3d93, 0x0000, 0x0000, 0x0000, 0x36e8, 0xf961, 0x3d73, 0x0000, + 0x0000, 0x0000, 0xf668, 0xf463, 0x3d54, 0x0000, 0x0000, 0x0000, 0x5168, 0xf2ff, + 0x3d35, 0x0000, 0x0000, 0x0000, 0x758e, 0xea4f, 0x3d17, 0x0000, 0x0000, 0x0000, + 0xf17a, 0xebe5, 0x3cf8, 0x0000, 0x0000, 0x0000, 0x9cfa, 0x9e83, 0x3cd9, 0x0000, + 0x0000, 0x0000, 0xa4ba, 0xe294, 0x3cba, 0x0000, 0x0000, 0x0000, 0xd7ec, 0x9afe, + 0x3c9a, 0x0000, 0x0000, 0x0000, 0xae80, 0x8fc6, 0x3c79, 0x0000, 0x0000, 0x0000, + 0x3304, 0x8560, 0x3c5c, 0x0000, 0x0000, 0x0000, 0x6d70, 0xdf8f, 0x3c3b, 0x0000, + 0x0000, 0x0000, 0x3ef0, 0xafc3, 0x3c1e, 0x0000, 0x0000, 0x0000, 0xd0d8, 0x826b, + 0x3bfe, 0x0000, 0x0000, 0x0000, 0x1c80, 0xed4f, 0x3bdd, 0x0000, 0x0000, 0x0000, + 0x730c, 0xb0af, 0x3bc1, 0x0000, 0x0000, 0x0000, 0x6660, 0xc219, 0x3ba2, 0x0000, + 0x0000, 0x0000, 0x940c, 0xabe2, 0x3b83, 0x0000, 0x0000, 0x0000, 0xdffc, 0x8408, + 0x3b64, 0x0000, 0x0000, 0x0000, 0x6b98, 0xc402, 0x3b45, 0x0000, 0x0000, 0x0000, + 0x1818, 0x9cc4, 0x3b26, 0x0000, 0x0000, 0x0000, 0x5390, 0xaab6, 0x3b05, 0x0000, + 0x0000, 0x0000, 0xb070, 0xd464, 0x3ae9, 0x0000, 0x0000, 0x0000, 0x231a, 0x9ef0, + 0x3aca, 0x0000, 0x0000, 0x0000, 0x0670, 0xd1f1, 0x3aaa, 0x0000, 0x0000, 0x0000, + 0x7738, 0xd9f3, 0x3a8a, 0x0000, 0x0000, 0x0000, 0xa834, 0x8092, 0x3a6c, 0x0000, + 0x0000, 0x0000, 0xb45c, 0xce23, 0x3a4d, 0x0000, 0x0000, 0x0000, 0x36e8, 0xb0e5, + 0x3a2d, 0x0000, 0x0000, 0x0000, 0xd156, 0xaf44, 0x3a10, 0x0000, 0x0000, 0x0000, + 0x9f52, 0x8c82, 0x39f1, 0x0000, 0x0000, 0x0000, 0x829c, 0xff83, 0x39d1, 0x0000, + 0x0000, 0x0000, 0x7d06, 0xefc6, 0x39b3, 0x0000, 0x0000, 0x0000, 0x93e0, 0xb0b7, + 0x3992, 0x0000, 0x0000, 0x0000, 0xedde, 0xc193, 0x3975, 0x0000, 0x0000, 0x0000, + 0xbbc0, 0xcf49, 0x3952, 0x0000, 0x0000, 0x0000, 0xbdf0, 0xd63c, 0x3937, 0x0000, + 0x0000, 0x0000, 0x1f34, 0x9f3a, 0x3918, 0x0000, 0x0000, 0x0000, 0x3f8e, 0xe579, + 0x38f9, 0x0000, 0x0000, 0x0000, 0x90c8, 0xc3f8, 0x38d9, 0x0000, 0x0000, 0x0000, + 0x48c0, 0xf8f8, 0x38b7, 0x0000, 0x0000, 0x0000, 0xed56, 0xafa6, 0x389c, 0x0000, + 0x0000, 0x0000, 0x8218, 0xb969, 0x387d, 0x0000, 0x0000, 0x0000, 0x1852, 0xec57, + 0x385e, 0x0000, 0x0000, 0x0000, 0x670c, 0xd674, 0x383e, 0x0000, 0x0000, 0x0000, + 0xad40, 0xc2c4, 0x3820, 0x0000, 0x0000, 0x0000, 0x2e80, 0xa696, 0x3801, 0x0000, + 0x0000, 0x0000, 0xd800, 0xc467, 0x37dc, 0x0000, 0x0000, 0x0000, 0x3c72, 0xc5ae, + 0x37c3, 0x0000, 0x0000, 0x0000, 0xb006, 0xac69, 0x37a4, 0x0000, 0x0000, 0x0000, + 0x34a0, 0x8cdf, 0x3782, 0x0000, 0x0000, 0x0000, 0x9ed2, 0xd25e, 0x3766, 0x0000, + 0x0000, 0x0000, 0x6fec, 0xaaaa, 0x3747, 0x0000, 0x0000, 0x0000, 0x6040, 0xfb5c, + 0x3726, 0x0000, 0x0000, 0x0000, 0x764c, 0xa3fc, 0x3708, 0x0000, 0x0000, 0x0000, + 0xb254, 0x954e, 0x36e9, 0x0000, 0x0000, 0x0000, 0x3e1c, 0xf5dc, 0x36ca, 0x0000, + 0x0000, 0x0000, 0x7b06, 0xc635, 0x36ac, 0x0000, 0x0000, 0x0000, 0xa8ba, 0xd738, + 0x368d, 0x0000, 0x0000, 0x0000, 0x06cc, 0xb24e, 0x366d, 0x0000, 0x0000, 0x0000, + 0x7108, 0xac76, 0x364f, 0x0000, 0x0000, 0x0000, 0x2324, 0xa7cb, 0x3630, 0x0000, + 0x0000, 0x0000, 0xac40, 0xef15, 0x360f, 0x0000, 0x0000, 0x0000, 0xae46, 0xd516, + 0x35f2, 0x0000, 0x0000, 0x0000, 0x615e, 0xe003, 0x35d3, 0x0000, 0x0000, 0x0000, + 0x0cf0, 0xefe7, 0x35b1, 0x0000, 0x0000, 0x0000, 0xfb50, 0xf98c, 0x3595, 0x0000, + 0x0000, 0x0000, 0x0abc, 0xf333, 0x3575, 0x0000, 0x0000, 0x0000, 0xdd60, 0xca3f, + 0x3555, 0x0000, 0x0000, 0x0000, 0x7eb6, 0xd87f, 0x3538, 0x0000, 0x0000, 0x0000, + 0x44f4, 0xb291, 0x3519, 0x0000, 0x0000, 0x0000, 0xff80, 0xc982, 0x34f6, 0x0000, + 0x0000, 0x0000, 0x9de0, 0xd9b8, 0x34db, 0x0000, 0x0000, 0x0000, 0xcd42, 0x9366, + 0x34bc, 0x0000, 0x0000, 0x0000, 0xbef0, 0xfaee, 0x349d, 0x0000, 0x0000, 0x0000, + 0xdac4, 0xb6f1, 0x347d, 0x0000, 0x0000, 0x0000, 0xf140, 0x94de, 0x345d, 0x0000, + 0x0000, 0x0000, 0xa218, 0x8b4b, 0x343e, 0x0000, 0x0000, 0x0000, 0x6380, 0xa135, + 0x341e, 0x0000, 0x0000, 0x0000, 0xb184, 0x8cb2, 0x3402, 0x0000, 0x0000, 0x0000, + 0x196e, 0xdc61, 0x33e3, 0x0000, 0x0000, 0x0000, 0x0c00, 0xde05, 0x33c4, 0x0000, + 0x0000, 0x0000, 0xef9a, 0xbd38, 0x33a5, 0x0000, 0x0000, 0x0000, 0xc1a0, 0xdf00, + 0x3385, 0x0000, 0x0000, 0x0000, 0x1090, 0x9973, 0x3365, 0x0000, 0x0000, 0x0000, + 0x4882, 0x8301, 0x3348, 0x0000, 0x0000, 0x0000, 0x7abe, 0xadc7, 0x3329, 0x0000, + 0x0000, 0x0000, 0x7cba, 0xec2b, 0x330a, 0x0000, 0x0000, 0x0000, 0xa520, 0x8f21, + 0x32e9, 0x0000, 0x0000, 0x0000, 0x710c, 0x8d36, 0x32cc, 0x0000, 0x0000, 0x0000, + 0x5212, 0xc6ed, 0x32ad, 0x0000, 0x0000, 0x0000, 0x7308, 0xfd76, 0x328d, 0x0000, + 0x0000, 0x0000, 0x5014, 0xd548, 0x326f, 0x0000, 0x0000, 0x0000, 0xd3f2, 0xb499, + 0x3250, 0x0000, 0x0000, 0x0000, 0x7f74, 0xa606, 0x3230, 0x0000, 0x0000, 0x0000, + 0xf0a8, 0xd720, 0x3212, 0x0000, 0x0000, 0x0000, 0x185c, 0xe20f, 0x31f2, 0x0000, + 0x0000, 0x0000, 0xa5a8, 0x8738, 0x31d4, 0x0000, 0x0000, 0x0000, 0xdd74, 0xcafb, + 0x31b4, 0x0000, 0x0000, 0x0000, 0x98b6, 0xbd8e, 0x3196, 0x0000, 0x0000, 0x0000, + 0xe9de, 0x977f, 0x3177, 0x0000, 0x0000, 0x0000, 0x67c0, 0x818d, 0x3158, 0x0000, + 0x0000, 0x0000, 0xe52a, 0x9322, 0x3139, 0x0000, 0x0000, 0x0000, 0xe568, 0x9b6c, + 0x3119, 0x0000, 0x0000, 0x0000, 0x2358, 0xaa0a, 0x30fa, 0x0000, 0x0000, 0x0000, + 0xe480, 0xe13b, 0x30d9, 0x0000, 0x0000, 0x0000, 0x3024, 0x90a1, 0x30bd, 0x0000, + 0x0000, 0x0000, 0x9620, 0xda30, 0x309d, 0x0000, 0x0000, 0x0000, 0x898a, 0xb388, + 0x307f, 0x0000, 0x0000, 0x0000, 0xb24c, 0xc891, 0x3060, 0x0000, 0x0000, 0x0000, + 0x8056, 0xf98b, 0x3041, 0x0000, 0x0000, 0x0000, 0x72a4, 0xa1ea, 0x3021, 0x0000, + 0x0000, 0x0000, 0x6af8, 0x9488, 0x3001, 0x0000, 0x0000, 0x0000, 0xe00c, 0xdfcb, + 0x2fe4, 0x0000, 0x0000, 0x0000, 0xeeec, 0xc941, 0x2fc4, 0x0000, 0x0000, 0x0000, + 0x53e0, 0xe70f, 0x2fa4, 0x0000, 0x0000, 0x0000, 0x8f60, 0x9c07, 0x2f85, 0x0000, + 0x0000, 0x0000, 0xb328, 0xc3e7, 0x2f68, 0x0000, 0x0000, 0x0000, 0x9404, 0xf8c7, + 0x2f48, 0x0000, 0x0000, 0x0000, 0x38e0, 0xc99f, 0x2f29, 0x0000, 0x0000, 0x0000, + 0x9778, 0xd984, 0x2f09, 0x0000, 0x0000, 0x0000, 0xe700, 0xd142, 0x2eea, 0x0000, + 0x0000, 0x0000, 0xd904, 0x9443, 0x2ecd, 0x0000, 0x0000, 0x0000, 0xd4ba, 0xae7e, + 0x2eae, 0x0000, 0x0000, 0x0000, 0x8e5e, 0x8524, 0x2e8f, 0x0000, 0x0000, 0x0000, + 0xb550, 0xc9ed, 0x2e6e, 0x0000, 0x0000, 0x0000, 0x53b8, 0x8648, 0x2e51, 0x0000, + 0x0000, 0x0000, 0xdae4, 0x87f9, 0x2e32, 0x0000, 0x0000, 0x0000, 0x2942, 0xd966, + 0x2e13, 0x0000, 0x0000, 0x0000, 0x4f28, 0xcf3c, 0x2df3, 0x0000, 0x0000, 0x0000, + 0xfa40, 0xc4ef, 0x2dd1, 0x0000, 0x0000, 0x0000, 0x4424, 0xbca7, 0x2db5, 0x0000, + 0x0000, 0x0000, 0x2e62, 0xcdc5, 0x2d97, 0x0000, 0x0000, 0x0000, 0xed88, 0x996b, + 0x2d78, 0x0000, 0x0000, 0x0000, 0x7c30, 0xd97d, 0x2d56, 0x0000, 0x0000, 0x0000, + 0xed26, 0xbf6e, 0x2d3a, 0x0000, 0x0000, 0x0000, 0x2918, 0x921b, 0x2d1a, 0x0000, + 0x0000, 0x0000, 0x4e24, 0xe84e, 0x2cfb, 0x0000, 0x0000, 0x0000, 0x6dc0, 0x92ec, + 0x2cdd, 0x0000, 0x0000, 0x0000, 0x4f2c, 0xacf8, 0x2cbd, 0x0000, 0x0000, 0x0000, + 0xc634, 0xf094, 0x2c9e, 0x0000, 0x0000, 0x0000, 0xdc70, 0xe5d3, 0x2c7e, 0x0000, + 0x0000, 0x0000, 0x2180, 0xa600, 0x2c5b, 0x0000, 0x0000, 0x0000, 0x8480, 0xd680, + 0x2c3c, 0x0000, 0x0000, 0x0000, 0x8b24, 0xd63b, 0x2c22, 0x0000, 0x0000, 0x0000, + 0x02e0, 0xaa47, 0x2c00, 0x0000, 0x0000, 0x0000, 0x9ad0, 0xee84, 0x2be3, 0x0000, + 0x0000, 0x0000, 0xf7dc, 0xf699, 0x2bc6, 0x0000, 0x0000, 0x0000, 0xddde, 0xe490, + 0x2ba7, 0x0000, 0x0000, 0x0000, 0x34a0, 0xb4fd, 0x2b85, 0x0000, 0x0000, 0x0000, + 0x91b4, 0x8ef6, 0x2b68, 0x0000, 0x0000, 0x0000, 0xa3e0, 0xa2a7, 0x2b47, 0x0000, + 0x0000, 0x0000, 0xcce4, 0x82b3, 0x2b2a, 0x0000, 0x0000, 0x0000, 0xe4be, 0x8207, + 0x2b0c, 0x0000, 0x0000, 0x0000, 0x1d92, 0xab43, 0x2aed, 0x0000, 0x0000, 0x0000, + 0xe818, 0xf9f6, 0x2acd, 0x0000, 0x0000, 0x0000, 0xff12, 0xba80, 0x2aaf, 0x0000, + 0x0000, 0x0000, 0x5254, 0x8529, 0x2a90, 0x0000, 0x0000, 0x0000, 0x1b88, 0xe032, + 0x2a71, 0x0000, 0x0000, 0x0000, 0x3248, 0xd86d, 0x2a50, 0x0000, 0x0000, 0x0000, + 0x3140, 0xc9d5, 0x2a2e, 0x0000, 0x0000, 0x0000, 0x14e6, 0xbd47, 0x2a14, 0x0000, + 0x0000, 0x0000, 0x5c10, 0xe544, 0x29f4, 0x0000, 0x0000, 0x0000, 0x9f50, 0x90b6, + 0x29d4, 0x0000, 0x0000, 0x0000, 0x9850, 0xab55, 0x29b6, 0x0000, 0x0000, 0x0000, + 0x2750, 0x9d07, 0x2998, 0x0000, 0x0000, 0x0000, 0x6700, 0x8bbb, 0x2973, 0x0000, + 0x0000, 0x0000, 0x5dba, 0xed31, 0x295a, 0x0000, 0x0000, 0x0000, 0x61dc, 0x85fe, + 0x293a, 0x0000, 0x0000, 0x0000, 0x9ba2, 0xd6b4, 0x291c, 0x0000, 0x0000, 0x0000, + 0x2d30, 0xe3a5, 0x28fb, 0x0000, 0x0000, 0x0000, 0x6630, 0xb566, 0x28dd, 0x0000, + 0x0000, 0x0000, 0x5ad4, 0xa829, 0x28bf, 0x0000, 0x0000, 0x0000, 0x89d8, 0xe290, + 0x28a0, 0x0000, 0x0000, 0x0000, 0x3916, 0xc428, 0x2881, 0x0000, 0x0000, 0x0000, + 0x0490, 0xbea4, 0x2860, 0x0000, 0x0000, 0x0000, 0xee06, 0x80ee, 0x2843, 0x0000, + 0x0000, 0x0000, 0xfc00, 0xf327, 0x2820, 0x0000, 0x0000, 0x0000, 0xea40, 0xa871, + 0x2800, 0x0000, 0x0000, 0x0000, 0x63d8, 0x9c26, 0x27e4, 0x0000, 0x0000, 0x0000, + 0x07ba, 0xc0c9, 0x27c7, 0x0000, 0x0000, 0x0000, 0x3fa2, 0x9797, 0x27a8, 0x0000, + 0x0000, 0x0000, 0x21c6, 0xfeca, 0x2789, 0x0000, 0x0000, 0x0000, 0xde40, 0x860d, + 0x2768, 0x0000, 0x0000, 0x0000, 0x9cc8, 0x98ce, 0x2749, 0x0000, 0x0000, 0x0000, + 0x3778, 0xa31c, 0x272a, 0x0000, 0x0000, 0x0000, 0xe778, 0xf6e2, 0x270b, 0x0000, + 0x0000, 0x0000, 0x59b8, 0xf841, 0x26ed, 0x0000, 0x0000, 0x0000, 0x02e0, 0xad04, + 0x26cd, 0x0000, 0x0000, 0x0000, 0x5a92, 0x9380, 0x26b0, 0x0000, 0x0000, 0x0000, + 0xc740, 0x8886, 0x268d, 0x0000, 0x0000, 0x0000, 0x0680, 0xfaf8, 0x266c, 0x0000, + 0x0000, 0x0000, 0xfb60, 0x897f, 0x2653, 0x0000, 0x0000, 0x0000, 0x8760, 0xf903, + 0x2634, 0x0000, 0x0000, 0x0000, 0xad2a, 0xc2c8, 0x2615, 0x0000, 0x0000, 0x0000, + 0x2d86, 0x8aef, 0x25f6, 0x0000, 0x0000, 0x0000, 0x1ef4, 0xe627, 0x25d6, 0x0000, + 0x0000, 0x0000, 0x09e4, 0x8020, 0x25b7, 0x0000, 0x0000, 0x0000, 0x7548, 0xd227, + 0x2598, 0x0000, 0x0000, 0x0000, 0x75dc, 0xfb5b, 0x2579, 0x0000, 0x0000, 0x0000, + 0xea84, 0xc8b6, 0x255a, 0x0000, 0x0000, 0x0000, 0xe4d0, 0x8145, 0x253b, 0x0000, + 0x0000, 0x0000, 0x3640, 0x9768, 0x251c, 0x0000, 0x0000, 0x0000, 0x246a, 0xccec, + 0x24fe, 0x0000, 0x0000, 0x0000, 0x51d0, 0xa075, 0x24dd, 0x0000, 0x0000, 0x0000, + 0x4638, 0xa385, 0x24bf, 0x0000, 0x0000, 0x0000, 0xd788, 0xd776, 0x24a1, 0x0000, + 0x0000, 0x0000, 0x1370, 0x8997, 0x2482, 0x0000, 0x0000, 0x0000, 0x1e88, 0x9b67, + 0x2462, 0x0000, 0x0000, 0x0000, 0x6c08, 0xd975, 0x2444, 0x0000, 0x0000, 0x0000, + 0xfdb0, 0xcfc0, 0x2422, 0x0000, 0x0000, 0x0000, 0x3100, 0xc026, 0x2406, 0x0000, + 0x0000, 0x0000, 0xc5b4, 0xae64, 0x23e6, 0x0000, 0x0000, 0x0000, 0x2280, 0xf687, + 0x23c3, 0x0000, 0x0000, 0x0000, 0x2de0, 0x9006, 0x23a9, 0x0000, 0x0000, 0x0000, + 0x24bc, 0xf631, 0x238a, 0x0000, 0x0000, 0x0000, 0xb8d4, 0xa975, 0x236b, 0x0000, + 0x0000, 0x0000, 0xd9a4, 0xb949, 0x234b, 0x0000, 0x0000, 0x0000, 0xb54e, 0xbd39, + 0x232d, 0x0000, 0x0000, 0x0000, 0x4aac, 0x9a52, 0x230e, 0x0000, 0x0000, 0x0000, + 0xbbbc, 0xd085, 0x22ef, 0x0000, 0x0000, 0x0000, 0xdf18, 0xc633, 0x22cf, 0x0000, + 0x0000, 0x0000, 0x16d0, 0xeca5, 0x22af, 0x0000, 0x0000, 0x0000, 0xf2a0, 0xdf6f, + 0x228e, 0x0000, 0x0000, 0x0000, 0x8c44, 0xe86b, 0x2272, 0x0000, 0x0000, 0x0000, + 0x35c0, 0xbbf4, 0x2253, 0x0000, 0x0000, 0x0000, 0x0c40, 0xdafb, 0x2230, 0x0000, + 0x0000, 0x0000, 0x92dc, 0x9935, 0x2216, 0x0000, 0x0000, 0x0000, 0x0ca0, 0xbda6, + 0x21f3, 0x0000, 0x0000, 0x0000, 0x5958, 0xa6fd, 0x21d6, 0x0000, 0x0000, 0x0000, + 0xa3dc, 0x9d7f, 0x21b9, 0x0000, 0x0000, 0x0000, 0x79dc, 0xfcb5, 0x2199, 0x0000, + 0x0000, 0x0000, 0xf264, 0xcebb, 0x217b, 0x0000, 0x0000, 0x0000, 0x0abe, 0x8308, + 0x215c, 0x0000, 0x0000, 0x0000, 0x30ae, 0xb463, 0x213d, 0x0000, 0x0000, 0x0000, + 0x6228, 0xb040, 0x211c, 0x0000, 0x0000, 0x0000, 0xc9b2, 0xf43b, 0x20ff, 0x0000, + 0x0000, 0x0000, 0x3d8e, 0xa4b3, 0x20e0, 0x0000, 0x0000, 0x0000, 0x84e6, 0x8dab, + 0x20c1, 0x0000, 0x0000, 0x0000, 0xa124, 0x9b74, 0x20a1, 0x0000, 0x0000, 0x0000, + 0xc276, 0xd497, 0x2083, 0x0000, 0x0000, 0x0000, 0x6354, 0xa466, 0x2063, 0x0000, + 0x0000, 0x0000, 0x8654, 0xaf0a, 0x2044, 0x0000, 0x0000, 0x0000, 0x1d20, 0xfa5c, + 0x2024, 0x0000, 0x0000, 0x0000, 0xbcd0, 0xf3f0, 0x2004, 0x0000, 0x0000, 0x0000, + 0xedf0, 0xf0b6, 0x1fe7, 0x0000, 0x0000, 0x0000, 0x45bc, 0x9182, 0x1fc9, 0x0000, + 0x0000, 0x0000, 0xe254, 0xdc85, 0x1faa, 0x0000, 0x0000, 0x0000, 0xb898, 0xe9b1, + 0x1f8a, 0x0000, 0x0000, 0x0000, 0x0ebe, 0xe6f0, 0x1f6c, 0x0000, 0x0000, 0x0000, + 0xa9b8, 0xf584, 0x1f4c, 0x0000, 0x0000, 0x0000, 0x12e8, 0xdf6b, 0x1f2e, 0x0000, + 0x0000, 0x0000, 0x9f9e, 0xcd55, 0x1f0f, 0x0000, 0x0000, 0x0000, 0x05a0, 0xec3a, + 0x1eef, 0x0000, 0x0000, 0x0000, 0xd8e0, 0x96f8, 0x1ed1, 0x0000, 0x0000, 0x0000, + 0x3bd4, 0xccc6, 0x1eb1, 0x0000, 0x0000, 0x0000, 0x4910, 0xb87b, 0x1e93, 0x0000, + 0x0000, 0x0000, 0xbefc, 0xd40b, 0x1e73, 0x0000, 0x0000, 0x0000, 0x317e, 0xa406, + 0x1e55, 0x0000, 0x0000, 0x0000, 0x6bb2, 0xc2b2, 0x1e36, 0x0000, 0x0000, 0x0000, + 0xb87e, 0xbb78, 0x1e17, 0x0000, 0x0000, 0x0000, 0xa03c, 0xdbbd, 0x1df7, 0x0000, + 0x0000, 0x0000, 0x5b6c, 0xe3c8, 0x1dd9, 0x0000, 0x0000, 0x0000, 0x8968, 0xca8e, + 0x1dba, 0x0000, 0x0000, 0x0000, 0xc024, 0xe6ab, 0x1d9a, 0x0000, 0x0000, 0x0000, + 0x4110, 0xd4eb, 0x1d7a, 0x0000, 0x0000, 0x0000, 0xa168, 0xbdb5, 0x1d5d, 0x0000, + 0x0000, 0x0000, 0x012e, 0xa5fa, 0x1d3e, 0x0000, 0x0000, 0x0000, 0x6838, 0x9c1f, + 0x1d1e, 0x0000, 0x0000, 0x0000, 0xa158, 0xaa76, 0x1d00, 0x0000, 0x0000, 0x0000, + 0x090a, 0xbd95, 0x1ce1, 0x0000, 0x0000, 0x0000, 0xf73e, 0x8b6d, 0x1cc2, 0x0000, + 0x0000, 0x0000, 0x5fda, 0xbcbf, 0x1ca3, 0x0000, 0x0000, 0x0000, 0xdbe8, 0xb89f, + 0x1c84, 0x0000, 0x0000, 0x0000, 0x6e4c, 0x96c7, 0x1c64, 0x0000, 0x0000, 0x0000, + 0x19c2, 0xf2a4, 0x1c46, 0x0000, 0x0000, 0x0000, 0xb800, 0xf855, 0x1c1e, 0x0000, + 0x0000, 0x0000, 0x87fc, 0x85ff, 0x1c08, 0x0000, 0x0000, 0x0000, 0x1418, 0x839f, + 0x1be9, 0x0000, 0x0000, 0x0000, 0x6186, 0xd9d8, 0x1bca, 0x0000, 0x0000, 0x0000, + 0xf500, 0xabaa, 0x1ba6, 0x0000, 0x0000, 0x0000, 0x7b36, 0xdafe, 0x1b8c, 0x0000, + 0x0000, 0x0000, 0xf394, 0xe6d8, 0x1b6c, 0x0000, 0x0000, 0x0000, 0x6efc, 0x9e55, + 0x1b4e, 0x0000, 0x0000, 0x0000, 0x5e10, 0xc523, 0x1b2e, 0x0000, 0x0000, 0x0000, + 0x8210, 0xb6f9, 0x1b0d, 0x0000, 0x0000, 0x0000, 0x9ab0, 0x96e3, 0x1af1, 0x0000, + 0x0000, 0x0000, 0x3864, 0x92e7, 0x1ad1, 0x0000, 0x0000, 0x0000, 0x9878, 0xdc65, + 0x1ab1, 0x0000, 0x0000, 0x0000, 0xfa20, 0xd6cb, 0x1a94, 0x0000, 0x0000, 0x0000, + 0x6c00, 0xa4e4, 0x1a70, 0x0000, 0x0000, 0x0000, 0xab40, 0xb41b, 0x1a53, 0x0000, + 0x0000, 0x0000, 0x43a4, 0x8ede, 0x1a37, 0x0000, 0x0000, 0x0000, 0x22e0, 0x9314, + 0x1a15, 0x0000, 0x0000, 0x0000, 0x6170, 0xb949, 0x19f8, 0x0000, 0x0000, 0x0000, + 0x6b00, 0xe056, 0x19d8, 0x0000, 0x0000, 0x0000, 0x9ba8, 0xa94c, 0x19b9, 0x0000, + 0x0000, 0x0000, 0xfaa0, 0xaa16, 0x199b, 0x0000, 0x0000, 0x0000, 0x899a, 0xf627, + 0x197d, 0x0000, 0x0000, 0x0000, 0x9f20, 0xfb70, 0x195d, 0x0000, 0x0000, 0x0000, + 0xa4b8, 0xc176, 0x193e, 0x0000, 0x0000, 0x0000, 0xb21c, 0x85c3, 0x1920, 0x0000, + 0x0000, 0x0000, 0x50d2, 0x9b19, 0x1901, 0x0000, 0x0000, 0x0000, 0xd4b0, 0xb708, + 0x18e0, 0x0000, 0x0000, 0x0000, 0xfb88, 0xf510, 0x18c1, 0x0000, 0x0000, 0x0000, + 0x31ec, 0xdc8d, 0x18a3, 0x0000, 0x0000, 0x0000, 0x3c00, 0xbff9, 0x1885, 0x0000, + 0x0000, 0x0000, 0x5020, 0xc30b, 0x1862, 0x0000, 0x0000, 0x0000, 0xd4f0, 0xda0c, + 0x1844, 0x0000, 0x0000, 0x0000, 0x20d2, 0x99a5, 0x1828, 0x0000, 0x0000, 0x0000, + 0x852e, 0xd159, 0x1809, 0x0000, 0x0000, 0x0000, 0x7cd8, 0x97a1, 0x17e9, 0x0000, + 0x0000, 0x0000, 0x423a, 0x997b, 0x17cb, 0x0000, 0x0000, 0x0000, 0xc1c0, 0xbe7d, + 0x17a8, 0x0000, 0x0000, 0x0000, 0xe8bc, 0xdcdd, 0x178d, 0x0000, 0x0000, 0x0000, + 0x8b28, 0xae06, 0x176e, 0x0000, 0x0000, 0x0000, 0x102e, 0xb8d4, 0x174f, 0x0000, + 0x0000, 0x0000, 0xaa00, 0xaa5c, 0x172f, 0x0000, 0x0000, 0x0000, 0x51f0, 0x9fc0, + 0x170e, 0x0000, 0x0000, 0x0000, 0xf858, 0xe181, 0x16f2, 0x0000, 0x0000, 0x0000, + 0x91a8, 0x8162, 0x16d3, 0x0000, 0x0000, 0x0000, 0x5f40, 0xcb6f, 0x16b1, 0x0000, + 0x0000, 0x0000, 0xbb50, 0xe55f, 0x1693, 0x0000, 0x0000, 0x0000, 0xacd2, 0xd895, + 0x1676, 0x0000, 0x0000, 0x0000, 0xef30, 0x97bf, 0x1654, 0x0000, 0x0000, 0x0000, + 0xf700, 0xb3d7, 0x1633, 0x0000, 0x0000, 0x0000, 0x3454, 0xa7b5, 0x1619, 0x0000, + 0x0000, 0x0000, 0x6b00, 0xa929, 0x15f6, 0x0000, 0x0000, 0x0000, 0x9f04, 0x89f7, + 0x15db, 0x0000, 0x0000, 0x0000, 0xad78, 0xd985, 0x15bc, 0x0000, 0x0000, 0x0000, + 0xa46a, 0xae3f, 0x159d, 0x0000, 0x0000, 0x0000, 0x63a0, 0xd0da, 0x157c, 0x0000, + 0x0000, 0x0000, 0x5e90, 0x817d, 0x155e, 0x0000, 0x0000, 0x0000, 0x1494, 0xb13f, + 0x1540, 0x0000, 0x0000, 0x0000, 0x0090, 0x9c40, 0x1521, 0x0000, 0x0000, 0x0000, + 0xdd70, 0xcc86, 0x1500, 0x0000, 0x0000, 0x0000, 0x64f8, 0xdb6f, 0x14e1, 0x0000, + 0x0000, 0x0000, 0xe22c, 0xac17, 0x14c3, 0x0000, 0x0000, 0x0000, 0x60e0, 0xa9ad, + 0x14a3, 0x0000, 0x0000, 0x0000, 0x4640, 0xd658, 0x1481, 0x0000, 0x0000, 0x0000, + 0x6490, 0xa181, 0x1467, 0x0000, 0x0000, 0x0000, 0x1df4, 0xaaa2, 0x1447, 0x0000, + 0x0000, 0x0000, 0xb94a, 0x8f61, 0x1429, 0x0000, 0x0000, 0x0000, 0x5198, 0x9d83, + 0x1409, 0x0000, 0x0000, 0x0000, 0x0f7a, 0xa818, 0x13eb, 0x0000, 0x0000, 0x0000, + 0xc45e, 0xc06c, 0x13cc, 0x0000, 0x0000, 0x0000, 0x4ec0, 0xfa29, 0x13a8, 0x0000, + 0x0000, 0x0000, 0x6418, 0x8cad, 0x138c, 0x0000, 0x0000, 0x0000, 0xbcc8, 0xe7d1, + 0x136f, 0x0000, 0x0000, 0x0000, 0xc934, 0xf9b0, 0x134f, 0x0000, 0x0000, 0x0000, + 0x6ce0, 0x98df, 0x1331, 0x0000, 0x0000, 0x0000, 0x3516, 0xe5e9, 0x1312, 0x0000, + 0x0000, 0x0000, 0xc6c0, 0xef8b, 0x12ef, 0x0000, 0x0000, 0x0000, 0xaf02, 0x913d, + 0x12d4, 0x0000, 0x0000, 0x0000, 0xd230, 0xe1d5, 0x12b5, 0x0000, 0x0000, 0x0000, + 0xfba8, 0xc232, 0x1295, 0x0000, 0x0000, 0x0000, 0x7ba4, 0xabeb, 0x1277, 0x0000, + 0x0000, 0x0000, 0x6e5c, 0xc692, 0x1258, 0x0000, 0x0000, 0x0000, 0x76a2, 0x9756, + 0x1239, 0x0000, 0x0000, 0x0000, 0xe180, 0xe423, 0x1214, 0x0000, 0x0000, 0x0000, + 0x8c3c, 0x90f8, 0x11fb, 0x0000, 0x0000, 0x0000, 0x9f3c, 0x9fd2, 0x11dc, 0x0000, + 0x0000, 0x0000, 0x53e0, 0xb73e, 0x11bd, 0x0000, 0x0000, 0x0000, 0x45be, 0x88d6, + 0x119e, 0x0000, 0x0000, 0x0000, 0x111a, 0x8bc0, 0x117f, 0x0000, 0x0000, 0x0000, + 0xe26a, 0xd7ff, 0x1160, 0x0000, 0x0000, 0x0000, 0xfb60, 0xdd8d, 0x113f, 0x0000, + 0x0000, 0x0000, 0x9370, 0xc108, 0x1120, 0x0000, 0x0000, 0x0000, 0x9654, 0x8baf, + 0x1103, 0x0000, 0x0000, 0x0000, 0xd6ec, 0xd6b9, 0x10e4, 0x0000, 0x0000, 0x0000, + 0x23e4, 0xd7b7, 0x10c4, 0x0000, 0x0000, 0x0000, 0x1aa6, 0xa847, 0x10a6, 0x0000, + 0x0000, 0x0000, 0xbee6, 0x9fef, 0x1087, 0x0000, 0x0000, 0x0000, 0x26d0, 0xa6eb, + 0x1066, 0x0000, 0x0000, 0x0000, 0x5b86, 0xa880, 0x1049, 0x0000, 0x0000, 0x0000, + 0x125c, 0xd971, 0x1029, 0x0000, 0x0000, 0x0000, 0x1f78, 0x9d18, 0x100a, 0x0000, + 0x0000, 0x0000, 0x0e84, 0xb15b, 0x0feb, 0x0000, 0x0000, 0x0000, 0xd0c0, 0xc150, + 0x0fcc, 0x0000, 0x0000, 0x0000, 0xa330, 0xc40c, 0x0fad, 0x0000, 0x0000, 0x0000, + 0x5202, 0xfc2c, 0x0f8f, 0x0000, 0x0000, 0x0000, 0x3f7c, 0xecf5, 0x0f6f, 0x0000, + 0x0000, 0x0000, 0xef44, 0xfdfd, 0x0f50, 0x0000, 0x0000, 0x0000, 0x3f6c, 0xab1b, + 0x0f31, 0x0000, 0x0000, 0x0000, 0xf658, 0x89ec, 0x0f11, 0x0000, 0x0000, 0x0000, + 0xbfc8, 0x9ba8, 0x0ef4, 0x0000, 0x0000, 0x0000, 0x3d40, 0xbe21, 0x0ed5, 0x0000, + 0x0000, 0x0000, 0xbbc4, 0xc70d, 0x0eb6, 0x0000, 0x0000, 0x0000, 0x5158, 0xdb16, + 0x0e96, 0x0000, 0x0000, 0x0000, 0xb5a8, 0xa8d8, 0x0e78, 0x0000, 0x0000, 0x0000, + 0xcccc, 0xb40e, 0x0e58, 0x0000, 0x0000, 0x0000, 0x448c, 0xcb62, 0x0e3a, 0x0000, + 0x0000, 0x0000, 0xf12a, 0x8aed, 0x0e1b, 0x0000, 0x0000, 0x0000, 0x79d0, 0xc59c, + 0x0dfb, 0x0000, 0x0000, 0x0000, 0x06b4, 0xcdc9, 0x0ddd, 0x0000, 0x0000, 0x0000, + 0xae70, 0xa979, 0x0dbe, 0x0000, 0x0000, 0x0000, 0x317c, 0xa8fb, 0x0d9e, 0x0000, + 0x0000, 0x0000, 0x5fe0, 0x8a50, 0x0d7d, 0x0000, 0x0000, 0x0000, 0x70b6, 0xfdfa, + 0x0d61, 0x0000, 0x0000, 0x0000, 0x1640, 0x9dc7, 0x0d41, 0x0000, 0x0000, 0x0000, + 0x9a9c, 0xdc50, 0x0d23, 0x0000, 0x0000, 0x0000, 0x4fcc, 0x9a9b, 0x0d04, 0x0000, + 0x0000, 0x0000, 0x7e48, 0x8f77, 0x0ce5, 0x0000, 0x0000, 0x0000, 0x84e4, 0xd4b9, + 0x0cc6, 0x0000, 0x0000, 0x0000, 0x84e0, 0xbd10, 0x0ca6, 0x0000, 0x0000, 0x0000, + 0x1b0a, 0xc8d9, 0x0c88, 0x0000, 0x0000, 0x0000, 0x6a48, 0xfc81, 0x0c68, 0x0000, + 0x0000, 0x0000, 0x070a, 0xbef6, 0x0c4a, 0x0000, 0x0000, 0x0000, 0x8a70, 0xf096, + 0x0c2b, 0x0000, 0x0000, 0x0000, 0xecc2, 0xc994, 0x0c0c, 0x0000, 0x0000, 0x0000, + 0x1540, 0x9537, 0x0bea, 0x0000, 0x0000, 0x0000, 0x1b02, 0xab5b, 0x0bce, 0x0000, + 0x0000, 0x0000, 0x5dc0, 0xb0c8, 0x0bad, 0x0000, 0x0000, 0x0000, 0xc928, 0xe034, + 0x0b8f, 0x0000, 0x0000, 0x0000, 0x2d12, 0xb4b0, 0x0b71, 0x0000, 0x0000, 0x0000, + 0x8fc2, 0xbb94, 0x0b52, 0x0000, 0x0000, 0x0000, 0xe236, 0xe22f, 0x0b33, 0x0000, + 0x0000, 0x0000, 0xb97c, 0xbe9e, 0x0b13, 0x0000, 0x0000, 0x0000, 0xe1a6, 0xe16d, + 0x0af5, 0x0000, 0x0000, 0x0000, 0xd330, 0xbaf0, 0x0ad6, 0x0000, 0x0000, 0x0000, + 0xc0bc, 0xbbd0, 0x0ab7, 0x0000, 0x0000, 0x0000, 0x8e66, 0xdd9b, 0x0a98, 0x0000, + 0x0000, 0x0000, 0xc95c, 0xf799, 0x0a79, 0x0000, 0x0000, 0x0000, 0xdac0, 0xbe4c, + 0x0a55, 0x0000, 0x0000, 0x0000, 0xafc0, 0xc378, 0x0a37, 0x0000, 0x0000, 0x0000, + 0xa880, 0xe341, 0x0a19, 0x0000, 0x0000, 0x0000, 0xc242, 0x81f6, 0x09fd, 0x0000, + 0x0000, 0x0000, 0x7470, 0xc777, 0x09de, 0x0000, 0x0000, 0x0000, 0x62bc, 0xb684, + 0x09be, 0x0000, 0x0000, 0x0000, 0x43ac, 0x8c58, 0x099f, 0x0000, 0x0000, 0x0000, + 0xcc3c, 0xf9ac, 0x0981, 0x0000, 0x0000, 0x0000, 0x1526, 0xb670, 0x0962, 0x0000, + 0x0000, 0x0000, 0xc9fe, 0xdf50, 0x0943, 0x0000, 0x0000, 0x0000, 0x6ae6, 0xc065, + 0x0924, 0x0000, 0x0000, 0x0000, 0xb114, 0xcf29, 0x0905, 0x0000, 0x0000, 0x0000, + 0xd388, 0x922a, 0x08e4, 0x0000, 0x0000, 0x0000, 0xcf54, 0xb926, 0x08c7, 0x0000, + 0x0000, 0x0000, 0x3826, 0xe855, 0x08a8, 0x0000, 0x0000, 0x0000, 0xe7c8, 0x829b, + 0x0888, 0x0000, 0x0000, 0x0000, 0x546c, 0xa903, 0x086a, 0x0000, 0x0000, 0x0000, + 0x8768, 0x99cc, 0x0849, 0x0000, 0x0000, 0x0000, 0x00ac, 0xf529, 0x082b, 0x0000, + 0x0000, 0x0000, 0x2658, 0x9f0b, 0x080c, 0x0000, 0x0000, 0x0000, 0xfe5c, 0x9e21, + 0x07ee, 0x0000, 0x0000, 0x0000, 0x6da2, 0x9910, 0x07cf, 0x0000, 0x0000, 0x0000, + 0x9220, 0xf9b3, 0x07b0, 0x0000, 0x0000, 0x0000, 0x3d90, 0xa541, 0x0791, 0x0000, + 0x0000, 0x0000, 0x6e4c, 0xe7cc, 0x0771, 0x0000, 0x0000, 0x0000, 0xa8fa, 0xe80a, + 0x0753, 0x0000, 0x0000, 0x0000, 0x4e14, 0xc3a7, 0x0734, 0x0000, 0x0000, 0x0000, + 0xf7e0, 0xbad9, 0x0712, 0x0000, 0x0000, 0x0000, 0xfea0, 0xeff2, 0x06f5, 0x0000, + 0x0000, 0x0000, 0xcef6, 0xbd48, 0x06d7, 0x0000, 0x0000, 0x0000, 0x7544, 0xf559, + 0x06b7, 0x0000, 0x0000, 0x0000, 0x2388, 0xf655, 0x0698, 0x0000, 0x0000, 0x0000, + 0xe900, 0xad56, 0x0676, 0x0000, 0x0000, 0x0000, 0x2cc0, 0x8437, 0x0659, 0x0000, + 0x0000, 0x0000, 0x3068, 0xc544, 0x063b, 0x0000, 0x0000, 0x0000, 0xdc70, 0xe73c, + 0x061b, 0x0000, 0x0000, 0x0000, 0xee50, 0x9d49, 0x05fc, 0x0000, 0x0000, 0x0000, + 0x93d2, 0x81f6, 0x05df, 0x0000, 0x0000, 0x0000, 0x941c, 0xadff, 0x05bf, 0x0000, + 0x0000, 0x0000, 0x2ce2, 0x8e45, 0x05a1, 0x0000, 0x0000, 0x0000, 0x4a60, 0x95fd, + 0x0581, 0x0000, 0x0000, 0x0000, 0x79f8, 0xb83a, 0x0563, 0x0000, 0x0000, 0x0000, + 0xcb58, 0xa1f5, 0x0543, 0x0000, 0x0000, 0x0000, 0x2a3a, 0xdc36, 0x0525, 0x0000, + 0x0000, 0x0000, 0x14ee, 0x890e, 0x0506, 0x0000, 0x0000, 0x0000, 0x8f20, 0xc432, + 0x04e3, 0x0000, 0x0000, 0x0000, 0x8440, 0xb21d, 0x04c6, 0x0000, 0x0000, 0x0000, + 0x5430, 0xf698, 0x04a7, 0x0000, 0x0000, 0x0000, 0x04ae, 0x8b20, 0x048a, 0x0000, + 0x0000, 0x0000, 0x04d0, 0xe872, 0x046b, 0x0000, 0x0000, 0x0000, 0xc78e, 0x8893, + 0x044c, 0x0000, 0x0000, 0x0000, 0x0f78, 0x9895, 0x042b, 0x0000, 0x0000, 0x0000, + 0x11d4, 0xdf2e, 0x040d, 0x0000, 0x0000, 0x0000, 0xe84c, 0x89d5, 0x03ef, 0x0000, + 0x0000, 0x0000, 0xf7be, 0x8a67, 0x03d0, 0x0000, 0x0000, 0x0000, 0x95d0, 0xc906, + 0x03b1, 0x0000, 0x0000, 0x0000, 0x64ce, 0xd96c, 0x0392, 0x0000, 0x0000, 0x0000, + 0x97ba, 0xa16f, 0x0373, 0x0000, 0x0000, 0x0000, 0x463c, 0xc51a, 0x0354, 0x0000, + 0x0000, 0x0000, 0xef0a, 0xe93e, 0x0335, 0x0000, 0x0000, 0x0000, 0x526a, 0xa466, + 0x0316, 0x0000, 0x0000, 0x0000, 0x4140, 0xa94d, 0x02f5, 0x0000, 0x0000, 0x0000, + 0xb4ec, 0xce68, 0x02d8, 0x0000, 0x0000, 0x0000, 0x4fa2, 0x8490, 0x02b9, 0x0000, + 0x0000, 0x0000, 0x4e60, 0xca98, 0x0298, 0x0000, 0x0000, 0x0000, 0x08dc, 0xe09c, + 0x027a, 0x0000, 0x0000, 0x0000, 0x2b90, 0xc7e3, 0x025c, 0x0000, 0x0000, 0x0000, + 0x5a7c, 0xf8ef, 0x023c, 0x0000, 0x0000, 0x0000, 0x5022, 0x9d58, 0x021e, 0x0000, + 0x0000, 0x0000, 0x553a, 0xe242, 0x01ff, 0x0000, 0x0000, 0x0000, 0x7e6e, 0xb54d, + 0x01e0, 0x0000, 0x0000, 0x0000, 0xd2d4, 0xa88c, 0x01c1, 0x0000, 0x0000, 0x0000, + 0x75b6, 0xfe6d, 0x01a2, 0x0000, 0x0000, 0x0000, 0x3bb2, 0xf04c, 0x0183, 0x0000, + 0x0000, 0x0000, 0xc2d0, 0xc046, 0x0163, 0x0000, 0x0000, 0x0000, 0x250c, 0xf9d6, + 0x0145, 0x0000, 0x0000, 0x0000, 0xb7b4, 0x8a0d, 0x0126, 0x0000, 0x0000, 0x0000, + 0x1a72, 0xe4f5, 0x0107, 0x0000, 0x0000, 0x0000, 0x825c, 0xa9b8, 0x00e8, 0x0000, + 0x0000, 0x0000, 0x6c90, 0xc9ad, 0x00c6, 0x0000, 0x0000, 0x0000, 0x4d00, 0xd1bb, + 0x00aa, 0x0000, 0x0000, 0x0000, 0xa4a0, 0xee01, 0x0087, 0x0000, 0x0000, 0x0000, + 0x89a8, 0xbe9f, 0x006b, 0x0000, 0x0000, 0x0000, 0x038e, 0xc80c, 0x004d, 0x0000, + 0x0000, 0x0000, 0xfe26, 0x8384, 0x002e, 0x0000, 0x0000, 0x0000, 0xcd90, 0xca57, + 0x000e, 0x0000 +}; + +void MacroAssembler::libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp) { + Label B1_1, B1_2, B1_3, B1_4, B1_5, B1_6, B1_7, B1_8, B1_9, B1_10, B1_11, B1_12; + Label B1_13, B1_14, B1_15; + + assert_different_registers(ebx, eax, ecx, edx, esi, edi, ebp, esp); + + address zero_none = (address)_zero_none; + address _4onpi_d = (address)__4onpi_d; + address TWO_32H = (address)_TWO_32H; + address pi04_3d = (address)_pi04_3d; + address pi04_5d = (address)_pi04_5d; + address SCALE = (address)_SCALE; + address zeros = (address)_zeros; + address pi04_2d = (address)_pi04_2d; + address TWO_12H = (address)_TWO_12H; + address _4onpi_31l = (address)__4onpi_31l; + + bind(B1_1); + push(ebp); + movl(ebp, esp); + andl(esp, -16); + push(esi); + push(edi); + push(ebx); + subl(esp, 20); + movzwl(ebx, Address(ebp, 16)); + andl(ebx, 32767); + movl(eax, Address(ebp, 20)); + cmpl(ebx, 16413); + movl(esi, Address(ebp, 24)); + movl(Address(esp, 4), eax); + jcc(Assembler::greaterEqual, B1_8); + + bind(B1_2); + fld_x(Address(ebp, 8)); + fld_d(ExternalAddress(_4onpi_d)); //0x6dc9c883UL, 0x3ff45f30UL + fmul(1); + fstp_x(Address(esp, 8)); + movzwl(ecx, Address(esp, 16)); + negl(ecx); + addl(ecx, 30); + movl(eax, Address(esp, 12)); + shrl(eax); + cmpl(Address(esp, 4), 0); + jcc(Assembler::notEqual, B1_4); + + bind(B1_3); + lea(ecx, Address(eax, 1)); + andl(ecx, -2); + jmp(B1_5); + + bind(B1_4); + movl(ecx, eax); + addl(eax, Address(esp, 4)); + movl(edx, eax); + andl(edx, 1); + addl(ecx, edx); + + bind(B1_5); + fld_d(ExternalAddress(TWO_32H)); //0x00000000UL, 0x41f80000UL + cmpl(ebx, 16400); + movl(Address(esp, 0), ecx); + fild_s(Address(esp, 0)); + jcc(Assembler::greaterEqual, B1_7); + + bind(B1_6); + fld_d(ExternalAddress(pi04_3d)); //0x54442d00UL, 0x3fe921fbUL + fmul(1); + fsubp(3); + fxch(1); + fmul(2); + fld_s(2); + fadd(1); + fsubrp(1); + fld_s(0); + fxch(1); + fsuba(3); + fld_d(ExternalAddress(8 + pi04_3d)); //0x98cc5180UL, 0x3ce84698UL + fmul(3); + fsuba(2); + fxch(1); + fsub(2); + fsubrp(1); + faddp(3); + fld_d(ExternalAddress(16 + pi04_3d)); //0xcbb5bf6cUL, 0xb9dfc8f8UL + fmulp(2); + fld_s(1); + fsubr(1); + fsuba(1); + fxch(2); + fsubp(1); + faddp(2); + fxch(1); + jmp(B1_15); + + bind(B1_7); + fld_d(ExternalAddress(pi04_5d)); //0x54400000UL, 0x3fe921fbUL + fmul(1); + fsubp(3); + fxch(1); + fmul(2); + fld_s(2); + fadd(1); + fsubrp(1); + fld_s(0); + fxch(1); + fsuba(3); + fld_d(ExternalAddress(8 + pi04_5d)); //0x1a600000UL, 0x3dc0b461UL + fmul(3); + fsuba(2); + fxch(1); + fsub(2); + fsubrp(1); + faddp(3); + fld_d(ExternalAddress(16 + pi04_5d)); //0x2e000000UL, 0x3b93198aUL + fmul(2); + fld_s(0); + fsubr(2); + fsuba(2); + fxch(1); + fsubp(2); + fxch(1); + faddp(3); + fld_d(ExternalAddress(24 + pi04_5d)); //0x25200000UL, 0x396b839aUL + fmul(2); + fld_s(0); + fsubr(2); + fsuba(2); + fxch(1); + fsubp(2); + fxch(1); + faddp(3); + fld_d(ExternalAddress(32 + pi04_5d)); //0x533e63a0UL, 0x37027044UL + fmulp(2); + fld_s(1); + fsubr(1); + fsuba(1); + fxch(2); + fsubp(1); + faddp(2); + fxch(1); + jmp(B1_15); + + bind(B1_8); + fld_x(Address(ebp, 8)); + addl(ebx, -16417); + fmul_d(as_Address(ExternalAddress(SCALE))); //0x00000000UL, 0x32600000UL + movl(eax, -2078209981); + imull(ebx); + addl(edx, ebx); + movl(ecx, ebx); + sarl(edx, 4); + sarl(ecx, 31); + subl(edx, ecx); + movl(eax, edx); + shll(eax, 5); + fstp_x(Address(ebp, 8)); + fld_x(Address(ebp, 8)); + subl(eax, edx); + movl(Address(ebp, 8), 0); + subl(ebx, eax); + fld_x(Address(ebp, 8)); + cmpl(ebx, 17); + fsuba(1); + jcc(Assembler::less, B1_10); + + bind(B1_9); + lea(eax, Address(noreg, edx, Address::times_8)); + lea(ecx, Address(eax, edx, Address::times_4)); + incl(edx); + fld_x(Address(_4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); + fmul(2); + fld_x(Address(12 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); + fmul(2); + fld_s(0); + fadd(2); + fsuba(2); + fxch(1); + faddp(2); + fld_s(1); + fadd(1); + fstp_x(Address(esp, 8)); + andl(Address(esp, 8), -16777216); + fld_x(Address(esp, 8)); + fsubp(1); + jmp(B1_11); + + bind(B1_10); + fld_d(ExternalAddress(zeros)); //0x00000000UL, 0x00000000UL + fld_s(0); + + bind(B1_11); + fld_s(0); + lea(eax, Address(noreg, edx, Address::times_8)); + fld_s(3); + lea(edx, Address(eax, edx, Address::times_4)); + fld_x(Address(_4onpi_31l, RelocationHolder::none).plus_disp(edx, Address::times_1)); + fmul(6); + movl(Address(esp, 0), edx); + fadda(2); + fxch(2); + fsuba(3); + fxch(2); + faddp(3); + fxch(2); + faddp(3); + fld_x(Address(12 + _4onpi_31l, RelocationHolder::none).plus_disp(edx, Address::times_1)); + fmula(2); + fld_s(2); + fadd(2); + fld_s(0); + fxch(1); + fsubra(3); + fxch(3); + fchs(); + faddp(4); + fxch(3); + faddp(4); + fxch(2); + fadd(3); + fxch(2); + fmul(5); + fadda(2); + fld_s(4); + fld_x(Address(24 + _4onpi_31l, RelocationHolder::none).plus_disp(edx, Address::times_1)); + fmula(1); + fxch(1); + fadda(4); + fxch(4); + fstp_x(Address(esp, 8)); + movzwl(ebx, Address(esp, 16)); + andl(ebx, 32767); + cmpl(ebx, 16415); + jcc(Assembler::greaterEqual, B1_13); + + bind(B1_12); + negl(ebx); + addl(ebx, 30); + movl(ecx, ebx); + movl(eax, Address(esp, 12)); + shrl(eax); + shll(eax); + movl(Address(esp, 12), eax); + movl(Address(esp, 8), 0); + shrl(eax); + jmp(B1_14); + + bind(B1_13); + negl(ebx); + addl(ebx, 30); + movl(ecx, ebx); + movl(edx, Address(esp, 8)); + shrl(edx); + shll(edx); + negl(ecx); + movl(eax, Address(esp, 12)); + shll(eax); + movl(ecx, ebx); + movl(Address(esp, 8), edx); + shrl(edx); + orl(eax, edx); + + bind(B1_14); + fld_x(Address(esp, 8)); + addl(eax, Address(esp, 4)); + fsubp(3); + fmul(6); + fld_s(4); + movl(edx, eax); + andl(edx, 1); + fadd(3); + movl(ecx, Address(esp, 0)); + fsuba(3); + fxch(3); + faddp(5); + fld_s(1); + fxch(3); + fadd_d(Address(zero_none, RelocationHolder::none).plus_disp(edx, Address::times_8)); + fadda(3); + fsub(3); + faddp(2); + fxch(1); + faddp(4); + fld_s(2); + fadd(2); + fsuba(2); + fxch(3); + faddp(2); + fxch(1); + faddp(3); + fld_s(0); + fadd(2); + fsuba(2); + fxch(1); + faddp(2); + fxch(1); + faddp(2); + fld_s(2); + fld_x(Address(36 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); + fmula(1); + fld_s(1); + fadd(3); + fsuba(3); + fxch(2); + faddp(3); + fxch(2); + faddp(3); + fxch(1); + fmul(4); + fld_s(0); + fadd(2); + fsuba(2); + fxch(1); + faddp(2); + fxch(1); + faddp(2); + fld_s(2); + fld_x(Address(48 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); + fmula(1); + fld_s(1); + fadd(3); + fsuba(3); + fxch(2); + faddp(3); + fxch(2); + faddp(3); + fld_s(3); + fxch(2); + fmul(5); + fld_x(Address(60 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); + fmula(3); + fxch(3); + faddp(1); + fld_s(0); + fadd(2); + fsuba(2); + fxch(1); + faddp(2); + fxch(1); + faddp(3); + fld_s(3); + fxch(2); + fmul(5); + fld_x(Address(72 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); + fmula(3); + fxch(3); + faddp(1); + fld_s(0); + fadd(2); + fsuba(2); + fxch(1); + faddp(2); + fxch(1); + faddp(3); + fxch(1); + fmulp(4); + fld_x(Address(84 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); + fmulp(3); + fxch(2); + faddp(3); + fld_s(2); + fadd(2); + fld_d(ExternalAddress(TWO_32H)); //0x00000000UL, 0x41f80000UL + fmul(1); + fadda(1); + fsubp(1); + fsuba(2); + fxch(3); + faddp(2); + faddp(1); + fld_d(ExternalAddress(pi04_2d)); //0x54400000UL, 0x3fe921fbUL + fld_s(0); + fmul(2); + fxch(2); + fadd(3); + fxch(1); + fmulp(3); + fmul_d(as_Address(ExternalAddress(8 + pi04_2d))); //0x1a626331UL, 0x3dc0b461UL + faddp(1); + + bind(B1_15); + fld_d(ExternalAddress(TWO_12H)); //0x00000000UL, 0x40b80000UL + fld_s(2); + fadd(2); + fmula(1); + fstp_x(Address(esp, 8)); + fld_x(Address(esp, 8)); + fadd(1); + fsubrp(1); + fst_d(Address(esi, 0)); + fsubp(2); + faddp(1); + fstp_d(Address(esi, 8)); + addl(esp, 20); + pop(ebx); + pop(edi); + pop(esi); + movl(esp, ebp); + pop(ebp); + ret(0); +} + +ALIGNED_(16) juint _L_2il0floatpacket_0[] = +{ + 0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL +}; + +ALIGNED_(16) juint _Pi4Inv[] = +{ + 0x6dc9c883UL, 0x3ff45f30UL +}; + +ALIGNED_(16) juint _Pi4x3[] = +{ + 0x54443000UL, 0xbfe921fbUL, 0x3b39a000UL, 0x3d373dcbUL, 0xe0e68948UL, + 0xba845c06UL +}; + +ALIGNED_(16) juint _Pi4x4[] = +{ + 0x54400000UL, 0xbfe921fbUL, 0x1a600000UL, 0xbdc0b461UL, 0x2e000000UL, + 0xbb93198aUL, 0x252049c1UL, 0xb96b839aUL +}; + +ALIGNED_(16) jushort _SP[] = +{ + 0xaaab, 0xaaaa, 0xaaaa, 0xaaaa, 0xbffc, 0x0000, 0x8887, 0x8888, 0x8888, 0x8888, + 0x3ff8, 0x0000, 0xc527, 0x0d00, 0x00d0, 0xd00d, 0xbff2, 0x0000, 0x45f6, 0xb616, + 0x1d2a, 0xb8ef, 0x3fec, 0x0000, 0x825b, 0x3997, 0x2b3f, 0xd732, 0xbfe5, 0x0000, + 0xbf33, 0x8bb4, 0x2fda, 0xb092, 0x3fde, 0x0000, 0x44a6, 0xed1a, 0x29ef, 0xd73e, + 0xbfd6, 0x0000, 0x8610, 0x307f, 0x62a1, 0xc921, 0x3fce, 0x0000 +}; + +ALIGNED_(16) jushort _CP[] = +{ + 0x0000, 0x0000, 0x0000, 0x8000, 0xbffe, 0x0000, 0xaaa5, 0xaaaa, 0xaaaa, 0xaaaa, + 0x3ffa, 0x0000, 0x9c2f, 0x0b60, 0x60b6, 0xb60b, 0xbff5, 0x0000, 0xf024, 0x0cac, + 0x00d0, 0xd00d, 0x3fef, 0x0000, 0x03fe, 0x3f65, 0x7dbb, 0x93f2, 0xbfe9, 0x0000, + 0xd84d, 0xadee, 0xc698, 0x8f76, 0x3fe2, 0x0000, 0xdaba, 0xfe79, 0xea36, 0xc9c9, + 0xbfda, 0x0000, 0x3ac6, 0x0ba0, 0x07ce, 0xd585, 0x3fd2, 0x0000 +}; + +ALIGNED_(16) juint _ones[] = +{ + 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xbff00000UL +}; + +void MacroAssembler::libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp) { + Label B1_1, B1_2, B1_3, B1_4, B1_5, B1_6, B1_7, B1_8, B1_9, B1_10, B1_11, B1_12; + Label B1_13, B1_14, B1_15, B1_16, B1_17, B1_18, B1_19, B1_20, B1_21, B1_22, B1_23; + Label B1_24, B1_25, B1_26, B1_27, B1_28, B1_29, B1_30, B1_31, B1_32, B1_33, B1_34; + Label B1_35, B1_36, B1_37, B1_38, B1_39, B1_40, B1_41, B1_42, B1_43, B1_44, B1_45, B1_46; + + assert_different_registers(ebx, eax, ecx, edx, esi, edi, ebp, esp); + + address L_2il0floatpacket_0 = (address)_L_2il0floatpacket_0; + address Pi4Inv = (address)_Pi4Inv; + address Pi4x3 = (address)_Pi4x3; + address Pi4x4 = (address)_Pi4x4; + address ones = (address)_ones; + address CP = (address)_CP; + address SP = (address)_SP; + + bind(B1_1); + push(ebp); + movl(ebp, esp); + andl(esp, -64); + push(esi); + push(edi); + push(ebx); + subl(esp, 52); + movl(eax, Address(ebp, 16)); + movl(edx, Address(ebp, 20)); + movl(Address(esp, 32), eax); + movl(Address(esp, 36), edx); + + bind(B1_2); + fnstcw(Address(esp, 30)); + + bind(B1_3); + movsd(xmm1, Address(ebp, 8)); + movl(esi, Address(ebp, 12)); + movl(eax, esi); + andl(eax, 2147483647); + andps(xmm1, ExternalAddress(L_2il0floatpacket_0)); //0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL + shrl(esi, 31); + movl(Address(esp, 40), eax); + cmpl(eax, 1104150528); + movsd(Address(ebp, 8), xmm1); + jcc(Assembler::aboveEqual, B1_11); + + bind(B1_4); + movsd(xmm0, ExternalAddress(Pi4Inv)); //0x6dc9c883UL, 0x3ff45f30UL + mulsd(xmm0, xmm1); + movzwl(edx, Address(esp, 30)); + movl(eax, edx); + andl(eax, 768); + movsd(Address(esp, 0), xmm0); + cmpl(eax, 768); + jcc(Assembler::equal, B1_42); + + bind(B1_5); + orl(edx, -64768); + movw(Address(esp, 28), edx); + + bind(B1_6); + fldcw(Address(esp, 28)); + + bind(B1_7); + movsd(xmm1, Address(ebp, 8)); + movl(ebx, 1); + + bind(B1_8); + movl(Address(esp, 12), ebx); + movl(ebx, Address(esp, 4)); + movl(eax, ebx); + movl(Address(esp, 8), esi); + movl(esi, ebx); + shrl(esi, 20); + andl(eax, 1048575); + movl(ecx, esi); + orl(eax, 1048576); + negl(ecx); + movl(edx, eax); + addl(ecx, 19); + addl(esi, 13); + movl(Address(esp, 24), ecx); + shrl(edx); + movl(ecx, esi); + shll(eax); + movl(ecx, Address(esp, 24)); + movl(esi, Address(esp, 0)); + shrl(esi); + orl(eax, esi); + cmpl(ebx, 1094713344); + movsd(Address(esp, 16), xmm1); + fld_d(Address(esp, 16)); + cmov32(Assembler::below, eax, edx); + movl(esi, Address(esp, 8)); + lea(edx, Address(eax, 1)); + movl(ebx, edx); + andl(ebx, -2); + movl(Address(esp, 16), ebx); + fild_s(Address(esp, 16)); + movl(ebx, Address(esp, 12)); + cmpl(Address(esp, 40), 1094713344); + jcc(Assembler::aboveEqual, B1_10); + + bind(B1_9); + fld_d(ExternalAddress(Pi4x3)); //0x54443000UL, 0xbfe921fbUL + fmul(1); + faddp(2); + fld_d(ExternalAddress(8 + Pi4x3)); //0x3b39a000UL, 0x3d373dcbUL + fmul(1); + faddp(2); + fld_d(ExternalAddress(16 + Pi4x3)); //0xe0e68948UL, 0xba845c06UL + fmulp(1); + faddp(1); + jmp(B1_17); + + bind(B1_10); + fld_d(ExternalAddress(Pi4x4)); //0x54400000UL, 0xbfe921fbUL + fmul(1); + faddp(2); + fld_d(ExternalAddress(8 + Pi4x4)); //0x1a600000UL, 0xbdc0b461UL + fmul(1); + faddp(2); + fld_d(ExternalAddress(16 + Pi4x4)); //0x2e000000UL, 0xbb93198aUL + fmul(1); + faddp(2); + fld_d(ExternalAddress(24 + Pi4x4)); //0x252049c1UL, 0xb96b839aUL + fmulp(1); + faddp(1); + jmp(B1_17); + + bind(B1_11); + movzwl(edx, Address(esp, 30)); + movl(eax, edx); + andl(eax, 768); + cmpl(eax, 768); + jcc(Assembler::equal, B1_43); + bind(B1_12); + orl(edx, -64768); + movw(Address(esp, 28), edx); + + bind(B1_13); + fldcw(Address(esp, 28)); + + bind(B1_14); + movsd(xmm1, Address(ebp, 8)); + movl(ebx, 1); + + bind(B1_15); + movsd(Address(esp, 16), xmm1); + fld_d(Address(esp, 16)); + addl(esp, -32); + lea(eax, Address(esp, 32)); + fstp_x(Address(esp, 0)); + movl(Address(esp, 12), 0); + movl(Address(esp, 16), eax); + call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_reduce_pi04l()))); + + bind(B1_46); + addl(esp, 32); + + bind(B1_16); + fld_d(Address(esp, 0)); + lea(edx, Address(eax, 1)); + fld_d(Address(esp, 8)); + faddp(1); + + bind(B1_17); + movl(ecx, edx); + addl(eax, 3); + shrl(ecx, 2); + andl(ecx, 1); + shrl(eax, 2); + xorl(esi, ecx); + movl(ecx, Address(esp, 36)); + andl(eax, 1); + andl(ecx, 3); + cmpl(ecx, 3); + jcc(Assembler::notEqual, B1_25); + + bind(B1_18); + fld_x(ExternalAddress(84 + SP)); //0x8610, 0x307f, 0x62 + fld_s(1); + fmul((2)); + testb(edx, 2); + fmula((1)); + fld_x(ExternalAddress(72 + SP)); //0x44a6, 0xed1a, 0x29 + faddp(2); + fmula(1); + fld_x(ExternalAddress(60 + SP)); //0xbf33, 0x8bb4, 0x2f + faddp(2); + fmula(1); + fld_x(ExternalAddress(48 + SP)); //0x825b, 0x3997, 0x2b + faddp(2); + fmula(1); + fld_x(ExternalAddress(36 + SP)); //0x45f6, 0xb616, 0x1d + faddp(2); + fmula(1); + fld_x(ExternalAddress(24 + SP)); //0xc527, 0x0d00, 0x00 + faddp(2); + fmula(1); + fld_x(ExternalAddress(12 + SP)); //0x8887, 0x8888, 0x88 + faddp(2); + fmula(1); + fld_x(ExternalAddress(SP)); //0xaaab, 0xaaaa, 0xaa + faddp(2); + fmula(1); + fld_x(ExternalAddress(84 + CP)); //0x3ac6, 0x0ba0, 0x07 + fmul(1); + fld_x(ExternalAddress(72 + CP)); //0xdaba, 0xfe79, 0xea + faddp(1); + fmul(1); + fld_x(ExternalAddress(62 + CP)); //0xd84d, 0xadee, 0xc6 + faddp(1); + fmul(1); + fld_x(ExternalAddress(48 + CP)); //0x03fe, 0x3f65, 0x7d + faddp(1); + fmul(1); + fld_x(ExternalAddress(36 + CP)); //0xf024, 0x0cac, 0x00 + faddp(1); + fmul(1); + fld_x(ExternalAddress(24 + CP)); //0x9c2f, 0x0b60, 0x60 + faddp(1); + fmul(1); + fld_x(ExternalAddress(12 + CP)); //0xaaa5, 0xaaaa, 0xaa + faddp(1); + fmul(1); + fld_x(ExternalAddress(CP)); //0x0000, 0x0000, 0x00 + faddp(1); + fmulp(1); + fld_d(Address(ones, RelocationHolder::none).plus_disp(esi, Address::times_8)); + fld_d(Address(ones, RelocationHolder::none).plus_disp(eax, Address::times_8)); + jcc(Assembler::equal, B1_22); + + bind(B1_19); + fmulp(4); + testl(ebx, ebx); + fxch(2); + fmul(3); + movl(eax, Address(esp, 2)); + faddp(3); + fxch(2); + fstp_d(Address(eax, 0)); + fmula(1); + faddp(1); + fstp_d(Address(eax, 8)); + jcc(Assembler::equal, B1_21); + + bind(B1_20); + fldcw(Address(esp, 30)); + + bind(B1_21); + addl(esp, 52); + pop(ebx); + pop(edi); + pop(esi); + movl(esp, ebp); + pop(ebp); + ret(0); + + bind(B1_22); + fxch(1); + fmulp(4); + testl(ebx, ebx); + fxch(2); + fmul(3); + movl(eax, Address(esp, 32)); + faddp(3); + fxch(2); + fstp_d(Address(eax, 8)); + fmula(1); + faddp(1); + fstp_d(Address(eax, 0)); + jcc(Assembler::equal, B1_24); + + bind(B1_23); + fldcw(Address(esp, 30)); + + bind(B1_24); + addl(esp, 52); + pop(ebx); + pop(edi); + pop(esi); + movl(esp, ebp); + pop(ebp); + ret(0); + + bind(B1_25); + testb(Address(esp, 36), 2); + jcc(Assembler::equal, B1_33); + + bind(B1_26); + fld_s(0); + testb(edx, 2); + fmul(1); + fld_s(0); + fmul(1); + jcc(Assembler::equal, B1_30); + + bind(B1_27); + fstp_d(2); + fld_x(ExternalAddress(84 + CP)); //0x3ac6, 0x0ba0, 0x07 + testl(ebx, ebx); + fmul(2); + fld_x(ExternalAddress(72 + CP)); //0xdaba, 0xfe79, 0xea + fmul(3); + fld_x(ExternalAddress(60 + CP)); //0xd84d, 0xadee, 0xc6 + movl(eax, Address(rsp, 32)); + faddp(2); + fxch(1); + fmul(3); + fld_x(ExternalAddress(48 + CP)); //0x03fe, 0x3f65, 0x7d + faddp(2); + fxch(1); + fmul(3); + fld_x(ExternalAddress(36 + CP)); //0xf024, 0x0cac, 0x00 + faddp(2); + fxch(1); + fmul(3); + fld_x(ExternalAddress(24 + CP)); //0x9c2f, 0x0b60, 0x60 + faddp(2); + fxch(1); + fmul(3); + fld_x(ExternalAddress(12 + CP)); //0xaaa5, 0xaaaa, 0xaa + faddp(2); + fxch(1); + fmulp(3); + fld_x(ExternalAddress(CP)); //0x0000, 0x0000, 0x00 + faddp(1); + fmulp(1); + faddp(1); + fld_d(Address(ones, RelocationHolder::none).plus_disp(rsi, Address::times_8)); + fmula(1); + faddp(1); + fstp_d(Address(eax, 8)); + jcc(Assembler::equal, B1_29); + + bind(B1_28); + fldcw(Address(esp, 30)); + + bind(B1_29); + addl(esp, 52); + pop(ebx); + pop(edi); + pop(esi); + movl(esp, ebp); + pop(ebp); + ret(0); + + bind(B1_30); + fld_x(ExternalAddress(84 + SP)); //0x8610, 0x307f, 0x62 + testl(ebx, ebx); + fmul(1); + fld_x(ExternalAddress(72 + SP)); //0x44a6, 0xed1a, 0x29 + fmul(2); + fld_x(ExternalAddress(60 + SP)); //0xbf33, 0x8bb4, 0x2f + movl(eax, Address(rsp, 32)); + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(48 + SP)); //0x825b, 0x3997, 0x2b + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(36 + SP)); //0x45f6, 0xb616, 0x1d + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(24 + SP)); //0xc527, 0x0d00, 0x00 + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(12 + SP)); //0x8887, 0x8888, 0x88 + faddp(2); + fxch(1); + fmulp(2); + fld_x(ExternalAddress(SP)); //0xaaab, 0xaaaa, 0xaa + faddp(1); + fmulp(2); + faddp(1); + fld_d(Address(ones, RelocationHolder::none).plus_disp(rsi, Address::times_8)); + fmulp(2); + fmul(1); + faddp(1); + fstp_d(Address(eax, 8)); + jcc(Assembler::equal, B1_32); + + bind(B1_31); + fldcw(Address(esp, 30)); + + bind(B1_32); + addl(esp, 52); + pop(ebx); + pop(edi); + pop(esi); + movl(esp, ebp); + pop(ebp); + ret(0); + + bind(B1_33); + testb(Address(esp, 36), 1); + jcc(Assembler::equal, B1_41); + + bind(B1_34); + fld_s(0); + testb(edx, 2); + fmul(1); + fld_s(0); + fmul(1); + jcc(Assembler::equal, B1_38); + + bind(B1_35); + fld_x(ExternalAddress(84 + SP)); //0x8610, 0x307f, 0x62 + testl(ebx, ebx); + fmul(1); + fld_x(ExternalAddress(72 + SP)); //0x44a6, 0xed1a, 0x29 + fmul(2); + fld_x(ExternalAddress(60 + SP)); //0xbf33, 0x8bb4, 0x2f + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(48 + SP)); //0x825b, 0x3997, 0x2b + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(36 + SP)); //0x45f6, 0xb616, 0x1d + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(24 + SP)); //0xc527, 0x0d00, 0x00 + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(12 + SP)); //0x8887, 0x8888, 0x88 + faddp(2); + fxch(1); + fmulp(2); + fld_x(ExternalAddress(SP)); //0xaaab, 0xaaaa, 0xaa + faddp(1); + fmulp(2); + faddp(1); + fld_d(Address(ones, RelocationHolder::none).plus_disp(eax, Address::times_8)); + fmulp(2); + fmul(1); + movl(eax, Address(esp, 32)); + faddp(1); + fstp_d(Address(eax, 0)); + jcc(Assembler::equal, B1_37); + + bind(B1_36); + fldcw(Address(esp, 30)); + + bind(B1_37); + addl(esp, 52); + pop(ebx); + pop(edi); + pop(esi); + movl(esp, ebp); + pop(ebp); + ret(0); + + bind(B1_38); + fstp_d(2); + fld_x(ExternalAddress(84 + CP)); //0x3ac6, 0x0ba0, 0x07 + testl(ebx, ebx); + fmul(2); + fld_x(ExternalAddress(72 + CP)); //0xdaba, 0xfe79, 0xea + fmul(3); + fld_x(ExternalAddress(60 + CP)); //0xd84d, 0xadee, 0xc6 + faddp(2); + fxch(1); + fmul(3); + fld_x(ExternalAddress(48 + CP)); //0x03fe, 0x3f65, 0x7d + faddp(2); + fxch(1); + fmul(3); + fld_x(ExternalAddress(36 + CP)); //0xf024, 0x0cac, 0x00 + faddp(2); + fxch(1); + fmul(3); + fld_x(ExternalAddress(24 + CP)); //0x9c2f, 0x0b60, 0x60 + faddp(2); + fxch(1); + fmul(3); + fld_x(ExternalAddress(12 + CP)); //0xaaa5, 0xaaaa, 0xaa + faddp(2); + fxch(1); + fmulp(3); + fld_x(ExternalAddress(CP)); //0x0000, 0x0000, 0x00 + faddp(1); + fmulp(1); + faddp(1); + fld_d(Address(ones, RelocationHolder::none).plus_disp(eax, Address::times_8)); + fmula(1); + movl(eax, Address(esp, 32)); + faddp(1); + fstp_d(Address(eax, 0)); + jcc(Assembler::equal, B1_40); + + bind(B1_39); + fldcw(Address(esp, 30)); + bind(B1_40); + addl(esp, 52); + pop(ebx); + pop(edi); + pop(esi); + movl(esp, ebp); + pop(ebp); + ret(0); + bind(B1_41); + fstp_d(0); + addl(esp, 52); + pop(ebx); + pop(edi); + pop(esi); + movl(esp, ebp); + pop(ebp); + ret(0); + bind(B1_42); + xorl(ebx, ebx); + jmp(B1_8); + bind(B1_43); + xorl(ebx, ebx); + jmp(B1_15); +} + +ALIGNED_(16) juint _static_const_table_sin[] = +{ + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL, + 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, + 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, + 0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, + 0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, + 0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, + 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL, + 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL, + 0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL, + 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, + 0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, + 0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL, + 0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, + 0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, + 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL, + 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL, + 0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL, + 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, + 0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL, + 0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL, + 0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, + 0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, + 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL, + 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL, + 0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, + 0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL, + 0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, + 0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, + 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL, + 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL, + 0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL, + 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, + 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, + 0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, + 0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, + 0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, + 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL, + 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL, + 0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL, + 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, + 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, + 0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, + 0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, + 0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, + 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL, + 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL, + 0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL, + 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, + 0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, + 0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, + 0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, + 0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL, + 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, + 0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL, + 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL, + 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL, + 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, + 0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, + 0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, + 0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL, + 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, + 0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL, + 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL, + 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL, + 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, + 0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, + 0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, + 0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL, + 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, + 0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL, + 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL, + 0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL, + 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, + 0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL, + 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, + 0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL, + 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL, + 0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL, + 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, + 0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, + 0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, + 0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL, + 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, + 0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL, + 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL, + 0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL, + 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, + 0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, + 0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, + 0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL, + 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, + 0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL, + 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL, + 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL, + 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, + 0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, + 0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL, + 0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL, + 0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL, + 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL, + 0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL, + 0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL, + 0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x43600000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3c800000UL, 0x00000000UL, + 0x00000000UL, 0xffffffffUL, 0x3fefffffUL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x80000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x3fe00000UL, + 0x00000000UL, 0x3fe00000UL +}; + +void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ebx, Register edx) { + + Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; + Label L_2TAG_PACKET_4_0_2, start; + assert_different_registers(eax, ebx, edx); + address static_const_table_sin = (address)_static_const_table_sin; + + bind(start); + subl(rsp, 120); + movl(Address(rsp, 56), ebx); + lea(ebx, ExternalAddress(static_const_table_sin)); + movsd(xmm0, Address(rsp, 128)); + pextrw(eax, xmm0, 3); + andl(eax, 32767); + subl(eax, 12336); + cmpl(eax, 4293); + jcc(Assembler::above, L_2TAG_PACKET_0_0_2); + movsd(xmm1, Address(ebx, 2160)); + mulsd(xmm1, xmm0); + movsd(xmm5, Address(ebx, 2272)); + movdqu(xmm4, Address(ebx, 2256)); + pand(xmm4, xmm0); + por(xmm5, xmm4); + movsd(xmm3, Address(ebx, 2128)); + movdqu(xmm2, Address(ebx, 2112)); + addpd(xmm1, xmm5); + cvttsd2sil(edx, xmm1); + cvtsi2sdl(xmm1, edx); + mulsd(xmm3, xmm1); + unpcklpd(xmm1, xmm1); + addl(edx, 1865216); + movdqu(xmm4, xmm0); + andl(edx, 63); + movdqu(xmm5, Address(ebx, 2096)); + lea(eax, Address(ebx, 0)); + shll(edx, 5); + addl(eax, edx); + mulpd(xmm2, xmm1); + subsd(xmm0, xmm3); + mulsd(xmm1, Address(ebx, 2144)); + subsd(xmm4, xmm3); + movsd(xmm7, Address(eax, 8)); + unpcklpd(xmm0, xmm0); + movapd(xmm3, xmm4); + subsd(xmm4, xmm2); + mulpd(xmm5, xmm0); + subpd(xmm0, xmm2); + movdqu(xmm6, Address(ebx, 2064)); + mulsd(xmm7, xmm4); + subsd(xmm3, xmm4); + mulpd(xmm5, xmm0); + mulpd(xmm0, xmm0); + subsd(xmm3, xmm2); + movdqu(xmm2, Address(eax, 0)); + subsd(xmm1, xmm3); + movsd(xmm3, Address(eax, 24)); + addsd(xmm2, xmm3); + subsd(xmm7, xmm2); + mulsd(xmm2, xmm4); + mulpd(xmm6, xmm0); + mulsd(xmm3, xmm4); + mulpd(xmm2, xmm0); + mulpd(xmm0, xmm0); + addpd(xmm5, Address(ebx, 2080)); + mulsd(xmm4, Address(eax, 0)); + addpd(xmm6, Address(ebx, 2048)); + mulpd(xmm5, xmm0); + movapd(xmm0, xmm3); + addsd(xmm3, Address(eax, 8)); + mulpd(xmm1, xmm7); + movapd(xmm7, xmm4); + addsd(xmm4, xmm3); + addpd(xmm6, xmm5); + movsd(xmm5, Address(eax, 8)); + subsd(xmm5, xmm3); + subsd(xmm3, xmm4); + addsd(xmm1, Address(eax, 16)); + mulpd(xmm6, xmm2); + addsd(xmm5, xmm0); + addsd(xmm3, xmm7); + addsd(xmm1, xmm5); + addsd(xmm1, xmm3); + addsd(xmm1, xmm6); + unpckhpd(xmm6, xmm6); + addsd(xmm1, xmm6); + addsd(xmm4, xmm1); + movsd(Address(rsp, 0), xmm4); + fld_d(Address(rsp, 0)); + jmp(L_2TAG_PACKET_1_0_2); + + bind(L_2TAG_PACKET_0_0_2); + jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); + shrl(eax, 4); + cmpl(eax, 268434685); + jcc(Assembler::notEqual, L_2TAG_PACKET_3_0_2); + movsd(Address(rsp, 0), xmm0); + fld_d(Address(rsp, 0)); + jmp(L_2TAG_PACKET_1_0_2); + + bind(L_2TAG_PACKET_3_0_2); + movsd(xmm3, Address(ebx, 2192)); + mulsd(xmm3, xmm0); + subsd(xmm3, xmm0); + mulsd(xmm3, Address(ebx, 2208)); + movsd(Address(rsp, 0), xmm0); + fld_d(Address(rsp, 0)); + jmp(L_2TAG_PACKET_1_0_2); + + bind(L_2TAG_PACKET_2_0_2); + movl(eax, Address(rsp, 132)); + andl(eax, 2146435072); + cmpl(eax, 2146435072); + jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); + subl(rsp, 32); + movsd(Address(rsp, 0), xmm0); + lea(eax, Address(rsp, 40)); + movl(Address(rsp, 8), eax); + movl(eax, 2); + movl(Address(rsp, 12), eax); + call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge()))); + addl(rsp, 32); + fld_d(Address(rsp, 16)); + jmp(L_2TAG_PACKET_1_0_2); + bind(L_2TAG_PACKET_4_0_2); + fld_d(Address(rsp, 128)); + fmul_d(Address(ebx, 2240)); + bind(L_2TAG_PACKET_1_0_2); + movl(ebx, Address(rsp, 56)); +} + +#endif + +/******************************************************************************/ +// ALGORITHM DESCRIPTION - COS() +// --------------------- +// +// 1. RANGE REDUCTION +// +// We perform an initial range reduction from X to r with +// +// X =~= N * pi/32 + r +// +// so that |r| <= pi/64 + epsilon. We restrict inputs to those +// where |N| <= 932560. Beyond this, the range reduction is +// insufficiently accurate. For extremely small inputs, +// denormalization can occur internally, impacting performance. +// This means that the main path is actually only taken for +// 2^-252 <= |X| < 90112. +// +// To avoid branches, we perform the range reduction to full +// accuracy each time. +// +// X - N * (P_1 + P_2 + P_3) +// +// where P_1 and P_2 are 32-bit numbers (so multiplication by N +// is exact) and P_3 is a 53-bit number. Together, these +// approximate pi well enough for all cases in the restricted +// range. +// +// The main reduction sequence is: +// +// y = 32/pi * x +// N = integer(y) +// (computed by adding and subtracting off SHIFTER) +// +// m_1 = N * P_1 +// m_2 = N * P_2 +// r_1 = x - m_1 +// r = r_1 - m_2 +// (this r can be used for most of the calculation) +// +// c_1 = r_1 - r +// m_3 = N * P_3 +// c_2 = c_1 - m_2 +// c = c_2 - m_3 +// +// 2. MAIN ALGORITHM +// +// The algorithm uses a table lookup based on B = M * pi / 32 +// where M = N mod 64. The stored values are: +// sigma closest power of 2 to cos(B) +// C_hl 53-bit cos(B) - sigma +// S_hi + S_lo 2 * 53-bit sin(B) +// +// The computation is organized as follows: +// +// sin(B + r + c) = [sin(B) + sigma * r] + +// r * (cos(B) - sigma) + +// sin(B) * [cos(r + c) - 1] + +// cos(B) * [sin(r + c) - r] +// +// which is approximately: +// +// [S_hi + sigma * r] + +// C_hl * r + +// S_lo + S_hi * [(cos(r) - 1) - r * c] + +// (C_hl + sigma) * [(sin(r) - r) + c] +// +// and this is what is actually computed. We separate this sum +// into four parts: +// +// hi + med + pols + corr +// +// where +// +// hi = S_hi + sigma r +// med = C_hl * r +// pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) +// corr = S_lo + c * ((C_hl + sigma) - S_hi * r) +// +// 3. POLYNOMIAL +// +// The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * +// (sin(r) - r) can be rearranged freely, since it is quite +// small, so we exploit parallelism to the fullest. +// +// psc4 = SC_4 * r_1 +// msc4 = psc4 * r +// r2 = r * r +// msc2 = SC_2 * r2 +// r4 = r2 * r2 +// psc3 = SC_3 + msc4 +// psc1 = SC_1 + msc2 +// msc3 = r4 * psc3 +// sincospols = psc1 + msc3 +// pols = sincospols * +// +// +// 4. CORRECTION TERM +// +// This is where the "c" component of the range reduction is +// taken into account; recall that just "r" is used for most of +// the calculation. +// +// -c = m_3 - c_2 +// -d = S_hi * r - (C_hl + sigma) +// corr = -c * -d + S_lo +// +// 5. COMPENSATED SUMMATIONS +// +// The two successive compensated summations add up the high +// and medium parts, leaving just the low parts to add up at +// the end. +// +// rs = sigma * r +// res_int = S_hi + rs +// k_0 = S_hi - res_int +// k_2 = k_0 + rs +// med = C_hl * r +// res_hi = res_int + med +// k_1 = res_int - res_hi +// k_3 = k_1 + med +// +// 6. FINAL SUMMATION +// +// We now add up all the small parts: +// +// res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 +// +// Now the overall result is just: +// +// res_hi + res_lo +// +// 7. SMALL ARGUMENTS +// +// Inputs with |X| < 2^-252 are treated specially as +// 1 - |x|. +// +// Special cases: +// cos(NaN) = quiet NaN, and raise invalid exception +// cos(INF) = NaN and raise invalid exception +// cos(0) = 1 +// +/******************************************************************************/ + +#ifdef _LP64 + +ALIGNED_(8) juint _ONE[] = +{ + 0x00000000UL, 0x3ff00000UL +}; + +void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r8, Register r9, Register r10, Register r11) { + Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; + Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1; + Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1; + Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, B1_2, B1_3, B1_4, B1_5, start; + + assert_different_registers(r8, r9, r10, r11, eax, ecx, edx); + + address ONEHALF = (address)_ONEHALF; + address P_2 = (address)_P_2; + address SC_4 = (address)_SC_4; + address Ctable = (address)_Ctable; + address SC_2 = (address)_SC_2; + address SC_3 = (address)_SC_3; + address SC_1 = (address)_SC_1; + address PI_INV_TABLE = (address)_PI_INV_TABLE; + address PI_4 = (address)_PI_4; + address PI32INV = (address)_PI32INV; + address SIGN_MASK = (address)_SIGN_MASK; + address P_1 = (address)_P_1; + address P_3 = (address)_P_3; + address ONE = (address)_ONE; + address NEG_ZERO = (address)_NEG_ZERO; + + bind(start); + push(rbx); + subq(rsp, 16); + movsd(Address(rsp, 8), xmm0); + + bind(B1_2); + movl(eax, Address(rsp, 12)); + movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL + andl(eax, 2147418112); + subl(eax, 808452096); + cmpl(eax, 281346048); + jcc(Assembler::above, L_2TAG_PACKET_0_0_1); + mulsd(xmm1, xmm0); + movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL + movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL + pand(xmm4, xmm0); + por(xmm5, xmm4); + addpd(xmm1, xmm5); + cvttsd2sil(edx, xmm1); + cvtsi2sdl(xmm1, edx); + movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL + movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL + mulsd(xmm3, xmm1); + unpcklpd(xmm1, xmm1); + addq(rdx, 1865232); + movdqu(xmm4, xmm0); + andq(rdx, 63); + movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL + lea(rax, ExternalAddress(Ctable)); + shlq(rdx, 5); + addq(rax, rdx); + mulpd(xmm2, xmm1); + subsd(xmm0, xmm3); + mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL + subsd(xmm4, xmm3); + movq(xmm7, Address(rax, 8)); + unpcklpd(xmm0, xmm0); + movdqu(xmm3, xmm4); + subsd(xmm4, xmm2); + mulpd(xmm5, xmm0); + subpd(xmm0, xmm2); + movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL + mulsd(xmm7, xmm4); + subsd(xmm3, xmm4); + mulpd(xmm5, xmm0); + mulpd(xmm0, xmm0); + subsd(xmm3, xmm2); + movdqu(xmm2, Address(rax, 0)); + subsd(xmm1, xmm3); + movq(xmm3, Address(rax, 24)); + addsd(xmm2, xmm3); + subsd(xmm7, xmm2); + mulsd(xmm2, xmm4); + mulpd(xmm6, xmm0); + mulsd(xmm3, xmm4); + mulpd(xmm2, xmm0); + mulpd(xmm0, xmm0); + addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL + mulsd(xmm4, Address(rax, 0)); + addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL + mulpd(xmm5, xmm0); + movdqu(xmm0, xmm3); + addsd(xmm3, Address(rax, 8)); + mulpd(xmm1, xmm7); + movdqu(xmm7, xmm4); + addsd(xmm4, xmm3); + addpd(xmm6, xmm5); + movq(xmm5, Address(rax, 8)); + subsd(xmm5, xmm3); + subsd(xmm3, xmm4); + addsd(xmm1, Address(rax, 16)); + mulpd(xmm6, xmm2); + addsd(xmm0, xmm5); + addsd(xmm3, xmm7); + addsd(xmm0, xmm1); + addsd(xmm0, xmm3); + addsd(xmm0, xmm6); + unpckhpd(xmm6, xmm6); + addsd(xmm0, xmm6); + addsd(xmm0, xmm4); + jmp(B1_4); + + bind(L_2TAG_PACKET_0_0_1); + jcc(Assembler::greater, L_2TAG_PACKET_1_0_1); + pextrw(eax, xmm0, 3); + andl(eax, 32767); + pinsrw(xmm0, eax, 3); + movq(xmm1, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL + subsd(xmm1, xmm0); + movdqu(xmm0, xmm1); + jmp(B1_4); + + bind(L_2TAG_PACKET_1_0_1); + pextrw(eax, xmm0, 3); + andl(eax, 32752); + cmpl(eax, 32752); + jcc(Assembler::equal, L_2TAG_PACKET_2_0_1); + pextrw(ecx, xmm0, 3); + andl(ecx, 32752); + subl(ecx, 16224); + shrl(ecx, 7); + andl(ecx, 65532); + lea(r11, ExternalAddress(PI_INV_TABLE)); + addq(rcx, r11); + movdq(rax, xmm0); + movl(r10, Address(rcx, 20)); + movl(r8, Address(rcx, 24)); + movl(edx, eax); + shrq(rax, 21); + orl(eax, INT_MIN); + shrl(eax, 11); + movl(r9, r10); + imulq(r10, rdx); + imulq(r9, rax); + imulq(r8, rax); + movl(rsi, Address(rcx, 16)); + movl(rdi, Address(rcx, 12)); + movl(r11, r10); + shrq(r10, 32); + addq(r9, r10); + addq(r11, r8); + movl(r8, r11); + shrq(r11, 32); + addq(r9, r11); + movl(r10, rsi); + imulq(rsi, rdx); + imulq(r10, rax); + movl(r11, rdi); + imulq(rdi, rdx); + movl(rbx, rsi); + shrq(rsi, 32); + addq(r9, rbx); + movl(rbx, r9); + shrq(r9, 32); + addq(r10, rsi); + addq(r10, r9); + shlq(rbx, 32); + orq(r8, rbx); + imulq(r11, rax); + movl(r9, Address(rcx, 8)); + movl(rsi, Address(rcx, 4)); + movl(rbx, rdi); + shrq(rdi, 32); + addq(r10, rbx); + movl(rbx, r10); + shrq(r10, 32); + addq(r11, rdi); + addq(r11, r10); + movq(rdi, r9); + imulq(r9, rdx); + imulq(rdi, rax); + movl(r10, r9); + shrq(r9, 32); + addq(r11, r10); + movl(r10, r11); + shrq(r11, 32); + addq(rdi, r9); + addq(rdi, r11); + movq(r9, rsi); + imulq(rsi, rdx); + imulq(r9, rax); + shlq(r10, 32); + orq(r10, rbx); + movl(eax, Address(rcx, 0)); + movl(r11, rsi); + shrq(rsi, 32); + addq(rdi, r11); + movl(r11, rdi); + shrq(rdi, 32); + addq(r9, rsi); + addq(r9, rdi); + imulq(rdx, rax); + pextrw(rbx, xmm0, 3); + lea(rdi, ExternalAddress(PI_INV_TABLE)); + subq(rcx, rdi); + addl(ecx, ecx); + addl(ecx, ecx); + addl(ecx, ecx); + addl(ecx, 19); + movl(rsi, 32768); + andl(rsi, rbx); + shrl(rbx, 4); + andl(rbx, 2047); + subl(rbx, 1023); + subl(ecx, rbx); + addq(r9, rdx); + movl(edx, ecx); + addl(edx, 32); + cmpl(ecx, 1); + jcc(Assembler::less, L_2TAG_PACKET_3_0_1); + negl(ecx); + addl(ecx, 29); + shll(r9); + movl(rdi, r9); + andl(r9, 536870911); + testl(r9, 268435456); + jcc(Assembler::notEqual, L_2TAG_PACKET_4_0_1); + shrl(r9); + movl(rbx, 0); + shlq(r9, 32); + orq(r9, r11); + + bind(L_2TAG_PACKET_5_0_1); + + bind(L_2TAG_PACKET_6_0_1); + cmpq(r9, 0); + jcc(Assembler::equal, L_2TAG_PACKET_7_0_1); + + bind(L_2TAG_PACKET_8_0_1); + bsrq(r11, r9); + movl(ecx, 29); + subl(ecx, r11); + jcc(Assembler::lessEqual, L_2TAG_PACKET_9_0_1); + shlq(r9); + movq(rax, r10); + shlq(r10); + addl(edx, ecx); + negl(ecx); + addl(ecx, 64); + shrq(rax); + shrq(r8); + orq(r9, rax); + orq(r10, r8); + + bind(L_2TAG_PACKET_10_0_1); + cvtsi2sdq(xmm0, r9); + shrq(r10, 1); + cvtsi2sdq(xmm3, r10); + xorpd(xmm4, xmm4); + shll(edx, 4); + negl(edx); + addl(edx, 16368); + orl(edx, rsi); + xorl(edx, rbx); + pinsrw(xmm4, edx, 3); + movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL + movq(xmm6, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL + xorpd(xmm5, xmm5); + subl(edx, 1008); + pinsrw(xmm5, edx, 3); + mulsd(xmm0, xmm4); + shll(rsi, 16); + sarl(rsi, 31); + mulsd(xmm3, xmm5); + movdqu(xmm1, xmm0); + mulsd(xmm0, xmm2); + shrl(rdi, 29); + addsd(xmm1, xmm3); + mulsd(xmm3, xmm2); + addl(rdi, rsi); + xorl(rdi, rsi); + mulsd(xmm6, xmm1); + movl(eax, rdi); + addsd(xmm6, xmm3); + movdqu(xmm2, xmm0); + addsd(xmm0, xmm6); + subsd(xmm2, xmm0); + addsd(xmm6, xmm2); + + bind(L_2TAG_PACKET_11_0_1); + movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL + mulsd(xmm1, xmm0); + movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL + movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL + pand(xmm4, xmm0); + por(xmm5, xmm4); + addpd(xmm1, xmm5); + cvttsd2siq(rdx, xmm1); + cvtsi2sdq(xmm1, rdx); + movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL + movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL + mulsd(xmm3, xmm1); + unpcklpd(xmm1, xmm1); + shll(eax, 3); + addl(edx, 1865232); + movdqu(xmm4, xmm0); + addl(edx, eax); + andl(edx, 63); + movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL + lea(rax, ExternalAddress(Ctable)); + shll(edx, 5); + addq(rax, rdx); + mulpd(xmm2, xmm1); + subsd(xmm0, xmm3); + mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL + subsd(xmm4, xmm3); + movq(xmm7, Address(rax, 8)); + unpcklpd(xmm0, xmm0); + movdqu(xmm3, xmm4); + subsd(xmm4, xmm2); + mulpd(xmm5, xmm0); + subpd(xmm0, xmm2); + mulsd(xmm7, xmm4); + subsd(xmm3, xmm4); + mulpd(xmm5, xmm0); + mulpd(xmm0, xmm0); + subsd(xmm3, xmm2); + movdqu(xmm2, Address(rax, 0)); + subsd(xmm1, xmm3); + movq(xmm3, Address(rax, 24)); + addsd(xmm2, xmm3); + subsd(xmm7, xmm2); + subsd(xmm1, xmm6); + movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL + mulsd(xmm2, xmm4); + mulpd(xmm6, xmm0); + mulsd(xmm3, xmm4); + mulpd(xmm2, xmm0); + mulpd(xmm0, xmm0); + addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL + mulsd(xmm4, Address(rax, 0)); + addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL + mulpd(xmm5, xmm0); + movdqu(xmm0, xmm3); + addsd(xmm3, Address(rax, 8)); + mulpd(xmm1, xmm7); + movdqu(xmm7, xmm4); + addsd(xmm4, xmm3); + addpd(xmm6, xmm5); + movq(xmm5, Address(rax, 8)); + subsd(xmm5, xmm3); + subsd(xmm3, xmm4); + addsd(xmm1, Address(rax, 16)); + mulpd(xmm6, xmm2); + addsd(xmm5, xmm0); + addsd(xmm3, xmm7); + addsd(xmm1, xmm5); + addsd(xmm1, xmm3); + addsd(xmm1, xmm6); + unpckhpd(xmm6, xmm6); + movdqu(xmm0, xmm4); + addsd(xmm1, xmm6); + addsd(xmm0, xmm1); + jmp(B1_4); + + bind(L_2TAG_PACKET_7_0_1); + addl(edx, 64); + movq(r9, r10); + movq(r10, r8); + movl(r8, 0); + cmpq(r9, 0); + jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1); + addl(edx, 64); + movq(r9, r10); + movq(r10, r8); + cmpq(r9, 0); + jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1); + xorpd(xmm0, xmm0); + xorpd(xmm6, xmm6); + jmp(L_2TAG_PACKET_11_0_1); + + bind(L_2TAG_PACKET_9_0_1); + jcc(Assembler::equal, L_2TAG_PACKET_10_0_1); + negl(ecx); + shrq(r10); + movq(rax, r9); + shrq(r9); + subl(edx, ecx); + negl(ecx); + addl(ecx, 64); + shlq(rax); + orq(r10, rax); + jmp(L_2TAG_PACKET_10_0_1); + bind(L_2TAG_PACKET_3_0_1); + negl(ecx); + shlq(r9, 32); + orq(r9, r11); + shlq(r9); + movq(rdi, r9); + testl(r9, INT_MIN); + jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_1); + shrl(r9); + movl(rbx, 0); + shrq(rdi, 3); + jmp(L_2TAG_PACKET_6_0_1); + + bind(L_2TAG_PACKET_4_0_1); + shrl(r9); + movl(rbx, 536870912); + shrl(rbx); + shlq(r9, 32); + orq(r9, r11); + shlq(rbx, 32); + addl(rdi, 536870912); + movl(rcx, 0); + movl(r11, 0); + subq(rcx, r8); + sbbq(r11, r10); + sbbq(rbx, r9); + movq(r8, rcx); + movq(r10, r11); + movq(r9, rbx); + movl(rbx, 32768); + jmp(L_2TAG_PACKET_5_0_1); + + bind(L_2TAG_PACKET_12_0_1); + shrl(r9); + mov64(rbx, 0x100000000); + shrq(rbx); + movl(rcx, 0); + movl(r11, 0); + subq(rcx, r8); + sbbq(r11, r10); + sbbq(rbx, r9); + movq(r8, rcx); + movq(r10, r11); + movq(r9, rbx); + movl(rbx, 32768); + shrq(rdi, 3); + addl(rdi, 536870912); + jmp(L_2TAG_PACKET_6_0_1); + + bind(L_2TAG_PACKET_2_0_1); + movsd(xmm0, Address(rsp, 8)); + mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL + movq(Address(rsp, 0), xmm0); + + bind(L_2TAG_PACKET_13_0_1); + + bind(B1_4); + addq(rsp, 16); + pop(rbx); +} + +#endif + +#ifndef _LP64 + +ALIGNED_(16) juint _static_const_table_cos[] = +{ + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL, + 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, + 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, + 0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, + 0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, + 0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, + 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL, + 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL, + 0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL, + 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, + 0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, + 0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL, + 0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, + 0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, + 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL, + 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL, + 0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL, + 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, + 0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL, + 0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL, + 0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, + 0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, + 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL, + 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL, + 0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, + 0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL, + 0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, + 0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, + 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL, + 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL, + 0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL, + 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, + 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, + 0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, + 0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, + 0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, + 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL, + 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL, + 0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL, + 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, + 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, + 0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, + 0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, + 0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, + 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL, + 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL, + 0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL, + 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, + 0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, + 0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, + 0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, + 0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL, + 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, + 0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL, + 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL, + 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL, + 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, + 0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, + 0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, + 0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL, + 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, + 0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL, + 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL, + 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL, + 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, + 0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, + 0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, + 0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL, + 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, + 0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL, + 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL, + 0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL, + 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, + 0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL, + 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, + 0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL, + 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL, + 0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL, + 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, + 0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, + 0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, + 0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL, + 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, + 0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL, + 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL, + 0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL, + 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, + 0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, + 0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, + 0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL, + 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, + 0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL, + 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL, + 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL, + 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, + 0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, + 0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL, + 0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL, + 0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL, + 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL, + 0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL, + 0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL, + 0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL +}; + +//registers, +// input: (rbp + 8) +// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 +// rax, rdx, rcx, rbx (tmp) + +// Code generated by Intel C compiler for LIBM library + +void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { + Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; + Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; + Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; + Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; + + assert_different_registers(tmp, eax, ecx, edx); + + address static_const_table_cos = (address)_static_const_table_cos; + + bind(start); + subl(rsp, 120); + movl(Address(rsp, 56), tmp); + lea(tmp, ExternalAddress(static_const_table_cos)); + movsd(xmm0, Address(rsp, 128)); + pextrw(eax, xmm0, 3); + andl(eax, 32767); + subl(eax, 12336); + cmpl(eax, 4293); + jcc(Assembler::above, L_2TAG_PACKET_0_0_2); + movsd(xmm1, Address(tmp, 2160)); + mulsd(xmm1, xmm0); + movdqu(xmm5, Address(tmp, 2240)); + movsd(xmm4, Address(tmp, 2224)); + pand(xmm4, xmm0); + por(xmm5, xmm4); + movsd(xmm3, Address(tmp, 2128)); + movdqu(xmm2, Address(tmp, 2112)); + addpd(xmm1, xmm5); + cvttsd2sil(edx, xmm1); + cvtsi2sdl(xmm1, edx); + mulsd(xmm3, xmm1); + unpcklpd(xmm1, xmm1); + addl(edx, 1865232); + movdqu(xmm4, xmm0); + andl(edx, 63); + movdqu(xmm5, Address(tmp, 2096)); + lea(eax, Address(tmp, 0)); + shll(edx, 5); + addl(eax, edx); + mulpd(xmm2, xmm1); + subsd(xmm0, xmm3); + mulsd(xmm1, Address(tmp, 2144)); + subsd(xmm4, xmm3); + movsd(xmm7, Address(eax, 8)); + unpcklpd(xmm0, xmm0); + movapd(xmm3, xmm4); + subsd(xmm4, xmm2); + mulpd(xmm5, xmm0); + subpd(xmm0, xmm2); + movdqu(xmm6, Address(tmp, 2064)); + mulsd(xmm7, xmm4); + subsd(xmm3, xmm4); + mulpd(xmm5, xmm0); + mulpd(xmm0, xmm0); + subsd(xmm3, xmm2); + movdqu(xmm2, Address(eax, 0)); + subsd(xmm1, xmm3); + movsd(xmm3, Address(eax, 24)); + addsd(xmm2, xmm3); + subsd(xmm7, xmm2); + mulsd(xmm2, xmm4); + mulpd(xmm6, xmm0); + mulsd(xmm3, xmm4); + mulpd(xmm2, xmm0); + mulpd(xmm0, xmm0); + addpd(xmm5, Address(tmp, 2080)); + mulsd(xmm4, Address(eax, 0)); + addpd(xmm6, Address(tmp, 2048)); + mulpd(xmm5, xmm0); + movapd(xmm0, xmm3); + addsd(xmm3, Address(eax, 8)); + mulpd(xmm1, xmm7); + movapd(xmm7, xmm4); + addsd(xmm4, xmm3); + addpd(xmm6, xmm5); + movsd(xmm5, Address(eax, 8)); + subsd(xmm5, xmm3); + subsd(xmm3, xmm4); + addsd(xmm1, Address(eax, 16)); + mulpd(xmm6, xmm2); + addsd(xmm5, xmm0); + addsd(xmm3, xmm7); + addsd(xmm1, xmm5); + addsd(xmm1, xmm3); + addsd(xmm1, xmm6); + unpckhpd(xmm6, xmm6); + addsd(xmm1, xmm6); + addsd(xmm4, xmm1); + movsd(Address(rsp, 0), xmm4); + fld_d(Address(rsp, 0)); + jmp(L_2TAG_PACKET_1_0_2); + + bind(L_2TAG_PACKET_0_0_2); + jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); + pextrw(eax, xmm0, 3); + andl(eax, 32767); + pinsrw(xmm0, eax, 3); + movsd(xmm1, Address(tmp, 2192)); + subsd(xmm1, xmm0); + movsd(Address(rsp, 0), xmm1); + fld_d(Address(rsp, 0)); + jmp(L_2TAG_PACKET_1_0_2); + + bind(L_2TAG_PACKET_2_0_2); + movl(eax, Address(rsp, 132)); + andl(eax, 2146435072); + cmpl(eax, 2146435072); + jcc(Assembler::equal, L_2TAG_PACKET_3_0_2); + subl(rsp, 32); + movsd(Address(rsp, 0), xmm0); + lea(eax, Address(rsp, 40)); + movl(Address(rsp, 8), eax); + movl(eax, 1); + movl(Address(rsp, 12), eax); + call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge()))); + addl(rsp, 32); + fld_d(Address(rsp, 8)); + jmp(L_2TAG_PACKET_1_0_2); + + bind(L_2TAG_PACKET_3_0_2); + fld_d(Address(rsp, 128)); + fmul_d(Address(tmp, 2208)); + + bind(L_2TAG_PACKET_1_0_2); + movl(tmp, Address(rsp, 56)); +} + +#endif --- old/src/cpu/x86/vm/stubGenerator_x86_32.cpp 2015-11-19 18:51:26.482389300 -0800 +++ new/src/cpu/x86/vm/stubGenerator_x86_32.cpp 2015-11-19 18:51:26.147355800 -0800 @@ -2103,22 +2103,6 @@ __ ret(0); } { - StubCodeMark mark(this, "StubRoutines", "sin"); - StubRoutines::_intrinsic_sin = (double (*)(double)) __ pc(); - - __ fld_d(Address(rsp, 4)); - __ trigfunc('s'); - __ ret(0); - } - { - StubCodeMark mark(this, "StubRoutines", "cos"); - StubRoutines::_intrinsic_cos = (double (*)(double)) __ pc(); - - __ fld_d(Address(rsp, 4)); - __ trigfunc('c'); - __ ret(0); - } - { StubCodeMark mark(this, "StubRoutines", "tan"); StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc(); @@ -3082,7 +3066,76 @@ } + address generate_libm_reduce_pi04l() { + address start = __ pc(); + + BLOCK_COMMENT("Entry:"); + __ libm_reduce_pi04l(rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp); + + return start; + + } + + address generate_libm_sin_cos_huge() { + address start = __ pc(); + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + + BLOCK_COMMENT("Entry:"); + __ libm_sincos_huge(x0, x1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp); + + return start; + + } + + address generate_libmSin() { + address start = __ pc(); + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + const XMMRegister x3 = xmm3; + + const XMMRegister x4 = xmm4; + const XMMRegister x5 = xmm5; + const XMMRegister x6 = xmm6; + const XMMRegister x7 = xmm7; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ fast_sin(x0, x1, x2, x3, x4, x5, x6, x7, rax, rbx, rdx); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + + } + + address generate_libmCos() { + address start = __ pc(); + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + const XMMRegister x3 = xmm3; + + const XMMRegister x4 = xmm4; + const XMMRegister x5 = xmm5; + const XMMRegister x6 = xmm6; + const XMMRegister x7 = xmm7; + + const Register tmp = rbx; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ fast_cos(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + + } // Safefetch stubs. void generate_safefetch(const char* name, int size, address* entry, @@ -3307,6 +3360,16 @@ if (VM_Version::supports_sse2()) { StubRoutines::_dexp = generate_libmExp(); StubRoutines::_dlog = generate_libmLog(); + if (UseLibmSinIntrinsic || UseLibmCosIntrinsic) { + StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l(); + StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge(); + } + if (UseLibmSinIntrinsic) { + StubRoutines::_dsin = generate_libmSin(); + } + if (UseLibmCosIntrinsic) { + StubRoutines::_dcos = generate_libmCos(); + } } } --- old/src/cpu/x86/vm/stubGenerator_x86_64.cpp 2015-11-19 18:51:28.681609200 -0800 +++ new/src/cpu/x86/vm/stubGenerator_x86_64.cpp 2015-11-19 18:51:28.344575500 -0800 @@ -2987,32 +2987,6 @@ __ ret(0); } { - StubCodeMark mark(this, "StubRoutines", "sin"); - StubRoutines::_intrinsic_sin = (double (*)(double)) __ pc(); - - __ subq(rsp, 8); - __ movdbl(Address(rsp, 0), xmm0); - __ fld_d(Address(rsp, 0)); - __ trigfunc('s'); - __ fstp_d(Address(rsp, 0)); - __ movdbl(xmm0, Address(rsp, 0)); - __ addq(rsp, 8); - __ ret(0); - } - { - StubCodeMark mark(this, "StubRoutines", "cos"); - StubRoutines::_intrinsic_cos = (double (*)(double)) __ pc(); - - __ subq(rsp, 8); - __ movdbl(Address(rsp, 0), xmm0); - __ fld_d(Address(rsp, 0)); - __ trigfunc('c'); - __ fstp_d(Address(rsp, 0)); - __ movdbl(xmm0, Address(rsp, 0)); - __ addq(rsp, 8); - __ ret(0); - } - { StubCodeMark mark(this, "StubRoutines", "tan"); StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc(); @@ -4235,6 +4209,91 @@ } + address generate_libmSin() { + address start = __ pc(); + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + const XMMRegister x3 = xmm3; + + const XMMRegister x4 = xmm4; + const XMMRegister x5 = xmm5; + const XMMRegister x6 = xmm6; + const XMMRegister x7 = xmm7; + + const Register tmp1 = r8; + const Register tmp2 = r9; + const Register tmp3 = r10; + const Register tmp4 = r11; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifdef _WIN64 + // save the xmm registers which must be preserved 6-7 + __ subptr(rsp, 4 * wordSize); + __ movdqu(Address(rsp, 0), xmm6); + __ movdqu(Address(rsp, 2 * wordSize), xmm7); +#endif + __ fast_sin(x0, x1, x2, x3, x4, x5, x6, x7, rax, rbx, rcx, rdx, tmp1, tmp2, tmp3, tmp4); + +#ifdef _WIN64 + // restore xmm regs belonging to calling function + __ movdqu(xmm6, Address(rsp, 0)); + __ movdqu(xmm7, Address(rsp, 2 * wordSize)); + __ addptr(rsp, 4 * wordSize); +#endif + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + + } + + address generate_libmCos() { + address start = __ pc(); + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + const XMMRegister x3 = xmm3; + + const XMMRegister x4 = xmm4; + const XMMRegister x5 = xmm5; + const XMMRegister x6 = xmm6; + const XMMRegister x7 = xmm7; + + const Register tmp1 = r8; + const Register tmp2 = r9; + const Register tmp3 = r10; + const Register tmp4 = r11; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifdef _WIN64 + // save the xmm registers which must be preserved 6-7 + __ subptr(rsp, 4 * wordSize); + __ movdqu(Address(rsp, 0), xmm6); + __ movdqu(Address(rsp, 2 * wordSize), xmm7); +#endif + __ fast_cos(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4); + +#ifdef _WIN64 + // restore xmm regs belonging to calling function + __ movdqu(xmm6, Address(rsp, 0)); + __ movdqu(xmm7, Address(rsp, 2 * wordSize)); + __ addptr(rsp, 4 * wordSize); +#endif + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + + } #undef __ #define __ masm-> @@ -4425,6 +4484,12 @@ if (VM_Version::supports_sse2()) { StubRoutines::_dexp = generate_libmExp(); StubRoutines::_dlog = generate_libmLog(); + if (UseLibmSinIntrinsic) { + StubRoutines::_dsin = generate_libmSin(); + } + if (UseLibmCosIntrinsic) { + StubRoutines::_dcos = generate_libmCos(); + } } } --- old/src/cpu/x86/vm/x86_32.ad 2015-11-19 18:51:30.935834600 -0800 +++ new/src/cpu/x86/vm/x86_32.ad 2015-11-19 18:51:30.589800000 -0800 @@ -9786,48 +9786,6 @@ ins_pipe( pipe_slow ); %} -instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ - predicate (UseSSE<=1); - match(Set dst (SinD src)); - ins_cost(1800); - format %{ "DSIN $dst" %} - opcode(0xD9, 0xFE); - ins_encode( OpcP, OpcS ); - ins_pipe( pipe_slow ); -%} - -instruct sinD_reg(regD dst, eFlagsReg cr) %{ - predicate (UseSSE>=2); - match(Set dst (SinD dst)); - effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" - ins_cost(1800); - format %{ "DSIN $dst" %} - opcode(0xD9, 0xFE); - ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); - ins_pipe( pipe_slow ); -%} - -instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ - predicate (UseSSE<=1); - match(Set dst (CosD src)); - ins_cost(1800); - format %{ "DCOS $dst" %} - opcode(0xD9, 0xFF); - ins_encode( OpcP, OpcS ); - ins_pipe( pipe_slow ); -%} - -instruct cosD_reg(regD dst, eFlagsReg cr) %{ - predicate (UseSSE>=2); - match(Set dst (CosD dst)); - effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" - ins_cost(1800); - format %{ "DCOS $dst" %} - opcode(0xD9, 0xFF); - ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); - ins_pipe( pipe_slow ); -%} - instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); match(Set dst(TanD src)); --- old/src/cpu/x86/vm/x86_64.ad 2015-11-19 18:51:33.533094300 -0800 +++ new/src/cpu/x86/vm/x86_64.ad 2015-11-19 18:51:33.195060500 -0800 @@ -9821,23 +9821,6 @@ %} // -----------Trig and Trancendental Instructions------------------------------ -instruct cosD_reg(regD dst) %{ - match(Set dst (CosD dst)); - - format %{ "dcos $dst\n\t" %} - opcode(0xD9, 0xFF); - ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); - ins_pipe( pipe_slow ); -%} - -instruct sinD_reg(regD dst) %{ - match(Set dst (SinD dst)); - - format %{ "dsin $dst\n\t" %} - opcode(0xD9, 0xFE); - ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); - ins_pipe( pipe_slow ); -%} instruct tanD_reg(regD dst) %{ match(Set dst (TanD dst)); --- old/src/share/vm/adlc/formssel.cpp 2015-11-19 18:51:36.047345700 -0800 +++ new/src/share/vm/adlc/formssel.cpp 2015-11-19 18:51:35.712312200 -0800 @@ -4009,7 +4009,6 @@ if( _rChild ) { const char *opType = _rChild->_opType; if( strcmp(opType,"AtanD")==0 || - strcmp(opType,"CosD")==0 || strcmp(opType,"DivD")==0 || strcmp(opType,"DivF")==0 || strcmp(opType,"DivI")==0 || @@ -4018,7 +4017,6 @@ strcmp(opType,"ModF")==0 || strcmp(opType,"ModI")==0 || strcmp(opType,"PowD")==0 || - strcmp(opType,"SinD")==0 || strcmp(opType,"SqrtD")==0 || strcmp(opType,"TanD")==0 || strcmp(opType,"ConvD2F")==0 || --- old/src/share/vm/c1/c1_LIR.cpp 2015-11-19 18:51:38.245565500 -0800 +++ new/src/share/vm/c1/c1_LIR.cpp 2015-11-19 18:51:37.912532200 -0800 @@ -729,8 +729,6 @@ case lir_tan: - case lir_sin: - case lir_cos: case lir_log10: { assert(op->as_Op2() != NULL, "must be"); LIR_Op2* op2 = (LIR_Op2*)op; @@ -1765,8 +1763,6 @@ case lir_rem: s = "rem"; break; case lir_abs: s = "abs"; break; case lir_sqrt: s = "sqrt"; break; - case lir_sin: s = "sin"; break; - case lir_cos: s = "cos"; break; case lir_tan: s = "tan"; break; case lir_log10: s = "log10"; break; case lir_pow: s = "pow"; break; --- old/src/share/vm/c1/c1_LIR.hpp 2015-11-19 18:51:40.399780900 -0800 +++ new/src/share/vm/c1/c1_LIR.hpp 2015-11-19 18:51:40.059746900 -0800 @@ -956,8 +956,6 @@ , lir_rem , lir_sqrt , lir_abs - , lir_sin - , lir_cos , lir_tan , lir_log10 , lir_pow @@ -2193,8 +2191,6 @@ void abs (LIR_Opr from, LIR_Opr to, LIR_Opr tmp) { append(new LIR_Op2(lir_abs , from, tmp, to)); } void sqrt(LIR_Opr from, LIR_Opr to, LIR_Opr tmp) { append(new LIR_Op2(lir_sqrt, from, tmp, to)); } void log10 (LIR_Opr from, LIR_Opr to, LIR_Opr tmp) { append(new LIR_Op2(lir_log10, from, LIR_OprFact::illegalOpr, to, tmp)); } - void sin (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_sin , from, tmp1, to, tmp2)); } - void cos (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_cos , from, tmp1, to, tmp2)); } void tan (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_tan , from, tmp1, to, tmp2)); } void pow (LIR_Opr arg1, LIR_Opr arg2, LIR_Opr res, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, LIR_Opr tmp4, LIR_Opr tmp5) { append(new LIR_Op2(lir_pow, arg1, arg2, res, tmp1, tmp2, tmp3, tmp4, tmp5)); } --- old/src/share/vm/c1/c1_LIRAssembler.cpp 2015-11-19 18:51:42.550996000 -0800 +++ new/src/share/vm/c1/c1_LIRAssembler.cpp 2015-11-19 18:51:42.212962200 -0800 @@ -735,9 +735,7 @@ case lir_abs: case lir_sqrt: - case lir_sin: case lir_tan: - case lir_cos: case lir_log10: case lir_pow: intrinsic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op); --- old/src/share/vm/c1/c1_LinearScan.cpp 2015-11-19 18:51:44.642205100 -0800 +++ new/src/share/vm/c1/c1_LinearScan.cpp 2015-11-19 18:51:44.305171400 -0800 @@ -6599,8 +6599,6 @@ case lir_div_strictfp: case lir_rem: case lir_sqrt: - case lir_sin: - case lir_cos: case lir_abs: case lir_log10: case lir_pow: --- old/src/share/vm/c1/c1_Runtime1.cpp 2015-11-19 18:51:46.958436700 -0800 +++ new/src/share/vm/c1/c1_Runtime1.cpp 2015-11-19 18:51:46.620402900 -0800 @@ -319,6 +319,8 @@ FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32()); FUNCTION_CASE(entry, StubRoutines::dexp()); FUNCTION_CASE(entry, StubRoutines::dlog()); + FUNCTION_CASE(entry, StubRoutines::dsin()); + FUNCTION_CASE(entry, StubRoutines::dcos()); #undef FUNCTION_CASE --- old/src/share/vm/opto/classes.hpp 2015-11-19 18:51:49.057646600 -0800 +++ new/src/share/vm/opto/classes.hpp 2015-11-19 18:51:48.726613500 -0800 @@ -112,7 +112,6 @@ macro(ConvL2D) macro(ConvL2F) macro(ConvL2I) -macro(CosD) macro(CountedLoop) macro(CountedLoopEnd) macro(CountLeadingZerosI) @@ -230,7 +229,6 @@ macro(SafePoint) macro(SafePointScalarObject) macro(SCMemProj) -macro(SinD) macro(SqrtD) macro(Start) macro(StartOSR) --- old/src/share/vm/opto/library_call.cpp 2015-11-19 18:51:51.091850000 -0800 +++ new/src/share/vm/opto/library_call.cpp 2015-11-19 18:51:50.749815800 -0800 @@ -1545,8 +1545,6 @@ Node* n = NULL; switch (id) { - case vmIntrinsics::_dsin: n = new SinDNode(C, control(), arg); break; - case vmIntrinsics::_dcos: n = new CosDNode(C, control(), arg); break; case vmIntrinsics::_dtan: n = new TanDNode(C, control(), arg); break; default: fatal_unexpected_iid(id); break; } @@ -1609,16 +1607,6 @@ // Slow path - non-blocking leaf call Node* call = NULL; switch (id) { - case vmIntrinsics::_dsin: - call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(), - CAST_FROM_FN_PTR(address, SharedRuntime::dsin), - "Sin", NULL, arg, top()); - break; - case vmIntrinsics::_dcos: - call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(), - CAST_FROM_FN_PTR(address, SharedRuntime::dcos), - "Cos", NULL, arg, top()); - break; case vmIntrinsics::_dtan: call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dtan), @@ -1906,17 +1894,21 @@ #define FN_PTR(f) CAST_FROM_FN_PTR(address, f) switch (id) { // These intrinsics are not properly supported on all hardware - case vmIntrinsics::_dcos: return Matcher::has_match_rule(Op_CosD) ? inline_trig(id) : - runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dcos), "COS"); - case vmIntrinsics::_dsin: return Matcher::has_match_rule(Op_SinD) ? inline_trig(id) : - runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dsin), "SIN"); + case vmIntrinsics::_dsin: + return StubRoutines::dsin() != NULL ? + runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dsin(), "dsin") : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dsin), "SIN"); + case vmIntrinsics::_dcos: + return StubRoutines::dcos() != NULL ? + runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dcos(), "dcos") : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dcos), "COS"); case vmIntrinsics::_dtan: return Matcher::has_match_rule(Op_TanD) ? inline_trig(id) : runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dtan), "TAN"); case vmIntrinsics::_dlog: return StubRoutines::dlog() != NULL ? - runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dlog(), "dlog") : - runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog), "LOG"); + runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dlog(), "dlog") : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog), "LOG"); case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_math(id) : runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10"); --- old/src/share/vm/opto/subnode.cpp 2015-11-19 18:51:53.415082300 -0800 +++ new/src/share/vm/opto/subnode.cpp 2015-11-19 18:51:53.076048400 -0800 @@ -1477,28 +1477,6 @@ //============================================================================= //------------------------------Value------------------------------------------ -// Compute cos -const Type *CosDNode::Value( PhaseTransform *phase ) const { - const Type *t1 = phase->type( in(1) ); - if( t1 == Type::TOP ) return Type::TOP; - if( t1->base() != Type::DoubleCon ) return Type::DOUBLE; - double d = t1->getd(); - return TypeD::make( StubRoutines::intrinsic_cos( d ) ); -} - -//============================================================================= -//------------------------------Value------------------------------------------ -// Compute sin -const Type *SinDNode::Value( PhaseTransform *phase ) const { - const Type *t1 = phase->type( in(1) ); - if( t1 == Type::TOP ) return Type::TOP; - if( t1->base() != Type::DoubleCon ) return Type::DOUBLE; - double d = t1->getd(); - return TypeD::make( StubRoutines::intrinsic_sin( d ) ); -} - -//============================================================================= -//------------------------------Value------------------------------------------ // Compute tan const Type *TanDNode::Value( PhaseTransform *phase ) const { const Type *t1 = phase->type( in(1) ); --- old/src/share/vm/opto/subnode.hpp 2015-11-19 18:51:55.531293900 -0800 +++ new/src/share/vm/opto/subnode.hpp 2015-11-19 18:51:55.204261200 -0800 @@ -409,20 +409,6 @@ }; //------------------------------CosDNode--------------------------------------- -// Cosinus of a double -class CosDNode : public Node { -public: - CosDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) { - init_flags(Flag_is_expensive); - C->add_expensive_node(this); - } - virtual int Opcode() const; - const Type *bottom_type() const { return Type::DOUBLE; } - virtual uint ideal_reg() const { return Op_RegD; } - virtual const Type *Value( PhaseTransform *phase ) const; -}; - -//------------------------------CosDNode--------------------------------------- // Sinus of a double class SinDNode : public Node { public: --- old/src/share/vm/runtime/stubRoutines.cpp 2015-11-19 18:51:57.581498900 -0800 +++ new/src/share/vm/runtime/stubRoutines.cpp 2015-11-19 18:51:57.230463800 -0800 @@ -150,6 +150,10 @@ address StubRoutines::_dexp = NULL; address StubRoutines::_dlog = NULL; +address StubRoutines::_dsin = NULL; +address StubRoutines::_dcos = NULL; +address StubRoutines::_dlibm_sin_cos_huge = NULL; +address StubRoutines::_dlibm_reduce_pi04l = NULL; double (* StubRoutines::_intrinsic_log10 )(double) = NULL; double (* StubRoutines::_intrinsic_pow )(double, double) = NULL; --- old/src/share/vm/runtime/stubRoutines.hpp 2015-11-19 18:51:59.671707900 -0800 +++ new/src/share/vm/runtime/stubRoutines.hpp 2015-11-19 18:51:59.341674900 -0800 @@ -209,6 +209,10 @@ static address _dexp; static address _dlog; + static address _dsin; + static address _dcos; + static address _dlibm_sin_cos_huge; + static address _dlibm_reduce_pi04l; // These are versions of the java.lang.Math methods which perform // the same operations as the intrinsic version. They are used for @@ -378,6 +382,10 @@ static address dexp() { return _dexp; } static address dlog() { return _dlog; } + static address dsin() { return _dsin; } + static address dcos() { return _dcos; } + static address dlibm_reduce_pi04l() { return _dlibm_reduce_pi04l; } + static address dlibm_sin_cos_huge() { return _dlibm_sin_cos_huge; } static address select_fill_function(BasicType t, bool aligned, const char* &name); --- old/src/share/vm/runtime/vmStructs.cpp 2015-11-19 18:52:01.719912700 -0800 +++ new/src/share/vm/runtime/vmStructs.cpp 2015-11-19 18:52:01.384879200 -0800 @@ -860,6 +860,8 @@ static_field(StubRoutines, _mulAdd, address) \ static_field(StubRoutines, _dexp, address) \ static_field(StubRoutines, _dlog, address) \ + static_field(StubRoutines, _dsin, address) \ + static_field(StubRoutines, _dcos, address) \ static_field(StubRoutines, _jbyte_arraycopy, address) \ static_field(StubRoutines, _jshort_arraycopy, address) \ static_field(StubRoutines, _jint_arraycopy, address) \ @@ -2056,8 +2058,6 @@ declare_c2_type(NegNode, Node) \ declare_c2_type(NegFNode, NegNode) \ declare_c2_type(NegDNode, NegNode) \ - declare_c2_type(CosDNode, Node) \ - declare_c2_type(SinDNode, Node) \ declare_c2_type(TanDNode, Node) \ declare_c2_type(AtanDNode, Node) \ declare_c2_type(SqrtDNode, Node) \