--- old/src/cpu/x86/vm/assembler_x86.cpp 2016-03-25 14:14:50.339100100 -0700 +++ new/src/cpu/x86/vm/assembler_x86.cpp 2016-03-25 14:14:49.804069500 -0700 @@ -1827,6 +1827,15 @@ emit_int8((unsigned char)(0xC0 | encode)); } +void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit; + InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xE6); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::decl(Address dst) { // Don't use it directly. Use MacroAssembler::decrement() instead. InstructionMark im(this); @@ -5028,7 +5037,7 @@ } void Assembler::phaddw(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse3(), "")); + assert(VM_Version::supports_sse3(), ""); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x01); @@ -5036,7 +5045,7 @@ } void Assembler::phaddd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse3(), "")); + assert(VM_Version::supports_sse3(), ""); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x02); --- old/src/cpu/x86/vm/assembler_x86.hpp 2016-03-25 14:14:53.956307000 -0700 +++ new/src/cpu/x86/vm/assembler_x86.hpp 2016-03-25 14:14:53.446277900 -0700 @@ -1042,6 +1042,8 @@ void cvttss2sil(Register dst, XMMRegister src); void cvttss2siq(Register dst, XMMRegister src); + void cvttpd2dq(XMMRegister dst, XMMRegister src); + // Divide Scalar Double-Precision Floating-Point Values void divsd(XMMRegister dst, Address src); void divsd(XMMRegister dst, XMMRegister src); --- old/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp 2016-03-25 14:14:57.258495900 -0700 +++ new/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp 2016-03-25 14:14:56.742466400 -0700 @@ -2365,13 +2365,8 @@ } else if (value->is_double_fpu()) { assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS"); switch(code) { - case lir_log10 : __ flog10() ; break; case lir_abs : __ fabs() ; break; case lir_sqrt : __ fsqrt(); break; - case lir_tan : - // Should consider not saving rbx, if not necessary - __ trigfunc('t', op->as_Op2()->fpu_stack_size()); - break; default : ShouldNotReachHere(); } } else { --- old/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp 2016-03-25 14:15:00.653690100 -0700 +++ new/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp 2016-03-25 14:15:00.127660000 -0700 @@ -812,7 +812,8 @@ if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog || x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos || - x->id() == vmIntrinsics::_dsin) { + x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan || + x->id() == vmIntrinsics::_dlog10) { do_LibmIntrinsic(x); return; } @@ -820,58 +821,17 @@ LIRItem value(x->argument_at(0), this); bool use_fpu = false; - if (UseSSE >= 2) { - switch(x->id()) { - case vmIntrinsics::_dtan: - case vmIntrinsics::_dlog10: - use_fpu = true; - break; - } - } else { + if (UseSSE < 2) { value.set_destroys_register(); } - value.load_item(); LIR_Opr calc_input = value.result(); - LIR_Opr calc_input2 = NULL; - if (x->id() == vmIntrinsics::_dpow) { - LIRItem extra_arg(x->argument_at(1), this); - if (UseSSE < 2) { - extra_arg.set_destroys_register(); - } - extra_arg.load_item(); - calc_input2 = extra_arg.result(); - } LIR_Opr calc_result = rlock_result(x); - // sin, cos, pow and exp need two free fpu stack slots, so register - // two temporary operands - LIR_Opr tmp1 = FrameMap::caller_save_fpu_reg_at(0); - LIR_Opr tmp2 = FrameMap::caller_save_fpu_reg_at(1); - - if (use_fpu) { - LIR_Opr tmp = FrameMap::fpu0_double_opr; - int tmp_start = 1; - if (calc_input2 != NULL) { - __ move(calc_input2, tmp); - tmp_start = 2; - calc_input2 = tmp; - } - __ move(calc_input, tmp); - - calc_input = tmp; - calc_result = tmp; - - tmp1 = FrameMap::caller_save_fpu_reg_at(tmp_start); - tmp2 = FrameMap::caller_save_fpu_reg_at(tmp_start + 1); - } - switch(x->id()) { case vmIntrinsics::_dabs: __ abs (calc_input, calc_result, LIR_OprFact::illegalOpr); break; case vmIntrinsics::_dsqrt: __ sqrt (calc_input, calc_result, LIR_OprFact::illegalOpr); break; - case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break; - case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break; default: ShouldNotReachHere(); } @@ -912,21 +872,28 @@ result_reg = tmp; switch(x->id()) { case vmIntrinsics::_dexp: - if (VM_Version::supports_sse2()) { + if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); } else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args()); } break; case vmIntrinsics::_dlog: - if (VM_Version::supports_sse2()) { + if (StubRoutines::dlog() != NULL) { __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args()); } else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args()); } break; + case vmIntrinsics::_dlog10: + if (StubRoutines::dlog10() != NULL) { + __ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args()); + } + break; case vmIntrinsics::_dpow: - if (VM_Version::supports_sse2()) { + if (StubRoutines::dpow() != NULL) { __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args()); } else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); @@ -946,18 +913,44 @@ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args()); } break; + case vmIntrinsics::_dtan: + if (StubRoutines::dtan() != NULL) { + __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); + } + break; default: ShouldNotReachHere(); } #else switch (x->id()) { case vmIntrinsics::_dexp: - __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); + if (StubRoutines::dexp() != NULL) { + __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args()); + } break; case vmIntrinsics::_dlog: + if (StubRoutines::dlog() != NULL) { __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args()); + } + break; + case vmIntrinsics::_dlog10: + if (StubRoutines::dlog10() != NULL) { + __ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args()); + } break; case vmIntrinsics::_dpow: + if (StubRoutines::dpow() != NULL) { __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); + } break; case vmIntrinsics::_dsin: if (StubRoutines::dsin() != NULL) { @@ -973,6 +966,13 @@ __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args()); } break; + case vmIntrinsics::_dtan: + if (StubRoutines::dtan() != NULL) { + __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); + } + break; default: ShouldNotReachHere(); } #endif // _LP64 --- old/src/cpu/x86/vm/c1_LinearScan_x86.cpp 2016-03-25 14:15:03.842872500 -0700 +++ new/src/cpu/x86/vm/c1_LinearScan_x86.cpp 2016-03-25 14:15:03.338843700 -0700 @@ -786,58 +786,6 @@ break; } - case lir_log10: { - // log and log10 need one temporary fpu stack slot, so - // there is one temporary registers stored in temp of the - // operation. the stack allocator must guarantee that the stack - // slots are really free, otherwise there might be a stack - // overflow. - assert(right->is_illegal(), "must be"); - assert(left->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - assert(op2->tmp1_opr()->is_fpu_register(), "must be"); - - insert_free_if_dead(op2->tmp1_opr()); - insert_free_if_dead(res, left); - insert_exchange(left); - do_rename(left, res); - - new_left = to_fpu_stack_top(res); - new_res = new_left; - - op2->set_fpu_stack_size(sim()->stack_size()); - assert(sim()->stack_size() <= 7, "at least one stack slot must be free"); - break; - } - - - case lir_tan: { - // sin, cos and exp need two temporary fpu stack slots, so there are two temporary - // registers (stored in right and temp of the operation). - // the stack allocator must guarantee that the stack slots are really free, - // otherwise there might be a stack overflow. - assert(left->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - // assert(left->is_last_use(), "old value gets destroyed"); - assert(right->is_fpu_register(), "right is used as the first temporary register"); - assert(op2->tmp1_opr()->is_fpu_register(), "temp is used as the second temporary register"); - assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers"); - - insert_free_if_dead(right); - insert_free_if_dead(op2->tmp1_opr()); - - insert_free_if_dead(res, left); - insert_exchange(left); - do_rename(left, res); - - new_left = to_fpu_stack_top(res); - new_res = new_left; - - op2->set_fpu_stack_size(sim()->stack_size()); - assert(sim()->stack_size() <= 6, "at least two stack slots must be free"); - break; - } - default: { assert(false, "missed a fpu-operation"); } --- old/src/cpu/x86/vm/globals_x86.hpp 2016-03-25 14:15:06.946050000 -0700 +++ new/src/cpu/x86/vm/globals_x86.hpp 2016-03-25 14:15:06.443021200 -0700 @@ -192,11 +192,5 @@ "Use BMI1 instructions") \ \ product(bool, UseBMI2Instructions, false, \ - "Use BMI2 instructions") \ - \ - diagnostic(bool, UseLibmSinIntrinsic, true, \ - "Use Libm Sin Intrinsic") \ - \ - diagnostic(bool, UseLibmCosIntrinsic, true, \ - "Use Libm Cos Intrinsic") + "Use BMI2 instructions") #endif // CPU_X86_VM_GLOBALS_X86_HPP --- old/src/cpu/x86/vm/macroAssembler_libm_x86_32.cpp 2016-03-25 14:15:09.968222900 -0700 +++ new/src/cpu/x86/vm/macroAssembler_libm_x86_32.cpp 2016-03-25 14:15:09.471194400 -0700 @@ -645,6 +645,329 @@ } /******************************************************************************/ +// ALGORITHM DESCRIPTION - LOG10() +// --------------------- +// +// Let x=2^k * mx, mx in [1,2) +// +// Get B~1/mx based on the output of rcpss instruction (B0) +// B = int((B0*LH*2^7+0.5))/2^7 +// LH is a short approximation for log10(e) +// +// Reduced argument: r=B*mx-LH (computed accurately in high and low parts) +// +// Result: k*log10(2) - log(B) + p(r) +// p(r) is a degree 7 polynomial +// -log(B) read from data table (high, low parts) +// Result is formed from high and low parts +// +// Special cases: +// log10(0) = -INF with divide-by-zero exception raised +// log10(1) = +0 +// log10(x) = NaN with invalid exception raised if x < -0, including -INF +// log10(+INF) = +INF +// +/******************************************************************************/ + +ALIGNED_(16) juint _static_const_table_log10[] = +{ + 0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL, + 0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL, + 0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL, + 0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL, + 0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL, + 0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL, + 0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL, + 0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL, + 0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL, + 0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL, + 0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL, + 0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL, + 0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL, + 0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL, + 0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL, + 0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL, + 0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL, + 0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL, + 0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL, + 0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL, + 0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL, + 0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL, + 0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL, + 0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL, + 0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL, + 0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL, + 0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL, + 0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL, + 0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL, + 0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL, + 0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL, + 0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL, + 0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL, + 0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL, + 0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL, + 0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL, + 0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL, + 0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL, + 0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL, + 0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL, + 0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL, + 0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL, + 0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL, + 0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL, + 0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL, + 0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL, + 0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL, + 0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL, + 0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL, + 0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL, + 0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL, + 0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL, + 0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL, + 0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL, + 0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL, + 0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL, + 0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL, + 0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL, + 0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL, + 0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL, + 0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL, + 0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL, + 0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL, + 0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL, + 0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL, + 0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL, + 0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL, + 0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL, + 0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL, + 0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL, + 0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL, + 0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL, + 0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL, + 0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL, + 0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL, + 0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL, + 0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL, + 0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL, + 0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL, + 0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL, + 0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL, + 0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL, + 0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL, + 0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL, + 0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL, + 0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL, + 0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL, + 0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL, + 0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL, + 0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL, + 0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL, + 0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL, + 0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL, + 0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL, + 0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL, + 0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL, + 0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL, + 0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL, + 0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL, + 0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL, + 0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL, + 0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL, + 0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL, + 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL, + 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, + 0xdc77b115UL, 0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL, + 0xffffe000UL, 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL +}; +//registers, +// input: xmm0 +// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 +// rax, rdx, rcx, rbx (tmp) + +void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { + + Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; + Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; + Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, start; + + assert_different_registers(tmp, eax, ecx, edx); + + address static_const_table_log10 = (address)_static_const_table_log10; + + bind(start); + subl(rsp, 104); + movl(Address(rsp, 40), tmp); + lea(tmp, ExternalAddress(static_const_table_log10)); + xorpd(xmm2, xmm2); + movl(eax, 16368); + pinsrw(xmm2, eax, 3); + movl(ecx, 1054736384); + movdl(xmm7, ecx); + xorpd(xmm3, xmm3); + movl(edx, 30704); + pinsrw(xmm3, edx, 3); + movsd(xmm0, Address(rsp, 112)); + movdqu(xmm1, xmm0); + movl(edx, 32768); + movdl(xmm4, edx); + movdqu(xmm5, Address(tmp, 2128)); //0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL + pextrw(eax, xmm0, 3); + por(xmm0, xmm2); + movl(ecx, 16352); + psllq(xmm0, 5); + movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL + psrlq(xmm0, 34); + rcpss(xmm0, xmm0); + psllq(xmm1, 12); + pshufd(xmm6, xmm5, 78); + psrlq(xmm1, 12); + subl(eax, 16); + cmpl(eax, 32736); + jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); + + bind(L_2TAG_PACKET_1_0_2); + mulss(xmm0, xmm7); + por(xmm1, xmm3); + andpd(xmm5, xmm1); + paddd(xmm0, xmm4); + subsd(xmm1, xmm5); + movdl(edx, xmm0); + psllq(xmm0, 29); + andpd(xmm0, xmm6); + andl(eax, 32752); + subl(eax, ecx); + cvtsi2sdl(xmm7, eax); + mulpd(xmm5, xmm0); + mulsd(xmm1, xmm0); + movsd(xmm6, Address(tmp, 2064)); //0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL + movdqu(xmm3, Address(tmp, 2080)); //0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL + subsd(xmm5, xmm2); + andl(edx, 16711680); + shrl(edx, 12); + movdqu(xmm0, Address(tmp, edx, Address::times_1, -1504)); + movdqu(xmm4, Address(tmp, 2096)); //0x3cdfef31UL, 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL + addsd(xmm1, xmm5); + movdqu(xmm2, Address(tmp, 2112)); //0xc0089309UL, 0x385593b1UL, 0xc025c917UL, 0xdc963467UL + mulsd(xmm6, xmm7); + pshufd(xmm5, xmm1, 68); + mulsd(xmm7, Address(tmp, 2072)); //0x00000000UL, 0x00000000UL, 0x00000000UL, 0x509f7800UL + mulsd(xmm3, xmm1); + addsd(xmm0, xmm6); + mulpd(xmm4, xmm5); + movsd(xmm6, Address(tmp, 2152)); //0xffffffffUL, 0x00000000UL, 0xffffe000UL, 0x00000000UL + mulpd(xmm5, xmm5); + addpd(xmm4, xmm2); + mulpd(xmm3, xmm5); + pshufd(xmm2, xmm0, 228); + addsd(xmm0, xmm1); + mulsd(xmm4, xmm1); + subsd(xmm2, xmm0); + mulsd(xmm6, xmm1); + addsd(xmm1, xmm2); + pshufd(xmm2, xmm0, 238); + mulsd(xmm5, xmm5); + addsd(xmm7, xmm2); + addsd(xmm1, xmm6); + addpd(xmm4, xmm3); + addsd(xmm1, xmm7); + mulpd(xmm4, xmm5); + addsd(xmm1, xmm4); + pshufd(xmm5, xmm4, 238); + addsd(xmm1, xmm5); + addsd(xmm0, xmm1); + jmp(L_2TAG_PACKET_2_0_2); + + bind(L_2TAG_PACKET_0_0_2); + movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL + movdqu(xmm1, xmm0); + addl(eax, 16); + cmpl(eax, 32768); + jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2); + cmpl(eax, 16); + jcc(Assembler::below, L_2TAG_PACKET_4_0_2); + + bind(L_2TAG_PACKET_5_0_2); + addsd(xmm0, xmm0); + jmp(L_2TAG_PACKET_2_0_2); + + bind(L_2TAG_PACKET_6_0_2); + jcc(Assembler::above, L_2TAG_PACKET_5_0_2); + cmpl(edx, 0); + jcc(Assembler::above, L_2TAG_PACKET_5_0_2); + jmp(L_2TAG_PACKET_7_0_2); + + bind(L_2TAG_PACKET_3_0_2); + movdl(edx, xmm1); + psrlq(xmm1, 32); + movdl(ecx, xmm1); + addl(ecx, ecx); + cmpl(ecx, -2097152); + jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2); + orl(edx, ecx); + cmpl(edx, 0); + jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); + + bind(L_2TAG_PACKET_7_0_2); + xorpd(xmm1, xmm1); + xorpd(xmm0, xmm0); + movl(eax, 32752); + pinsrw(xmm1, eax, 3); + movl(edx, 9); + mulsd(xmm0, xmm1); + + bind(L_2TAG_PACKET_9_0_2); + movsd(Address(rsp, 0), xmm0); + movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL + fld_d(Address(rsp, 0)); + jmp(L_2TAG_PACKET_10_0_2); + + bind(L_2TAG_PACKET_8_0_2); + xorpd(xmm1, xmm1); + xorpd(xmm0, xmm0); + movl(eax, 49136); + pinsrw(xmm0, eax, 3); + divsd(xmm0, xmm1); + movl(edx, 8); + jmp(L_2TAG_PACKET_9_0_2); + + bind(L_2TAG_PACKET_4_0_2); + movdl(edx, xmm1); + psrlq(xmm1, 32); + movdl(ecx, xmm1); + orl(edx, ecx); + cmpl(edx, 0); + jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); + xorpd(xmm1, xmm1); + movl(eax, 18416); + pinsrw(xmm1, eax, 3); + mulsd(xmm0, xmm1); + xorpd(xmm2, xmm2); + movl(eax, 16368); + pinsrw(xmm2, eax, 3); + movdqu(xmm1, xmm0); + pextrw(eax, xmm0, 3); + por(xmm0, xmm2); + movl(ecx, 18416); + psllq(xmm0, 5); + movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL + psrlq(xmm0, 34); + rcpss(xmm0, xmm0); + psllq(xmm1, 12); + pshufd(xmm6, xmm5, 78); + psrlq(xmm1, 12); + jmp(L_2TAG_PACKET_1_0_2); + + bind(L_2TAG_PACKET_2_0_2); + movsd(Address(rsp, 24), xmm0); + fld_d(Address(rsp, 24)); + + bind(L_2TAG_PACKET_10_0_2); + movl(tmp, Address(rsp, 40)); + +} + +/******************************************************************************/ // ALGORITHM DESCRIPTION - POW() // --------------------- // @@ -4569,3 +4892,1149 @@ movl(tmp, Address(rsp, 56)); } +/******************************************************************************/ +// ALGORITHM DESCRIPTION - TAN() +// --------------------- +// +// Polynomials coefficients and other constants. +// +// Note that in this algorithm, there is a different polynomial for +// each breakpoint, so there are 32 sets of polynomial coefficients +// as well as 32 instances of the other constants. +// +// The polynomial coefficients and constants are offset from the start +// of the main block as follows: +// +// 0: c8 | c0 +// 16: c9 | c1 +// 32: c10 | c2 +// 48: c11 | c3 +// 64: c12 | c4 +// 80: c13 | c5 +// 96: c14 | c6 +// 112: c15 | c7 +// 128: T_hi +// 136: T_lo +// 144: Sigma +// 152: T_hl +// 160: Tau +// 168: Mask +// 176: (end of block) +// +// The total table size is therefore 5632 bytes. +// +// Note that c0 and c1 are always zero. We could try storing +// other constants here, and just loading the low part of the +// SIMD register in these cases, after ensuring the high part +// is zero. +// +// The higher terms of the polynomial are computed in the *low* +// part of the SIMD register. This is so we can overlap the +// multiplication by r^8 and the unpacking of the other part. +// +// The constants are: +// T_hi + T_lo = accurate constant term in power series +// Sigma + T_hl = accurate coefficient of r in power series (Sigma=1 bit) +// Tau = multiplier for the reciprocal, always -1 or 0 +// +// The basic reconstruction formula using these constants is: +// +// High = tau * recip_hi + t_hi +// Med = (sgn * r + t_hl * r)_hi +// Low = (sgn * r + t_hl * r)_lo + +// tau * recip_lo + T_lo + (T_hl + sigma) * c + pol +// +// where pol = c0 + c1 * r + c2 * r^2 + ... + c15 * r^15 +// +// (c0 = c1 = 0, but using them keeps SIMD regularity) +// +// We then do a compensated sum High + Med, add the low parts together +// and then do the final sum. +// +// Here recip_hi + recip_lo is an accurate reciprocal of the remainder +// modulo pi/2 +// +// Special cases: +// tan(NaN) = quiet NaN, and raise invalid exception +// tan(INF) = NaN and raise invalid exception +// tan(+/-0) = +/-0 +// +/******************************************************************************/ + +ALIGNED_(16) jushort _TP[] = +{ + 0x4cd6, 0xaf6c, 0xc710, 0xc662, 0xbffd, 0x0000, 0x4b06, 0xb0ac, 0xd3b2, 0xcc2c, + 0x3ff9, 0x0000, 0x00e3, 0xc850, 0xaa28, 0x9533, 0xbff3, 0x0000, 0x2ff0, 0x466d, + 0x1a3b, 0xb266, 0x3fe5, 0x0000 +}; + +ALIGNED_(16) jushort _TQ[] = +{ + 0x399c, 0x8391, 0x154c, 0x94ca, 0xbfff, 0x0000, 0xb6a3, 0xc36a, 0x44e2, 0x8a2c, + 0x3ffe, 0x0000, 0xb70f, 0xd068, 0xa6ce, 0xe9dd, 0xbff9, 0x0000, 0x820f, 0x51ce, + 0x7d76, 0x9bff, 0x3ff3, 0x0000 +}; + +ALIGNED_(16) jushort _GP[] = +{ + 0xaaab, 0xaaaa, 0xaaaa, 0xaaaa, 0xbffd, 0x0000, 0xb62f, 0x0b60, 0x60b6, 0xb60b, + 0xbff9, 0x0000, 0xdfa7, 0x08aa, 0x55e0, 0x8ab3, 0xbff6, 0x0000, 0x85a0, 0xa819, + 0xbc99, 0xddeb, 0xbff2, 0x0000, 0x7065, 0x6a37, 0x795f, 0xb354, 0xbfef, 0x0000, + 0xa8f9, 0x83f1, 0x2ec8, 0x9140, 0xbfec, 0x0000, 0xf3ca, 0x8c96, 0x8e0b, 0xeb6d, + 0xbfe8, 0x0000, 0x355b, 0xd910, 0x67c9, 0xbed3, 0xbfe5, 0x0000, 0x286b, 0xb49e, + 0xb854, 0x9a98, 0xbfe2, 0x0000, 0x0871, 0x1a2f, 0x6477, 0xfcc4, 0xbfde, 0x0000, + 0xa559, 0x1da9, 0xaed2, 0xba76, 0xbfdb, 0x0000, 0x00a3, 0x7fea, 0x9bc3, 0xf205, + 0xbfd8, 0x0000 +}; + +void MacroAssembler::libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp) { + Label B1_1, B1_2, B1_3, B1_4, B1_5, B1_6, B1_7, B1_8, B1_9, B1_10, B1_11, B1_12; + Label B1_13, B1_14, B1_15, B1_16, B1_17, B1_18, B1_19, B1_20, B1_21, B1_22, B1_23; + Label B1_24, B1_25, B1_26, B1_27, B1_28, B1_29, B1_30, B1_31, B1_32, B1_33, B1_34; + Label B1_35, B1_36, B1_37, B1_38, B1_39, B1_40, B1_41, B1_42, B1_43, B1_44, B1_45, B1_46; + + assert_different_registers(ebx, eax, ecx, edx, esi, edi, ebp, esp); + + address L_2il0floatpacket_0 = (address)_L_2il0floatpacket_0; + address Pi4Inv = (address)_Pi4Inv; + address Pi4x3 = (address)_Pi4x3; + address Pi4x4 = (address)_Pi4x4; + address ones = (address)_ones; + address TP = (address)_TP; + address TQ = (address)_TQ; + address GP = (address)_GP; + + bind(B1_1); + push(ebp); + movl(ebp, esp); + andl(esp, -64); + push(esi); + push(edi); + push(ebx); + subl(esp, 52); + movl(eax, Address(ebp, 16)); + movl(ebx, Address(ebp, 20)); + movl(Address(esp, 40), eax); + + bind(B1_2); + fnstcw(Address(esp, 38)); + + bind(B1_3); + movl(edx, Address(ebp, 12)); + movl(eax, edx); + andl(eax, 2147483647); + shrl(edx, 31); + movl(Address(esp, 44), edx); + cmpl(eax, 1104150528); + jcc(Assembler::aboveEqual, B1_11); + + bind(B1_4); + movsd(xmm1, Address(ebp, 8)); + movzwl(ecx, Address(esp, 38)); + movl(edx, ecx); + andl(edx, 768); + andps(xmm1, ExternalAddress(L_2il0floatpacket_0)); //0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL + cmpl(edx, 768); + movsd(xmm0, ExternalAddress(Pi4Inv)); ////0x6dc9c883UL, 0x3ff45f30UL + mulsd(xmm0, xmm1); + movsd(Address(ebp, 8), xmm1); + movsd(Address(esp, 0), xmm0); + jcc(Assembler::equal, B1_39); + + bind(B1_5); + orl(ecx, -64768); + movw(Address(esp, 36), ecx); + + bind(B1_6); + fldcw(Address(esp, 36)); + + bind(B1_7); + movsd(xmm1, Address(ebp, 8)); + movl(edi, 1); + + bind(B1_8); + movl(Address(esp, 12), esi); + movl(esi, Address(esp, 4)); + movl(edx, esi); + movl(Address(esp, 24), edi); + movl(edi, esi); + shrl(edi, 20); + andl(edx, 1048575); + movl(ecx, edi); + orl(edx, 1048576); + negl(ecx); + addl(edi, 13); + movl(Address(esp, 8), ebx); + addl(ecx, 19); + movl(ebx, edx); + movl(Address(esp, 28), ecx); + shrl(ebx); + movl(ecx, edi); + shll(edx); + movl(ecx, Address(esp, 28)); + movl(edi, Address(esp, 0)); + shrl(edi); + orl(edx, edi); + cmpl(esi, 1094713344); + movsd(Address(esp, 16), xmm1); + fld_d(Address(esp, 16)); + cmov32(Assembler::below, edx, ebx); + movl(edi, Address(esp, 24)); + movl(esi, Address(esp, 12)); + lea(ebx, Address(edx, 1)); + andl(ebx, -2); + movl(Address(esp, 16), ebx); + cmpl(eax, 1094713344); + fild_s(Address(esp, 16)); + movl(ebx, Address(esp, 8)); + jcc(Assembler::aboveEqual, B1_10); + + bind(B1_9); + fld_d(ExternalAddress(Pi4x3)); //0x54443000UL, 0xbfe921fbUL + fmul(1); + faddp(2); + fld_d(ExternalAddress(8 + Pi4x3)); //0x3b39a000UL, 0x3d373dcbUL + fmul(1); + faddp(2); + fld_d(ExternalAddress(16 + Pi4x3)); //0xe0e68948UL, 0xba845c06UL + fmulp(1); + faddp(1); + jmp(B1_17); + + bind(B1_10); + fld_d(ExternalAddress(Pi4x4)); //0x54400000UL, 0xbfe921fbUL + fmul(1); + faddp(2); + fld_d(ExternalAddress(8 + Pi4x4)); //0x1a600000UL, 0xbdc0b461UL + fmul(1); + faddp(2); + fld_d(ExternalAddress(16 + Pi4x4)); //0x2e000000UL, 0xbb93198aUL + fmul(1); + faddp(2); + fld_d(ExternalAddress(24 + Pi4x4)); //0x252049c1UL, 0xb96b839aUL + fmulp(1); + faddp(1); + jmp(B1_17); + + bind(B1_11); + movzwl(edx, Address(esp, 38)); + movl(eax, edx); + andl(eax, 768); + cmpl(eax, 768); + jcc(Assembler::equal, B1_40); + + bind(B1_12); + orl(edx, -64768); + movw(Address(esp, 36), edx); + + bind(B1_13); + fldcw(Address(esp, 36)); + + bind(B1_14); + movl(edi, 1); + + bind(B1_15); + movsd(xmm0, Address(ebp, 8)); + addl(esp, -32); + andps(xmm0, ExternalAddress(L_2il0floatpacket_0)); //0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL + lea(eax, Address(esp, 32)); + movsd(Address(eax, 16), xmm0); + fld_d(Address(eax, 16)); + fstp_x(Address(esp, 0)); + movl(Address(esp, 12), 0); + movl(Address(esp, 16), eax); + call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_reduce_pi04l()))); + + bind(B1_43); + movl(edx, eax); + addl(esp, 32); + + bind(B1_16); + fld_d(Address(esp, 0)); + fld_d(Address(esp, 8)); + faddp(1); + + bind(B1_17); + movl(eax, ebx); + andl(eax, 3); + cmpl(eax, 3); + jcc(Assembler::notEqual, B1_24); + + bind(B1_18); + fld_d(ExternalAddress(ones)); + incl(edx); + fdiv(1); + testb(edx, 2); + fstp_x(Address(esp, 24)); + fld_s(0); + fmul(1); + fld_s(0); + fmul(1); + fld_x(ExternalAddress(36 + TP)); //0x2ff0, 0x466d, 0x1a + fmul(2); + fld_x(ExternalAddress(24 + TP)); //0x00e3, 0xc850, 0xaa + faddp(1); + fmul(2); + fld_x(ExternalAddress(12 + TP)); //0x4b06, 0xb0ac, 0xd3 + faddp(1); + fmul(2); + fld_x(ExternalAddress(36 + TQ)); //0x820f, 0x51ce, 0x7d + fmul(3); + fld_x(ExternalAddress(24 + TQ)); //0xb70f, 0xd068, 0xa6 + faddp(1); + fmul(3); + fld_x(ExternalAddress(12 + TQ)); //0xb6a3, 0xc36a, 0x44 + faddp(1); + fmul(3); + fld_x(ExternalAddress(TQ)); //0x399c, 0x8391, 0x15 + faddp(1); + fld_x(ExternalAddress(TP)); //0x4cd6, 0xaf6c, 0xc7 + faddp(2); + fld_x(ExternalAddress(132 + GP)); //0x00a3, 0x7fea, 0x9b + fmul(3); + fld_x(ExternalAddress(120 + GP)); //0xa559, 0x1da9, 0xae + fmul(4); + fld_x(ExternalAddress(108 + GP)); //0x0871, 0x1a2f, 0x64 + faddp(2); + fxch(1); + fmul(4); + fld_x(ExternalAddress(96 + GP)); //0x286b, 0xb49e, 0xb8 + faddp(2); + fxch(1); + fmul(4); + fld_x(ExternalAddress(84 + GP)); //0x355b, 0xd910, 0x67 + faddp(2); + fxch(1); + fmul(4); + fld_x(ExternalAddress(72 + GP)); //0x8c96, 0x8e0b, 0xeb + faddp(2); + fxch(1); + fmul(4); + fld_x(ExternalAddress(60 + GP)); //0xa8f9, 0x83f1, 0x2e + faddp(2); + fxch(1); + fmul(4); + fld_x(ExternalAddress(48 + GP)); //0x7065, 0x6a37, 0x79 + faddp(2); + fxch(1); + fmul(4); + fld_x(ExternalAddress(36 + GP)); //0x85a0, 0xa819, 0xbc + faddp(2); + fxch(1); + fmul(4); + fld_x(ExternalAddress(24 + GP)); //0xdfa7, 0x08aa, 0x55 + faddp(2); + fxch(1); + fmulp(4); + fld_x(ExternalAddress(12 + GP)); //0xb62f, 0x0b60, 0x60 + faddp(1); + fmul(4); + fmul(5); + fld_x(ExternalAddress(GP)); //0xaaab, 0xaaaa, 0xaa + faddp(4); + fxch(3); + fmul(5); + faddp(3); + jcc(Assembler::equal, B1_20); + + bind(B1_19); + fld_x(Address(esp, 24)); + fxch(1); + fdivrp(2); + fxch(1); + fmulp(3); + movl(eax, Address(esp, 44)); + xorl(eax, 1); + fxch(2); + fmul(3); + fld_d(Address(ones, RelocationHolder::none).plus_disp(eax, Address::times_8)); + fmula(2); + fmula(3); + fxch(3); + faddp(2); + fxch(1); + fstp_d(Address(esp, 16)); + fmul(1); + fxch(1); + fmulp(2); + movsd(xmm0, Address(esp, 16)); + faddp(1); + fstp_d(Address(esp, 16)); + movsd(xmm1, Address(esp, 16)); + jmp(B1_21); + + bind(B1_20); + fdivrp(1); + fmulp(2); + fxch(1); + fmul(2); + movl(eax, Address(esp, 44)); + fld_d(Address(ones, RelocationHolder::none).plus_disp(eax, Address::times_8)); + fmula(1); + fmula(3); + fxch(3); + faddp(1); + fstp_d(Address(esp, 16)); + fmul(1); + fld_x(Address(esp, 24)); + fmulp(2); + movsd(xmm0, Address(esp, 16)); + faddp(1); + fstp_d(Address(esp, 16)); + movsd(xmm1, Address(esp, 16)); + + bind(B1_21); + testl(edi, edi); + jcc(Assembler::equal, B1_23); + + bind(B1_22); + fldcw(Address(esp, 38)); + + bind(B1_23); + movl(eax, Address(esp, 40)); + movsd(Address(eax, 0), xmm0); + movsd(Address(eax, 8), xmm1); + addl(esp, 52); + pop(ebx); + pop(edi); + pop(esi); + movl(esp, ebp); + pop(ebp); + ret(0); + + bind(B1_24); + testb(ebx, 2); + jcc(Assembler::equal, B1_31); + + bind(B1_25); + incl(edx); + fld_s(0); + fmul(1); + testb(edx, 2); + jcc(Assembler::equal, B1_27); + + bind(B1_26); + fld_d(ExternalAddress(ones)); + fdiv(2); + fld_s(1); + fmul(2); + fld_x(ExternalAddress(132 + GP)); //0x00a3, 0x7fea, 0x9b + fmul(1); + fld_x(ExternalAddress(120 + GP)); //0xa559, 0x1da9, 0xae + fmul(2); + fld_x(ExternalAddress(108 + GP)); //0x67c9, 0xbed3, 0xbf + movl(eax, Address(esp, 44)); + faddp(2); + fxch(1); + fmul(2); + xorl(eax, 1); + fld_x(ExternalAddress(96 + GP)); //0x286b, 0xb49e, 0xb8 + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(84 + GP)); //0x355b, 0xd910, 0x67 + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(72 + GP)); //0xf3ca, 0x8c96, 0x8e + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(60 + GP)); //0xa8f9, 0x83f1, 0x2e + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(48 + GP)); //0x7065, 0x6a37, 0x79 + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(36 + GP)); //0x85a0, 0xa819, 0xbc + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(24 + GP)); //0xdfa7, 0x08aa, 0x55 + faddp(2); + fxch(1); + fmulp(2); + fld_x(ExternalAddress(12 + GP)); //0xb62f, 0x0b60, 0x60 + faddp(1); + fmulp(3); + fld_x(ExternalAddress(GP)); //0xaaab, 0xaaaa, 0xaa + faddp(1); + fmul(3); + fxch(2); + fmulp(3); + fxch(1); + faddp(2); + fld_d(Address(ones, RelocationHolder::none).plus_disp(eax, Address::times_8)); + fmula(2); + fmulp(1); + faddp(1); + fstp_d(Address(esp, 16)); + movsd(xmm0, Address(esp, 16)); + jmp(B1_28); + + bind(B1_27); + fld_x(ExternalAddress(36 + TP)); //0x2ff0, 0x466d, 0x1a + fmul(1); + fld_x(ExternalAddress(24 + TP)); //0x00e3, 0xc850, 0xaa + movl(eax, Address(esp, 44)); + faddp(1); + fmul(1); + fld_x(ExternalAddress(36 + TQ)); //0x820f, 0x51ce, 0x7d + fmul(2); + fld_x(ExternalAddress(24 + TQ)); //0xb70f, 0xd068, 0xa6 + faddp(1); + fmul(2); + fld_x(ExternalAddress(12 + TQ)); //0xb6a3, 0xc36a, 0x44 + faddp(1); + fmul(2); + fld_x(ExternalAddress(TQ)); //0x399c, 0x8391, 0x15 + faddp(1); + fld_x(ExternalAddress(12 + TP)); //0x4b06, 0xb0ac, 0xd3 + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(TP)); //0x4cd6, 0xaf6c, 0xc7 + faddp(1); + fdivrp(1); + fmulp(1); + fmul(1); + fld_d(Address(ones, RelocationHolder::none).plus_disp(eax, Address::times_8)); + fmula(1); + fmulp(2); + faddp(1); + fstp_d(Address(esp, 16)); + movsd(xmm0, Address(esp, 16)); + + bind(B1_28); + testl(edi, edi); + jcc(Assembler::equal, B1_30); + + bind(B1_29); + fldcw(Address(esp, 38)); + + bind(B1_30); + movl(eax, Address(esp, 40)); + movsd(Address(eax, 0), xmm0); + addl(esp, 52); + pop(ebx); + pop(edi); + pop(esi); + movl(esp, ebp); + pop(ebp); + ret(0); + + bind(B1_31); + testb(ebx, 1); + jcc(Assembler::equal, B1_38); + + bind(B1_32); + incl(edx); + fld_s(0); + fmul(1); + testb(edx, 2); + jcc(Assembler::equal, B1_34); + + bind(B1_33); + fld_x(ExternalAddress(36 + TP)); //0x2ff0, 0x466d, 0x1a + fmul(1); + fld_x(ExternalAddress(24 + TP)); //0x00e3, 0xc850, 0xaa + movl(eax, Address(esp, 44)); + faddp(1); + fmul(1); + xorl(eax, 1); + fld_x(ExternalAddress(36 + TQ)); //0x820f, 0x51ce, 0x7d + fmul(2); + fld_x(ExternalAddress(24 + TQ)); //0xb70f, 0xd068, 0xa6 + faddp(1); + fmul(2); + fld_x(ExternalAddress(12 + TQ)); //0xb6a3, 0xc36a, 0x44 + faddp(1); + fmul(2); + fld_x(ExternalAddress(TQ)); //0x399c, 0x8391, 0x15 + faddp(1); + fld_x(ExternalAddress(12 + TP)); //0x4b06, 0xb0ac, 0xd3 + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(TP)); //0x4cd6, 0xaf6c, 0xc7 + faddp(1); + fdivrp(1); + fmulp(1); + fmul(1); + fld_d(Address(ones, RelocationHolder::none).plus_disp(eax, Address::times_8)); + fmula(1); + fmulp(2); + faddp(1); + fstp_d(Address(esp, 16)); + movsd(xmm0, Address(esp, 16)); + jmp(B1_35); + + bind(B1_34); + fld_d(ExternalAddress(ones)); + fdiv(2); + fld_s(1); + fmul(2); + fld_x(ExternalAddress(132 + GP)); //0x00a3, 0x7fea, 0x9b + fmul(1); + fld_x(ExternalAddress(120 + GP)); //0xa559, 0x1da9, 0xae + fmul(2); + fld_x(ExternalAddress(108 + GP)); //0x67c9, 0xbed3, 0xbf + movl(eax, Address(esp, 44)); + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(96 + GP)); //0x286b, 0xb49e, 0xb8 + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(84 + GP)); //0x355b, 0xd910, 0x67 + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(72 + GP)); //0xf3ca, 0x8c96, 0x8e + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(60 + GP)); //0xa8f9, 0x83f1, 0x2e + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(48 + GP)); //0x7065, 0x6a37, 0x79 + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(36 + GP)); //0x85a0, 0xa819, 0xbc + faddp(2); + fxch(1); + fmul(2); + fld_x(ExternalAddress(24 + GP)); //0xdfa7, 0x08aa, 0x55 + faddp(2); + fxch(1); + fmulp(2); + fld_x(ExternalAddress(12 + GP)); //0xb62f, 0x0b60, 0x60 + faddp(1); + fmulp(3); + fld_x(ExternalAddress(GP)); //0xaaab, 0xaaaa, 0xaa + faddp(1); + fmul(3); + fxch(2); + fmulp(3); + fxch(1); + faddp(2); + fld_d(Address(ones, RelocationHolder::none).plus_disp(eax, Address::times_8)); + fmula(2); + fmulp(1); + faddp(1); + fstp_d(Address(esp, 16)); + movsd(xmm0, Address(esp, 16)); + + bind(B1_35); + testl(edi, edi); + jcc(Assembler::equal, B1_37); + + bind(B1_36); + fldcw(Address(esp, 38)); + + bind(B1_37); + movl(eax, Address(esp, 40)); + movsd(Address(eax, 8), xmm0); + addl(esp, 52); + pop(ebx); + pop(edi); + pop(esi); + mov(esp, ebp); + pop(ebp); + ret(0); + + bind(B1_38); + fstp_d(0); + addl(esp, 52); + pop(ebx); + pop(edi); + pop(esi); + mov(esp, ebp); + pop(ebp); + ret(0); + + bind(B1_39); + xorl(edi, edi); + jmp(B1_8); + + bind(B1_40); + xorl(edi, edi); + jmp(B1_15); +} + +ALIGNED_(16) juint _static_const_table_tan[] = +{ + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x882c10faUL, + 0x3f9664f4UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x55e6c23dUL, 0x3f8226e3UL, 0x55555555UL, + 0x3fd55555UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x0e157de0UL, 0x3f6d6d3dUL, 0x11111111UL, 0x3fc11111UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x452b75e3UL, 0x3f57da36UL, + 0x1ba1ba1cUL, 0x3faba1baUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x4e435f9bUL, + 0x3f953f83UL, 0x00000000UL, 0x00000000UL, 0x3c6e8e46UL, 0x3f9b74eaUL, + 0x00000000UL, 0x00000000UL, 0xda5b7511UL, 0x3f85ad63UL, 0xdc230b9bUL, + 0x3fb97558UL, 0x26cb3788UL, 0x3f881308UL, 0x76fc4985UL, 0x3fd62ac9UL, + 0x77bb08baUL, 0x3f757c85UL, 0xb6247521UL, 0x3fb1381eUL, 0x5922170cUL, + 0x3f754e95UL, 0x8746482dUL, 0x3fc27f83UL, 0x11055b30UL, 0x3f64e391UL, + 0x3e666320UL, 0x3fa3e609UL, 0x0de9dae3UL, 0x3f6301dfUL, 0x1f1dca06UL, + 0x3fafa8aeUL, 0x8c5b2da2UL, 0x3fb936bbUL, 0x4e88f7a5UL, 0x3c587d05UL, + 0x00000000UL, 0x3ff00000UL, 0xa8935dd9UL, 0x3f83dde2UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x5a279ea3UL, 0x3faa3407UL, + 0x00000000UL, 0x00000000UL, 0x432d65faUL, 0x3fa70153UL, 0x00000000UL, + 0x00000000UL, 0x891a4602UL, 0x3f9d03efUL, 0xd62ca5f8UL, 0x3fca77d9UL, + 0xb35f4628UL, 0x3f97a265UL, 0x433258faUL, 0x3fd8cf51UL, 0xb58fd909UL, + 0x3f8f88e3UL, 0x01771ceaUL, 0x3fc2b154UL, 0xf3562f8eUL, 0x3f888f57UL, + 0xc028a723UL, 0x3fc7370fUL, 0x20b7f9f0UL, 0x3f80f44cUL, 0x214368e9UL, + 0x3fb6dfaaUL, 0x28891863UL, 0x3f79b4b6UL, 0x172dbbf0UL, 0x3fb6cb8eUL, + 0xe0553158UL, 0x3fc975f5UL, 0x593fe814UL, 0x3c2ef5d3UL, 0x00000000UL, + 0x3ff00000UL, 0x03dec550UL, 0x3fa44203UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x9314533eUL, 0x3fbb8ec5UL, 0x00000000UL, + 0x00000000UL, 0x09aa36d0UL, 0x3fb6d3f4UL, 0x00000000UL, 0x00000000UL, + 0xdcb427fdUL, 0x3fb13950UL, 0xd87ab0bbUL, 0x3fd5335eUL, 0xce0ae8a5UL, + 0x3fabb382UL, 0x79143126UL, 0x3fddba41UL, 0x5f2b28d4UL, 0x3fa552f1UL, + 0x59f21a6dUL, 0x3fd015abUL, 0x22c27d95UL, 0x3fa0e984UL, 0xe19fc6aaUL, + 0x3fd0576cUL, 0x8f2c2950UL, 0x3f9a4898UL, 0xc0b3f22cUL, 0x3fc59462UL, + 0x1883a4b8UL, 0x3f94b61cUL, 0x3f838640UL, 0x3fc30eb8UL, 0x355c63dcUL, + 0x3fd36a08UL, 0x1dce993dUL, 0xbc6d704dUL, 0x00000000UL, 0x3ff00000UL, + 0x2b82ab63UL, 0x3fb78e92UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x56f37042UL, 0x3fccfc56UL, 0x00000000UL, 0x00000000UL, + 0xaa563951UL, 0x3fc90125UL, 0x00000000UL, 0x00000000UL, 0x3d0e7c5dUL, + 0x3fc50533UL, 0x9bed9b2eUL, 0x3fdf0ed9UL, 0x5fe7c47cUL, 0x3fc1f250UL, + 0x96c125e5UL, 0x3fe2edd9UL, 0x5a02bbd8UL, 0x3fbe5c71UL, 0x86362c20UL, + 0x3fda08b7UL, 0x4b4435edUL, 0x3fb9d342UL, 0x4b494091UL, 0x3fd911bdUL, + 0xb56658beUL, 0x3fb5e4c7UL, 0x93a2fd76UL, 0x3fd3c092UL, 0xda271794UL, + 0x3fb29910UL, 0x3303df2bUL, 0x3fd189beUL, 0x99fcef32UL, 0x3fda8279UL, + 0xb68c1467UL, 0x3c708b2fUL, 0x00000000UL, 0x3ff00000UL, 0x980c4337UL, + 0x3fc5f619UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0xcc03e501UL, 0x3fdff10fUL, 0x00000000UL, 0x00000000UL, 0x44a4e845UL, + 0x3fddb63bUL, 0x00000000UL, 0x00000000UL, 0x3768ad9fUL, 0x3fdb72a4UL, + 0x3dd01ccaUL, 0x3fe5fdb9UL, 0xa61d2811UL, 0x3fd972b2UL, 0x5645ad0bUL, + 0x3fe977f9UL, 0xd013b3abUL, 0x3fd78ca3UL, 0xbf0bf914UL, 0x3fe4f192UL, + 0x4d53e730UL, 0x3fd5d060UL, 0x3f8b9000UL, 0x3fe49933UL, 0xe2b82f08UL, + 0x3fd4322aUL, 0x5936a835UL, 0x3fe27ae1UL, 0xb1c61c9bUL, 0x3fd2b3fbUL, + 0xef478605UL, 0x3fe1659eUL, 0x190834ecUL, 0x3fe11ab7UL, 0xcdb625eaUL, + 0xbc8e564bUL, 0x00000000UL, 0x3ff00000UL, 0xb07217e3UL, 0x3fd248f1UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2b2c49d0UL, + 0x3ff2de9cUL, 0x00000000UL, 0x00000000UL, 0x2655bc98UL, 0x3ff33e58UL, + 0x00000000UL, 0x00000000UL, 0xff691fa2UL, 0x3ff3972eUL, 0xe93463bdUL, + 0x3feeed87UL, 0x070e10a0UL, 0x3ff3f5b2UL, 0xf4d790a4UL, 0x3ff20c10UL, + 0xa04e8ea3UL, 0x3ff4541aUL, 0x386accd3UL, 0x3ff1369eUL, 0x222a66ddUL, + 0x3ff4b521UL, 0x22a9777eUL, 0x3ff20817UL, 0x52a04a6eUL, 0x3ff5178fUL, + 0xddaa0031UL, 0x3ff22137UL, 0x4447d47cUL, 0x3ff57c01UL, 0x1e9c7f1dUL, + 0x3ff29311UL, 0x2ab7f990UL, 0x3fe561b8UL, 0x209c7df1UL, 0x3c87a8c5UL, + 0x00000000UL, 0x3ff00000UL, 0x4170bcc6UL, 0x3fdc92d8UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xc7ab4d5aUL, 0x40085e24UL, + 0x00000000UL, 0x00000000UL, 0xe93ea75dUL, 0x400b963dUL, 0x00000000UL, + 0x00000000UL, 0x94a7f25aUL, 0x400f37e2UL, 0x4b6261cbUL, 0x3ff5f984UL, + 0x5a9dd812UL, 0x4011aab0UL, 0x74c30018UL, 0x3ffaf5a5UL, 0x7f2ce8e3UL, + 0x4013fe8bUL, 0xfe8e54faUL, 0x3ffd7334UL, 0x670d618dUL, 0x4016a10cUL, + 0x4db97058UL, 0x4000e012UL, 0x24df44ddUL, 0x40199c5fUL, 0x697d6eceUL, + 0x4003006eUL, 0x83298b82UL, 0x401cfc4dUL, 0x19d490d6UL, 0x40058c19UL, + 0x2ae42850UL, 0x3fea4300UL, 0x118e20e6UL, 0xbc7a6db8UL, 0x00000000UL, + 0x40000000UL, 0xe33345b8UL, 0xbfd4e526UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x65965966UL, 0x40219659UL, 0x00000000UL, + 0x00000000UL, 0x882c10faUL, 0x402664f4UL, 0x00000000UL, 0x00000000UL, + 0x83cd3723UL, 0x402c8342UL, 0x00000000UL, 0x40000000UL, 0x55e6c23dUL, + 0x403226e3UL, 0x55555555UL, 0x40055555UL, 0x34451939UL, 0x40371c96UL, + 0xaaaaaaabUL, 0x400aaaaaUL, 0x0e157de0UL, 0x403d6d3dUL, 0x11111111UL, + 0x40111111UL, 0xa738201fUL, 0x4042bbceUL, 0x05b05b06UL, 0x4015b05bUL, + 0x452b75e3UL, 0x4047da36UL, 0x1ba1ba1cUL, 0x401ba1baUL, 0x00000000UL, + 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x40000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x4f48b8d3UL, 0xbf33eaf9UL, 0x00000000UL, 0x00000000UL, + 0x0cf7586fUL, 0x3f20b8eaUL, 0x00000000UL, 0x00000000UL, 0xd0258911UL, + 0xbf0abaf3UL, 0x23e49fe9UL, 0xbfab5a8cUL, 0x2d53222eUL, 0x3ef60d15UL, + 0x21169451UL, 0x3fa172b2UL, 0xbb254dbcUL, 0xbee1d3b5UL, 0xdbf93b8eUL, + 0xbf84c7dbUL, 0x05b4630bUL, 0x3ecd3364UL, 0xee9aada7UL, 0x3f743924UL, + 0x794a8297UL, 0xbeb7b7b9UL, 0xe015f797UL, 0xbf5d41f5UL, 0xe41a4a56UL, + 0x3ea35dfbUL, 0xe4c2a251UL, 0x3f49a2abUL, 0x5af9e000UL, 0xbfce49ceUL, + 0x8c743719UL, 0x3d1eb860UL, 0x00000000UL, 0x00000000UL, 0x1b4863cfUL, + 0x3fd78294UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, + 0x535ad890UL, 0xbf2b9320UL, 0x00000000UL, 0x00000000UL, 0x018fdf1fUL, + 0x3f16d61dUL, 0x00000000UL, 0x00000000UL, 0x0359f1beUL, 0xbf0139e4UL, + 0xa4317c6dUL, 0xbfa67e17UL, 0x82672d0fUL, 0x3eebb405UL, 0x2f1b621eUL, + 0x3f9f455bUL, 0x51ccf238UL, 0xbed55317UL, 0xf437b9acUL, 0xbf804beeUL, + 0xc791a2b5UL, 0x3ec0e993UL, 0x919a1db2UL, 0x3f7080c2UL, 0x336a5b0eUL, + 0xbeaa48a2UL, 0x0a268358UL, 0xbf55a443UL, 0xdfd978e4UL, 0x3e94b61fUL, + 0xd7767a58UL, 0x3f431806UL, 0x2aea0000UL, 0xbfc9bbe8UL, 0x7723ea61UL, + 0xbd3a2369UL, 0x00000000UL, 0x00000000UL, 0xdf7796ffUL, 0x3fd6e642UL, + 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0xb9ff07ceUL, + 0xbf231c78UL, 0x00000000UL, 0x00000000UL, 0xa5517182UL, 0x3f0ff0e0UL, + 0x00000000UL, 0x00000000UL, 0x790b4cbcUL, 0xbef66191UL, 0x848a46c6UL, + 0xbfa21ac0UL, 0xb16435faUL, 0x3ee1d3ecUL, 0x2a1aa832UL, 0x3f9c71eaUL, + 0xfdd299efUL, 0xbec9dd1aUL, 0x3f8dbaafUL, 0xbf793363UL, 0x309fc6eaUL, + 0x3eb415d6UL, 0xbee60471UL, 0x3f6b83baUL, 0x94a0a697UL, 0xbe9dae11UL, + 0x3e5c67b3UL, 0xbf4fd07bUL, 0x9a8f3e3eUL, 0x3e86bd75UL, 0xa4beb7a4UL, + 0x3f3d1eb1UL, 0x29cfc000UL, 0xbfc549ceUL, 0xbf159358UL, 0xbd397b33UL, + 0x00000000UL, 0x00000000UL, 0x871fee6cUL, 0x3fd666f0UL, 0x00000000UL, + 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x7d98a556UL, 0xbf1a3958UL, + 0x00000000UL, 0x00000000UL, 0x9d88dc01UL, 0x3f0704c2UL, 0x00000000UL, + 0x00000000UL, 0x73742a2bUL, 0xbeed054aUL, 0x58844587UL, 0xbf9c2a13UL, + 0x55688a79UL, 0x3ed7a326UL, 0xee33f1d6UL, 0x3f9a48f4UL, 0xa8dc9888UL, + 0xbebf8939UL, 0xaad4b5b8UL, 0xbf72f746UL, 0x9102efa1UL, 0x3ea88f82UL, + 0xdabc29cfUL, 0x3f678228UL, 0x9289afb8UL, 0xbe90f456UL, 0x741fb4edUL, + 0xbf46f3a3UL, 0xa97f6663UL, 0x3e79b4bfUL, 0xca89ff3fUL, 0x3f36db70UL, + 0xa8a2a000UL, 0xbfc0ee13UL, 0x3da24be1UL, 0xbd338b9fUL, 0x00000000UL, + 0x00000000UL, 0x11cd6c69UL, 0x3fd601fdUL, 0x00000000UL, 0x3ff00000UL, + 0x00000000UL, 0xfffffff8UL, 0x1a154b97UL, 0xbf116b01UL, 0x00000000UL, + 0x00000000UL, 0x2d427630UL, 0x3f0147bfUL, 0x00000000UL, 0x00000000UL, + 0xb93820c8UL, 0xbee264d4UL, 0xbb6cbb18UL, 0xbf94ab8cUL, 0x888d4d92UL, + 0x3ed0568bUL, 0x60730f7cUL, 0x3f98b19bUL, 0xe4b1fb11UL, 0xbeb2f950UL, + 0x22cf9f74UL, 0xbf6b21cdUL, 0x4a3ff0a6UL, 0x3e9f499eUL, 0xfd2b83ceUL, + 0x3f64aad7UL, 0x637b73afUL, 0xbe83487cUL, 0xe522591aUL, 0xbf3fc092UL, + 0xa158e8bcUL, 0x3e6e3aaeUL, 0xe5e82ffaUL, 0x3f329d2fUL, 0xd636a000UL, + 0xbfb9477fUL, 0xc2c2d2bcUL, 0xbd135ef9UL, 0x00000000UL, 0x00000000UL, + 0xf2fdb123UL, 0x3fd5b566UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, + 0xfffffff8UL, 0xc41acb64UL, 0xbf05448dUL, 0x00000000UL, 0x00000000UL, + 0xdbb03d6fUL, 0x3efb7ad2UL, 0x00000000UL, 0x00000000UL, 0x9e42962dUL, + 0xbed5aea5UL, 0x2579f8efUL, 0xbf8b2398UL, 0x288a1ed9UL, 0x3ec81441UL, + 0xb0198dc5UL, 0x3f979a3aUL, 0x2fdfe253UL, 0xbea57cd3UL, 0x5766336fUL, + 0xbf617caaUL, 0x600944c3UL, 0x3e954ed6UL, 0xa4e0aaf8UL, 0x3f62c646UL, + 0x6b8fb29cUL, 0xbe74e3a3UL, 0xdc4c0409UL, 0xbf33f952UL, 0x9bffe365UL, + 0x3e6301ecUL, 0xb8869e44UL, 0x3f2fc566UL, 0xe1e04000UL, 0xbfb0cc62UL, + 0x016b907fUL, 0xbd119cbcUL, 0x00000000UL, 0x00000000UL, 0xe6b9d8faUL, + 0x3fd57fb3UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, + 0x5daf22a6UL, 0xbef429d7UL, 0x00000000UL, 0x00000000UL, 0x06bca545UL, + 0x3ef7a27dUL, 0x00000000UL, 0x00000000UL, 0x7211c19aUL, 0xbec41c3eUL, + 0x956ed53eUL, 0xbf7ae3f4UL, 0xee750e72UL, 0x3ec3901bUL, 0x91d443f5UL, + 0x3f96f713UL, 0x36661e6cUL, 0xbe936e09UL, 0x506f9381UL, 0xbf5122e8UL, + 0xcb6dd43fUL, 0x3e9041b9UL, 0x6698b2ffUL, 0x3f61b0c7UL, 0x576bf12bUL, + 0xbe625a8aUL, 0xe5a0e9dcUL, 0xbf23499dUL, 0x110384ddUL, 0x3e5b1c2cUL, + 0x68d43db6UL, 0x3f2cb899UL, 0x6ecac000UL, 0xbfa0c414UL, 0xcd7dd58cUL, + 0x3d13500fUL, 0x00000000UL, 0x00000000UL, 0x85a2c8fbUL, 0x3fd55fe0UL, + 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2bf70ebeUL, 0x3ef66a8fUL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0xd644267fUL, 0x3ec22805UL, 0x16c16c17UL, 0x3f96c16cUL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xc4e09162UL, + 0x3e8d6db2UL, 0xbc011567UL, 0x3f61566aUL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x1f79955cUL, 0x3e57da4eUL, 0x9334ef0bUL, + 0x3f2bbd77UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x55555555UL, 0x3fd55555UL, 0x00000000UL, + 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x5daf22a6UL, 0x3ef429d7UL, + 0x00000000UL, 0x00000000UL, 0x06bca545UL, 0x3ef7a27dUL, 0x00000000UL, + 0x00000000UL, 0x7211c19aUL, 0x3ec41c3eUL, 0x956ed53eUL, 0x3f7ae3f4UL, + 0xee750e72UL, 0x3ec3901bUL, 0x91d443f5UL, 0x3f96f713UL, 0x36661e6cUL, + 0x3e936e09UL, 0x506f9381UL, 0x3f5122e8UL, 0xcb6dd43fUL, 0x3e9041b9UL, + 0x6698b2ffUL, 0x3f61b0c7UL, 0x576bf12bUL, 0x3e625a8aUL, 0xe5a0e9dcUL, + 0x3f23499dUL, 0x110384ddUL, 0x3e5b1c2cUL, 0x68d43db6UL, 0x3f2cb899UL, + 0x6ecac000UL, 0x3fa0c414UL, 0xcd7dd58cUL, 0xbd13500fUL, 0x00000000UL, + 0x00000000UL, 0x85a2c8fbUL, 0x3fd55fe0UL, 0x00000000UL, 0x3ff00000UL, + 0x00000000UL, 0xfffffff8UL, 0xc41acb64UL, 0x3f05448dUL, 0x00000000UL, + 0x00000000UL, 0xdbb03d6fUL, 0x3efb7ad2UL, 0x00000000UL, 0x00000000UL, + 0x9e42962dUL, 0x3ed5aea5UL, 0x2579f8efUL, 0x3f8b2398UL, 0x288a1ed9UL, + 0x3ec81441UL, 0xb0198dc5UL, 0x3f979a3aUL, 0x2fdfe253UL, 0x3ea57cd3UL, + 0x5766336fUL, 0x3f617caaUL, 0x600944c3UL, 0x3e954ed6UL, 0xa4e0aaf8UL, + 0x3f62c646UL, 0x6b8fb29cUL, 0x3e74e3a3UL, 0xdc4c0409UL, 0x3f33f952UL, + 0x9bffe365UL, 0x3e6301ecUL, 0xb8869e44UL, 0x3f2fc566UL, 0xe1e04000UL, + 0x3fb0cc62UL, 0x016b907fUL, 0x3d119cbcUL, 0x00000000UL, 0x00000000UL, + 0xe6b9d8faUL, 0x3fd57fb3UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, + 0xfffffff8UL, 0x1a154b97UL, 0x3f116b01UL, 0x00000000UL, 0x00000000UL, + 0x2d427630UL, 0x3f0147bfUL, 0x00000000UL, 0x00000000UL, 0xb93820c8UL, + 0x3ee264d4UL, 0xbb6cbb18UL, 0x3f94ab8cUL, 0x888d4d92UL, 0x3ed0568bUL, + 0x60730f7cUL, 0x3f98b19bUL, 0xe4b1fb11UL, 0x3eb2f950UL, 0x22cf9f74UL, + 0x3f6b21cdUL, 0x4a3ff0a6UL, 0x3e9f499eUL, 0xfd2b83ceUL, 0x3f64aad7UL, + 0x637b73afUL, 0x3e83487cUL, 0xe522591aUL, 0x3f3fc092UL, 0xa158e8bcUL, + 0x3e6e3aaeUL, 0xe5e82ffaUL, 0x3f329d2fUL, 0xd636a000UL, 0x3fb9477fUL, + 0xc2c2d2bcUL, 0x3d135ef9UL, 0x00000000UL, 0x00000000UL, 0xf2fdb123UL, + 0x3fd5b566UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, + 0x7d98a556UL, 0x3f1a3958UL, 0x00000000UL, 0x00000000UL, 0x9d88dc01UL, + 0x3f0704c2UL, 0x00000000UL, 0x00000000UL, 0x73742a2bUL, 0x3eed054aUL, + 0x58844587UL, 0x3f9c2a13UL, 0x55688a79UL, 0x3ed7a326UL, 0xee33f1d6UL, + 0x3f9a48f4UL, 0xa8dc9888UL, 0x3ebf8939UL, 0xaad4b5b8UL, 0x3f72f746UL, + 0x9102efa1UL, 0x3ea88f82UL, 0xdabc29cfUL, 0x3f678228UL, 0x9289afb8UL, + 0x3e90f456UL, 0x741fb4edUL, 0x3f46f3a3UL, 0xa97f6663UL, 0x3e79b4bfUL, + 0xca89ff3fUL, 0x3f36db70UL, 0xa8a2a000UL, 0x3fc0ee13UL, 0x3da24be1UL, + 0x3d338b9fUL, 0x00000000UL, 0x00000000UL, 0x11cd6c69UL, 0x3fd601fdUL, + 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0xb9ff07ceUL, + 0x3f231c78UL, 0x00000000UL, 0x00000000UL, 0xa5517182UL, 0x3f0ff0e0UL, + 0x00000000UL, 0x00000000UL, 0x790b4cbcUL, 0x3ef66191UL, 0x848a46c6UL, + 0x3fa21ac0UL, 0xb16435faUL, 0x3ee1d3ecUL, 0x2a1aa832UL, 0x3f9c71eaUL, + 0xfdd299efUL, 0x3ec9dd1aUL, 0x3f8dbaafUL, 0x3f793363UL, 0x309fc6eaUL, + 0x3eb415d6UL, 0xbee60471UL, 0x3f6b83baUL, 0x94a0a697UL, 0x3e9dae11UL, + 0x3e5c67b3UL, 0x3f4fd07bUL, 0x9a8f3e3eUL, 0x3e86bd75UL, 0xa4beb7a4UL, + 0x3f3d1eb1UL, 0x29cfc000UL, 0x3fc549ceUL, 0xbf159358UL, 0x3d397b33UL, + 0x00000000UL, 0x00000000UL, 0x871fee6cUL, 0x3fd666f0UL, 0x00000000UL, + 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x535ad890UL, 0x3f2b9320UL, + 0x00000000UL, 0x00000000UL, 0x018fdf1fUL, 0x3f16d61dUL, 0x00000000UL, + 0x00000000UL, 0x0359f1beUL, 0x3f0139e4UL, 0xa4317c6dUL, 0x3fa67e17UL, + 0x82672d0fUL, 0x3eebb405UL, 0x2f1b621eUL, 0x3f9f455bUL, 0x51ccf238UL, + 0x3ed55317UL, 0xf437b9acUL, 0x3f804beeUL, 0xc791a2b5UL, 0x3ec0e993UL, + 0x919a1db2UL, 0x3f7080c2UL, 0x336a5b0eUL, 0x3eaa48a2UL, 0x0a268358UL, + 0x3f55a443UL, 0xdfd978e4UL, 0x3e94b61fUL, 0xd7767a58UL, 0x3f431806UL, + 0x2aea0000UL, 0x3fc9bbe8UL, 0x7723ea61UL, 0x3d3a2369UL, 0x00000000UL, + 0x00000000UL, 0xdf7796ffUL, 0x3fd6e642UL, 0x00000000UL, 0x3ff00000UL, + 0x00000000UL, 0xfffffff8UL, 0x4f48b8d3UL, 0x3f33eaf9UL, 0x00000000UL, + 0x00000000UL, 0x0cf7586fUL, 0x3f20b8eaUL, 0x00000000UL, 0x00000000UL, + 0xd0258911UL, 0x3f0abaf3UL, 0x23e49fe9UL, 0x3fab5a8cUL, 0x2d53222eUL, + 0x3ef60d15UL, 0x21169451UL, 0x3fa172b2UL, 0xbb254dbcUL, 0x3ee1d3b5UL, + 0xdbf93b8eUL, 0x3f84c7dbUL, 0x05b4630bUL, 0x3ecd3364UL, 0xee9aada7UL, + 0x3f743924UL, 0x794a8297UL, 0x3eb7b7b9UL, 0xe015f797UL, 0x3f5d41f5UL, + 0xe41a4a56UL, 0x3ea35dfbUL, 0xe4c2a251UL, 0x3f49a2abUL, 0x5af9e000UL, + 0x3fce49ceUL, 0x8c743719UL, 0xbd1eb860UL, 0x00000000UL, 0x00000000UL, + 0x1b4863cfUL, 0x3fd78294UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, + 0xfffffff8UL, 0x65965966UL, 0xc0219659UL, 0x00000000UL, 0x00000000UL, + 0x882c10faUL, 0x402664f4UL, 0x00000000UL, 0x00000000UL, 0x83cd3723UL, + 0xc02c8342UL, 0x00000000UL, 0xc0000000UL, 0x55e6c23dUL, 0x403226e3UL, + 0x55555555UL, 0x40055555UL, 0x34451939UL, 0xc0371c96UL, 0xaaaaaaabUL, + 0xc00aaaaaUL, 0x0e157de0UL, 0x403d6d3dUL, 0x11111111UL, 0x40111111UL, + 0xa738201fUL, 0xc042bbceUL, 0x05b05b06UL, 0xc015b05bUL, 0x452b75e3UL, + 0x4047da36UL, 0x1ba1ba1cUL, 0x401ba1baUL, 0x00000000UL, 0xbff00000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x40000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0xc7ab4d5aUL, 0xc0085e24UL, 0x00000000UL, 0x00000000UL, 0xe93ea75dUL, + 0x400b963dUL, 0x00000000UL, 0x00000000UL, 0x94a7f25aUL, 0xc00f37e2UL, + 0x4b6261cbUL, 0xbff5f984UL, 0x5a9dd812UL, 0x4011aab0UL, 0x74c30018UL, + 0x3ffaf5a5UL, 0x7f2ce8e3UL, 0xc013fe8bUL, 0xfe8e54faUL, 0xbffd7334UL, + 0x670d618dUL, 0x4016a10cUL, 0x4db97058UL, 0x4000e012UL, 0x24df44ddUL, + 0xc0199c5fUL, 0x697d6eceUL, 0xc003006eUL, 0x83298b82UL, 0x401cfc4dUL, + 0x19d490d6UL, 0x40058c19UL, 0x2ae42850UL, 0xbfea4300UL, 0x118e20e6UL, + 0x3c7a6db8UL, 0x00000000UL, 0x40000000UL, 0xe33345b8UL, 0xbfd4e526UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2b2c49d0UL, + 0xbff2de9cUL, 0x00000000UL, 0x00000000UL, 0x2655bc98UL, 0x3ff33e58UL, + 0x00000000UL, 0x00000000UL, 0xff691fa2UL, 0xbff3972eUL, 0xe93463bdUL, + 0xbfeeed87UL, 0x070e10a0UL, 0x3ff3f5b2UL, 0xf4d790a4UL, 0x3ff20c10UL, + 0xa04e8ea3UL, 0xbff4541aUL, 0x386accd3UL, 0xbff1369eUL, 0x222a66ddUL, + 0x3ff4b521UL, 0x22a9777eUL, 0x3ff20817UL, 0x52a04a6eUL, 0xbff5178fUL, + 0xddaa0031UL, 0xbff22137UL, 0x4447d47cUL, 0x3ff57c01UL, 0x1e9c7f1dUL, + 0x3ff29311UL, 0x2ab7f990UL, 0xbfe561b8UL, 0x209c7df1UL, 0xbc87a8c5UL, + 0x00000000UL, 0x3ff00000UL, 0x4170bcc6UL, 0x3fdc92d8UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xcc03e501UL, 0xbfdff10fUL, + 0x00000000UL, 0x00000000UL, 0x44a4e845UL, 0x3fddb63bUL, 0x00000000UL, + 0x00000000UL, 0x3768ad9fUL, 0xbfdb72a4UL, 0x3dd01ccaUL, 0xbfe5fdb9UL, + 0xa61d2811UL, 0x3fd972b2UL, 0x5645ad0bUL, 0x3fe977f9UL, 0xd013b3abUL, + 0xbfd78ca3UL, 0xbf0bf914UL, 0xbfe4f192UL, 0x4d53e730UL, 0x3fd5d060UL, + 0x3f8b9000UL, 0x3fe49933UL, 0xe2b82f08UL, 0xbfd4322aUL, 0x5936a835UL, + 0xbfe27ae1UL, 0xb1c61c9bUL, 0x3fd2b3fbUL, 0xef478605UL, 0x3fe1659eUL, + 0x190834ecUL, 0xbfe11ab7UL, 0xcdb625eaUL, 0x3c8e564bUL, 0x00000000UL, + 0x3ff00000UL, 0xb07217e3UL, 0x3fd248f1UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x56f37042UL, 0xbfccfc56UL, 0x00000000UL, + 0x00000000UL, 0xaa563951UL, 0x3fc90125UL, 0x00000000UL, 0x00000000UL, + 0x3d0e7c5dUL, 0xbfc50533UL, 0x9bed9b2eUL, 0xbfdf0ed9UL, 0x5fe7c47cUL, + 0x3fc1f250UL, 0x96c125e5UL, 0x3fe2edd9UL, 0x5a02bbd8UL, 0xbfbe5c71UL, + 0x86362c20UL, 0xbfda08b7UL, 0x4b4435edUL, 0x3fb9d342UL, 0x4b494091UL, + 0x3fd911bdUL, 0xb56658beUL, 0xbfb5e4c7UL, 0x93a2fd76UL, 0xbfd3c092UL, + 0xda271794UL, 0x3fb29910UL, 0x3303df2bUL, 0x3fd189beUL, 0x99fcef32UL, + 0xbfda8279UL, 0xb68c1467UL, 0xbc708b2fUL, 0x00000000UL, 0x3ff00000UL, + 0x980c4337UL, 0x3fc5f619UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x9314533eUL, 0xbfbb8ec5UL, 0x00000000UL, 0x00000000UL, + 0x09aa36d0UL, 0x3fb6d3f4UL, 0x00000000UL, 0x00000000UL, 0xdcb427fdUL, + 0xbfb13950UL, 0xd87ab0bbUL, 0xbfd5335eUL, 0xce0ae8a5UL, 0x3fabb382UL, + 0x79143126UL, 0x3fddba41UL, 0x5f2b28d4UL, 0xbfa552f1UL, 0x59f21a6dUL, + 0xbfd015abUL, 0x22c27d95UL, 0x3fa0e984UL, 0xe19fc6aaUL, 0x3fd0576cUL, + 0x8f2c2950UL, 0xbf9a4898UL, 0xc0b3f22cUL, 0xbfc59462UL, 0x1883a4b8UL, + 0x3f94b61cUL, 0x3f838640UL, 0x3fc30eb8UL, 0x355c63dcUL, 0xbfd36a08UL, + 0x1dce993dUL, 0x3c6d704dUL, 0x00000000UL, 0x3ff00000UL, 0x2b82ab63UL, + 0x3fb78e92UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x5a279ea3UL, 0xbfaa3407UL, 0x00000000UL, 0x00000000UL, 0x432d65faUL, + 0x3fa70153UL, 0x00000000UL, 0x00000000UL, 0x891a4602UL, 0xbf9d03efUL, + 0xd62ca5f8UL, 0xbfca77d9UL, 0xb35f4628UL, 0x3f97a265UL, 0x433258faUL, + 0x3fd8cf51UL, 0xb58fd909UL, 0xbf8f88e3UL, 0x01771ceaUL, 0xbfc2b154UL, + 0xf3562f8eUL, 0x3f888f57UL, 0xc028a723UL, 0x3fc7370fUL, 0x20b7f9f0UL, + 0xbf80f44cUL, 0x214368e9UL, 0xbfb6dfaaUL, 0x28891863UL, 0x3f79b4b6UL, + 0x172dbbf0UL, 0x3fb6cb8eUL, 0xe0553158UL, 0xbfc975f5UL, 0x593fe814UL, + 0xbc2ef5d3UL, 0x00000000UL, 0x3ff00000UL, 0x03dec550UL, 0x3fa44203UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x4e435f9bUL, + 0xbf953f83UL, 0x00000000UL, 0x00000000UL, 0x3c6e8e46UL, 0x3f9b74eaUL, + 0x00000000UL, 0x00000000UL, 0xda5b7511UL, 0xbf85ad63UL, 0xdc230b9bUL, + 0xbfb97558UL, 0x26cb3788UL, 0x3f881308UL, 0x76fc4985UL, 0x3fd62ac9UL, + 0x77bb08baUL, 0xbf757c85UL, 0xb6247521UL, 0xbfb1381eUL, 0x5922170cUL, + 0x3f754e95UL, 0x8746482dUL, 0x3fc27f83UL, 0x11055b30UL, 0xbf64e391UL, + 0x3e666320UL, 0xbfa3e609UL, 0x0de9dae3UL, 0x3f6301dfUL, 0x1f1dca06UL, + 0x3fafa8aeUL, 0x8c5b2da2UL, 0xbfb936bbUL, 0x4e88f7a5UL, 0xbc587d05UL, + 0x00000000UL, 0x3ff00000UL, 0xa8935dd9UL, 0x3f83dde2UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x6dc9c883UL, 0x3fe45f30UL, + 0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x43780000UL, 0x00000000UL, + 0x43380000UL, 0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL, + 0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL, 0x3707344aUL, + 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL, 0x00000000UL, 0x80000000UL, + 0x00000000UL, 0x80000000UL, 0x676733afUL, 0x3d32e7b9UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x7ff00000UL, 0x00000000UL, 0x00000000UL, 0xfffc0000UL, + 0xffffffffUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x43600000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3c800000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x3ca00000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL, 0x00000000UL, + 0x40300000UL, 0x00000000UL, 0x3ff00000UL +}; + +void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { + + Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; + Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; + Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; + Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; + + assert_different_registers(tmp, eax, ecx, edx); + + address static_const_table_tan = (address)_static_const_table_tan; + + bind(start); + subl(rsp, 120); + movl(Address(rsp, 56), tmp); + lea(tmp, ExternalAddress(static_const_table_tan)); + movsd(xmm0, Address(rsp, 128)); + pextrw(eax, xmm0, 3); + andl(eax, 32767); + subl(eax, 14368); + cmpl(eax, 2216); + jcc(Assembler::above, L_2TAG_PACKET_0_0_2); + movdqu(xmm5, Address(tmp, 5840)); + movdqu(xmm6, Address(tmp, 5856)); + unpcklpd(xmm0, xmm0); + movdqu(xmm4, Address(tmp, 5712)); + andpd(xmm4, xmm0); + movdqu(xmm1, Address(tmp, 5632)); + mulpd(xmm1, xmm0); + por(xmm5, xmm4); + addpd(xmm1, xmm5); + movdqu(xmm7, xmm1); + unpckhpd(xmm7, xmm7); + cvttsd2sil(edx, xmm7); + cvttpd2dq(xmm1, xmm1); + cvtdq2pd(xmm1, xmm1); + mulpd(xmm1, xmm6); + movdqu(xmm3, Address(tmp, 5664)); + movsd(xmm5, Address(tmp, 5728)); + addl(edx, 469248); + movdqu(xmm4, Address(tmp, 5680)); + mulpd(xmm3, xmm1); + andl(edx, 31); + mulsd(xmm5, xmm1); + movl(ecx, edx); + mulpd(xmm4, xmm1); + shll(ecx, 1); + subpd(xmm0, xmm3); + mulpd(xmm1, Address(tmp, 5696)); + addl(edx, ecx); + shll(ecx, 2); + addl(edx, ecx); + addsd(xmm5, xmm0); + movdqu(xmm2, xmm0); + subpd(xmm0, xmm4); + movsd(xmm6, Address(tmp, 5744)); + shll(edx, 4); + lea(eax, Address(tmp, 0)); + andpd(xmm5, Address(tmp, 5776)); + movdqu(xmm3, xmm0); + addl(eax, edx); + subpd(xmm2, xmm0); + unpckhpd(xmm0, xmm0); + divsd(xmm6, xmm5); + subpd(xmm2, xmm4); + movdqu(xmm7, Address(eax, 16)); + subsd(xmm3, xmm5); + mulpd(xmm7, xmm0); + subpd(xmm2, xmm1); + movdqu(xmm1, Address(eax, 48)); + mulpd(xmm1, xmm0); + movdqu(xmm4, Address(eax, 96)); + mulpd(xmm4, xmm0); + addsd(xmm2, xmm3); + movdqu(xmm3, xmm0); + mulpd(xmm0, xmm0); + addpd(xmm7, Address(eax, 0)); + addpd(xmm1, Address(eax, 32)); + mulpd(xmm1, xmm0); + addpd(xmm4, Address(eax, 80)); + addpd(xmm7, xmm1); + movdqu(xmm1, Address(eax, 112)); + mulpd(xmm1, xmm0); + mulpd(xmm0, xmm0); + addpd(xmm4, xmm1); + movdqu(xmm1, Address(eax, 64)); + mulpd(xmm1, xmm0); + addpd(xmm7, xmm1); + movdqu(xmm1, xmm3); + mulpd(xmm3, xmm0); + mulsd(xmm0, xmm0); + mulpd(xmm1, Address(eax, 144)); + mulpd(xmm4, xmm3); + movdqu(xmm3, xmm1); + addpd(xmm7, xmm4); + movdqu(xmm4, xmm1); + mulsd(xmm0, xmm7); + unpckhpd(xmm7, xmm7); + addsd(xmm0, xmm7); + unpckhpd(xmm1, xmm1); + addsd(xmm3, xmm1); + subsd(xmm4, xmm3); + addsd(xmm1, xmm4); + movdqu(xmm4, xmm2); + movsd(xmm7, Address(eax, 144)); + unpckhpd(xmm2, xmm2); + addsd(xmm7, Address(eax, 152)); + mulsd(xmm7, xmm2); + addsd(xmm7, Address(eax, 136)); + addsd(xmm7, xmm1); + addsd(xmm0, xmm7); + movsd(xmm7, Address(tmp, 5744)); + mulsd(xmm4, xmm6); + movsd(xmm2, Address(eax, 168)); + andpd(xmm2, xmm6); + mulsd(xmm5, xmm2); + mulsd(xmm6, Address(eax, 160)); + subsd(xmm7, xmm5); + subsd(xmm2, Address(eax, 128)); + subsd(xmm7, xmm4); + mulsd(xmm7, xmm6); + movdqu(xmm4, xmm3); + subsd(xmm3, xmm2); + addsd(xmm2, xmm3); + subsd(xmm4, xmm2); + addsd(xmm0, xmm4); + subsd(xmm0, xmm7); + addsd(xmm0, xmm3); + movsd(Address(rsp, 0), xmm0); + fld_d(Address(rsp, 0)); + jmp(L_2TAG_PACKET_1_0_2); + + bind(L_2TAG_PACKET_0_0_2); + jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); + shrl(eax, 4); + cmpl(eax, 268434558); + jcc(Assembler::notEqual, L_2TAG_PACKET_3_0_2); + movdqu(xmm3, xmm0); + mulsd(xmm3, Address(tmp, 5808)); + + bind(L_2TAG_PACKET_3_0_2); + movsd(xmm3, Address(tmp, 5792)); + mulsd(xmm3, xmm0); + addsd(xmm3, xmm0); + mulsd(xmm3, Address(tmp, 5808)); + movsd(Address(rsp, 0), xmm3); + fld_d(Address(rsp, 0)); + jmp(L_2TAG_PACKET_1_0_2); + + bind(L_2TAG_PACKET_2_0_2); + movq(xmm7, Address(tmp, 5712)); + andpd(xmm7, xmm0); + xorpd(xmm7, xmm0); + ucomisd(xmm7, Address(tmp, 5760)); + jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); + subl(rsp, 32); + movsd(Address(rsp, 0), xmm0); + lea(eax, Address(rsp, 40)); + movl(Address(rsp, 8), eax); + movl(eax, 2); + movl(Address(rsp, 12), eax); + call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_tan_cot_huge()))); + addl(rsp, 32); + fld_d(Address(rsp, 8)); + jmp(L_2TAG_PACKET_1_0_2); + + bind(L_2TAG_PACKET_4_0_2); + movq(Address(rsp, 0), xmm0); + fld_d(Address(rsp, 0)); + fsub_d(Address(rsp, 0)); + + bind(L_2TAG_PACKET_1_0_2); + movl(tmp, Address(rsp, 56)); +} --- old/src/cpu/x86/vm/macroAssembler_libm_x86_64.cpp 2016-03-25 14:15:13.476423500 -0700 +++ new/src/cpu/x86/vm/macroAssembler_libm_x86_64.cpp 2016-03-25 14:15:12.949393400 -0700 @@ -737,6 +737,354 @@ } /******************************************************************************/ +// ALGORITHM DESCRIPTION - LOG10() +// --------------------- +// +// Let x=2^k * mx, mx in [1,2) +// +// Get B~1/mx based on the output of rcpss instruction (B0) +// B = int((B0*LH*2^7+0.5))/2^7 +// LH is a short approximation for log10(e) +// +// Reduced argument: r=B*mx-LH (computed accurately in high and low parts) +// +// Result: k*log10(2) - log(B) + p(r) +// p(r) is a degree 7 polynomial +// -log(B) read from data table (high, low parts) +// Result is formed from high and low parts +// +// Special cases: +// log10(0) = -INF with divide-by-zero exception raised +// log10(1) = +0 +// log10(x) = NaN with invalid exception raised if x < -0, including -INF +// log10(+INF) = +INF +// +/******************************************************************************/ + +ALIGNED_(16) juint _HIGHSIGMASK_log10[] = +{ + 0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL +}; + +ALIGNED_(16) juint _LOG10_E[] = +{ + 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL +}; + +ALIGNED_(16) juint _L_tbl_log10[] = +{ + 0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL, + 0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL, + 0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL, + 0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL, + 0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL, + 0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL, + 0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL, + 0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL, + 0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL, + 0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL, + 0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL, + 0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL, + 0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL, + 0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL, + 0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL, + 0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL, + 0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL, + 0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL, + 0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL, + 0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL, + 0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL, + 0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL, + 0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL, + 0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL, + 0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL, + 0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL, + 0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL, + 0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL, + 0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL, + 0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL, + 0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL, + 0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL, + 0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL, + 0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL, + 0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL, + 0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL, + 0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL, + 0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL, + 0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL, + 0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL, + 0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL, + 0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL, + 0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL, + 0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL, + 0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL, + 0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL, + 0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL, + 0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL, + 0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL, + 0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL, + 0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL, + 0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL, + 0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL, + 0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL, + 0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL, + 0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL, + 0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL, + 0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL, + 0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL, + 0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL, + 0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL, + 0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL, + 0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL, + 0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL, + 0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL, + 0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL, + 0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL, + 0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL, + 0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL, + 0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL, + 0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL, + 0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL, + 0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL, + 0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL, + 0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL, + 0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL, + 0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL, + 0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL, + 0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL, + 0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL, + 0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL, + 0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL, + 0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL, + 0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL, + 0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL, + 0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL, + 0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL, + 0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL, + 0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL, + 0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL, + 0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL, + 0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL, + 0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL, + 0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL, + 0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL, + 0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL, + 0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL, + 0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL, + 0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL, + 0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL, + 0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL, + 0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL, + 0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL +}; + +ALIGNED_(16) juint _log2_log10[] = +{ + 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL +}; + +ALIGNED_(16) juint _coeff_log10[] = +{ + 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL, + 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, + 0xdc77b115UL, 0xbff27af2UL +}; + +// Registers: +// input: xmm0 +// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 +// rax, rdx, rcx, tmp - r11 + +// Code generated by Intel C compiler for LIBM library + +void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r11) { + Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; + Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; + Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, B1_2, B1_3, B1_4, B1_5, start; + + assert_different_registers(r11, eax, ecx, edx); + + address HIGHSIGMASK = (address)_HIGHSIGMASK_log10; + address LOG10_E = (address)_LOG10_E; + address L_tbl = (address)_L_tbl_log10; + address log2 = (address)_log2_log10; + address coeff = (address)_coeff_log10; + + bind(start); + subq(rsp, 24); + movsd(Address(rsp, 0), xmm0); + + bind(B1_2); + xorpd(xmm2, xmm2); + movl(eax, 16368); + pinsrw(xmm2, eax, 3); + movl(ecx, 1054736384); + movdl(xmm7, ecx); + xorpd(xmm3, xmm3); + movl(edx, 30704); + pinsrw(xmm3, edx, 3); + movdqu(xmm1, xmm0); + movl(edx, 32768); + movdl(xmm4, edx); + movdqu(xmm5, ExternalAddress(HIGHSIGMASK)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL + pextrw(eax, xmm0, 3); + por(xmm0, xmm2); + movl(ecx, 16352); + psrlq(xmm0, 27); + movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL + psrld(xmm0, 2); + rcpps(xmm0, xmm0); + psllq(xmm1, 12); + pshufd(xmm6, xmm5, 78); + psrlq(xmm1, 12); + subl(eax, 16); + cmpl(eax, 32736); + jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); + + bind(L_2TAG_PACKET_1_0_2); + mulss(xmm0, xmm7); + por(xmm1, xmm3); + lea(r11, ExternalAddress(L_tbl)); + andpd(xmm5, xmm1); + paddd(xmm0, xmm4); + subsd(xmm1, xmm5); + movdl(edx, xmm0); + psllq(xmm0, 29); + andpd(xmm0, xmm6); + andl(eax, 32752); + subl(eax, ecx); + cvtsi2sdl(xmm7, eax); + mulpd(xmm5, xmm0); + mulsd(xmm1, xmm0); + movq(xmm6, ExternalAddress(log2)); //0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL + movdqu(xmm3, ExternalAddress(coeff)); //0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL + subsd(xmm5, xmm2); + andl(edx, 16711680); + shrl(edx, 12); + movdqu(xmm0, Address(r11, rdx, Address::times_1, -1504)); + movdqu(xmm4, ExternalAddress(16 + coeff)); //0x385593b1UL, 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL + addsd(xmm1, xmm5); + movdqu(xmm2, ExternalAddress(32 + coeff)); //0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL, 0xbff27af2UL + mulsd(xmm6, xmm7); + pshufd(xmm5, xmm1, 68); + mulsd(xmm7, ExternalAddress(8 + log2)); //0x1f12b358UL, 0x3cdfef31UL + mulsd(xmm3, xmm1); + addsd(xmm0, xmm6); + mulpd(xmm4, xmm5); + movq(xmm6, ExternalAddress(8 + LOG10_E)); //0xbf2e4108UL, 0x3f5a7a6cUL + mulpd(xmm5, xmm5); + addpd(xmm4, xmm2); + mulpd(xmm3, xmm5); + pshufd(xmm2, xmm0, 228); + addsd(xmm0, xmm1); + mulsd(xmm4, xmm1); + subsd(xmm2, xmm0); + mulsd(xmm6, xmm1); + addsd(xmm1, xmm2); + pshufd(xmm2, xmm0, 238); + mulsd(xmm5, xmm5); + addsd(xmm7, xmm2); + addsd(xmm1, xmm6); + addpd(xmm4, xmm3); + addsd(xmm1, xmm7); + mulpd(xmm4, xmm5); + addsd(xmm1, xmm4); + pshufd(xmm5, xmm4, 238); + addsd(xmm1, xmm5); + addsd(xmm0, xmm1); + jmp(B1_5); + + bind(L_2TAG_PACKET_0_0_2); + movq(xmm0, Address(rsp, 0)); + movq(xmm1, Address(rsp, 0)); + addl(eax, 16); + cmpl(eax, 32768); + jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2); + cmpl(eax, 16); + jcc(Assembler::below, L_2TAG_PACKET_3_0_2); + + bind(L_2TAG_PACKET_4_0_2); + addsd(xmm0, xmm0); + jmp(B1_5); + + bind(L_2TAG_PACKET_5_0_2); + jcc(Assembler::above, L_2TAG_PACKET_4_0_2); + cmpl(edx, 0); + jcc(Assembler::above, L_2TAG_PACKET_4_0_2); + jmp(L_2TAG_PACKET_6_0_2); + + bind(L_2TAG_PACKET_3_0_2); + xorpd(xmm1, xmm1); + addsd(xmm1, xmm0); + movdl(edx, xmm1); + psrlq(xmm1, 32); + movdl(ecx, xmm1); + orl(edx, ecx); + cmpl(edx, 0); + jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); + xorpd(xmm1, xmm1); + movl(eax, 18416); + pinsrw(xmm1, eax, 3); + mulsd(xmm0, xmm1); + xorpd(xmm2, xmm2); + movl(eax, 16368); + pinsrw(xmm2, eax, 3); + movdqu(xmm1, xmm0); + pextrw(eax, xmm0, 3); + por(xmm0, xmm2); + movl(ecx, 18416); + psrlq(xmm0, 27); + movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL + psrld(xmm0, 2); + rcpps(xmm0, xmm0); + psllq(xmm1, 12); + pshufd(xmm6, xmm5, 78); + psrlq(xmm1, 12); + jmp(L_2TAG_PACKET_1_0_2); + + bind(L_2TAG_PACKET_2_0_2); + movdl(edx, xmm1); + psrlq(xmm1, 32); + movdl(ecx, xmm1); + addl(ecx, ecx); + cmpl(ecx, -2097152); + jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); + orl(edx, ecx); + cmpl(edx, 0); + jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); + + bind(L_2TAG_PACKET_6_0_2); + xorpd(xmm1, xmm1); + xorpd(xmm0, xmm0); + movl(eax, 32752); + pinsrw(xmm1, eax, 3); + mulsd(xmm0, xmm1); + movl(Address(rsp, 16), 9); + jmp(L_2TAG_PACKET_8_0_2); + + bind(L_2TAG_PACKET_7_0_2); + xorpd(xmm1, xmm1); + xorpd(xmm0, xmm0); + movl(eax, 49136); + pinsrw(xmm0, eax, 3); + divsd(xmm0, xmm1); + movl(Address(rsp, 16), 8); + + bind(L_2TAG_PACKET_8_0_2); + movq(Address(rsp, 8), xmm0); + + bind(B1_3); + movq(xmm0, Address(rsp, 8)); + + bind(L_2TAG_PACKET_9_0_2); + + bind(B1_5); + addq(rsp, 24); + +} + +/******************************************************************************/ // ALGORITHM DESCRIPTION - POW() // --------------------- // @@ -2942,7 +3290,11 @@ movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL pshufd(xmm4, xmm0, 68); mulsd(xmm3, xmm1); - movddup(xmm1, xmm1); + if (VM_Version::supports_sse3()) { + movddup(xmm1, xmm1); + } else { + movlhps(xmm1, xmm1); + } andl(edx, 63); shll(edx, 5); lea(rax, ExternalAddress(Ctable)); @@ -2952,7 +3304,12 @@ subsd(xmm4, xmm3); movq(xmm7, Address(rax, 8)); subsd(xmm0, xmm3); - movddup(xmm3, xmm4); + if (VM_Version::supports_sse3()) { + movddup(xmm3, xmm4); + } else { + movdqu(xmm3, xmm4); + movlhps(xmm3, xmm3); + } subsd(xmm4, xmm6); pshufd(xmm0, xmm0, 68); movdqu(xmm2, Address(rax, 0)); @@ -3943,3 +4300,1027 @@ addq(rsp, 16); pop(rbx); } + +/******************************************************************************/ +// ALGORITHM DESCRIPTION - TAN() +// --------------------- +// +// Polynomials coefficients and other constants. +// +// Note that in this algorithm, there is a different polynomial for +// each breakpoint, so there are 32 sets of polynomial coefficients +// as well as 32 instances of the other constants. +// +// The polynomial coefficients and constants are offset from the start +// of the main block as follows: +// +// 0: c8 | c0 +// 16: c9 | c1 +// 32: c10 | c2 +// 48: c11 | c3 +// 64: c12 | c4 +// 80: c13 | c5 +// 96: c14 | c6 +// 112: c15 | c7 +// 128: T_hi +// 136: T_lo +// 144: Sigma +// 152: T_hl +// 160: Tau +// 168: Mask +// 176: (end of block) +// +// The total table size is therefore 5632 bytes. +// +// Note that c0 and c1 are always zero. We could try storing +// other constants here, and just loading the low part of the +// SIMD register in these cases, after ensuring the high part +// is zero. +// +// The higher terms of the polynomial are computed in the *low* +// part of the SIMD register. This is so we can overlap the +// multiplication by r^8 and the unpacking of the other part. +// +// The constants are: +// T_hi + T_lo = accurate constant term in power series +// Sigma + T_hl = accurate coefficient of r in power series (Sigma=1 bit) +// Tau = multiplier for the reciprocal, always -1 or 0 +// +// The basic reconstruction formula using these constants is: +// +// High = tau * recip_hi + t_hi +// Med = (sgn * r + t_hl * r)_hi +// Low = (sgn * r + t_hl * r)_lo + +// tau * recip_lo + T_lo + (T_hl + sigma) * c + pol +// +// where pol = c0 + c1 * r + c2 * r^2 + ... + c15 * r^15 +// +// (c0 = c1 = 0, but using them keeps SIMD regularity) +// +// We then do a compensated sum High + Med, add the low parts together +// and then do the final sum. +// +// Here recip_hi + recip_lo is an accurate reciprocal of the remainder +// modulo pi/2 +// +// Special cases: +// tan(NaN) = quiet NaN, and raise invalid exception +// tan(INF) = NaN and raise invalid exception +// tan(+/-0) = +/-0 +// +/******************************************************************************/ + + +ALIGNED_(16) juint _ONEHALF_tan[] = +{ + 0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL +}; + +ALIGNED_(16) juint _MUL16[] = +{ + 0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL +}; + +ALIGNED_(16) juint _sign_mask_tan[] = +{ + 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL +}; + +ALIGNED_(16) juint _PI32INV_tan[] = +{ + 0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL +}; + +ALIGNED_(16) juint _P_1_tan[] = +{ + 0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL +}; + +ALIGNED_(16) juint _P_2_tan[] = +{ + 0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL +}; + +ALIGNED_(16) juint _P_3_tan[] = +{ + 0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL +}; + +ALIGNED_(16) juint _Ctable_tan[] = +{ + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x882c10faUL, + 0x3f9664f4UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x55e6c23dUL, 0x3f8226e3UL, 0x55555555UL, + 0x3fd55555UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x0e157de0UL, 0x3f6d6d3dUL, 0x11111111UL, 0x3fc11111UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x452b75e3UL, 0x3f57da36UL, + 0x1ba1ba1cUL, 0x3faba1baUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x4e435f9bUL, + 0x3f953f83UL, 0x00000000UL, 0x00000000UL, 0x3c6e8e46UL, 0x3f9b74eaUL, + 0x00000000UL, 0x00000000UL, 0xda5b7511UL, 0x3f85ad63UL, 0xdc230b9bUL, + 0x3fb97558UL, 0x26cb3788UL, 0x3f881308UL, 0x76fc4985UL, 0x3fd62ac9UL, + 0x77bb08baUL, 0x3f757c85UL, 0xb6247521UL, 0x3fb1381eUL, 0x5922170cUL, + 0x3f754e95UL, 0x8746482dUL, 0x3fc27f83UL, 0x11055b30UL, 0x3f64e391UL, + 0x3e666320UL, 0x3fa3e609UL, 0x0de9dae3UL, 0x3f6301dfUL, 0x1f1dca06UL, + 0x3fafa8aeUL, 0x8c5b2da2UL, 0x3fb936bbUL, 0x4e88f7a5UL, 0x3c587d05UL, + 0x00000000UL, 0x3ff00000UL, 0xa8935dd9UL, 0x3f83dde2UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x5a279ea3UL, 0x3faa3407UL, + 0x00000000UL, 0x00000000UL, 0x432d65faUL, 0x3fa70153UL, 0x00000000UL, + 0x00000000UL, 0x891a4602UL, 0x3f9d03efUL, 0xd62ca5f8UL, 0x3fca77d9UL, + 0xb35f4628UL, 0x3f97a265UL, 0x433258faUL, 0x3fd8cf51UL, 0xb58fd909UL, + 0x3f8f88e3UL, 0x01771ceaUL, 0x3fc2b154UL, 0xf3562f8eUL, 0x3f888f57UL, + 0xc028a723UL, 0x3fc7370fUL, 0x20b7f9f0UL, 0x3f80f44cUL, 0x214368e9UL, + 0x3fb6dfaaUL, 0x28891863UL, 0x3f79b4b6UL, 0x172dbbf0UL, 0x3fb6cb8eUL, + 0xe0553158UL, 0x3fc975f5UL, 0x593fe814UL, 0x3c2ef5d3UL, 0x00000000UL, + 0x3ff00000UL, 0x03dec550UL, 0x3fa44203UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x9314533eUL, 0x3fbb8ec5UL, 0x00000000UL, + 0x00000000UL, 0x09aa36d0UL, 0x3fb6d3f4UL, 0x00000000UL, 0x00000000UL, + 0xdcb427fdUL, 0x3fb13950UL, 0xd87ab0bbUL, 0x3fd5335eUL, 0xce0ae8a5UL, + 0x3fabb382UL, 0x79143126UL, 0x3fddba41UL, 0x5f2b28d4UL, 0x3fa552f1UL, + 0x59f21a6dUL, 0x3fd015abUL, 0x22c27d95UL, 0x3fa0e984UL, 0xe19fc6aaUL, + 0x3fd0576cUL, 0x8f2c2950UL, 0x3f9a4898UL, 0xc0b3f22cUL, 0x3fc59462UL, + 0x1883a4b8UL, 0x3f94b61cUL, 0x3f838640UL, 0x3fc30eb8UL, 0x355c63dcUL, + 0x3fd36a08UL, 0x1dce993dUL, 0xbc6d704dUL, 0x00000000UL, 0x3ff00000UL, + 0x2b82ab63UL, 0x3fb78e92UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x56f37042UL, 0x3fccfc56UL, 0x00000000UL, 0x00000000UL, + 0xaa563951UL, 0x3fc90125UL, 0x00000000UL, 0x00000000UL, 0x3d0e7c5dUL, + 0x3fc50533UL, 0x9bed9b2eUL, 0x3fdf0ed9UL, 0x5fe7c47cUL, 0x3fc1f250UL, + 0x96c125e5UL, 0x3fe2edd9UL, 0x5a02bbd8UL, 0x3fbe5c71UL, 0x86362c20UL, + 0x3fda08b7UL, 0x4b4435edUL, 0x3fb9d342UL, 0x4b494091UL, 0x3fd911bdUL, + 0xb56658beUL, 0x3fb5e4c7UL, 0x93a2fd76UL, 0x3fd3c092UL, 0xda271794UL, + 0x3fb29910UL, 0x3303df2bUL, 0x3fd189beUL, 0x99fcef32UL, 0x3fda8279UL, + 0xb68c1467UL, 0x3c708b2fUL, 0x00000000UL, 0x3ff00000UL, 0x980c4337UL, + 0x3fc5f619UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0xcc03e501UL, 0x3fdff10fUL, 0x00000000UL, 0x00000000UL, 0x44a4e845UL, + 0x3fddb63bUL, 0x00000000UL, 0x00000000UL, 0x3768ad9fUL, 0x3fdb72a4UL, + 0x3dd01ccaUL, 0x3fe5fdb9UL, 0xa61d2811UL, 0x3fd972b2UL, 0x5645ad0bUL, + 0x3fe977f9UL, 0xd013b3abUL, 0x3fd78ca3UL, 0xbf0bf914UL, 0x3fe4f192UL, + 0x4d53e730UL, 0x3fd5d060UL, 0x3f8b9000UL, 0x3fe49933UL, 0xe2b82f08UL, + 0x3fd4322aUL, 0x5936a835UL, 0x3fe27ae1UL, 0xb1c61c9bUL, 0x3fd2b3fbUL, + 0xef478605UL, 0x3fe1659eUL, 0x190834ecUL, 0x3fe11ab7UL, 0xcdb625eaUL, + 0xbc8e564bUL, 0x00000000UL, 0x3ff00000UL, 0xb07217e3UL, 0x3fd248f1UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2b2c49d0UL, + 0x3ff2de9cUL, 0x00000000UL, 0x00000000UL, 0x2655bc98UL, 0x3ff33e58UL, + 0x00000000UL, 0x00000000UL, 0xff691fa2UL, 0x3ff3972eUL, 0xe93463bdUL, + 0x3feeed87UL, 0x070e10a0UL, 0x3ff3f5b2UL, 0xf4d790a4UL, 0x3ff20c10UL, + 0xa04e8ea3UL, 0x3ff4541aUL, 0x386accd3UL, 0x3ff1369eUL, 0x222a66ddUL, + 0x3ff4b521UL, 0x22a9777eUL, 0x3ff20817UL, 0x52a04a6eUL, 0x3ff5178fUL, + 0xddaa0031UL, 0x3ff22137UL, 0x4447d47cUL, 0x3ff57c01UL, 0x1e9c7f1dUL, + 0x3ff29311UL, 0x2ab7f990UL, 0x3fe561b8UL, 0x209c7df1UL, 0x3c87a8c5UL, + 0x00000000UL, 0x3ff00000UL, 0x4170bcc6UL, 0x3fdc92d8UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xc7ab4d5aUL, 0x40085e24UL, + 0x00000000UL, 0x00000000UL, 0xe93ea75dUL, 0x400b963dUL, 0x00000000UL, + 0x00000000UL, 0x94a7f25aUL, 0x400f37e2UL, 0x4b6261cbUL, 0x3ff5f984UL, + 0x5a9dd812UL, 0x4011aab0UL, 0x74c30018UL, 0x3ffaf5a5UL, 0x7f2ce8e3UL, + 0x4013fe8bUL, 0xfe8e54faUL, 0x3ffd7334UL, 0x670d618dUL, 0x4016a10cUL, + 0x4db97058UL, 0x4000e012UL, 0x24df44ddUL, 0x40199c5fUL, 0x697d6eceUL, + 0x4003006eUL, 0x83298b82UL, 0x401cfc4dUL, 0x19d490d6UL, 0x40058c19UL, + 0x2ae42850UL, 0x3fea4300UL, 0x118e20e6UL, 0xbc7a6db8UL, 0x00000000UL, + 0x40000000UL, 0xe33345b8UL, 0xbfd4e526UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x65965966UL, 0x40219659UL, 0x00000000UL, + 0x00000000UL, 0x882c10faUL, 0x402664f4UL, 0x00000000UL, 0x00000000UL, + 0x83cd3723UL, 0x402c8342UL, 0x00000000UL, 0x40000000UL, 0x55e6c23dUL, + 0x403226e3UL, 0x55555555UL, 0x40055555UL, 0x34451939UL, 0x40371c96UL, + 0xaaaaaaabUL, 0x400aaaaaUL, 0x0e157de0UL, 0x403d6d3dUL, 0x11111111UL, + 0x40111111UL, 0xa738201fUL, 0x4042bbceUL, 0x05b05b06UL, 0x4015b05bUL, + 0x452b75e3UL, 0x4047da36UL, 0x1ba1ba1cUL, 0x401ba1baUL, 0x00000000UL, + 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x40000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x4f48b8d3UL, 0xbf33eaf9UL, 0x00000000UL, 0x00000000UL, + 0x0cf7586fUL, 0x3f20b8eaUL, 0x00000000UL, 0x00000000UL, 0xd0258911UL, + 0xbf0abaf3UL, 0x23e49fe9UL, 0xbfab5a8cUL, 0x2d53222eUL, 0x3ef60d15UL, + 0x21169451UL, 0x3fa172b2UL, 0xbb254dbcUL, 0xbee1d3b5UL, 0xdbf93b8eUL, + 0xbf84c7dbUL, 0x05b4630bUL, 0x3ecd3364UL, 0xee9aada7UL, 0x3f743924UL, + 0x794a8297UL, 0xbeb7b7b9UL, 0xe015f797UL, 0xbf5d41f5UL, 0xe41a4a56UL, + 0x3ea35dfbUL, 0xe4c2a251UL, 0x3f49a2abUL, 0x5af9e000UL, 0xbfce49ceUL, + 0x8c743719UL, 0x3d1eb860UL, 0x00000000UL, 0x00000000UL, 0x1b4863cfUL, + 0x3fd78294UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, + 0x535ad890UL, 0xbf2b9320UL, 0x00000000UL, 0x00000000UL, 0x018fdf1fUL, + 0x3f16d61dUL, 0x00000000UL, 0x00000000UL, 0x0359f1beUL, 0xbf0139e4UL, + 0xa4317c6dUL, 0xbfa67e17UL, 0x82672d0fUL, 0x3eebb405UL, 0x2f1b621eUL, + 0x3f9f455bUL, 0x51ccf238UL, 0xbed55317UL, 0xf437b9acUL, 0xbf804beeUL, + 0xc791a2b5UL, 0x3ec0e993UL, 0x919a1db2UL, 0x3f7080c2UL, 0x336a5b0eUL, + 0xbeaa48a2UL, 0x0a268358UL, 0xbf55a443UL, 0xdfd978e4UL, 0x3e94b61fUL, + 0xd7767a58UL, 0x3f431806UL, 0x2aea0000UL, 0xbfc9bbe8UL, 0x7723ea61UL, + 0xbd3a2369UL, 0x00000000UL, 0x00000000UL, 0xdf7796ffUL, 0x3fd6e642UL, + 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0xb9ff07ceUL, + 0xbf231c78UL, 0x00000000UL, 0x00000000UL, 0xa5517182UL, 0x3f0ff0e0UL, + 0x00000000UL, 0x00000000UL, 0x790b4cbcUL, 0xbef66191UL, 0x848a46c6UL, + 0xbfa21ac0UL, 0xb16435faUL, 0x3ee1d3ecUL, 0x2a1aa832UL, 0x3f9c71eaUL, + 0xfdd299efUL, 0xbec9dd1aUL, 0x3f8dbaafUL, 0xbf793363UL, 0x309fc6eaUL, + 0x3eb415d6UL, 0xbee60471UL, 0x3f6b83baUL, 0x94a0a697UL, 0xbe9dae11UL, + 0x3e5c67b3UL, 0xbf4fd07bUL, 0x9a8f3e3eUL, 0x3e86bd75UL, 0xa4beb7a4UL, + 0x3f3d1eb1UL, 0x29cfc000UL, 0xbfc549ceUL, 0xbf159358UL, 0xbd397b33UL, + 0x00000000UL, 0x00000000UL, 0x871fee6cUL, 0x3fd666f0UL, 0x00000000UL, + 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x7d98a556UL, 0xbf1a3958UL, + 0x00000000UL, 0x00000000UL, 0x9d88dc01UL, 0x3f0704c2UL, 0x00000000UL, + 0x00000000UL, 0x73742a2bUL, 0xbeed054aUL, 0x58844587UL, 0xbf9c2a13UL, + 0x55688a79UL, 0x3ed7a326UL, 0xee33f1d6UL, 0x3f9a48f4UL, 0xa8dc9888UL, + 0xbebf8939UL, 0xaad4b5b8UL, 0xbf72f746UL, 0x9102efa1UL, 0x3ea88f82UL, + 0xdabc29cfUL, 0x3f678228UL, 0x9289afb8UL, 0xbe90f456UL, 0x741fb4edUL, + 0xbf46f3a3UL, 0xa97f6663UL, 0x3e79b4bfUL, 0xca89ff3fUL, 0x3f36db70UL, + 0xa8a2a000UL, 0xbfc0ee13UL, 0x3da24be1UL, 0xbd338b9fUL, 0x00000000UL, + 0x00000000UL, 0x11cd6c69UL, 0x3fd601fdUL, 0x00000000UL, 0x3ff00000UL, + 0x00000000UL, 0xfffffff8UL, 0x1a154b97UL, 0xbf116b01UL, 0x00000000UL, + 0x00000000UL, 0x2d427630UL, 0x3f0147bfUL, 0x00000000UL, 0x00000000UL, + 0xb93820c8UL, 0xbee264d4UL, 0xbb6cbb18UL, 0xbf94ab8cUL, 0x888d4d92UL, + 0x3ed0568bUL, 0x60730f7cUL, 0x3f98b19bUL, 0xe4b1fb11UL, 0xbeb2f950UL, + 0x22cf9f74UL, 0xbf6b21cdUL, 0x4a3ff0a6UL, 0x3e9f499eUL, 0xfd2b83ceUL, + 0x3f64aad7UL, 0x637b73afUL, 0xbe83487cUL, 0xe522591aUL, 0xbf3fc092UL, + 0xa158e8bcUL, 0x3e6e3aaeUL, 0xe5e82ffaUL, 0x3f329d2fUL, 0xd636a000UL, + 0xbfb9477fUL, 0xc2c2d2bcUL, 0xbd135ef9UL, 0x00000000UL, 0x00000000UL, + 0xf2fdb123UL, 0x3fd5b566UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, + 0xfffffff8UL, 0xc41acb64UL, 0xbf05448dUL, 0x00000000UL, 0x00000000UL, + 0xdbb03d6fUL, 0x3efb7ad2UL, 0x00000000UL, 0x00000000UL, 0x9e42962dUL, + 0xbed5aea5UL, 0x2579f8efUL, 0xbf8b2398UL, 0x288a1ed9UL, 0x3ec81441UL, + 0xb0198dc5UL, 0x3f979a3aUL, 0x2fdfe253UL, 0xbea57cd3UL, 0x5766336fUL, + 0xbf617caaUL, 0x600944c3UL, 0x3e954ed6UL, 0xa4e0aaf8UL, 0x3f62c646UL, + 0x6b8fb29cUL, 0xbe74e3a3UL, 0xdc4c0409UL, 0xbf33f952UL, 0x9bffe365UL, + 0x3e6301ecUL, 0xb8869e44UL, 0x3f2fc566UL, 0xe1e04000UL, 0xbfb0cc62UL, + 0x016b907fUL, 0xbd119cbcUL, 0x00000000UL, 0x00000000UL, 0xe6b9d8faUL, + 0x3fd57fb3UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, + 0x5daf22a6UL, 0xbef429d7UL, 0x00000000UL, 0x00000000UL, 0x06bca545UL, + 0x3ef7a27dUL, 0x00000000UL, 0x00000000UL, 0x7211c19aUL, 0xbec41c3eUL, + 0x956ed53eUL, 0xbf7ae3f4UL, 0xee750e72UL, 0x3ec3901bUL, 0x91d443f5UL, + 0x3f96f713UL, 0x36661e6cUL, 0xbe936e09UL, 0x506f9381UL, 0xbf5122e8UL, + 0xcb6dd43fUL, 0x3e9041b9UL, 0x6698b2ffUL, 0x3f61b0c7UL, 0x576bf12bUL, + 0xbe625a8aUL, 0xe5a0e9dcUL, 0xbf23499dUL, 0x110384ddUL, 0x3e5b1c2cUL, + 0x68d43db6UL, 0x3f2cb899UL, 0x6ecac000UL, 0xbfa0c414UL, 0xcd7dd58cUL, + 0x3d13500fUL, 0x00000000UL, 0x00000000UL, 0x85a2c8fbUL, 0x3fd55fe0UL, + 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2bf70ebeUL, 0x3ef66a8fUL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0xd644267fUL, 0x3ec22805UL, 0x16c16c17UL, 0x3f96c16cUL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xc4e09162UL, + 0x3e8d6db2UL, 0xbc011567UL, 0x3f61566aUL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x1f79955cUL, 0x3e57da4eUL, 0x9334ef0bUL, + 0x3f2bbd77UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x55555555UL, 0x3fd55555UL, 0x00000000UL, + 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x5daf22a6UL, 0x3ef429d7UL, + 0x00000000UL, 0x00000000UL, 0x06bca545UL, 0x3ef7a27dUL, 0x00000000UL, + 0x00000000UL, 0x7211c19aUL, 0x3ec41c3eUL, 0x956ed53eUL, 0x3f7ae3f4UL, + 0xee750e72UL, 0x3ec3901bUL, 0x91d443f5UL, 0x3f96f713UL, 0x36661e6cUL, + 0x3e936e09UL, 0x506f9381UL, 0x3f5122e8UL, 0xcb6dd43fUL, 0x3e9041b9UL, + 0x6698b2ffUL, 0x3f61b0c7UL, 0x576bf12bUL, 0x3e625a8aUL, 0xe5a0e9dcUL, + 0x3f23499dUL, 0x110384ddUL, 0x3e5b1c2cUL, 0x68d43db6UL, 0x3f2cb899UL, + 0x6ecac000UL, 0x3fa0c414UL, 0xcd7dd58cUL, 0xbd13500fUL, 0x00000000UL, + 0x00000000UL, 0x85a2c8fbUL, 0x3fd55fe0UL, 0x00000000UL, 0x3ff00000UL, + 0x00000000UL, 0xfffffff8UL, 0xc41acb64UL, 0x3f05448dUL, 0x00000000UL, + 0x00000000UL, 0xdbb03d6fUL, 0x3efb7ad2UL, 0x00000000UL, 0x00000000UL, + 0x9e42962dUL, 0x3ed5aea5UL, 0x2579f8efUL, 0x3f8b2398UL, 0x288a1ed9UL, + 0x3ec81441UL, 0xb0198dc5UL, 0x3f979a3aUL, 0x2fdfe253UL, 0x3ea57cd3UL, + 0x5766336fUL, 0x3f617caaUL, 0x600944c3UL, 0x3e954ed6UL, 0xa4e0aaf8UL, + 0x3f62c646UL, 0x6b8fb29cUL, 0x3e74e3a3UL, 0xdc4c0409UL, 0x3f33f952UL, + 0x9bffe365UL, 0x3e6301ecUL, 0xb8869e44UL, 0x3f2fc566UL, 0xe1e04000UL, + 0x3fb0cc62UL, 0x016b907fUL, 0x3d119cbcUL, 0x00000000UL, 0x00000000UL, + 0xe6b9d8faUL, 0x3fd57fb3UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, + 0xfffffff8UL, 0x1a154b97UL, 0x3f116b01UL, 0x00000000UL, 0x00000000UL, + 0x2d427630UL, 0x3f0147bfUL, 0x00000000UL, 0x00000000UL, 0xb93820c8UL, + 0x3ee264d4UL, 0xbb6cbb18UL, 0x3f94ab8cUL, 0x888d4d92UL, 0x3ed0568bUL, + 0x60730f7cUL, 0x3f98b19bUL, 0xe4b1fb11UL, 0x3eb2f950UL, 0x22cf9f74UL, + 0x3f6b21cdUL, 0x4a3ff0a6UL, 0x3e9f499eUL, 0xfd2b83ceUL, 0x3f64aad7UL, + 0x637b73afUL, 0x3e83487cUL, 0xe522591aUL, 0x3f3fc092UL, 0xa158e8bcUL, + 0x3e6e3aaeUL, 0xe5e82ffaUL, 0x3f329d2fUL, 0xd636a000UL, 0x3fb9477fUL, + 0xc2c2d2bcUL, 0x3d135ef9UL, 0x00000000UL, 0x00000000UL, 0xf2fdb123UL, + 0x3fd5b566UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, + 0x7d98a556UL, 0x3f1a3958UL, 0x00000000UL, 0x00000000UL, 0x9d88dc01UL, + 0x3f0704c2UL, 0x00000000UL, 0x00000000UL, 0x73742a2bUL, 0x3eed054aUL, + 0x58844587UL, 0x3f9c2a13UL, 0x55688a79UL, 0x3ed7a326UL, 0xee33f1d6UL, + 0x3f9a48f4UL, 0xa8dc9888UL, 0x3ebf8939UL, 0xaad4b5b8UL, 0x3f72f746UL, + 0x9102efa1UL, 0x3ea88f82UL, 0xdabc29cfUL, 0x3f678228UL, 0x9289afb8UL, + 0x3e90f456UL, 0x741fb4edUL, 0x3f46f3a3UL, 0xa97f6663UL, 0x3e79b4bfUL, + 0xca89ff3fUL, 0x3f36db70UL, 0xa8a2a000UL, 0x3fc0ee13UL, 0x3da24be1UL, + 0x3d338b9fUL, 0x00000000UL, 0x00000000UL, 0x11cd6c69UL, 0x3fd601fdUL, + 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0xb9ff07ceUL, + 0x3f231c78UL, 0x00000000UL, 0x00000000UL, 0xa5517182UL, 0x3f0ff0e0UL, + 0x00000000UL, 0x00000000UL, 0x790b4cbcUL, 0x3ef66191UL, 0x848a46c6UL, + 0x3fa21ac0UL, 0xb16435faUL, 0x3ee1d3ecUL, 0x2a1aa832UL, 0x3f9c71eaUL, + 0xfdd299efUL, 0x3ec9dd1aUL, 0x3f8dbaafUL, 0x3f793363UL, 0x309fc6eaUL, + 0x3eb415d6UL, 0xbee60471UL, 0x3f6b83baUL, 0x94a0a697UL, 0x3e9dae11UL, + 0x3e5c67b3UL, 0x3f4fd07bUL, 0x9a8f3e3eUL, 0x3e86bd75UL, 0xa4beb7a4UL, + 0x3f3d1eb1UL, 0x29cfc000UL, 0x3fc549ceUL, 0xbf159358UL, 0x3d397b33UL, + 0x00000000UL, 0x00000000UL, 0x871fee6cUL, 0x3fd666f0UL, 0x00000000UL, + 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x535ad890UL, 0x3f2b9320UL, + 0x00000000UL, 0x00000000UL, 0x018fdf1fUL, 0x3f16d61dUL, 0x00000000UL, + 0x00000000UL, 0x0359f1beUL, 0x3f0139e4UL, 0xa4317c6dUL, 0x3fa67e17UL, + 0x82672d0fUL, 0x3eebb405UL, 0x2f1b621eUL, 0x3f9f455bUL, 0x51ccf238UL, + 0x3ed55317UL, 0xf437b9acUL, 0x3f804beeUL, 0xc791a2b5UL, 0x3ec0e993UL, + 0x919a1db2UL, 0x3f7080c2UL, 0x336a5b0eUL, 0x3eaa48a2UL, 0x0a268358UL, + 0x3f55a443UL, 0xdfd978e4UL, 0x3e94b61fUL, 0xd7767a58UL, 0x3f431806UL, + 0x2aea0000UL, 0x3fc9bbe8UL, 0x7723ea61UL, 0x3d3a2369UL, 0x00000000UL, + 0x00000000UL, 0xdf7796ffUL, 0x3fd6e642UL, 0x00000000UL, 0x3ff00000UL, + 0x00000000UL, 0xfffffff8UL, 0x4f48b8d3UL, 0x3f33eaf9UL, 0x00000000UL, + 0x00000000UL, 0x0cf7586fUL, 0x3f20b8eaUL, 0x00000000UL, 0x00000000UL, + 0xd0258911UL, 0x3f0abaf3UL, 0x23e49fe9UL, 0x3fab5a8cUL, 0x2d53222eUL, + 0x3ef60d15UL, 0x21169451UL, 0x3fa172b2UL, 0xbb254dbcUL, 0x3ee1d3b5UL, + 0xdbf93b8eUL, 0x3f84c7dbUL, 0x05b4630bUL, 0x3ecd3364UL, 0xee9aada7UL, + 0x3f743924UL, 0x794a8297UL, 0x3eb7b7b9UL, 0xe015f797UL, 0x3f5d41f5UL, + 0xe41a4a56UL, 0x3ea35dfbUL, 0xe4c2a251UL, 0x3f49a2abUL, 0x5af9e000UL, + 0x3fce49ceUL, 0x8c743719UL, 0xbd1eb860UL, 0x00000000UL, 0x00000000UL, + 0x1b4863cfUL, 0x3fd78294UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, + 0xfffffff8UL, 0x65965966UL, 0xc0219659UL, 0x00000000UL, 0x00000000UL, + 0x882c10faUL, 0x402664f4UL, 0x00000000UL, 0x00000000UL, 0x83cd3723UL, + 0xc02c8342UL, 0x00000000UL, 0xc0000000UL, 0x55e6c23dUL, 0x403226e3UL, + 0x55555555UL, 0x40055555UL, 0x34451939UL, 0xc0371c96UL, 0xaaaaaaabUL, + 0xc00aaaaaUL, 0x0e157de0UL, 0x403d6d3dUL, 0x11111111UL, 0x40111111UL, + 0xa738201fUL, 0xc042bbceUL, 0x05b05b06UL, 0xc015b05bUL, 0x452b75e3UL, + 0x4047da36UL, 0x1ba1ba1cUL, 0x401ba1baUL, 0x00000000UL, 0xbff00000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x40000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0xc7ab4d5aUL, 0xc0085e24UL, 0x00000000UL, 0x00000000UL, 0xe93ea75dUL, + 0x400b963dUL, 0x00000000UL, 0x00000000UL, 0x94a7f25aUL, 0xc00f37e2UL, + 0x4b6261cbUL, 0xbff5f984UL, 0x5a9dd812UL, 0x4011aab0UL, 0x74c30018UL, + 0x3ffaf5a5UL, 0x7f2ce8e3UL, 0xc013fe8bUL, 0xfe8e54faUL, 0xbffd7334UL, + 0x670d618dUL, 0x4016a10cUL, 0x4db97058UL, 0x4000e012UL, 0x24df44ddUL, + 0xc0199c5fUL, 0x697d6eceUL, 0xc003006eUL, 0x83298b82UL, 0x401cfc4dUL, + 0x19d490d6UL, 0x40058c19UL, 0x2ae42850UL, 0xbfea4300UL, 0x118e20e6UL, + 0x3c7a6db8UL, 0x00000000UL, 0x40000000UL, 0xe33345b8UL, 0xbfd4e526UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2b2c49d0UL, + 0xbff2de9cUL, 0x00000000UL, 0x00000000UL, 0x2655bc98UL, 0x3ff33e58UL, + 0x00000000UL, 0x00000000UL, 0xff691fa2UL, 0xbff3972eUL, 0xe93463bdUL, + 0xbfeeed87UL, 0x070e10a0UL, 0x3ff3f5b2UL, 0xf4d790a4UL, 0x3ff20c10UL, + 0xa04e8ea3UL, 0xbff4541aUL, 0x386accd3UL, 0xbff1369eUL, 0x222a66ddUL, + 0x3ff4b521UL, 0x22a9777eUL, 0x3ff20817UL, 0x52a04a6eUL, 0xbff5178fUL, + 0xddaa0031UL, 0xbff22137UL, 0x4447d47cUL, 0x3ff57c01UL, 0x1e9c7f1dUL, + 0x3ff29311UL, 0x2ab7f990UL, 0xbfe561b8UL, 0x209c7df1UL, 0xbc87a8c5UL, + 0x00000000UL, 0x3ff00000UL, 0x4170bcc6UL, 0x3fdc92d8UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xcc03e501UL, 0xbfdff10fUL, + 0x00000000UL, 0x00000000UL, 0x44a4e845UL, 0x3fddb63bUL, 0x00000000UL, + 0x00000000UL, 0x3768ad9fUL, 0xbfdb72a4UL, 0x3dd01ccaUL, 0xbfe5fdb9UL, + 0xa61d2811UL, 0x3fd972b2UL, 0x5645ad0bUL, 0x3fe977f9UL, 0xd013b3abUL, + 0xbfd78ca3UL, 0xbf0bf914UL, 0xbfe4f192UL, 0x4d53e730UL, 0x3fd5d060UL, + 0x3f8b9000UL, 0x3fe49933UL, 0xe2b82f08UL, 0xbfd4322aUL, 0x5936a835UL, + 0xbfe27ae1UL, 0xb1c61c9bUL, 0x3fd2b3fbUL, 0xef478605UL, 0x3fe1659eUL, + 0x190834ecUL, 0xbfe11ab7UL, 0xcdb625eaUL, 0x3c8e564bUL, 0x00000000UL, + 0x3ff00000UL, 0xb07217e3UL, 0x3fd248f1UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x56f37042UL, 0xbfccfc56UL, 0x00000000UL, + 0x00000000UL, 0xaa563951UL, 0x3fc90125UL, 0x00000000UL, 0x00000000UL, + 0x3d0e7c5dUL, 0xbfc50533UL, 0x9bed9b2eUL, 0xbfdf0ed9UL, 0x5fe7c47cUL, + 0x3fc1f250UL, 0x96c125e5UL, 0x3fe2edd9UL, 0x5a02bbd8UL, 0xbfbe5c71UL, + 0x86362c20UL, 0xbfda08b7UL, 0x4b4435edUL, 0x3fb9d342UL, 0x4b494091UL, + 0x3fd911bdUL, 0xb56658beUL, 0xbfb5e4c7UL, 0x93a2fd76UL, 0xbfd3c092UL, + 0xda271794UL, 0x3fb29910UL, 0x3303df2bUL, 0x3fd189beUL, 0x99fcef32UL, + 0xbfda8279UL, 0xb68c1467UL, 0xbc708b2fUL, 0x00000000UL, 0x3ff00000UL, + 0x980c4337UL, 0x3fc5f619UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x9314533eUL, 0xbfbb8ec5UL, 0x00000000UL, 0x00000000UL, + 0x09aa36d0UL, 0x3fb6d3f4UL, 0x00000000UL, 0x00000000UL, 0xdcb427fdUL, + 0xbfb13950UL, 0xd87ab0bbUL, 0xbfd5335eUL, 0xce0ae8a5UL, 0x3fabb382UL, + 0x79143126UL, 0x3fddba41UL, 0x5f2b28d4UL, 0xbfa552f1UL, 0x59f21a6dUL, + 0xbfd015abUL, 0x22c27d95UL, 0x3fa0e984UL, 0xe19fc6aaUL, 0x3fd0576cUL, + 0x8f2c2950UL, 0xbf9a4898UL, 0xc0b3f22cUL, 0xbfc59462UL, 0x1883a4b8UL, + 0x3f94b61cUL, 0x3f838640UL, 0x3fc30eb8UL, 0x355c63dcUL, 0xbfd36a08UL, + 0x1dce993dUL, 0x3c6d704dUL, 0x00000000UL, 0x3ff00000UL, 0x2b82ab63UL, + 0x3fb78e92UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x5a279ea3UL, 0xbfaa3407UL, 0x00000000UL, 0x00000000UL, 0x432d65faUL, + 0x3fa70153UL, 0x00000000UL, 0x00000000UL, 0x891a4602UL, 0xbf9d03efUL, + 0xd62ca5f8UL, 0xbfca77d9UL, 0xb35f4628UL, 0x3f97a265UL, 0x433258faUL, + 0x3fd8cf51UL, 0xb58fd909UL, 0xbf8f88e3UL, 0x01771ceaUL, 0xbfc2b154UL, + 0xf3562f8eUL, 0x3f888f57UL, 0xc028a723UL, 0x3fc7370fUL, 0x20b7f9f0UL, + 0xbf80f44cUL, 0x214368e9UL, 0xbfb6dfaaUL, 0x28891863UL, 0x3f79b4b6UL, + 0x172dbbf0UL, 0x3fb6cb8eUL, 0xe0553158UL, 0xbfc975f5UL, 0x593fe814UL, + 0xbc2ef5d3UL, 0x00000000UL, 0x3ff00000UL, 0x03dec550UL, 0x3fa44203UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x4e435f9bUL, + 0xbf953f83UL, 0x00000000UL, 0x00000000UL, 0x3c6e8e46UL, 0x3f9b74eaUL, + 0x00000000UL, 0x00000000UL, 0xda5b7511UL, 0xbf85ad63UL, 0xdc230b9bUL, + 0xbfb97558UL, 0x26cb3788UL, 0x3f881308UL, 0x76fc4985UL, 0x3fd62ac9UL, + 0x77bb08baUL, 0xbf757c85UL, 0xb6247521UL, 0xbfb1381eUL, 0x5922170cUL, + 0x3f754e95UL, 0x8746482dUL, 0x3fc27f83UL, 0x11055b30UL, 0xbf64e391UL, + 0x3e666320UL, 0xbfa3e609UL, 0x0de9dae3UL, 0x3f6301dfUL, 0x1f1dca06UL, + 0x3fafa8aeUL, 0x8c5b2da2UL, 0xbfb936bbUL, 0x4e88f7a5UL, 0xbc587d05UL, + 0x00000000UL, 0x3ff00000UL, 0xa8935dd9UL, 0x3f83dde2UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL +}; + +ALIGNED_(16) juint _MASK_35_tan[] = +{ + 0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL +}; + +ALIGNED_(16) juint _Q_11_tan[] = +{ + 0xb8fe4d77UL, 0x3f82609aUL +}; + +ALIGNED_(16) juint _Q_9_tan[] = +{ + 0xbf847a43UL, 0x3f9664a0UL +}; + +ALIGNED_(16) juint _Q_7_tan[] = +{ + 0x52c4c8abUL, 0x3faba1baUL +}; + +ALIGNED_(16) juint _Q_5_tan[] = +{ + 0x11092746UL, 0x3fc11111UL +}; + +ALIGNED_(16) juint _Q_3_tan[] = +{ + 0x55555612UL, 0x3fd55555UL +}; + +ALIGNED_(16) juint _PI_INV_TABLE_tan[] = +{ + 0x00000000UL, 0x00000000UL, 0xa2f9836eUL, 0x4e441529UL, 0xfc2757d1UL, + 0xf534ddc0UL, 0xdb629599UL, 0x3c439041UL, 0xfe5163abUL, 0xdebbc561UL, + 0xb7246e3aUL, 0x424dd2e0UL, 0x06492eeaUL, 0x09d1921cUL, 0xfe1deb1cUL, + 0xb129a73eUL, 0xe88235f5UL, 0x2ebb4484UL, 0xe99c7026UL, 0xb45f7e41UL, + 0x3991d639UL, 0x835339f4UL, 0x9c845f8bUL, 0xbdf9283bUL, 0x1ff897ffUL, + 0xde05980fUL, 0xef2f118bUL, 0x5a0a6d1fUL, 0x6d367ecfUL, 0x27cb09b7UL, + 0x4f463f66UL, 0x9e5fea2dUL, 0x7527bac7UL, 0xebe5f17bUL, 0x3d0739f7UL, + 0x8a5292eaUL, 0x6bfb5fb1UL, 0x1f8d5d08UL, 0x56033046UL, 0xfc7b6babUL, + 0xf0cfbc21UL +}; + +ALIGNED_(8) juint _PI_4_tan[] = +{ + 0x00000000UL, 0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL +}; + +ALIGNED_(8) juint _QQ_2_tan[] = +{ + 0x676733afUL, 0x3d32e7b9UL +}; + +ALIGNED_(8) juint _ONE_tan[] = +{ + 0x00000000UL, 0x3ff00000UL +}; + +ALIGNED_(8) juint _TWO_POW_55_tan[] = +{ + 0x00000000UL, 0x43600000UL +}; + +ALIGNED_(4) juint _TWO_POW_M55_tan[] = +{ + 0x00000000UL, 0x3c800000UL +}; + +ALIGNED_(4) juint _NEG_ZERO_tan[] = +{ + 0x00000000UL, 0x80000000UL +}; + +void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r8, Register r9, Register r10, Register r11) { + + Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; + Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1; + Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1; + Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, L_2TAG_PACKET_14_0_1, B1_2, B1_3, B1_4, B1_5, start; + + address ONEHALF = (address)_ONEHALF_tan; + address MUL16 = (address)_MUL16; + address sign_mask = (address)_sign_mask_tan; + address PI32INV = (address)_PI32INV_tan; + address P_1 = (address)_P_1_tan; + address P_2 = (address)_P_2_tan; + address P_3 = (address)_P_3_tan; + address Ctable = (address)_Ctable_tan; + address MASK_35 = (address)_MASK_35_tan; + address Q_11 = (address)_Q_11_tan; + address Q_9 = (address)_Q_9_tan; + address Q_7 = (address)_Q_7_tan; + address Q_5 = (address)_Q_5_tan; + address Q_3 = (address)_Q_3_tan; + address PI_INV_TABLE = (address)_PI_INV_TABLE_tan; + address PI_4 = (address)_PI_4_tan; + address QQ_2 = (address)_QQ_2_tan; + address ONE = (address)_ONE_tan; + address TWO_POW_55 = (address)_TWO_POW_55_tan; + address TWO_POW_M55 = (address)_TWO_POW_M55_tan; + address NEG_ZERO = (address)_NEG_ZERO_tan; + + bind(start); + push(rbx); + subq(rsp, 16); + movsd(Address(rsp, 8), xmm0); + + bind(B1_2); + pextrw(eax, xmm0, 3); + andl(eax, 32767); + subl(eax, 16314); + cmpl(eax, 270); + jcc(Assembler::above, L_2TAG_PACKET_0_0_1); + movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL + movdqu(xmm6, ExternalAddress(MUL16)); //0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL + unpcklpd(xmm0, xmm0); + movdqu(xmm4, ExternalAddress(sign_mask)); //0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL + andpd(xmm4, xmm0); + movdqu(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL + mulpd(xmm1, xmm0); + por(xmm5, xmm4); + addpd(xmm1, xmm5); + movdqu(xmm7, xmm1); + unpckhpd(xmm7, xmm7); + cvttsd2sil(edx, xmm7); + cvttpd2dq(xmm1, xmm1); + cvtdq2pd(xmm1, xmm1); + mulpd(xmm1, xmm6); + movdqu(xmm3, ExternalAddress(P_1)); //0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL + movq(xmm5, ExternalAddress(QQ_2)); //0x676733afUL, 0x3d32e7b9UL + addq(rdx, 469248); + movdqu(xmm4, ExternalAddress(P_2)); //0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL + mulpd(xmm3, xmm1); + andq(rdx, 31); + mulsd(xmm5, xmm1); + movq(rcx, rdx); + mulpd(xmm4, xmm1); + shlq(rcx, 1); + subpd(xmm0, xmm3); + mulpd(xmm1, ExternalAddress(P_3)); //0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL + addq(rdx, rcx); + shlq(rcx, 2); + addq(rdx, rcx); + addsd(xmm5, xmm0); + movdqu(xmm2, xmm0); + subpd(xmm0, xmm4); + movq(xmm6, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL + shlq(rdx, 4); + lea(rax, ExternalAddress(Ctable)); + andpd(xmm5, ExternalAddress(MASK_35)); //0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL + movdqu(xmm3, xmm0); + addq(rax, rdx); + subpd(xmm2, xmm0); + unpckhpd(xmm0, xmm0); + divsd(xmm6, xmm5); + subpd(xmm2, xmm4); + movdqu(xmm7, Address(rax, 16)); + subsd(xmm3, xmm5); + mulpd(xmm7, xmm0); + subpd(xmm2, xmm1); + movdqu(xmm1, Address(rax, 48)); + mulpd(xmm1, xmm0); + movdqu(xmm4, Address(rax, 96)); + mulpd(xmm4, xmm0); + addsd(xmm2, xmm3); + movdqu(xmm3, xmm0); + mulpd(xmm0, xmm0); + addpd(xmm7, Address(rax, 0)); + addpd(xmm1, Address(rax, 32)); + mulpd(xmm1, xmm0); + addpd(xmm4, Address(rax, 80)); + addpd(xmm7, xmm1); + movdqu(xmm1, Address(rax, 112)); + mulpd(xmm1, xmm0); + mulpd(xmm0, xmm0); + addpd(xmm4, xmm1); + movdqu(xmm1, Address(rax, 64)); + mulpd(xmm1, xmm0); + addpd(xmm7, xmm1); + movdqu(xmm1, xmm3); + mulpd(xmm3, xmm0); + mulsd(xmm0, xmm0); + mulpd(xmm1, Address(rax, 144)); + mulpd(xmm4, xmm3); + movdqu(xmm3, xmm1); + addpd(xmm7, xmm4); + movdqu(xmm4, xmm1); + mulsd(xmm0, xmm7); + unpckhpd(xmm7, xmm7); + addsd(xmm0, xmm7); + unpckhpd(xmm1, xmm1); + addsd(xmm3, xmm1); + subsd(xmm4, xmm3); + addsd(xmm1, xmm4); + movdqu(xmm4, xmm2); + movq(xmm7, Address(rax, 144)); + unpckhpd(xmm2, xmm2); + addsd(xmm7, Address(rax, 152)); + mulsd(xmm7, xmm2); + addsd(xmm7, Address(rax, 136)); + addsd(xmm7, xmm1); + addsd(xmm0, xmm7); + movq(xmm7, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL + mulsd(xmm4, xmm6); + movq(xmm2, Address(rax, 168)); + andpd(xmm2, xmm6); + mulsd(xmm5, xmm2); + mulsd(xmm6, Address(rax, 160)); + subsd(xmm7, xmm5); + subsd(xmm2, Address(rax, 128)); + subsd(xmm7, xmm4); + mulsd(xmm7, xmm6); + movdqu(xmm4, xmm3); + subsd(xmm3, xmm2); + addsd(xmm2, xmm3); + subsd(xmm4, xmm2); + addsd(xmm0, xmm4); + subsd(xmm0, xmm7); + addsd(xmm0, xmm3); + jmp(B1_4); + + bind(L_2TAG_PACKET_0_0_1); + jcc(Assembler::greater, L_2TAG_PACKET_1_0_1); + pextrw(eax, xmm0, 3); + movl(edx, eax); + andl(eax, 32752); + jcc(Assembler::equal, L_2TAG_PACKET_2_0_1); + andl(edx, 32767); + cmpl(edx, 15904); + jcc(Assembler::below, L_2TAG_PACKET_3_0_1); + movdqu(xmm2, xmm0); + movdqu(xmm3, xmm0); + movq(xmm1, ExternalAddress(Q_11)); //0xb8fe4d77UL, 0x3f82609aUL + mulsd(xmm2, xmm0); + mulsd(xmm3, xmm2); + mulsd(xmm1, xmm2); + addsd(xmm1, ExternalAddress(Q_9)); //0xbf847a43UL, 0x3f9664a0UL + mulsd(xmm1, xmm2); + addsd(xmm1, ExternalAddress(Q_7)); //0x52c4c8abUL, 0x3faba1baUL + mulsd(xmm1, xmm2); + addsd(xmm1, ExternalAddress(Q_5)); //0x11092746UL, 0x3fc11111UL + mulsd(xmm1, xmm2); + addsd(xmm1, ExternalAddress(Q_3)); //0x55555612UL, 0x3fd55555UL + mulsd(xmm1, xmm3); + addsd(xmm0, xmm1); + jmp(B1_4); + + bind(L_2TAG_PACKET_3_0_1); + movq(xmm3, ExternalAddress(TWO_POW_55)); //0x00000000UL, 0x43600000UL + mulsd(xmm3, xmm0); + addsd(xmm0, xmm3); + mulsd(xmm0, ExternalAddress(TWO_POW_M55)); //0x00000000UL, 0x3c800000UL + jmp(B1_4); + + bind(L_2TAG_PACKET_2_0_1); + movdqu(xmm1, xmm0); + mulsd(xmm1, xmm1); + jmp(B1_4); + + bind(L_2TAG_PACKET_1_0_1); + pextrw(eax, xmm0, 3); + andl(eax, 32752); + cmpl(eax, 32752); + jcc(Assembler::equal, L_2TAG_PACKET_4_0_1); + pextrw(ecx, xmm0, 3); + andl(ecx, 32752); + subl(ecx, 16224); + shrl(ecx, 7); + andl(ecx, 65532); + lea(r11, ExternalAddress(PI_INV_TABLE)); + addq(rcx, r11); + movdq(rax, xmm0); + movl(r10, Address(rcx, 20)); + movl(r8, Address(rcx, 24)); + movl(edx, eax); + shrq(rax, 21); + orl(eax, INT_MIN); + shrl(eax, 11); + movl(r9, r10); + imulq(r10, rdx); + imulq(r9, rax); + imulq(r8, rax); + movl(rsi, Address(rcx, 16)); + movl(rdi, Address(rcx, 12)); + movl(r11, r10); + shrq(r10, 32); + addq(r9, r10); + addq(r11, r8); + movl(r8, r11); + shrq(r11, 32); + addq(r9, r11); + movl(r10, rsi); + imulq(rsi, rdx); + imulq(r10, rax); + movl(r11, rdi); + imulq(rdi, rdx); + movl(rbx, rsi); + shrq(rsi, 32); + addq(r9, rbx); + movl(rbx, r9); + shrq(r9, 32); + addq(r10, rsi); + addq(r10, r9); + shlq(rbx, 32); + orq(r8, rbx); + imulq(r11, rax); + movl(r9, Address(rcx, 8)); + movl(rsi, Address(rcx, 4)); + movl(rbx, rdi); + shrq(rdi, 32); + addq(r10, rbx); + movl(rbx, r10); + shrq(r10, 32); + addq(r11, rdi); + addq(r11, r10); + movq(rdi, r9); + imulq(r9, rdx); + imulq(rdi, rax); + movl(r10, r9); + shrq(r9, 32); + addq(r11, r10); + movl(r10, r11); + shrq(r11, 32); + addq(rdi, r9); + addq(rdi, r11); + movq(r9, rsi); + imulq(rsi, rdx); + imulq(r9, rax); + shlq(r10, 32); + orq(r10, rbx); + movl(eax, Address(rcx, 0)); + movl(r11, rsi); + shrq(rsi, 32); + addq(rdi, r11); + movl(r11, rdi); + shrq(rdi, 32); + addq(r9, rsi); + addq(r9, rdi); + imulq(rdx, rax); + pextrw(rbx, xmm0, 3); + lea(rdi, ExternalAddress(PI_INV_TABLE)); + subq(rcx, rdi); + addl(ecx, ecx); + addl(ecx, ecx); + addl(ecx, ecx); + addl(ecx, 19); + movl(rsi, 32768); + andl(rsi, rbx); + shrl(rbx, 4); + andl(rbx, 2047); + subl(rbx, 1023); + subl(ecx, rbx); + addq(r9, rdx); + movl(edx, ecx); + addl(edx, 32); + cmpl(ecx, 0); + jcc(Assembler::less, L_2TAG_PACKET_5_0_1); + negl(ecx); + addl(ecx, 29); + shll(r9); + movl(rdi, r9); + andl(r9, 1073741823); + testl(r9, 536870912); + jcc(Assembler::notEqual, L_2TAG_PACKET_6_0_1); + shrl(r9); + movl(rbx, 0); + shlq(r9, 32); + orq(r9, r11); + + bind(L_2TAG_PACKET_7_0_1); + + bind(L_2TAG_PACKET_8_0_1); + cmpq(r9, 0); + jcc(Assembler::equal, L_2TAG_PACKET_9_0_1); + + bind(L_2TAG_PACKET_10_0_1); + bsrq(r11, r9); + movl(ecx, 29); + subl(ecx, r11); + jcc(Assembler::lessEqual, L_2TAG_PACKET_11_0_1); + shlq(r9); + movq(rax, r10); + shlq(r10); + addl(edx, ecx); + negl(ecx); + addl(ecx, 64); + shrq(rax); + shrq(r8); + orq(r9, rax); + orq(r10, r8); + + bind(L_2TAG_PACKET_12_0_1); + cvtsi2sdq(xmm0, r9); + shrq(r10, 1); + cvtsi2sdq(xmm3, r10); + xorpd(xmm4, xmm4); + shll(edx, 4); + negl(edx); + addl(edx, 16368); + orl(edx, rsi); + xorl(edx, rbx); + pinsrw(xmm4, edx, 3); + movq(xmm2, ExternalAddress(PI_4)); //0x00000000UL, 0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL + movq(xmm7, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x4611a626UL, 0x3e85110bUL + xorpd(xmm5, xmm5); + subl(edx, 1008); + pinsrw(xmm5, edx, 3); + mulsd(xmm0, xmm4); + shll(rsi, 16); + sarl(rsi, 31); + mulsd(xmm3, xmm5); + movdqu(xmm1, xmm0); + mulsd(xmm0, xmm2); + shrl(rdi, 30); + addsd(xmm1, xmm3); + mulsd(xmm3, xmm2); + addl(rdi, rsi); + xorl(rdi, rsi); + mulsd(xmm7, xmm1); + movl(eax, rdi); + addsd(xmm7, xmm3); + movdqu(xmm2, xmm0); + addsd(xmm0, xmm7); + subsd(xmm2, xmm0); + addsd(xmm7, xmm2); + movdqu(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x3fe45f30UL, 0x6dc9c883UL, 0x40245f30UL + if (VM_Version::supports_sse3()) { + movddup(xmm0, xmm0); + } else { + movlhps(xmm0, xmm0); + } + movdqu(xmm4, ExternalAddress(sign_mask)); //0x00000000UL, 0x80000000UL, 0x00000000UL, 0x80000000UL + andpd(xmm4, xmm0); + mulpd(xmm1, xmm0); + if (VM_Version::supports_sse3()) { + movddup(xmm7, xmm7); + } else { + movlhps(xmm7, xmm7); + } + movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL + movdqu(xmm6, ExternalAddress(MUL16)); //0x00000000UL, 0x40300000UL, 0x00000000UL, 0x3ff00000UL + por(xmm5, xmm4); + addpd(xmm1, xmm5); + movdqu(xmm5, xmm1); + unpckhpd(xmm5, xmm5); + cvttsd2sil(edx, xmm5); + cvttpd2dq(xmm1, xmm1); + cvtdq2pd(xmm1, xmm1); + mulpd(xmm1, xmm6); + movdqu(xmm3, ExternalAddress(P_1)); //0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL + movq(xmm5, ExternalAddress(QQ_2)); //0x676733afUL, 0x3d32e7b9UL + shll(eax, 4); + addl(edx, 469248); + movdqu(xmm4, ExternalAddress(P_2)); //0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL + mulpd(xmm3, xmm1); + addl(edx, eax); + andl(edx, 31); + mulsd(xmm5, xmm1); + movl(ecx, edx); + mulpd(xmm4, xmm1); + shll(ecx, 1); + subpd(xmm0, xmm3); + mulpd(xmm1, ExternalAddress(P_3)); //0x3707344aUL, 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL + addl(edx, ecx); + shll(ecx, 2); + addl(edx, ecx); + addsd(xmm5, xmm0); + movdqu(xmm2, xmm0); + subpd(xmm0, xmm4); + movq(xmm6, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL + shll(edx, 4); + lea(rax, ExternalAddress(Ctable)); + andpd(xmm5, ExternalAddress(MASK_35)); //0xfffc0000UL, 0xffffffffUL, 0x00000000UL, 0x00000000UL + movdqu(xmm3, xmm0); + addq(rax, rdx); + subpd(xmm2, xmm0); + unpckhpd(xmm0, xmm0); + divsd(xmm6, xmm5); + subpd(xmm2, xmm4); + subsd(xmm3, xmm5); + subpd(xmm2, xmm1); + movdqu(xmm1, Address(rax, 48)); + addpd(xmm2, xmm7); + movdqu(xmm7, Address(rax, 16)); + mulpd(xmm7, xmm0); + movdqu(xmm4, Address(rax, 96)); + mulpd(xmm1, xmm0); + mulpd(xmm4, xmm0); + addsd(xmm2, xmm3); + movdqu(xmm3, xmm0); + mulpd(xmm0, xmm0); + addpd(xmm7, Address(rax, 0)); + addpd(xmm1, Address(rax, 32)); + mulpd(xmm1, xmm0); + addpd(xmm4, Address(rax, 80)); + addpd(xmm7, xmm1); + movdqu(xmm1, Address(rax, 112)); + mulpd(xmm1, xmm0); + mulpd(xmm0, xmm0); + addpd(xmm4, xmm1); + movdqu(xmm1, Address(rax, 64)); + mulpd(xmm1, xmm0); + addpd(xmm7, xmm1); + movdqu(xmm1, xmm3); + mulpd(xmm3, xmm0); + mulsd(xmm0, xmm0); + mulpd(xmm1, Address(rax, 144)); + mulpd(xmm4, xmm3); + movdqu(xmm3, xmm1); + addpd(xmm7, xmm4); + movdqu(xmm4, xmm1); + mulsd(xmm0, xmm7); + unpckhpd(xmm7, xmm7); + addsd(xmm0, xmm7); + unpckhpd(xmm1, xmm1); + addsd(xmm3, xmm1); + subsd(xmm4, xmm3); + addsd(xmm1, xmm4); + movdqu(xmm4, xmm2); + movq(xmm7, Address(rax, 144)); + unpckhpd(xmm2, xmm2); + addsd(xmm7, Address(rax, 152)); + mulsd(xmm7, xmm2); + addsd(xmm7, Address(rax, 136)); + addsd(xmm7, xmm1); + addsd(xmm0, xmm7); + movq(xmm7, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL + mulsd(xmm4, xmm6); + movq(xmm2, Address(rax, 168)); + andpd(xmm2, xmm6); + mulsd(xmm5, xmm2); + mulsd(xmm6, Address(rax, 160)); + subsd(xmm7, xmm5); + subsd(xmm2, Address(rax, 128)); + subsd(xmm7, xmm4); + mulsd(xmm7, xmm6); + movdqu(xmm4, xmm3); + subsd(xmm3, xmm2); + addsd(xmm2, xmm3); + subsd(xmm4, xmm2); + addsd(xmm0, xmm4); + subsd(xmm0, xmm7); + addsd(xmm0, xmm3); + jmp(B1_4); + + bind(L_2TAG_PACKET_9_0_1); + addl(edx, 64); + movq(r9, r10); + movq(r10, r8); + movl(r8, 0); + cmpq(r9, 0); + jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_1); + addl(edx, 64); + movq(r9, r10); + movq(r10, r8); + cmpq(r9, 0); + jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_1); + jmp(L_2TAG_PACKET_12_0_1); + + bind(L_2TAG_PACKET_11_0_1); + jcc(Assembler::equal, L_2TAG_PACKET_12_0_1); + negl(ecx); + shrq(r10); + movq(rax, r9); + shrq(r9); + subl(edx, ecx); + negl(ecx); + addl(ecx, 64); + shlq(rax); + orq(r10, rax); + jmp(L_2TAG_PACKET_12_0_1); + + bind(L_2TAG_PACKET_5_0_1); + notl(ecx); + shlq(r9, 32); + orq(r9, r11); + shlq(r9); + movq(rdi, r9); + testl(r9, INT_MIN); + jcc(Assembler::notEqual, L_2TAG_PACKET_13_0_1); + shrl(r9); + movl(rbx, 0); + shrq(rdi, 2); + jmp(L_2TAG_PACKET_8_0_1); + + bind(L_2TAG_PACKET_6_0_1); + shrl(r9); + movl(rbx, 1073741824); + shrl(rbx); + shlq(r9, 32); + orq(r9, r11); + shlq(rbx, 32); + addl(rdi, 1073741824); + movl(rcx, 0); + movl(r11, 0); + subq(rcx, r8); + sbbq(r11, r10); + sbbq(rbx, r9); + movq(r8, rcx); + movq(r10, r11); + movq(r9, rbx); + movl(rbx, 32768); + jmp(L_2TAG_PACKET_7_0_1); + + bind(L_2TAG_PACKET_13_0_1); + shrl(r9); + mov64(rbx, 0x100000000); + shrq(rbx); + movl(rcx, 0); + movl(r11, 0); + subq(rcx, r8); + sbbq(r11, r10); + sbbq(rbx, r9); + movq(r8, rcx); + movq(r10, r11); + movq(r9, rbx); + movl(rbx, 32768); + shrq(rdi, 2); + addl(rdi, 1073741824); + jmp(L_2TAG_PACKET_8_0_1); + + bind(L_2TAG_PACKET_4_0_1); + movq(xmm0, Address(rsp, 8)); + mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL + movq(Address(rsp, 0), xmm0); + + bind(L_2TAG_PACKET_14_0_1); + + bind(B1_4); + addq(rsp, 16); + +} --- old/src/cpu/x86/vm/macroAssembler_x86.hpp 2016-03-25 14:15:16.969623300 -0700 +++ new/src/cpu/x86/vm/macroAssembler_x86.hpp 2016-03-25 14:15:16.450593600 -0700 @@ -928,6 +928,10 @@ XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2); + void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, + XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, + Register rax, Register rcx, Register rdx, Register r11); + void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4); @@ -941,11 +945,19 @@ XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4); + void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, + XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, + Register rax, Register rcx, Register rdx, Register tmp1, + Register tmp2, Register tmp3, Register tmp4); #else void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, Register rdx, Register tmp1); + void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, + XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, + Register rax, Register rcx, Register rdx, Register tmp); + void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, Register rdx, Register tmp); @@ -964,6 +976,14 @@ void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp); + + void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, + Register edx, Register ebx, Register esi, Register edi, + Register ebp, Register esp); + + void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, + XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, + Register rax, Register rcx, Register rdx, Register tmp); #endif void increase_precision(); --- old/src/cpu/x86/vm/stubGenerator_x86_32.cpp 2016-03-25 14:15:20.231809900 -0700 +++ new/src/cpu/x86/vm/stubGenerator_x86_32.cpp 2016-03-25 14:15:19.719780600 -0700 @@ -2093,25 +2093,6 @@ entry_checkcast_arraycopy); } - void generate_math_stubs() { - { - StubCodeMark mark(this, "StubRoutines", "log10"); - StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc(); - - __ fld_d(Address(rsp, 4)); - __ flog10(); - __ ret(0); - } - { - StubCodeMark mark(this, "StubRoutines", "tan"); - StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc(); - - __ fld_d(Address(rsp, 4)); - __ trigfunc('t'); - __ ret(0); - } - } - // AES intrinsic stubs enum {AESBlockSize = 16}; @@ -3534,6 +3515,31 @@ } + address generate_libmLog10() { + address start = __ pc(); + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + const XMMRegister x3 = xmm3; + + const XMMRegister x4 = xmm4; + const XMMRegister x5 = xmm5; + const XMMRegister x6 = xmm6; + const XMMRegister x7 = xmm7; + + const Register tmp = rbx; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + + } + address generate_libmPow() { address start = __ pc(); @@ -3630,6 +3636,44 @@ } + address generate_libm_tan_cot_huge() { + address start = __ pc(); + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + + BLOCK_COMMENT("Entry:"); + __ libm_tancot_huge(x0, x1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp); + + return start; + + } + + address generate_libmTan() { + address start = __ pc(); + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + const XMMRegister x3 = xmm3; + + const XMMRegister x4 = xmm4; + const XMMRegister x5 = xmm5; + const XMMRegister x6 = xmm6; + const XMMRegister x7 = xmm7; + + const Register tmp = rbx; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + + } + // Safefetch stubs. void generate_safefetch(const char* name, int size, address* entry, address* fault_pc, address* continuation_pc) { @@ -3854,23 +3898,40 @@ StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul); } if (VM_Version::supports_sse2()) { - StubRoutines::_dexp = generate_libmExp(); - StubRoutines::_dlog = generate_libmLog(); - StubRoutines::_dpow = generate_libmPow(); - if (UseLibmSinIntrinsic || UseLibmCosIntrinsic) { + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) { + StubRoutines::_dexp = generate_libmExp(); + } + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog)) { + StubRoutines::_dlog = generate_libmLog(); + } + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog10)) { + StubRoutines::_dlog10 = generate_libmLog10(); + } + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dpow)) { + StubRoutines::_dpow = generate_libmPow(); + } + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) || + vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) || + vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) { StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l(); + } + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) || + vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) { StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge(); } - if (UseLibmSinIntrinsic) { + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) { StubRoutines::_dsin = generate_libmSin(); } - if (UseLibmCosIntrinsic) { + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) { StubRoutines::_dcos = generate_libmCos(); } + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) { + StubRoutines::_dlibm_tan_cot_huge = generate_libm_tan_cot_huge(); + StubRoutines::_dtan = generate_libmTan(); + } } } - void generate_all() { // Generates all stubs and initializes the entry points @@ -3889,8 +3950,6 @@ // arraycopy stubs used by compilers generate_arraycopy_stubs(); - generate_math_stubs(); - // don't bother generating these AES intrinsic stubs unless global flag is set if (UseAESIntrinsics) { StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others --- old/src/cpu/x86/vm/stubGenerator_x86_64.cpp 2016-03-25 14:15:23.536999000 -0700 +++ new/src/cpu/x86/vm/stubGenerator_x86_64.cpp 2016-03-25 14:15:23.037970400 -0700 @@ -2972,35 +2972,6 @@ StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; } - void generate_math_stubs() { - { - StubCodeMark mark(this, "StubRoutines", "log10"); - StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc(); - - __ subq(rsp, 8); - __ movdbl(Address(rsp, 0), xmm0); - __ fld_d(Address(rsp, 0)); - __ flog10(); - __ fstp_d(Address(rsp, 0)); - __ movdbl(xmm0, Address(rsp, 0)); - __ addq(rsp, 8); - __ ret(0); - } - { - StubCodeMark mark(this, "StubRoutines", "tan"); - StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc(); - - __ subq(rsp, 8); - __ movdbl(Address(rsp, 0), xmm0); - __ fld_d(Address(rsp, 0)); - __ trigfunc('t'); - __ fstp_d(Address(rsp, 0)); - __ movdbl(xmm0, Address(rsp, 0)); - __ addq(rsp, 8); - __ ret(0); - } - } - // AES intrinsic stubs enum {AESBlockSize = 16}; @@ -4745,6 +4716,46 @@ } + address generate_libmLog10() { + address start = __ pc(); + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + const XMMRegister x3 = xmm3; + + const XMMRegister x4 = xmm4; + const XMMRegister x5 = xmm5; + const XMMRegister x6 = xmm6; + const XMMRegister x7 = xmm7; + + const Register tmp = r11; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifdef _WIN64 + // save the xmm registers which must be preserved 6-7 + __ subptr(rsp, 4 * wordSize); + __ movdqu(Address(rsp, 0), xmm6); + __ movdqu(Address(rsp, 2 * wordSize), xmm7); +#endif + __ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp); + +#ifdef _WIN64 + // restore xmm regs belonging to calling function + __ movdqu(xmm6, Address(rsp, 0)); + __ movdqu(xmm7, Address(rsp, 2 * wordSize)); + __ addptr(rsp, 4 * wordSize); +#endif + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + + } + address generate_libmPow() { address start = __ pc(); @@ -4810,6 +4821,8 @@ __ enter(); // required for proper stackwalking of RuntimeStub frame #ifdef _WIN64 + __ push(rsi); + __ push(rdi); // save the xmm registers which must be preserved 6-7 __ subptr(rsp, 4 * wordSize); __ movdqu(Address(rsp, 0), xmm6); @@ -4822,6 +4835,8 @@ __ movdqu(xmm6, Address(rsp, 0)); __ movdqu(xmm7, Address(rsp, 2 * wordSize)); __ addptr(rsp, 4 * wordSize); + __ pop(rdi); + __ pop(rsi); #endif __ leave(); // required for proper stackwalking of RuntimeStub frame @@ -4853,6 +4868,8 @@ __ enter(); // required for proper stackwalking of RuntimeStub frame #ifdef _WIN64 + __ push(rsi); + __ push(rdi); // save the xmm registers which must be preserved 6-7 __ subptr(rsp, 4 * wordSize); __ movdqu(Address(rsp, 0), xmm6); @@ -4865,6 +4882,55 @@ __ movdqu(xmm6, Address(rsp, 0)); __ movdqu(xmm7, Address(rsp, 2 * wordSize)); __ addptr(rsp, 4 * wordSize); + __ pop(rdi); + __ pop(rsi); +#endif + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + + } + + address generate_libmTan() { + address start = __ pc(); + + const XMMRegister x0 = xmm0; + const XMMRegister x1 = xmm1; + const XMMRegister x2 = xmm2; + const XMMRegister x3 = xmm3; + + const XMMRegister x4 = xmm4; + const XMMRegister x5 = xmm5; + const XMMRegister x6 = xmm6; + const XMMRegister x7 = xmm7; + + const Register tmp1 = r8; + const Register tmp2 = r9; + const Register tmp3 = r10; + const Register tmp4 = r11; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifdef _WIN64 + __ push(rsi); + __ push(rdi); + // save the xmm registers which must be preserved 6-7 + __ subptr(rsp, 4 * wordSize); + __ movdqu(Address(rsp, 0), xmm6); + __ movdqu(Address(rsp, 2 * wordSize), xmm7); +#endif + __ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4); + +#ifdef _WIN64 + // restore xmm regs belonging to calling function + __ movdqu(xmm6, Address(rsp, 0)); + __ movdqu(xmm7, Address(rsp, 2 * wordSize)); + __ addptr(rsp, 4 * wordSize); + __ pop(rdi); + __ pop(rsi); #endif __ leave(); // required for proper stackwalking of RuntimeStub frame @@ -5066,15 +5132,27 @@ StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul); } if (VM_Version::supports_sse2()) { - StubRoutines::_dexp = generate_libmExp(); - StubRoutines::_dlog = generate_libmLog(); - StubRoutines::_dpow = generate_libmPow(); - if (UseLibmSinIntrinsic) { + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) { + StubRoutines::_dexp = generate_libmExp(); + } + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog)) { + StubRoutines::_dlog = generate_libmLog(); + } + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog10)) { + StubRoutines::_dlog10 = generate_libmLog10(); + } + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dpow)) { + StubRoutines::_dpow = generate_libmPow(); + } + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) { StubRoutines::_dsin = generate_libmSin(); } - if (UseLibmCosIntrinsic) { + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) { StubRoutines::_dcos = generate_libmCos(); } + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) { + StubRoutines::_dtan = generate_libmTan(); + } } } @@ -5119,8 +5197,6 @@ // arraycopy stubs used by compilers generate_arraycopy_stubs(); - generate_math_stubs(); - // don't bother generating these AES intrinsic stubs unless global flag is set if (UseAESIntrinsics) { StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others --- old/src/cpu/x86/vm/templateInterpreterGenerator_x86_32.cpp 2016-03-25 14:15:26.872189700 -0700 +++ new/src/cpu/x86/vm/templateInterpreterGenerator_x86_32.cpp 2016-03-25 14:15:26.351159900 -0700 @@ -345,13 +345,34 @@ __ fld_d(Address(rsp, 1*wordSize)); switch (kind) { case Interpreter::java_lang_math_sin : - __ trigfunc('s'); + __ subptr(rsp, 2 * wordSize); + __ fstp_d(Address(rsp, 0)); + if (VM_Version::supports_sse2() && StubRoutines::dsin() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin()))); + } else { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin))); + } + __ addptr(rsp, 2 * wordSize); break; case Interpreter::java_lang_math_cos : - __ trigfunc('c'); + __ subptr(rsp, 2 * wordSize); + __ fstp_d(Address(rsp, 0)); + if (VM_Version::supports_sse2() && StubRoutines::dcos() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos()))); + } else { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos))); + } + __ addptr(rsp, 2 * wordSize); break; case Interpreter::java_lang_math_tan : - __ trigfunc('t'); + __ subptr(rsp, 2 * wordSize); + __ fstp_d(Address(rsp, 0)); + if (StubRoutines::dtan() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan()))); + } else { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan))); + } + __ addptr(rsp, 2 * wordSize); break; case Interpreter::java_lang_math_sqrt: __ fsqrt(); @@ -362,26 +383,29 @@ case Interpreter::java_lang_math_log: __ subptr(rsp, 2 * wordSize); __ fstp_d(Address(rsp, 0)); - if (VM_Version::supports_sse2()) { + if (StubRoutines::dlog() != NULL) { __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog()))); - } - else { + } else { __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog))); } __ addptr(rsp, 2 * wordSize); break; case Interpreter::java_lang_math_log10: - __ flog10(); - // Store to stack to convert 80bit precision back to 64bits - __ push_fTOS(); - __ pop_fTOS(); + __ subptr(rsp, 2 * wordSize); + __ fstp_d(Address(rsp, 0)); + if (StubRoutines::dlog10() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10()))); + } else { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10))); + } + __ addptr(rsp, 2 * wordSize); break; case Interpreter::java_lang_math_pow: __ fld_d(Address(rsp, 3*wordSize)); // second argument __ subptr(rsp, 4 * wordSize); __ fstp_d(Address(rsp, 0)); __ fstp_d(Address(rsp, 2 * wordSize)); - if (VM_Version::supports_sse2()) { + if (StubRoutines::dpow() != NULL) { __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow()))); } else { __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow))); @@ -391,7 +415,7 @@ case Interpreter::java_lang_math_exp: __ subptr(rsp, 2*wordSize); __ fstp_d(Address(rsp, 0)); - if (VM_Version::supports_sse2()) { + if (StubRoutines::dexp() != NULL) { __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp()))); } else { __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp))); --- old/src/cpu/x86/vm/templateInterpreterGenerator_x86_64.cpp 2016-03-25 14:15:30.073372800 -0700 +++ new/src/cpu/x86/vm/templateInterpreterGenerator_x86_64.cpp 2016-03-25 14:15:29.567343900 -0700 @@ -373,32 +373,60 @@ __ sqrtsd(xmm0, Address(rsp, wordSize)); } else if (kind == Interpreter::java_lang_math_exp) { __ movdbl(xmm0, Address(rsp, wordSize)); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp()))); + if (StubRoutines::dexp() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp()))); + } else { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp))); + } } else if (kind == Interpreter::java_lang_math_log) { __ movdbl(xmm0, Address(rsp, wordSize)); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog()))); + if (StubRoutines::dlog() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog()))); + } else { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog))); + } + } else if (kind == Interpreter::java_lang_math_log10) { + __ movdbl(xmm0, Address(rsp, wordSize)); + if (StubRoutines::dlog10() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10()))); + } else { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10))); + } + } else if (kind == Interpreter::java_lang_math_sin) { + __ movdbl(xmm0, Address(rsp, wordSize)); + if (StubRoutines::dsin() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin()))); + } else { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin))); + } + } else if (kind == Interpreter::java_lang_math_cos) { + __ movdbl(xmm0, Address(rsp, wordSize)); + if (StubRoutines::dcos() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos()))); + } else { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos))); + } } else if (kind == Interpreter::java_lang_math_pow) { __ movdbl(xmm1, Address(rsp, wordSize)); __ movdbl(xmm0, Address(rsp, 3 * wordSize)); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow()))); + if (StubRoutines::dpow() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow()))); + } else { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow))); + } + } else if (kind == Interpreter::java_lang_math_tan) { + __ movdbl(xmm0, Address(rsp, wordSize)); + if (StubRoutines::dtan() != NULL) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan()))); + } else { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan))); + } } else { __ fld_d(Address(rsp, wordSize)); switch (kind) { - case Interpreter::java_lang_math_sin : - __ trigfunc('s'); - break; - case Interpreter::java_lang_math_cos : - __ trigfunc('c'); - break; - case Interpreter::java_lang_math_tan : - __ trigfunc('t'); - break; case Interpreter::java_lang_math_abs: __ fabs(); break; - case Interpreter::java_lang_math_log10: - __ flog10(); - break; default : ShouldNotReachHere(); } --- old/src/cpu/x86/vm/x86_32.ad 2016-03-25 14:15:33.345560000 -0700 +++ new/src/cpu/x86/vm/x86_32.ad 2016-03-25 14:15:32.799528700 -0700 @@ -9819,27 +9819,6 @@ ins_pipe( pipe_slow ); %} -instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ - predicate (UseSSE<=1); - match(Set dst(TanD src)); - format %{ "DTAN $dst" %} - ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan - Opcode(0xDD), Opcode(0xD8)); // fstp st - ins_pipe( pipe_slow ); -%} - -instruct tanD_reg(regD dst, eFlagsReg cr) %{ - predicate (UseSSE>=2); - match(Set dst(TanD dst)); - effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" - format %{ "DTAN $dst" %} - ins_encode( Push_SrcD(dst), - Opcode(0xD9), Opcode(0xF2), // fptan - Opcode(0xDD), Opcode(0xD8), // fstp st - Push_ResultD(dst) ); - ins_pipe( pipe_slow ); -%} - instruct atanDPR_reg(regDPR dst, regDPR src) %{ predicate (UseSSE<=1); match(Set dst(AtanD dst src)); @@ -9871,41 +9850,6 @@ ins_pipe( pipe_slow ); %} -instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ - predicate (UseSSE<=1); - // The source Double operand on FPU stack - match(Set dst (Log10D src)); - // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number - // fxch ; swap ST(0) with ST(1) - // fyl2x ; compute log_10(2) * log_2(x) - format %{ "FLDLG2 \t\t\t#Log10\n\t" - "FXCH \n\t" - "FYL2X \t\t\t# Q=Log10*Log_2(x)" - %} - ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 - Opcode(0xD9), Opcode(0xC9), // fxch - Opcode(0xD9), Opcode(0xF1)); // fyl2x - - ins_pipe( pipe_slow ); -%} - -instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ - predicate (UseSSE>=2); - effect(KILL cr); - match(Set dst (Log10D src)); - // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number - // fyl2x ; compute log_10(2) * log_2(x) - format %{ "FLDLG2 \t\t\t#Log10\n\t" - "FYL2X \t\t\t# Q=Log10*Log_2(x)" - %} - ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 - Push_SrcD(src), - Opcode(0xD9), Opcode(0xF1), // fyl2x - Push_ResultD(dst)); - - ins_pipe( pipe_slow ); -%} - //-------------Float Instructions------------------------------- // Float Math --- old/src/cpu/x86/vm/x86_64.ad 2016-03-25 14:15:37.240782800 -0700 +++ new/src/cpu/x86/vm/x86_64.ad 2016-03-25 14:15:36.698751800 -0700 @@ -9897,34 +9897,6 @@ ins_pipe(pipe_slow); %} -// -----------Trig and Trancendental Instructions------------------------------ -instruct tanD_reg(regD dst) %{ - match(Set dst (TanD dst)); - - format %{ "dtan $dst\n\t" %} - ins_encode( Push_SrcXD(dst), - Opcode(0xD9), Opcode(0xF2), //fptan - Opcode(0xDD), Opcode(0xD8), //fstp st - Push_ResultXD(dst) ); - ins_pipe( pipe_slow ); -%} - -instruct log10D_reg(regD dst) %{ - // The source and result Double operands in XMM registers - match(Set dst (Log10D dst)); - // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number - // fyl2x ; compute log_10(2) * log_2(x) - format %{ "fldlg2\t\t\t#Log10\n\t" - "fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t" - %} - ins_encode(Opcode(0xD9), Opcode(0xEC), // fldlg2 - Push_SrcXD(dst), - Opcode(0xD9), Opcode(0xF1), // fyl2x - Push_ResultXD(dst)); - - ins_pipe( pipe_slow ); -%} - //----------Arithmetic Conversion Instructions--------------------------------- instruct roundFloat_nop(regF dst) --- old/src/share/vm/c1/c1_LIR.cpp 2016-03-25 14:15:41.048000500 -0700 +++ new/src/share/vm/c1/c1_LIR.cpp 2016-03-25 14:15:40.539971500 -0700 @@ -727,31 +727,6 @@ break; } - - case lir_tan: - case lir_log10: { - assert(op->as_Op2() != NULL, "must be"); - LIR_Op2* op2 = (LIR_Op2*)op; - - // On x86 tan/sin/cos need two temporary fpu stack slots and - // log/log10 need one so handle opr2 and tmp as temp inputs. - // Register input operand as temp to guarantee that it doesn't - // overlap with the input. - assert(op2->_info == NULL, "not used"); - assert(op2->_tmp5->is_illegal(), "not used"); - assert(op2->_opr1->is_valid(), "used"); - do_input(op2->_opr1); do_temp(op2->_opr1); - - if (op2->_opr2->is_valid()) do_temp(op2->_opr2); - if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1); - if (op2->_tmp2->is_valid()) do_temp(op2->_tmp2); - if (op2->_tmp3->is_valid()) do_temp(op2->_tmp3); - if (op2->_tmp4->is_valid()) do_temp(op2->_tmp4); - if (op2->_result->is_valid()) do_output(op2->_result); - - break; - } - // LIR_Op3 case lir_idiv: case lir_irem: { @@ -1738,8 +1713,6 @@ case lir_rem: s = "rem"; break; case lir_abs: s = "abs"; break; case lir_sqrt: s = "sqrt"; break; - case lir_tan: s = "tan"; break; - case lir_log10: s = "log10"; break; case lir_logic_and: s = "logic_and"; break; case lir_logic_or: s = "logic_or"; break; case lir_logic_xor: s = "logic_xor"; break; --- old/src/share/vm/c1/c1_Runtime1.cpp 2016-03-25 14:15:44.309187100 -0700 +++ new/src/share/vm/c1/c1_Runtime1.cpp 2016-03-25 14:15:43.797157800 -0700 @@ -320,9 +320,11 @@ FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32()); FUNCTION_CASE(entry, StubRoutines::dexp()); FUNCTION_CASE(entry, StubRoutines::dlog()); + FUNCTION_CASE(entry, StubRoutines::dlog10()); FUNCTION_CASE(entry, StubRoutines::dpow()); FUNCTION_CASE(entry, StubRoutines::dsin()); FUNCTION_CASE(entry, StubRoutines::dcos()); + FUNCTION_CASE(entry, StubRoutines::dtan()); #undef FUNCTION_CASE --- old/src/share/vm/classfile/vmSymbols.cpp 2016-03-25 14:15:47.450366700 -0700 +++ new/src/share/vm/classfile/vmSymbols.cpp 2016-03-25 14:15:46.949338100 -0700 @@ -416,9 +416,19 @@ } } +bool vmIntrinsics::is_intrinsic_available(vmIntrinsics::ID id) { + return !DirectiveSet::is_intrinsic_disabled(id) && + !vmIntrinsics::is_disabled_by_flags(id); +} + bool vmIntrinsics::is_disabled_by_flags(const methodHandle& method) { vmIntrinsics::ID id = method->intrinsic_id(); assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); + return is_disabled_by_flags(id); +} + +bool vmIntrinsics::is_disabled_by_flags(const vmIntrinsics::ID id) { + assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); // -XX:-InlineNatives disables nearly all intrinsics except the ones listed in // the following switch statement. --- old/src/share/vm/classfile/vmSymbols.hpp 2016-03-25 14:15:50.754555700 -0700 +++ new/src/share/vm/classfile/vmSymbols.hpp 2016-03-25 14:15:50.217525000 -0700 @@ -1621,6 +1621,8 @@ // Returns true if a compiler intrinsic is disabled by command-line flags // and false otherwise. static bool is_disabled_by_flags(const methodHandle& method); + static bool is_disabled_by_flags(const vmIntrinsics::ID id); + static bool is_intrinsic_available(vmIntrinsics::ID id); }; #endif // SHARE_VM_CLASSFILE_VMSYMBOLS_HPP --- old/src/share/vm/compiler/compilerDirectives.cpp 2016-03-25 14:15:54.237754900 -0700 +++ new/src/share/vm/compiler/compilerDirectives.cpp 2016-03-25 14:15:53.673722700 -0700 @@ -409,6 +409,32 @@ return false; } + bool DirectiveSet::is_intrinsic_disabled(vmIntrinsics::ID id) { + assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); + + ResourceMark rm; + ccstr DisableIntrinsicString = canonicalize_disableintrinsic(DisableIntrinsic); + // Create a copy of the string that contains the list of disabled + // intrinsics. The copy is created because the string + // will be modified by strtok(). Then, the list is tokenized with + // ',' as a separator. + size_t length = strlen(DisableIntrinsicString); + char* local_list = NEW_RESOURCE_ARRAY(char, length + 1); + strncpy(local_list, DisableIntrinsicString, length + 1); + + char* token = strtok(local_list, ","); + while (token != NULL) { + if (strcmp(token, vmIntrinsics::name_at(id)) == 0) { + return true; + } + else { + token = strtok(NULL, ","); + } + } + + return false; +} + DirectiveSet* DirectiveSet::clone(DirectiveSet const* src) { DirectiveSet* set = new DirectiveSet(NULL); memcpy(set->_modified, src->_modified, sizeof(src->_modified)); --- old/src/share/vm/compiler/compilerDirectives.hpp 2016-03-25 14:15:57.489941000 -0700 +++ new/src/share/vm/compiler/compilerDirectives.hpp 2016-03-25 14:15:56.974911500 -0700 @@ -117,6 +117,7 @@ bool matches_inline(methodHandle method, int inline_action); static DirectiveSet* clone(DirectiveSet const* src); bool is_intrinsic_disabled(methodHandle method); + static bool is_intrinsic_disabled(vmIntrinsics::ID id); void finalize(outputStream* st); typedef enum { --- old/src/share/vm/opto/classes.hpp 2016-03-25 14:16:00.637121000 -0700 +++ new/src/share/vm/opto/classes.hpp 2016-03-25 14:16:00.117091200 -0700 @@ -171,7 +171,6 @@ macro(LoadRange) macro(LoadS) macro(Lock) -macro(Log10D) macro(Loop) macro(LoopLimit) macro(Mach) @@ -264,7 +263,6 @@ macro(SubL) macro(TailCall) macro(TailJump) -macro(TanD) macro(ThreadLocal) macro(Unlock) macro(URShiftI) --- old/src/share/vm/opto/library_call.cpp 2016-03-25 14:16:03.782300900 -0700 +++ new/src/share/vm/opto/library_call.cpp 2016-03-25 14:16:03.256270800 -0700 @@ -1676,7 +1676,6 @@ switch (id) { case vmIntrinsics::_dabs: n = new AbsDNode( arg); break; case vmIntrinsics::_dsqrt: n = new SqrtDNode(C, control(), arg); break; - case vmIntrinsics::_dlog10: n = new Log10DNode(C, control(), arg); break; default: fatal_unexpected_iid(id); break; } set_result(_gvn.transform(n)); @@ -1690,10 +1689,6 @@ Node* arg = round_double_node(argument(0)); Node* n = NULL; - switch (id) { - case vmIntrinsics::_dtan: n = new TanDNode(C, control(), arg); break; - default: fatal_unexpected_iid(id); break; - } n = _gvn.transform(n); // Rounding required? Check for argument reduction! @@ -1811,15 +1806,18 @@ return StubRoutines::dcos() != NULL ? runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dcos(), "dcos") : runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dcos), "COS"); - case vmIntrinsics::_dtan: return Matcher::has_match_rule(Op_TanD) ? inline_trig(id) : - runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dtan), "TAN"); - + case vmIntrinsics::_dtan: + return StubRoutines::dtan() != NULL ? + runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dtan(), "dtan") : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dtan), "TAN"); case vmIntrinsics::_dlog: return StubRoutines::dlog() != NULL ? runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dlog(), "dlog") : runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog), "LOG"); - case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_math(id) : - runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10"); + case vmIntrinsics::_dlog10: + return StubRoutines::dlog10() != NULL ? + runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dlog10(), "dlog10") : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10"); // These intrinsics are supported on all hardware case vmIntrinsics::_dsqrt: return Matcher::match_rule_supported(Op_SqrtD) ? inline_math(id) : false; --- old/src/share/vm/opto/subnode.cpp 2016-03-25 14:16:07.125492100 -0700 +++ new/src/share/vm/opto/subnode.cpp 2016-03-25 14:16:06.628463600 -0700 @@ -1533,25 +1533,3 @@ if( d < 0.0 ) return Type::DOUBLE; return TypeD::make( sqrt( d ) ); } - -//============================================================================= -//------------------------------Value------------------------------------------ -// Compute tan -const Type* TanDNode::Value(PhaseGVN* phase) const { - const Type *t1 = phase->type( in(1) ); - if( t1 == Type::TOP ) return Type::TOP; - if( t1->base() != Type::DoubleCon ) return Type::DOUBLE; - double d = t1->getd(); - return TypeD::make( StubRoutines::intrinsic_tan( d ) ); -} - -//============================================================================= -//------------------------------Value------------------------------------------ -// Compute log10 -const Type* Log10DNode::Value(PhaseGVN* phase) const { - const Type *t1 = phase->type( in(1) ); - if( t1 == Type::TOP ) return Type::TOP; - if( t1->base() != Type::DoubleCon ) return Type::DOUBLE; - double d = t1->getd(); - return TypeD::make( StubRoutines::intrinsic_log10( d ) ); -} --- old/src/share/vm/opto/subnode.hpp 2016-03-25 14:16:10.278672400 -0700 +++ new/src/share/vm/opto/subnode.hpp 2016-03-25 14:16:09.758642700 -0700 @@ -408,21 +408,6 @@ virtual uint ideal_reg() const { return Op_RegD; } }; -//------------------------------TanDNode--------------------------------------- -// tangens of a double -class TanDNode : public Node { -public: - TanDNode(Compile* C, Node *c,Node *in1) : Node(c, in1) { - init_flags(Flag_is_expensive); - C->add_expensive_node(this); - } - virtual int Opcode() const; - const Type *bottom_type() const { return Type::DOUBLE; } - virtual uint ideal_reg() const { return Op_RegD; } - virtual const Type* Value(PhaseGVN* phase) const; -}; - - //------------------------------AtanDNode-------------------------------------- // arcus tangens of a double class AtanDNode : public Node { @@ -442,20 +427,6 @@ init_flags(Flag_is_expensive); C->add_expensive_node(this); } - virtual int Opcode() const; - const Type *bottom_type() const { return Type::DOUBLE; } - virtual uint ideal_reg() const { return Op_RegD; } - virtual const Type* Value(PhaseGVN* phase) const; -}; - -//------------------------------Log10DNode--------------------------------------- -// Log_10 of a double -class Log10DNode : public Node { -public: - Log10DNode(Compile* C, Node *c, Node *in1) : Node(c, in1) { - init_flags(Flag_is_expensive); - C->add_expensive_node(this); - } virtual int Opcode() const; const Type *bottom_type() const { return Type::DOUBLE; } virtual uint ideal_reg() const { return Op_RegD; } --- old/src/share/vm/runtime/stubRoutines.cpp 2016-03-25 14:16:13.625863900 -0700 +++ new/src/share/vm/runtime/stubRoutines.cpp 2016-03-25 14:16:13.106834200 -0700 @@ -154,11 +154,14 @@ address StubRoutines::_dexp = NULL; address StubRoutines::_dlog = NULL; +address StubRoutines::_dlog10 = NULL; address StubRoutines::_dpow = NULL; address StubRoutines::_dsin = NULL; address StubRoutines::_dcos = NULL; address StubRoutines::_dlibm_sin_cos_huge = NULL; address StubRoutines::_dlibm_reduce_pi04l = NULL; +address StubRoutines::_dlibm_tan_cot_huge = NULL; +address StubRoutines::_dtan = NULL; double (* StubRoutines::_intrinsic_log10 )(double) = NULL; double (* StubRoutines::_intrinsic_sin )(double) = NULL; --- old/src/share/vm/runtime/stubRoutines.hpp 2016-03-25 14:16:16.832047300 -0700 +++ new/src/share/vm/runtime/stubRoutines.hpp 2016-03-25 14:16:16.324018200 -0700 @@ -213,11 +213,14 @@ static address _dexp; static address _dlog; + static address _dlog10; static address _dpow; static address _dsin; static address _dcos; static address _dlibm_sin_cos_huge; static address _dlibm_reduce_pi04l; + static address _dlibm_tan_cot_huge; + static address _dtan; // These are versions of the java.lang.Math methods which perform // the same operations as the intrinsic version. They are used for @@ -391,11 +394,14 @@ static address dexp() { return _dexp; } static address dlog() { return _dlog; } + static address dlog10() { return _dlog10; } static address dpow() { return _dpow; } static address dsin() { return _dsin; } static address dcos() { return _dcos; } static address dlibm_reduce_pi04l() { return _dlibm_reduce_pi04l; } static address dlibm_sin_cos_huge() { return _dlibm_sin_cos_huge; } + static address dlibm_tan_cot_huge() { return _dlibm_tan_cot_huge; } + static address dtan() { return _dtan; } static address select_fill_function(BasicType t, bool aligned, const char* &name); --- old/src/share/vm/runtime/vmStructs.cpp 2016-03-25 14:16:20.007228900 -0700 +++ new/src/share/vm/runtime/vmStructs.cpp 2016-03-25 14:16:19.479198700 -0700 @@ -859,9 +859,11 @@ static_field(StubRoutines, _mulAdd, address) \ static_field(StubRoutines, _dexp, address) \ static_field(StubRoutines, _dlog, address) \ + static_field(StubRoutines, _dlog10, address) \ static_field(StubRoutines, _dpow, address) \ static_field(StubRoutines, _dsin, address) \ static_field(StubRoutines, _dcos, address) \ + static_field(StubRoutines, _dtan, address) \ static_field(StubRoutines, _vectorizedMismatch, address) \ static_field(StubRoutines, _jbyte_arraycopy, address) \ static_field(StubRoutines, _jshort_arraycopy, address) \ @@ -2065,10 +2067,8 @@ declare_c2_type(NegNode, Node) \ declare_c2_type(NegFNode, NegNode) \ declare_c2_type(NegDNode, NegNode) \ - declare_c2_type(TanDNode, Node) \ declare_c2_type(AtanDNode, Node) \ declare_c2_type(SqrtDNode, Node) \ - declare_c2_type(Log10DNode, Node) \ declare_c2_type(ReverseBytesINode, Node) \ declare_c2_type(ReverseBytesLNode, Node) \ declare_c2_type(ReductionNode, Node) \