--- old/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathIntrinsicUnaryOp.java 2019-03-12 08:09:28.483527123 +0100 +++ new/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64MathIntrinsicUnaryOp.java 2019-03-12 08:09:28.107524683 +0100 @@ -24,3822 +24,48 @@ package org.graalvm.compiler.lir.amd64; -import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL; -import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG; -import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.STACK; -import static jdk.vm.ci.code.ValueUtil.asRegister; +import static jdk.vm.ci.amd64.AMD64.xmm0; +import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.registersToValues; -import org.graalvm.compiler.asm.Label; -import org.graalvm.compiler.asm.amd64.AMD64Address; -import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; -import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag; -import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler; import org.graalvm.compiler.core.common.LIRKind; -import org.graalvm.compiler.debug.GraalError; import org.graalvm.compiler.lir.LIRInstructionClass; -import org.graalvm.compiler.lir.Opcode; -import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant; -import org.graalvm.compiler.lir.asm.CompilationResultBuilder; +import org.graalvm.compiler.lir.Variable; import org.graalvm.compiler.lir.gen.LIRGeneratorTool; -import jdk.vm.ci.amd64.AMD64; -import jdk.vm.ci.amd64.AMD64.CPUFeature; import jdk.vm.ci.amd64.AMD64Kind; import jdk.vm.ci.code.Register; -import jdk.vm.ci.meta.AllocatableValue; +import jdk.vm.ci.code.RegisterValue; import jdk.vm.ci.meta.Value; -public final class AMD64MathIntrinsicUnaryOp extends AMD64LIRInstruction { - public static final LIRInstructionClass TYPE = LIRInstructionClass.create(AMD64MathIntrinsicUnaryOp.class); +/** + * AMD64MathIntrinsicUnaryOp assumes that the input value is stored at the xmm0 register, and will + * emit the output value into the xmm0 register as well. + * {@link #emitLIRWrapper(LIRGeneratorTool, Value)} is provided for emitting necessary mov LIRs + * before and after this LIR instruction. + */ +public abstract class AMD64MathIntrinsicUnaryOp extends AMD64LIRInstruction { - public enum UnaryIntrinsicOpcode { - LOG, - LOG10, - SIN, - COS, - TAN, - EXP - } - - @Opcode private final UnaryIntrinsicOpcode opcode; - @Def protected Value result; + @Def protected Value output; @Use protected Value input; - @Temp({REG, ILLEGAL}) protected Value xmm1Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value xmm2Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value xmm3Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value xmm4Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value xmm5Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value xmm6Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value xmm7Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value xmm8Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value xmm9Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value xmm10Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value gpr1Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value gpr2Temp = Value.ILLEGAL; - @Temp protected AllocatableValue rcxTemp; - @Temp({REG, ILLEGAL}) protected Value gpr4Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value gpr5Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value gpr6Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value gpr7Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value gpr8Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value gpr9Temp = Value.ILLEGAL; - @Temp({REG, ILLEGAL}) protected Value gpr10Temp = Value.ILLEGAL; - @Temp({STACK, ILLEGAL}) protected Value stackTemp = Value.ILLEGAL; - - CompilationResultBuilder internalCrb; - - public AMD64MathIntrinsicUnaryOp(LIRGeneratorTool tool, UnaryIntrinsicOpcode opcode, Value result, Value input, Value stackTemp) { - super(TYPE); - this.opcode = opcode; - this.result = result; - this.input = input; - if (opcode == UnaryIntrinsicOpcode.LOG || opcode == UnaryIntrinsicOpcode.LOG10 || - opcode == UnaryIntrinsicOpcode.SIN || opcode == UnaryIntrinsicOpcode.COS || - opcode == UnaryIntrinsicOpcode.TAN || opcode == UnaryIntrinsicOpcode.EXP) { - this.gpr1Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); - this.gpr2Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); - this.rcxTemp = AMD64.rcx.asValue(LIRKind.value(AMD64Kind.QWORD)); - this.gpr4Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); - this.xmm1Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); - this.xmm2Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); - this.xmm3Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); - this.xmm4Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); - this.xmm5Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); - this.xmm6Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); - this.xmm7Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); - - if (opcode == UnaryIntrinsicOpcode.EXP) { - this.gpr5Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); - this.xmm8Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); - this.xmm9Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); - this.xmm10Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); - } - - if (opcode == UnaryIntrinsicOpcode.TAN) { - this.gpr5Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); - this.gpr6Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); - this.gpr7Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); - this.gpr8Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); - this.gpr9Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); - this.gpr10Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); - } - - if (opcode == UnaryIntrinsicOpcode.SIN || opcode == UnaryIntrinsicOpcode.COS) { - this.gpr5Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); - this.gpr6Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); - this.gpr7Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); - this.gpr8Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); - this.gpr9Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); - this.gpr10Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); - this.xmm8Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); - this.xmm9Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); - } - - this.stackTemp = stackTemp; - } - } - - public AMD64MathIntrinsicUnaryOp(LIRGeneratorTool tool, UnaryIntrinsicOpcode opcode, Value result, Value input) { - this(tool, opcode, result, input, Value.ILLEGAL); - } - - private void setCrb(CompilationResultBuilder crb) { - internalCrb = crb; - } - - private AMD64Address externalAddress(ArrayDataPointerConstant curPtr) { - return (AMD64Address) internalCrb.recordDataReferenceInCode(curPtr); - } - - @Override - public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { - switch (opcode) { - case LOG: - logIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); - break; - case LOG10: - log10Intrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); - break; - case SIN: - sinIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); - break; - case COS: - cosIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); - break; - case TAN: - tanIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); - break; - case EXP: - expIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); - break; - default: - throw GraalError.shouldNotReachHere(); - } - } - - private static int[] logTwoTable = { - 0xfefa3800, 0x3fe62e42, 0x93c76730, 0x3d2ef357, 0xaa241800, - 0x3fe5ee82, 0x0cda46be, 0x3d220238, 0x5c364800, 0x3fe5af40, - 0xac10c9fb, 0x3d2dfa63, 0x26bb8c00, 0x3fe5707a, 0xff3303dd, - 0x3d09980b, 0x26867800, 0x3fe5322e, 0x5d257531, 0x3d05ccc4, - 0x835a5000, 0x3fe4f45a, 0x6d93b8fb, 0xbd2e6c51, 0x6f970c00, - 0x3fe4b6fd, 0xed4c541c, 0x3cef7115, 0x27e8a400, 0x3fe47a15, - 0xf94d60aa, 0xbd22cb6a, 0xf2f92400, 0x3fe43d9f, 0x481051f7, - 0xbcfd984f, 0x2125cc00, 0x3fe4019c, 0x30f0c74c, 0xbd26ce79, - 0x0c36c000, 0x3fe3c608, 0x7cfe13c2, 0xbd02b736, 0x17197800, - 0x3fe38ae2, 0xbb5569a4, 0xbd218b7a, 0xad9d8c00, 0x3fe35028, - 0x9527e6ac, 0x3d10b83f, 0x44340800, 0x3fe315da, 0xc5a0ed9c, - 0xbd274e93, 0x57b0e000, 0x3fe2dbf5, 0x07b9dc11, 0xbd17a6e5, - 0x6d0ec000, 0x3fe2a278, 0xe797882d, 0x3d206d2b, 0x1134dc00, - 0x3fe26962, 0x05226250, 0xbd0b61f1, 0xd8bebc00, 0x3fe230b0, - 0x6e48667b, 0x3d12fc06, 0x5fc61800, 0x3fe1f863, 0xc9fe81d3, - 0xbd2a7242, 0x49ae6000, 0x3fe1c078, 0xed70e667, 0x3cccacde, - 0x40f23c00, 0x3fe188ee, 0xf8ab4650, 0x3d14cc4e, 0xf6f29800, - 0x3fe151c3, 0xa293ae49, 0xbd2edd97, 0x23c75c00, 0x3fe11af8, - 0xbb9ddcb2, 0xbd258647, 0x8611cc00, 0x3fe0e489, 0x07801742, - 0x3d1c2998, 0xe2d05400, 0x3fe0ae76, 0x887e7e27, 0x3d1f486b, - 0x0533c400, 0x3fe078bf, 0x41edf5fd, 0x3d268122, 0xbe760400, - 0x3fe04360, 0xe79539e0, 0xbd04c45f, 0xe5b20800, 0x3fe00e5a, - 0xb1727b1c, 0xbd053ba3, 0xaf7a4800, 0x3fdfb358, 0x3c164935, - 0x3d0085fa, 0xee031800, 0x3fdf4aa7, 0x6f014a8b, 0x3d12cde5, - 0x56b41000, 0x3fdee2a1, 0x5a470251, 0x3d2f27f4, 0xc3ddb000, - 0x3fde7b42, 0x5372bd08, 0xbd246550, 0x1a272800, 0x3fde148a, - 0x07322938, 0xbd1326b2, 0x484c9800, 0x3fddae75, 0x60dc616a, - 0xbd1ea42d, 0x46def800, 0x3fdd4902, 0xe9a767a8, 0x3d235baf, - 0x18064800, 0x3fdce42f, 0x3ec7a6b0, 0xbd0797c3, 0xc7455800, - 0x3fdc7ff9, 0xc15249ae, 0xbd29b6dd, 0x693fa000, 0x3fdc1c60, - 0x7fe8e180, 0x3d2cec80, 0x1b80e000, 0x3fdbb961, 0xf40a666d, - 0x3d27d85b, 0x04462800, 0x3fdb56fa, 0x2d841995, 0x3d109525, - 0x5248d000, 0x3fdaf529, 0x52774458, 0xbd217cc5, 0x3c8ad800, - 0x3fda93ed, 0xbea77a5d, 0x3d1e36f2, 0x0224f800, 0x3fda3344, - 0x7f9d79f5, 0x3d23c645, 0xea15f000, 0x3fd9d32b, 0x10d0c0b0, - 0xbd26279e, 0x43135800, 0x3fd973a3, 0xa502d9f0, 0xbd152313, - 0x635bf800, 0x3fd914a8, 0x2ee6307d, 0xbd1766b5, 0xa88b3000, - 0x3fd8b639, 0xe5e70470, 0xbd205ae1, 0x776dc800, 0x3fd85855, - 0x3333778a, 0x3d2fd56f, 0x3bd81800, 0x3fd7fafa, 0xc812566a, - 0xbd272090, 0x687cf800, 0x3fd79e26, 0x2efd1778, 0x3d29ec7d, - 0x76c67800, 0x3fd741d8, 0x49dc60b3, 0x3d2d8b09, 0xe6af1800, - 0x3fd6e60e, 0x7c222d87, 0x3d172165, 0x3e9c6800, 0x3fd68ac8, - 0x2756eba0, 0x3d20a0d3, 0x0b3ab000, 0x3fd63003, 0xe731ae00, - 0xbd2db623, 0xdf596000, 0x3fd5d5bd, 0x08a465dc, 0xbd0a0b2a, - 0x53c8d000, 0x3fd57bf7, 0xee5d40ef, 0x3d1faded, 0x0738a000, - 0x3fd522ae, 0x8164c759, 0x3d2ebe70, 0x9e173000, 0x3fd4c9e0, - 0x1b0ad8a4, 0xbd2e2089, 0xc271c800, 0x3fd4718d, 0x0967d675, - 0xbd2f27ce, 0x23d5e800, 0x3fd419b4, 0xec90e09d, 0x3d08e436, - 0x77333000, 0x3fd3c252, 0xb606bd5c, 0x3d183b54, 0x76be1000, - 0x3fd36b67, 0xb0f177c8, 0x3d116ecd, 0xe1d36000, 0x3fd314f1, - 0xd3213cb8, 0xbd28e27a, 0x7cdc9000, 0x3fd2bef0, 0x4a5004f4, - 0x3d2a9cfa, 0x1134d800, 0x3fd26962, 0xdf5bb3b6, 0x3d2c93c1, - 0x6d0eb800, 0x3fd21445, 0xba46baea, 0x3d0a87de, 0x635a6800, - 0x3fd1bf99, 0x5147bdb7, 0x3d2ca6ed, 0xcbacf800, 0x3fd16b5c, - 0xf7a51681, 0x3d2b9acd, 0x8227e800, 0x3fd1178e, 0x63a5f01c, - 0xbd2c210e, 0x67616000, 0x3fd0c42d, 0x163ceae9, 0x3d27188b, - 0x604d5800, 0x3fd07138, 0x16ed4e91, 0x3cf89cdb, 0x5626c800, - 0x3fd01eae, 0x1485e94a, 0xbd16f08c, 0x6cb3b000, 0x3fcf991c, - 0xca0cdf30, 0x3d1bcbec, 0xe4dd0000, 0x3fcef5ad, 0x65bb8e11, - 0xbcca2115, 0xffe71000, 0x3fce530e, 0x6041f430, 0x3cc21227, - 0xb0d49000, 0x3fcdb13d, 0xf715b035, 0xbd2aff2a, 0xf2656000, - 0x3fcd1037, 0x75b6f6e4, 0xbd084a7e, 0xc6f01000, 0x3fcc6ffb, - 0xc5962bd2, 0xbcf1ec72, 0x383be000, 0x3fcbd087, 0x595412b6, - 0xbd2d4bc4, 0x575bd000, 0x3fcb31d8, 0x4eace1aa, 0xbd0c358d, - 0x3c8ae000, 0x3fca93ed, 0x50562169, 0xbd287243, 0x07089000, - 0x3fc9f6c4, 0x6865817a, 0x3d29904d, 0xdcf70000, 0x3fc95a5a, - 0x58a0ff6f, 0x3d07f228, 0xeb390000, 0x3fc8beaf, 0xaae92cd1, - 0xbd073d54, 0x6551a000, 0x3fc823c1, 0x9a631e83, 0x3d1e0ddb, - 0x85445000, 0x3fc7898d, 0x70914305, 0xbd1c6610, 0x8b757000, - 0x3fc6f012, 0xe59c21e1, 0xbd25118d, 0xbe8c1000, 0x3fc6574e, - 0x2c3c2e78, 0x3d19cf8b, 0x6b544000, 0x3fc5bf40, 0xeb68981c, - 0xbd127023, 0xe4a1b000, 0x3fc527e5, 0xe5697dc7, 0x3d2633e8, - 0x8333b000, 0x3fc4913d, 0x54fdb678, 0x3d258379, 0xa5993000, - 0x3fc3fb45, 0x7e6a354d, 0xbd2cd1d8, 0xb0159000, 0x3fc365fc, - 0x234b7289, 0x3cc62fa8, 0x0c868000, 0x3fc2d161, 0xcb81b4a1, - 0x3d039d6c, 0x2a49c000, 0x3fc23d71, 0x8fd3df5c, 0x3d100d23, - 0x7e23f000, 0x3fc1aa2b, 0x44389934, 0x3d2ca78e, 0x8227e000, - 0x3fc1178e, 0xce2d07f2, 0x3d21ef78, 0xb59e4000, 0x3fc08598, - 0x7009902c, 0xbd27e5dd, 0x39dbe000, 0x3fbfe891, 0x4fa10afd, - 0xbd2534d6, 0x830a2000, 0x3fbec739, 0xafe645e0, 0xbd2dc068, - 0x63844000, 0x3fbda727, 0x1fa71733, 0x3d1a8940, 0x01bc4000, - 0x3fbc8858, 0xc65aacd3, 0x3d2646d1, 0x8dad6000, 0x3fbb6ac8, - 0x2bf768e5, 0xbd139080, 0x40b1c000, 0x3fba4e76, 0xb94407c8, - 0xbd0e42b6, 0x5d594000, 0x3fb9335e, 0x3abd47da, 0x3d23115c, - 0x2f40e000, 0x3fb8197e, 0xf96ffdf7, 0x3d0f80dc, 0x0aeac000, - 0x3fb700d3, 0xa99ded32, 0x3cec1e8d, 0x4d97a000, 0x3fb5e95a, - 0x3c5d1d1e, 0xbd2c6906, 0x5d208000, 0x3fb4d311, 0x82f4e1ef, - 0xbcf53a25, 0xa7d1e000, 0x3fb3bdf5, 0xa5db4ed7, 0x3d2cc85e, - 0xa4472000, 0x3fb2aa04, 0xae9c697d, 0xbd20b6e8, 0xd1466000, - 0x3fb1973b, 0x560d9e9b, 0xbd25325d, 0xb59e4000, 0x3fb08598, - 0x7009902c, 0xbd17e5dd, 0xc006c000, 0x3faeea31, 0x4fc93b7b, - 0xbd0e113e, 0xcdddc000, 0x3faccb73, 0x47d82807, 0xbd1a68f2, - 0xd0fb0000, 0x3faaaef2, 0x353bb42e, 0x3d20fc1a, 0x149fc000, - 0x3fa894aa, 0xd05a267d, 0xbd197995, 0xf2d4c000, 0x3fa67c94, - 0xec19afa2, 0xbd029efb, 0xd42e0000, 0x3fa466ae, 0x75bdfd28, - 0xbd2c1673, 0x2f8d0000, 0x3fa252f3, 0xe021b67b, 0x3d283e9a, - 0x89e74000, 0x3fa0415d, 0x5cf1d753, 0x3d0111c0, 0xec148000, - 0x3f9c63d2, 0x3f9eb2f3, 0x3d2578c6, 0x28c90000, 0x3f984925, - 0x325a0c34, 0xbd2aa0ba, 0x25980000, 0x3f9432a9, 0x928637fe, - 0x3d098139, 0x58938000, 0x3f902056, 0x06e2f7d2, 0xbd23dc5b, - 0xa3890000, 0x3f882448, 0xda74f640, 0xbd275577, 0x75890000, - 0x3f801015, 0x999d2be8, 0xbd10c76b, 0x59580000, 0x3f700805, - 0xcb31c67b, 0x3d2166af, 0x00000000, 0x00000000, 0x00000000, - 0x80000000 - }; - - private static int[] logTwoData = { - 0xfefa3800, 0x3fa62e42, 0x93c76730, 0x3ceef357 - }; - - private static int[] coeffLogTwoData = { - 0x92492492, 0x3fc24924, 0x00000000, 0xbfd00000, 0x3d6fb175, - 0xbfc5555e, 0x55555555, 0x3fd55555, 0x9999999a, 0x3fc99999, - 0x00000000, 0xbfe00000 - }; - - /* - * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) - * Source Code - * - * ALGORITHM DESCRIPTION - LOG() --------------------- - * - * x=2^k * mx, mx in [1,2) - * - * Get B~1/mx based on the output of rcpps instruction (B0) B = int((B0*2^7+0.5))/2^7 - * - * Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts) - * - * Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and p(r) is a degree 7 - * polynomial -log(B) read from data table (high, low parts) Result is formed from high and low - * parts. - * - * Special cases: log(NaN) = quiet NaN, and raise invalid exception log(+INF) = that INF log(0) - * = -INF with divide-by-zero exception raised log(1) = +0 log(x) = NaN with invalid exception - * raised if x < -0, including -INF - * - */ - - public void logIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { - ArrayDataPointerConstant logTwoTablePtr = new ArrayDataPointerConstant(logTwoTable, 16); - ArrayDataPointerConstant logTwoDataPtr = new ArrayDataPointerConstant(logTwoData, 16); - ArrayDataPointerConstant coeffLogTwoDataPtr = new ArrayDataPointerConstant(coeffLogTwoData, 16); - - Label bb0 = new Label(); - Label bb1 = new Label(); - Label bb2 = new Label(); - Label bb3 = new Label(); - Label bb4 = new Label(); - Label bb5 = new Label(); - Label bb6 = new Label(); - Label bb7 = new Label(); - Label bb8 = new Label(); - - Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); - Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); - Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); - Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); - - Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); - Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); - Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); - Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); - Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); - Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); - Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); - - AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); - - setCrb(crb); - masm.movdq(stackSlot, value); - if (dest.encoding != value.encoding) { - masm.movdqu(dest, value); - } - masm.movq(gpr1, 0x3ff0000000000000L); - masm.movdq(temp2, gpr1); - masm.movq(gpr3, 0x77f0000000000000L); - masm.movdq(temp3, gpr3); - masm.movl(gpr2, 32768); - masm.movdl(temp4, gpr2); - masm.movq(gpr2, 0xffffe00000000000L); - masm.movdq(temp5, gpr2); - masm.movdqu(temp1, value); - masm.pextrw(gpr1, dest, 3); - masm.por(dest, temp2); - masm.movl(gpr2, 16352); - masm.psrlq(dest, 27); - masm.leaq(gpr4, externalAddress(logTwoTablePtr)); - masm.psrld(dest, 2); - masm.rcpps(dest, dest); - masm.psllq(temp1, 12); - masm.pshufd(temp6, temp5, 0xE4); - masm.psrlq(temp1, 12); - masm.subl(gpr1, 16); - masm.cmpl(gpr1, 32736); - masm.jcc(ConditionFlag.AboveEqual, bb0); - - masm.bind(bb1); - masm.paddd(dest, temp4); - masm.por(temp1, temp3); - masm.movdl(gpr3, dest); - masm.psllq(dest, 29); - masm.pand(temp5, temp1); - masm.pand(dest, temp6); - masm.subsd(temp1, temp5); - masm.mulpd(temp5, dest); - masm.andl(gpr1, 32752); - masm.subl(gpr1, gpr2); - masm.cvtsi2sdl(temp7, gpr1); - masm.mulsd(temp1, dest); - masm.movdq(temp6, externalAddress(logTwoDataPtr)); // 0xfefa3800, - // 0x3fa62e42 - masm.movdqu(temp3, externalAddress(coeffLogTwoDataPtr)); // 0x92492492, - // 0x3fc24924, - // 0x00000000, - // 0xbfd00000 - masm.subsd(temp5, temp2); - masm.andl(gpr3, 16711680); - masm.shrl(gpr3, 12); - masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, 0)); - masm.leaq(gpr4, externalAddress(coeffLogTwoDataPtr)); - masm.movdqu(temp4, new AMD64Address(gpr4, 16)); // 0x3d6fb175, - // 0xbfc5555e, - // 0x55555555, - // 0x3fd55555 - masm.addsd(temp1, temp5); - masm.movdqu(temp2, new AMD64Address(gpr4, 32)); // 0x9999999a, - // 0x3fc99999, - // 0x00000000, - // 0xbfe00000 - masm.mulsd(temp6, temp7); - if (masm.supports(CPUFeature.SSE3)) { - masm.movddup(temp5, temp1); - } else { - masm.movdqu(temp5, temp1); - masm.movlhps(temp5, temp5); - } - masm.leaq(gpr4, externalAddress(logTwoDataPtr)); - masm.mulsd(temp7, new AMD64Address(gpr4, 8)); // 0x93c76730, - // 0x3ceef357 - masm.mulsd(temp3, temp1); - masm.addsd(dest, temp6); - masm.mulpd(temp4, temp5); - masm.mulpd(temp5, temp5); - if (masm.supports(CPUFeature.SSE3)) { - masm.movddup(temp6, dest); - } else { - masm.movdqu(temp6, dest); - masm.movlhps(temp6, temp6); - } - masm.addsd(dest, temp1); - masm.addpd(temp4, temp2); - masm.mulpd(temp3, temp5); - masm.subsd(temp6, dest); - masm.mulsd(temp4, temp1); - masm.pshufd(temp2, dest, 0xEE); - masm.addsd(temp1, temp6); - masm.mulsd(temp5, temp5); - masm.addsd(temp7, temp2); - masm.addpd(temp4, temp3); - masm.addsd(temp1, temp7); - masm.mulpd(temp4, temp5); - masm.addsd(temp1, temp4); - masm.pshufd(temp5, temp4, 0xEE); - masm.addsd(temp1, temp5); - masm.addsd(dest, temp1); - masm.jmp(bb8); - - masm.bind(bb0); - masm.movdq(dest, stackSlot); - masm.movdq(temp1, stackSlot); - masm.addl(gpr1, 16); - masm.cmpl(gpr1, 32768); - masm.jcc(ConditionFlag.AboveEqual, bb2); - - masm.cmpl(gpr1, 16); - masm.jcc(ConditionFlag.Below, bb3); - - masm.bind(bb4); - masm.addsd(dest, dest); - masm.jmp(bb8); - - masm.bind(bb5); - masm.jcc(ConditionFlag.Above, bb4); - - masm.cmpl(gpr3, 0); - masm.jcc(ConditionFlag.Above, bb4); - - masm.jmp(bb6); - - masm.bind(bb3); - masm.xorpd(temp1, temp1); - masm.addsd(temp1, dest); - masm.movdl(gpr3, temp1); - masm.psrlq(temp1, 32); - masm.movdl(gpr2, temp1); - masm.orl(gpr3, gpr2); - masm.cmpl(gpr3, 0); - masm.jcc(ConditionFlag.Equal, bb7); - - masm.xorpd(temp1, temp1); - masm.movl(gpr1, 18416); - masm.pinsrw(temp1, gpr1, 3); - masm.mulsd(dest, temp1); - masm.movdqu(temp1, dest); - masm.pextrw(gpr1, dest, 3); - masm.por(dest, temp2); - masm.psrlq(dest, 27); - masm.movl(gpr2, 18416); - masm.psrld(dest, 2); - masm.rcpps(dest, dest); - masm.psllq(temp1, 12); - masm.pshufd(temp6, temp5, 0xE4); - masm.psrlq(temp1, 12); - masm.jmp(bb1); - - masm.bind(bb2); - masm.movdl(gpr3, temp1); - masm.psrlq(temp1, 32); - masm.movdl(gpr2, temp1); - masm.addl(gpr2, gpr2); - masm.cmpl(gpr2, -2097152); - masm.jcc(ConditionFlag.AboveEqual, bb5); - - masm.orl(gpr3, gpr2); - masm.cmpl(gpr3, 0); - masm.jcc(ConditionFlag.Equal, bb7); - - masm.bind(bb6); - masm.xorpd(temp1, temp1); - masm.xorpd(dest, dest); - masm.movl(gpr1, 32752); - masm.pinsrw(temp1, gpr1, 3); - masm.mulsd(dest, temp1); - masm.jmp(bb8); - - masm.bind(bb7); - masm.xorpd(temp1, temp1); - masm.xorpd(dest, dest); - masm.movl(gpr1, 49136); - masm.pinsrw(dest, gpr1, 3); - masm.divsd(dest, temp1); - - masm.bind(bb8); - } - - private static int[] highmaskLogTen = { - 0xf8000000, 0xffffffff, 0x00000000, 0xffffe000 - }; - - private static int[] logTenE = { - 0x00000000, 0x3fdbc000, 0xbf2e4108, 0x3f5a7a6c - }; - - private static int[] logTenTable = { - 0x509f7800, 0x3fd34413, 0x1f12b358, 0x3d1fef31, 0x80333400, - 0x3fd32418, 0xc671d9d0, 0xbcf542bf, 0x51195000, 0x3fd30442, - 0x78a4b0c3, 0x3d18216a, 0x6fc79400, 0x3fd2e490, 0x80fa389d, - 0xbc902869, 0x89d04000, 0x3fd2c502, 0x75c2f564, 0x3d040754, - 0x4ddd1c00, 0x3fd2a598, 0xd219b2c3, 0xbcfa1d84, 0x6baa7c00, - 0x3fd28651, 0xfd9abec1, 0x3d1be6d3, 0x94028800, 0x3fd2672d, - 0xe289a455, 0xbd1ede5e, 0x78b86400, 0x3fd2482c, 0x6734d179, - 0x3d1fe79b, 0xcca3c800, 0x3fd2294d, 0x981a40b8, 0xbced34ea, - 0x439c5000, 0x3fd20a91, 0xcc392737, 0xbd1a9cc3, 0x92752c00, - 0x3fd1ebf6, 0x03c9afe7, 0x3d1e98f8, 0x6ef8dc00, 0x3fd1cd7d, - 0x71dae7f4, 0x3d08a86c, 0x8fe4dc00, 0x3fd1af25, 0xee9185a1, - 0xbcff3412, 0xace59400, 0x3fd190ee, 0xc2cab353, 0x3cf17ed9, - 0x7e925000, 0x3fd172d8, 0x6952c1b2, 0x3cf1521c, 0xbe694400, - 0x3fd154e2, 0xcacb79ca, 0xbd0bdc78, 0x26cbac00, 0x3fd1370d, - 0xf71f4de1, 0xbd01f8be, 0x72fa0800, 0x3fd11957, 0x55bf910b, - 0x3c946e2b, 0x5f106000, 0x3fd0fbc1, 0x39e639c1, 0x3d14a84b, - 0xa802a800, 0x3fd0de4a, 0xd3f31d5d, 0xbd178385, 0x0b992000, - 0x3fd0c0f3, 0x3843106f, 0xbd1f602f, 0x486ce800, 0x3fd0a3ba, - 0x8819497c, 0x3cef987a, 0x1de49400, 0x3fd086a0, 0x1caa0467, - 0x3d0faec7, 0x4c30cc00, 0x3fd069a4, 0xa4424372, 0xbd1618fc, - 0x94490000, 0x3fd04cc6, 0x946517d2, 0xbd18384b, 0xb7e84000, - 0x3fd03006, 0xe0109c37, 0xbd19a6ac, 0x798a0c00, 0x3fd01364, - 0x5121e864, 0xbd164cf7, 0x38ce8000, 0x3fcfedbf, 0x46214d1a, - 0xbcbbc402, 0xc8e62000, 0x3fcfb4ef, 0xdab93203, 0x3d1e0176, - 0x2cb02800, 0x3fcf7c5a, 0x2a2ea8e4, 0xbcfec86a, 0xeeeaa000, - 0x3fcf43fd, 0xc18e49a4, 0x3cf110a8, 0x9bb6e800, 0x3fcf0bda, - 0x923cc9c0, 0xbd15ce99, 0xc093f000, 0x3fced3ef, 0x4d4b51e9, - 0x3d1a04c7, 0xec58f800, 0x3fce9c3c, 0x163cad59, 0x3cac8260, - 0x9a907000, 0x3fce2d7d, 0x3fa93646, 0x3ce4a1c0, 0x37311000, - 0x3fcdbf99, 0x32abd1fd, 0x3d07ea9d, 0x6744b800, 0x3fcd528c, - 0x4dcbdfd4, 0xbd1b08e2, 0xe36de800, 0x3fcce653, 0x0b7b7f7f, - 0xbd1b8f03, 0x77506800, 0x3fcc7aec, 0xa821c9fb, 0x3d13c163, - 0x00ff8800, 0x3fcc1053, 0x536bca76, 0xbd074ee5, 0x70719800, - 0x3fcba684, 0xd7da9b6b, 0xbd1fbf16, 0xc6f8d800, 0x3fcb3d7d, - 0xe2220bb3, 0x3d1a295d, 0x16c15800, 0x3fcad53c, 0xe724911e, - 0xbcf55822, 0x82533800, 0x3fca6dbc, 0x6d982371, 0x3cac567c, - 0x3c19e800, 0x3fca06fc, 0x84d17d80, 0x3d1da204, 0x85ef8000, - 0x3fc9a0f8, 0x54466a6a, 0xbd002204, 0xb0ac2000, 0x3fc93bae, - 0xd601fd65, 0x3d18840c, 0x1bb9b000, 0x3fc8d71c, 0x7bf58766, - 0xbd14f897, 0x34aae800, 0x3fc8733e, 0x3af6ac24, 0xbd0f5c45, - 0x76d68000, 0x3fc81012, 0x4303e1a1, 0xbd1f9a80, 0x6af57800, - 0x3fc7ad96, 0x43fbcb46, 0x3cf4c33e, 0xa6c51000, 0x3fc74bc7, - 0x70f0eac5, 0xbd192e3b, 0xccab9800, 0x3fc6eaa3, 0xc0093dfe, - 0xbd0faf15, 0x8b60b800, 0x3fc68a28, 0xde78d5fd, 0xbc9ea4ee, - 0x9d987000, 0x3fc62a53, 0x962bea6e, 0xbd194084, 0xc9b0e800, - 0x3fc5cb22, 0x888dd999, 0x3d1fe201, 0xe1634800, 0x3fc56c93, - 0x16ada7ad, 0x3d1b1188, 0xc176c000, 0x3fc50ea4, 0x4159b5b5, - 0xbcf09c08, 0x51766000, 0x3fc4b153, 0x84393d23, 0xbcf6a89c, - 0x83695000, 0x3fc4549d, 0x9f0b8bbb, 0x3d1c4b8c, 0x538d5800, - 0x3fc3f881, 0xf49df747, 0x3cf89b99, 0xc8138000, 0x3fc39cfc, - 0xd503b834, 0xbd13b99f, 0xf0df0800, 0x3fc3420d, 0xf011b386, - 0xbd05d8be, 0xe7466800, 0x3fc2e7b2, 0xf39c7bc2, 0xbd1bb94e, - 0xcdd62800, 0x3fc28de9, 0x05e6d69b, 0xbd10ed05, 0xd015d800, - 0x3fc234b0, 0xe29b6c9d, 0xbd1ff967, 0x224ea800, 0x3fc1dc06, - 0x727711fc, 0xbcffb30d, 0x01540000, 0x3fc183e8, 0x39786c5a, - 0x3cc23f57, 0xb24d9800, 0x3fc12c54, 0xc905a342, 0x3d003a1d, - 0x82835800, 0x3fc0d54a, 0x9b9920c0, 0x3d03b25a, 0xc72ac000, - 0x3fc07ec7, 0x46f26a24, 0x3cf0fa41, 0xdd35d800, 0x3fc028ca, - 0x41d9d6dc, 0x3d034a65, 0x52474000, 0x3fbfa6a4, 0x44f66449, - 0x3d19cad3, 0x2da3d000, 0x3fbefcb8, 0x67832999, 0x3d18400f, - 0x32a10000, 0x3fbe53ce, 0x9c0e3b1a, 0xbcff62fd, 0x556b7000, - 0x3fbdabe3, 0x02976913, 0xbcf8243b, 0x97e88000, 0x3fbd04f4, - 0xec793797, 0x3d1c0578, 0x09647000, 0x3fbc5eff, 0x05fc0565, - 0xbd1d799e, 0xc6426000, 0x3fbbb9ff, 0x4625f5ed, 0x3d1f5723, - 0xf7afd000, 0x3fbb15f3, 0xdd5aae61, 0xbd1a7e1e, 0xd358b000, - 0x3fba72d8, 0x3314e4d3, 0x3d17bc91, 0x9b1f5000, 0x3fb9d0ab, - 0x9a4d514b, 0x3cf18c9b, 0x9cd4e000, 0x3fb92f69, 0x7e4496ab, - 0x3cf1f96d, 0x31f4f000, 0x3fb88f10, 0xf56479e7, 0x3d165818, - 0xbf628000, 0x3fb7ef9c, 0x26bf486d, 0xbd1113a6, 0xb526b000, - 0x3fb7510c, 0x1a1c3384, 0x3ca9898d, 0x8e31e000, 0x3fb6b35d, - 0xb3875361, 0xbd0661ac, 0xd01de000, 0x3fb6168c, 0x2a7cacfa, - 0xbd1bdf10, 0x0af23000, 0x3fb57a98, 0xff868816, 0x3cf046d0, - 0xd8ea0000, 0x3fb4df7c, 0x1515fbe7, 0xbd1fd529, 0xde3b2000, - 0x3fb44538, 0x6e59a132, 0x3d1faeee, 0xc8df9000, 0x3fb3abc9, - 0xf1322361, 0xbd198807, 0x505f1000, 0x3fb3132d, 0x0888e6ab, - 0x3d1e5380, 0x359bd000, 0x3fb27b61, 0xdfbcbb22, 0xbcfe2724, - 0x429ee000, 0x3fb1e463, 0x6eb4c58c, 0xbcfe4dd6, 0x4a673000, - 0x3fb14e31, 0x4ce1ac9b, 0x3d1ba691, 0x28b96000, 0x3fb0b8c9, - 0x8c7813b8, 0xbd0b3872, 0xc1f08000, 0x3fb02428, 0xc2bc8c2c, - 0x3cb5ea6b, 0x05a1a000, 0x3faf209c, 0x72e8f18e, 0xbce8df84, - 0xc0b5e000, 0x3fadfa6d, 0x9fdef436, 0x3d087364, 0xaf416000, - 0x3facd5c2, 0x1068c3a9, 0x3d0827e7, 0xdb356000, 0x3fabb296, - 0x120a34d3, 0x3d101a9f, 0x5dfea000, 0x3faa90e6, 0xdaded264, - 0xbd14c392, 0x6034c000, 0x3fa970ad, 0x1c9d06a9, 0xbd1b705e, - 0x194c6000, 0x3fa851e8, 0x83996ad9, 0xbd0117bc, 0xcf4ac000, - 0x3fa73492, 0xb1a94a62, 0xbca5ea42, 0xd67b4000, 0x3fa618a9, - 0x75aed8ca, 0xbd07119b, 0x9126c000, 0x3fa4fe29, 0x5291d533, - 0x3d12658f, 0x6f4d4000, 0x3fa3e50e, 0xcd2c5cd9, 0x3d1d5c70, - 0xee608000, 0x3fa2cd54, 0xd1008489, 0x3d1a4802, 0x9900e000, - 0x3fa1b6f9, 0x54fb5598, 0xbd16593f, 0x06bb6000, 0x3fa0a1f9, - 0x64ef57b4, 0xbd17636b, 0xb7940000, 0x3f9f1c9f, 0xee6a4737, - 0x3cb5d479, 0x91aa0000, 0x3f9cf7f5, 0x3a16373c, 0x3d087114, - 0x156b8000, 0x3f9ad5ed, 0x836c554a, 0x3c6900b0, 0xd4764000, - 0x3f98b67f, 0xed12f17b, 0xbcffc974, 0x77dec000, 0x3f9699a7, - 0x232ce7ea, 0x3d1e35bb, 0xbfbf4000, 0x3f947f5d, 0xd84ffa6e, - 0x3d0e0a49, 0x82c7c000, 0x3f92679c, 0x8d170e90, 0xbd14d9f2, - 0xadd20000, 0x3f90525d, 0x86d9f88e, 0x3cdeb986, 0x86f10000, - 0x3f8c7f36, 0xb9e0a517, 0x3ce29faa, 0xb75c8000, 0x3f885e9e, - 0x542568cb, 0xbd1f7bdb, 0x46b30000, 0x3f8442e8, 0xb954e7d9, - 0x3d1e5287, 0xb7e60000, 0x3f802c07, 0x22da0b17, 0xbd19fb27, - 0x6c8b0000, 0x3f7833e3, 0x821271ef, 0xbd190f96, 0x29910000, - 0x3f701936, 0xbc3491a5, 0xbd1bcf45, 0x354a0000, 0x3f600fe3, - 0xc0ff520a, 0xbd19d71c, 0x00000000, 0x00000000, 0x00000000, - 0x00000000 - }; - - private static int[] logTwoLogTenData = { - 0x509f7800, 0x3f934413, 0x1f12b358, 0x3cdfef31 - }; - - private static int[] coeffLogTenData = { - 0xc1a5f12e, 0x40358874, 0x64d4ef0d, 0xc0089309, 0x385593b1, - 0xc025c917, 0xdc963467, 0x3ffc6a02, 0x7f9d3aa1, 0x4016ab9f, - 0xdc77b115, 0xbff27af2 - }; - - /* - * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) - * Source Code - * - * ALGORITHM DESCRIPTION - LOG10() --------------------- - * - * Let x=2^k * mx, mx in [1,2) - * - * Get B~1/mx based on the output of rcpss instruction (B0) B = int((B0*LH*2^7+0.5))/2^7 LH is a - * short approximation for log10(e) - * - * Reduced argument: r=B*mx-LH (computed accurately in high and low parts) - * - * Result: k*log10(2) - log(B) + p(r) p(r) is a degree 7 polynomial -log(B) read from data table - * (high, low parts) Result is formed from high and low parts - * - * Special cases: log10(0) = -INF with divide-by-zero exception raised log10(1) = +0 log10(x) = - * NaN with invalid exception raised if x < -0, including -INF log10(+INF) = +INF - * - */ - - public void log10Intrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { - ArrayDataPointerConstant highmaskLogTenPtr = new ArrayDataPointerConstant(highmaskLogTen, 16); - ArrayDataPointerConstant logTenEPtr = new ArrayDataPointerConstant(logTenE, 16); - ArrayDataPointerConstant logTenTablePtr = new ArrayDataPointerConstant(logTenTable, 16); - ArrayDataPointerConstant logTwoLogTenDataPtr = new ArrayDataPointerConstant(logTwoLogTenData, 16); - ArrayDataPointerConstant coeffLogTenDataPtr = new ArrayDataPointerConstant(coeffLogTenData, 16); - - Label bb0 = new Label(); - Label bb1 = new Label(); - Label bb2 = new Label(); - Label bb3 = new Label(); - Label bb4 = new Label(); - Label bb5 = new Label(); - Label bb6 = new Label(); - Label bb7 = new Label(); - Label bb8 = new Label(); - - Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); - Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); - Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); - Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); - - Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); - Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); - Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); - Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); - Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); - Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); - Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); - - AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); - - setCrb(crb); - masm.movdq(stackSlot, value); - if (dest.encoding != value.encoding) { - masm.movdqu(dest, value); - } - masm.movdqu(temp5, externalAddress(highmaskLogTenPtr)); // 0xf8000000, - // 0xffffffff, - // 0x00000000, - // 0xffffe000 - masm.xorpd(temp2, temp2); - masm.movl(gpr1, 16368); - masm.pinsrw(temp2, gpr1, 3); - masm.movl(gpr2, 1054736384); - masm.movdl(temp7, gpr2); - masm.xorpd(temp3, temp3); - masm.movl(gpr3, 30704); - masm.pinsrw(temp3, gpr3, 3); - masm.movl(gpr3, 32768); - masm.movdl(temp4, gpr3); - masm.movdqu(temp1, value); - masm.pextrw(gpr1, dest, 3); - masm.por(dest, temp2); - masm.movl(gpr2, 16352); - masm.psrlq(dest, 27); - masm.movdqu(temp2, externalAddress(logTenEPtr)); // 0x00000000, - // 0x3fdbc000, - // 0xbf2e4108, - // 0x3f5a7a6c - masm.psrld(dest, 2); - masm.rcpps(dest, dest); - masm.psllq(temp1, 12); - masm.pshufd(temp6, temp5, 0x4E); - masm.psrlq(temp1, 12); - masm.subl(gpr1, 16); - masm.cmpl(gpr1, 32736); - masm.jcc(ConditionFlag.AboveEqual, bb0); - - masm.bind(bb1); - masm.mulss(dest, temp7); - masm.por(temp1, temp3); - masm.andpd(temp5, temp1); - masm.paddd(dest, temp4); - masm.movdqu(temp3, externalAddress(coeffLogTenDataPtr)); // 0xc1a5f12e, - // 0x40358874, - // 0x64d4ef0d, - // 0xc0089309 - masm.leaq(gpr4, externalAddress(coeffLogTenDataPtr)); - masm.movdqu(temp4, new AMD64Address(gpr4, 16)); // 0x385593b1, - // 0xc025c917, - // 0xdc963467, - // 0x3ffc6a02 - masm.subsd(temp1, temp5); - masm.movdl(gpr3, dest); - masm.psllq(dest, 29); - masm.andpd(dest, temp6); - masm.movdq(temp6, externalAddress(logTwoLogTenDataPtr)); // 0x509f7800, - // 0x3f934413 - masm.andl(gpr1, 32752); - masm.subl(gpr1, gpr2); - masm.cvtsi2sdl(temp7, gpr1); - masm.mulpd(temp5, dest); - masm.mulsd(temp1, dest); - masm.subsd(temp5, temp2); - masm.movdqu(temp2, new AMD64Address(gpr4, 32)); // 0x7f9d3aa1, - // 0x4016ab9f, - // 0xdc77b115, - // 0xbff27af2 - masm.leaq(gpr4, externalAddress(logTenTablePtr)); - masm.andl(gpr3, 16711680); - masm.shrl(gpr3, 12); - masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, -1504)); - masm.addsd(temp1, temp5); - masm.mulsd(temp6, temp7); - masm.pshufd(temp5, temp1, 0x44); - masm.leaq(gpr4, externalAddress(logTwoLogTenDataPtr)); - masm.mulsd(temp7, new AMD64Address(gpr4, 8)); // 0x1f12b358, - // 0x3cdfef31 - masm.mulsd(temp3, temp1); - masm.addsd(dest, temp6); - masm.mulpd(temp4, temp5); - masm.leaq(gpr4, externalAddress(logTenEPtr)); - masm.movdq(temp6, new AMD64Address(gpr4, 8)); // 0xbf2e4108, - // 0x3f5a7a6c - masm.mulpd(temp5, temp5); - masm.addpd(temp4, temp2); - masm.mulpd(temp3, temp5); - masm.pshufd(temp2, dest, 0xE4); - masm.addsd(dest, temp1); - masm.mulsd(temp4, temp1); - masm.subsd(temp2, dest); - masm.mulsd(temp6, temp1); - masm.addsd(temp1, temp2); - masm.pshufd(temp2, dest, 0xEE); - masm.mulsd(temp5, temp5); - masm.addsd(temp7, temp2); - masm.addsd(temp1, temp6); - masm.addpd(temp4, temp3); - masm.addsd(temp1, temp7); - masm.mulpd(temp4, temp5); - masm.addsd(temp1, temp4); - masm.pshufd(temp5, temp4, 0xEE); - masm.addsd(temp1, temp5); - masm.addsd(dest, temp1); - masm.jmp(bb8); - - masm.bind(bb0); - masm.movdq(dest, stackSlot); - masm.movdq(temp1, stackSlot); - masm.addl(gpr1, 16); - masm.cmpl(gpr1, 32768); - masm.jcc(ConditionFlag.AboveEqual, bb2); - - masm.cmpl(gpr1, 16); - masm.jcc(ConditionFlag.Below, bb3); - - masm.bind(bb4); - masm.addsd(dest, dest); - masm.jmp(bb8); - - masm.bind(bb5); - masm.jcc(ConditionFlag.Above, bb4); - - masm.cmpl(gpr3, 0); - masm.jcc(ConditionFlag.Above, bb4); - - masm.jmp(bb6); - - masm.bind(bb3); - masm.xorpd(temp1, temp1); - masm.addsd(temp1, dest); - masm.movdl(gpr3, temp1); - masm.psrlq(temp1, 32); - masm.movdl(gpr2, temp1); - masm.orl(gpr3, gpr2); - masm.cmpl(gpr3, 0); - masm.jcc(ConditionFlag.Equal, bb7); - - masm.xorpd(temp1, temp1); - masm.xorpd(temp2, temp2); - masm.movl(gpr1, 18416); - masm.pinsrw(temp1, gpr1, 3); - masm.mulsd(dest, temp1); - masm.movl(gpr1, 16368); - masm.pinsrw(temp2, gpr1, 3); - masm.movdqu(temp1, dest); - masm.pextrw(gpr1, dest, 3); - masm.por(dest, temp2); - masm.movl(gpr2, 18416); - masm.psrlq(dest, 27); - masm.movdqu(temp2, externalAddress(logTenEPtr)); // 0x00000000, - // 0x3fdbc000, - // 0xbf2e4108, - // 0x3f5a7a6c - masm.psrld(dest, 2); - masm.rcpps(dest, dest); - masm.psllq(temp1, 12); - masm.pshufd(temp6, temp5, 0x4E); - masm.psrlq(temp1, 12); - masm.jmp(bb1); - - masm.bind(bb2); - masm.movdl(gpr3, temp1); - masm.psrlq(temp1, 32); - masm.movdl(gpr2, temp1); - masm.addl(gpr2, gpr2); - masm.cmpl(gpr2, -2097152); - masm.jcc(ConditionFlag.AboveEqual, bb5); - - masm.orl(gpr3, gpr2); - masm.cmpl(gpr3, 0); - masm.jcc(ConditionFlag.Equal, bb7); - - masm.bind(bb6); - masm.xorpd(temp1, temp1); - masm.xorpd(dest, dest); - masm.movl(gpr1, 32752); - masm.pinsrw(temp1, gpr1, 3); - masm.mulsd(dest, temp1); - masm.jmp(bb8); - - masm.bind(bb7); - masm.xorpd(temp1, temp1); - masm.xorpd(dest, dest); - masm.movl(gpr1, 49136); - masm.pinsrw(dest, gpr1, 3); - masm.divsd(dest, temp1); - - masm.bind(bb8); - } - - /* - * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) - * Source Code - * - * ALGORITHM DESCRIPTION - SIN() --------------------- - * - * 1. RANGE REDUCTION - * - * We perform an initial range reduction from X to r with - * - * X =~= N * pi/32 + r - * - * so that |r| <= pi/64 + epsilon. We restrict inputs to those where |N| <= 932560. Beyond this, - * the range reduction is insufficiently accurate. For extremely small inputs, denormalization - * can occur internally, impacting performance. This means that the main path is actually only - * taken for 2^-252 <= |X| < 90112. - * - * To avoid branches, we perform the range reduction to full accuracy each time. - * - * X - N * (P_1 + P_2 + P_3) - * - * where P_1 and P_2 are 32-bit numbers (so multiplication by N is exact) and P_3 is a 53-bit - * number. Together, these approximate pi well enough for all cases in the restricted range. - * - * The main reduction sequence is: - * - * y = 32/pi * x N = integer(y) (computed by adding and subtracting off SHIFTER) - * - * m_1 = N * P_1 m_2 = N * P_2 r_1 = x - m_1 r = r_1 - m_2 (this r can be used for most of the - * calculation) - * - * c_1 = r_1 - r m_3 = N * P_3 c_2 = c_1 - m_2 c = c_2 - m_3 - * - * 2. MAIN ALGORITHM - * - * The algorithm uses a table lookup based on B = M * pi / 32 where M = N mod 64. The stored - * values are: sigma closest power of 2 to cos(B) C_hl 53-bit cos(B) - sigma S_hi + S_lo 2 * - * 53-bit sin(B) - * - * The computation is organized as follows: - * - * sin(B + r + c) = [sin(B) + sigma * r] + r * (cos(B) - sigma) + sin(B) * [cos(r + c) - 1] + - * cos(B) * [sin(r + c) - r] - * - * which is approximately: - * - * [S_hi + sigma * r] + C_hl * r + S_lo + S_hi * [(cos(r) - 1) - r * c] + (C_hl + sigma) * - * [(sin(r) - r) + c] - * - * and this is what is actually computed. We separate this sum into four parts: - * - * hi + med + pols + corr - * - * where - * - * hi = S_hi + sigma r med = C_hl * r pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) - * corr = S_lo + c * ((C_hl + sigma) - S_hi * r) - * - * 3. POLYNOMIAL - * - * The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) can be rearranged freely, - * since it is quite small, so we exploit parallelism to the fullest. - * - * psc4 = SC_4 * r_1 msc4 = psc4 * r r2 = r * r msc2 = SC_2 * r2 r4 = r2 * r2 psc3 = SC_3 + msc4 - * psc1 = SC_1 + msc2 msc3 = r4 * psc3 sincospols = psc1 + msc3 pols = sincospols * - * - * 4. CORRECTION TERM - * - * This is where the "c" component of the range reduction is taken into account; recall that - * just "r" is used for most of the calculation. - * - * -c = m_3 - c_2 -d = S_hi * r - (C_hl + sigma) corr = -c * -d + S_lo - * - * 5. COMPENSATED SUMMATIONS - * - * The two successive compensated summations add up the high and medium parts, leaving just the - * low parts to add up at the end. - * - * rs = sigma * r res_int = S_hi + rs k_0 = S_hi - res_int k_2 = k_0 + rs med = C_hl * r res_hi - * = res_int + med k_1 = res_int - res_hi k_3 = k_1 + med - * - * 6. FINAL SUMMATION - * - * We now add up all the small parts: - * - * res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 - * - * Now the overall result is just: - * - * res_hi + res_lo - * - * 7. SMALL ARGUMENTS - * - * If |x| < SNN (SNN meaning the smallest normal number), we simply perform 0.1111111 cdots 1111 - * * x. For SNN <= |x|, we do 2^-55 * (2^55 * x - x). - * - * Special cases: sin(NaN) = quiet NaN, and raise invalid exception sin(INF) = NaN and raise - * invalid exception sin(+/-0) = +/-0 - * - */ - - public int[] oneHalf = { - 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000 - }; - - public int[] pTwo = { - 0x1a600000, 0x3d90b461, 0x1a600000, 0x3d90b461 - }; - - public int[] scFour = { - 0xa556c734, 0x3ec71de3, 0x1a01a01a, 0x3efa01a0 - }; - - public int[] cTable = { - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x3ff00000, 0x176d6d31, 0xbf73b92e, - 0xbc29b42c, 0x3fb917a6, 0xe0000000, 0xbc3e2718, 0x00000000, - 0x3ff00000, 0x011469fb, 0xbf93ad06, 0x3c69a60b, 0x3fc8f8b8, - 0xc0000000, 0xbc626d19, 0x00000000, 0x3ff00000, 0x939d225a, - 0xbfa60bea, 0x2ed59f06, 0x3fd29406, 0xa0000000, 0xbc75d28d, - 0x00000000, 0x3ff00000, 0x866b95cf, 0xbfb37ca1, 0xa6aea963, - 0x3fd87de2, 0xe0000000, 0xbc672ced, 0x00000000, 0x3ff00000, - 0x73fa1279, 0xbfbe3a68, 0x3806f63b, 0x3fde2b5d, 0x20000000, - 0x3c5e0d89, 0x00000000, 0x3ff00000, 0x5bc57974, 0xbfc59267, - 0x39ae68c8, 0x3fe1c73b, 0x20000000, 0x3c8b25dd, 0x00000000, - 0x3ff00000, 0x53aba2fd, 0xbfcd0dfe, 0x25091dd6, 0x3fe44cf3, - 0x20000000, 0x3c68076a, 0x00000000, 0x3ff00000, 0x99fcef32, - 0x3fca8279, 0x667f3bcd, 0x3fe6a09e, 0x20000000, 0xbc8bdd34, - 0x00000000, 0x3fe00000, 0x94247758, 0x3fc133cc, 0x6b151741, - 0x3fe8bc80, 0x20000000, 0xbc82c5e1, 0x00000000, 0x3fe00000, - 0x9ae68c87, 0x3fac73b3, 0x290ea1a3, 0x3fea9b66, 0xe0000000, - 0x3c39f630, 0x00000000, 0x3fe00000, 0x7f909c4e, 0xbf9d4a2c, - 0xf180bdb1, 0x3fec38b2, 0x80000000, 0xbc76e0b1, 0x00000000, - 0x3fe00000, 0x65455a75, 0xbfbe0875, 0xcf328d46, 0x3fed906b, - 0x20000000, 0x3c7457e6, 0x00000000, 0x3fe00000, 0x76acf82d, - 0x3fa4a031, 0x56c62dda, 0x3fee9f41, 0xe0000000, 0x3c8760b1, - 0x00000000, 0x3fd00000, 0x0e5967d5, 0xbfac1d1f, 0xcff75cb0, - 0x3fef6297, 0x20000000, 0x3c756217, 0x00000000, 0x3fd00000, - 0x0f592f50, 0xbf9ba165, 0xa3d12526, 0x3fefd88d, 0x40000000, - 0xbc887df6, 0x00000000, 0x3fc00000, 0x00000000, 0x00000000, - 0x00000000, 0x3ff00000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x0f592f50, 0x3f9ba165, 0xa3d12526, 0x3fefd88d, - 0x40000000, 0xbc887df6, 0x00000000, 0xbfc00000, 0x0e5967d5, - 0x3fac1d1f, 0xcff75cb0, 0x3fef6297, 0x20000000, 0x3c756217, - 0x00000000, 0xbfd00000, 0x76acf82d, 0xbfa4a031, 0x56c62dda, - 0x3fee9f41, 0xe0000000, 0x3c8760b1, 0x00000000, 0xbfd00000, - 0x65455a75, 0x3fbe0875, 0xcf328d46, 0x3fed906b, 0x20000000, - 0x3c7457e6, 0x00000000, 0xbfe00000, 0x7f909c4e, 0x3f9d4a2c, - 0xf180bdb1, 0x3fec38b2, 0x80000000, 0xbc76e0b1, 0x00000000, - 0xbfe00000, 0x9ae68c87, 0xbfac73b3, 0x290ea1a3, 0x3fea9b66, - 0xe0000000, 0x3c39f630, 0x00000000, 0xbfe00000, 0x94247758, - 0xbfc133cc, 0x6b151741, 0x3fe8bc80, 0x20000000, 0xbc82c5e1, - 0x00000000, 0xbfe00000, 0x99fcef32, 0xbfca8279, 0x667f3bcd, - 0x3fe6a09e, 0x20000000, 0xbc8bdd34, 0x00000000, 0xbfe00000, - 0x53aba2fd, 0x3fcd0dfe, 0x25091dd6, 0x3fe44cf3, 0x20000000, - 0x3c68076a, 0x00000000, 0xbff00000, 0x5bc57974, 0x3fc59267, - 0x39ae68c8, 0x3fe1c73b, 0x20000000, 0x3c8b25dd, 0x00000000, - 0xbff00000, 0x73fa1279, 0x3fbe3a68, 0x3806f63b, 0x3fde2b5d, - 0x20000000, 0x3c5e0d89, 0x00000000, 0xbff00000, 0x866b95cf, - 0x3fb37ca1, 0xa6aea963, 0x3fd87de2, 0xe0000000, 0xbc672ced, - 0x00000000, 0xbff00000, 0x939d225a, 0x3fa60bea, 0x2ed59f06, - 0x3fd29406, 0xa0000000, 0xbc75d28d, 0x00000000, 0xbff00000, - 0x011469fb, 0x3f93ad06, 0x3c69a60b, 0x3fc8f8b8, 0xc0000000, - 0xbc626d19, 0x00000000, 0xbff00000, 0x176d6d31, 0x3f73b92e, - 0xbc29b42c, 0x3fb917a6, 0xe0000000, 0xbc3e2718, 0x00000000, - 0xbff00000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0xbff00000, 0x176d6d31, - 0x3f73b92e, 0xbc29b42c, 0xbfb917a6, 0xe0000000, 0x3c3e2718, - 0x00000000, 0xbff00000, 0x011469fb, 0x3f93ad06, 0x3c69a60b, - 0xbfc8f8b8, 0xc0000000, 0x3c626d19, 0x00000000, 0xbff00000, - 0x939d225a, 0x3fa60bea, 0x2ed59f06, 0xbfd29406, 0xa0000000, - 0x3c75d28d, 0x00000000, 0xbff00000, 0x866b95cf, 0x3fb37ca1, - 0xa6aea963, 0xbfd87de2, 0xe0000000, 0x3c672ced, 0x00000000, - 0xbff00000, 0x73fa1279, 0x3fbe3a68, 0x3806f63b, 0xbfde2b5d, - 0x20000000, 0xbc5e0d89, 0x00000000, 0xbff00000, 0x5bc57974, - 0x3fc59267, 0x39ae68c8, 0xbfe1c73b, 0x20000000, 0xbc8b25dd, - 0x00000000, 0xbff00000, 0x53aba2fd, 0x3fcd0dfe, 0x25091dd6, - 0xbfe44cf3, 0x20000000, 0xbc68076a, 0x00000000, 0xbff00000, - 0x99fcef32, 0xbfca8279, 0x667f3bcd, 0xbfe6a09e, 0x20000000, - 0x3c8bdd34, 0x00000000, 0xbfe00000, 0x94247758, 0xbfc133cc, - 0x6b151741, 0xbfe8bc80, 0x20000000, 0x3c82c5e1, 0x00000000, - 0xbfe00000, 0x9ae68c87, 0xbfac73b3, 0x290ea1a3, 0xbfea9b66, - 0xe0000000, 0xbc39f630, 0x00000000, 0xbfe00000, 0x7f909c4e, - 0x3f9d4a2c, 0xf180bdb1, 0xbfec38b2, 0x80000000, 0x3c76e0b1, - 0x00000000, 0xbfe00000, 0x65455a75, 0x3fbe0875, 0xcf328d46, - 0xbfed906b, 0x20000000, 0xbc7457e6, 0x00000000, 0xbfe00000, - 0x76acf82d, 0xbfa4a031, 0x56c62dda, 0xbfee9f41, 0xe0000000, - 0xbc8760b1, 0x00000000, 0xbfd00000, 0x0e5967d5, 0x3fac1d1f, - 0xcff75cb0, 0xbfef6297, 0x20000000, 0xbc756217, 0x00000000, - 0xbfd00000, 0x0f592f50, 0x3f9ba165, 0xa3d12526, 0xbfefd88d, - 0x40000000, 0x3c887df6, 0x00000000, 0xbfc00000, 0x00000000, - 0x00000000, 0x00000000, 0xbff00000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x0f592f50, 0xbf9ba165, 0xa3d12526, - 0xbfefd88d, 0x40000000, 0x3c887df6, 0x00000000, 0x3fc00000, - 0x0e5967d5, 0xbfac1d1f, 0xcff75cb0, 0xbfef6297, 0x20000000, - 0xbc756217, 0x00000000, 0x3fd00000, 0x76acf82d, 0x3fa4a031, - 0x56c62dda, 0xbfee9f41, 0xe0000000, 0xbc8760b1, 0x00000000, - 0x3fd00000, 0x65455a75, 0xbfbe0875, 0xcf328d46, 0xbfed906b, - 0x20000000, 0xbc7457e6, 0x00000000, 0x3fe00000, 0x7f909c4e, - 0xbf9d4a2c, 0xf180bdb1, 0xbfec38b2, 0x80000000, 0x3c76e0b1, - 0x00000000, 0x3fe00000, 0x9ae68c87, 0x3fac73b3, 0x290ea1a3, - 0xbfea9b66, 0xe0000000, 0xbc39f630, 0x00000000, 0x3fe00000, - 0x94247758, 0x3fc133cc, 0x6b151741, 0xbfe8bc80, 0x20000000, - 0x3c82c5e1, 0x00000000, 0x3fe00000, 0x99fcef32, 0x3fca8279, - 0x667f3bcd, 0xbfe6a09e, 0x20000000, 0x3c8bdd34, 0x00000000, - 0x3fe00000, 0x53aba2fd, 0xbfcd0dfe, 0x25091dd6, 0xbfe44cf3, - 0x20000000, 0xbc68076a, 0x00000000, 0x3ff00000, 0x5bc57974, - 0xbfc59267, 0x39ae68c8, 0xbfe1c73b, 0x20000000, 0xbc8b25dd, - 0x00000000, 0x3ff00000, 0x73fa1279, 0xbfbe3a68, 0x3806f63b, - 0xbfde2b5d, 0x20000000, 0xbc5e0d89, 0x00000000, 0x3ff00000, - 0x866b95cf, 0xbfb37ca1, 0xa6aea963, 0xbfd87de2, 0xe0000000, - 0x3c672ced, 0x00000000, 0x3ff00000, 0x939d225a, 0xbfa60bea, - 0x2ed59f06, 0xbfd29406, 0xa0000000, 0x3c75d28d, 0x00000000, - 0x3ff00000, 0x011469fb, 0xbf93ad06, 0x3c69a60b, 0xbfc8f8b8, - 0xc0000000, 0x3c626d19, 0x00000000, 0x3ff00000, 0x176d6d31, - 0xbf73b92e, 0xbc29b42c, 0xbfb917a6, 0xe0000000, 0x3c3e2718, - 0x00000000, 0x3ff00000 - }; - - public int[] scTwo = { - 0x11111111, 0x3f811111, 0x55555555, 0x3fa55555 - }; - - public int[] scThree = { - 0x1a01a01a, 0xbf2a01a0, 0x16c16c17, 0xbf56c16c - }; - - public int[] scOne = { - 0x55555555, 0xbfc55555, 0x00000000, 0xbfe00000 - }; - - public int[] piInvTable = { - 0x00000000, 0x00000000, 0xa2f9836e, 0x4e441529, 0xfc2757d1, - 0xf534ddc0, 0xdb629599, 0x3c439041, 0xfe5163ab, 0xdebbc561, - 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c, 0xfe1deb1c, - 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41, - 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, - 0xde05980f, 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, - 0x4f463f66, 0x9e5fea2d, 0x7527bac7, 0xebe5f17b, 0x3d0739f7, - 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08, 0x56033046, 0xfc7b6bab, - 0xf0cfbc21 - }; - - public int[] piFour = { - 0x40000000, 0x3fe921fb, 0x18469899, 0x3e64442d - }; - - public int[] piThirtyTwoInv = { - 0x6dc9c883, 0x40245f30 - }; - - public int[] shifter = { - 0x00000000, 0x43380000 - }; - - public int[] signMask = { - 0x00000000, 0x80000000 - }; - - public int[] pThree = { - 0x2e037073, 0x3b63198a - }; - - public int[] allOnes = { - 0xffffffff, 0x3fefffff - }; - - public int[] twoPowFiftyFive = { - 0x00000000, 0x43600000 - }; - - public int[] twoPowFiftyFiveM = { - 0x00000000, 0x3c800000 - }; - - public int[] pOne = { - 0x54400000, 0x3fb921fb - }; - - public void sinIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { - ArrayDataPointerConstant oneHalfPtr = new ArrayDataPointerConstant(oneHalf, 16); - ArrayDataPointerConstant pTwoPtr = new ArrayDataPointerConstant(pTwo, 16); - ArrayDataPointerConstant scFourPtr = new ArrayDataPointerConstant(scFour, 16); - ArrayDataPointerConstant cTablePtr = new ArrayDataPointerConstant(cTable, 16); - ArrayDataPointerConstant scTwoPtr = new ArrayDataPointerConstant(scTwo, 16); - ArrayDataPointerConstant scThreePtr = new ArrayDataPointerConstant(scThree, 16); - ArrayDataPointerConstant scOnePtr = new ArrayDataPointerConstant(scOne, 16); - ArrayDataPointerConstant piInvTablePtr = new ArrayDataPointerConstant(piInvTable, 16); - ArrayDataPointerConstant piFourPtr = new ArrayDataPointerConstant(piFour, 16); - ArrayDataPointerConstant piThirtyTwoInvPtr = new ArrayDataPointerConstant(piThirtyTwoInv, 8); - ArrayDataPointerConstant shifterPtr = new ArrayDataPointerConstant(shifter, 8); - ArrayDataPointerConstant signMaskPtr = new ArrayDataPointerConstant(signMask, 8); - ArrayDataPointerConstant pThreePtr = new ArrayDataPointerConstant(pThree, 8); - ArrayDataPointerConstant allOnesPtr = new ArrayDataPointerConstant(allOnes, 8); - ArrayDataPointerConstant twoPowFiftyFivePtr = new ArrayDataPointerConstant(twoPowFiftyFive, 8); - ArrayDataPointerConstant twoPowFiftyFiveMPtr = new ArrayDataPointerConstant(twoPowFiftyFiveM, 8); - ArrayDataPointerConstant pOnePtr = new ArrayDataPointerConstant(pOne, 8); - - Label bb0 = new Label(); - Label bb1 = new Label(); - Label bb2 = new Label(); - Label bb4 = new Label(); - Label bb5 = new Label(); - Label bb6 = new Label(); - Label bb8 = new Label(); - Label bb9 = new Label(); - Label bb10 = new Label(); - Label bb11 = new Label(); - Label bb12 = new Label(); - Label bb13 = new Label(); - Label bb14 = new Label(); - Label bb15 = new Label(); - - Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); - Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); - Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); - Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); - Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD); - Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD); - Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD); - Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD); - Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD); - Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD); - - Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); - Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); - Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); - Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); - Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); - Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); - Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); - Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE); - Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE); - - AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); - - setCrb(crb); - masm.movsd(stackSlot, value); - if (dest.encoding != value.encoding) { - masm.movdqu(dest, value); - } - - masm.leaq(gpr1, stackSlot); - masm.movl(gpr1, new AMD64Address(gpr1, 4)); - masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr)); // 0x6dc9c883, - // 0x40245f30 - masm.movdq(temp2, externalAddress(shifterPtr)); // 0x00000000, - // 0x43380000 - - masm.andl(gpr1, 2147418112); - masm.subl(gpr1, 808452096); - masm.cmpl(gpr1, 281346048); - masm.jcc(ConditionFlag.Above, bb0); - - masm.mulsd(temp1, dest); - masm.movdqu(temp5, externalAddress(oneHalfPtr)); // 0x00000000, - // 0x3fe00000, - // 0x00000000, - // 0x3fe00000 - masm.movdq(temp4, externalAddress(signMaskPtr)); // 0x00000000, - // 0x80000000 - masm.pand(temp4, dest); - masm.por(temp5, temp4); - masm.addpd(temp1, temp5); - masm.cvttsd2sil(gpr4, temp1); - masm.cvtsi2sdl(temp1, gpr4); - masm.movdqu(temp6, externalAddress(pTwoPtr)); // 0x1a600000, - // 0x3d90b461, - // 0x1a600000, - // 0x3d90b461 - masm.movq(gpr7, 0x3fb921fb54400000L); - masm.movdq(temp3, gpr7); - masm.movdqu(temp5, externalAddress(scFourPtr)); // 0xa556c734, - // 0x3ec71de3, - // 0x1a01a01a, - // 0x3efa01a0 - masm.pshufd(temp4, dest, 0x44); - masm.mulsd(temp3, temp1); - if (masm.supports(CPUFeature.SSE3)) { - masm.movddup(temp1, temp1); - } else { - masm.movlhps(temp1, temp1); - } - masm.andl(gpr4, 63); - masm.shll(gpr4, 5); - masm.leaq(gpr1, externalAddress(cTablePtr)); - masm.addq(gpr1, gpr4); - masm.movdqu(temp8, new AMD64Address(gpr1, 0)); - masm.mulpd(temp6, temp1); - masm.mulsd(temp1, externalAddress(pThreePtr)); // 0x2e037073, - // 0x3b63198a - masm.subsd(temp4, temp3); - masm.subsd(dest, temp3); - if (masm.supports(CPUFeature.SSE3)) { - masm.movddup(temp3, temp4); - } else { - masm.movdqu(temp3, temp4); - masm.movlhps(temp3, temp3); - } - masm.subsd(temp4, temp6); - masm.pshufd(dest, dest, 0x44); - masm.pshufd(temp7, temp8, 0xE); - masm.movdqu(temp2, temp8); - masm.movdqu(temp9, temp7); - masm.mulpd(temp5, dest); - masm.subpd(dest, temp6); - masm.mulsd(temp7, temp4); - masm.subsd(temp3, temp4); - masm.mulpd(temp5, dest); - masm.mulpd(dest, dest); - masm.subsd(temp3, temp6); - masm.movdqu(temp6, externalAddress(scTwoPtr)); // 0x11111111, - // 0x3f811111, - // 0x55555555, - // 0x3fa55555 - masm.subsd(temp1, temp3); - masm.movdq(temp3, new AMD64Address(gpr1, 24)); - masm.addsd(temp2, temp3); - masm.subsd(temp7, temp2); - masm.mulsd(temp2, temp4); - masm.mulpd(temp6, dest); - masm.mulsd(temp3, temp4); - masm.mulpd(temp2, dest); - masm.mulpd(dest, dest); - masm.addpd(temp5, externalAddress(scThreePtr)); // 0x1a01a01a, - // 0xbf2a01a0, - // 0x16c16c17, - // 0xbf56c16c - masm.mulsd(temp4, temp8); - masm.addpd(temp6, externalAddress(scOnePtr)); // 0x55555555, - // 0xbfc55555, - // 0x00000000, - // 0xbfe00000 - masm.mulpd(temp5, dest); - masm.movdqu(dest, temp3); - masm.addsd(temp3, temp9); - masm.mulpd(temp1, temp7); - masm.movdqu(temp7, temp4); - masm.addsd(temp4, temp3); - masm.addpd(temp6, temp5); - masm.subsd(temp9, temp3); - masm.subsd(temp3, temp4); - masm.addsd(temp1, new AMD64Address(gpr1, 16)); - masm.mulpd(temp6, temp2); - masm.addsd(temp9, dest); - masm.addsd(temp3, temp7); - masm.addsd(temp1, temp9); - masm.addsd(temp1, temp3); - masm.addsd(temp1, temp6); - masm.unpckhpd(temp6, temp6); - masm.movdqu(dest, temp4); - masm.addsd(temp1, temp6); - masm.addsd(dest, temp1); - masm.jmp(bb15); - - masm.bind(bb14); - masm.xorpd(temp1, temp1); - masm.xorpd(dest, dest); - masm.divsd(dest, temp1); - masm.jmp(bb15); - - masm.bind(bb0); - masm.jcc(ConditionFlag.Greater, bb1); - - masm.shrl(gpr1, 20); - masm.cmpl(gpr1, 3325); - masm.jcc(ConditionFlag.NotEqual, bb2); - - masm.mulsd(dest, externalAddress(allOnesPtr)); // 0xffffffff, - // 0x3fefffff - masm.jmp(bb15); - - masm.bind(bb2); - masm.movdq(temp3, externalAddress(twoPowFiftyFivePtr)); // 0x00000000, - // 0x43600000 - masm.mulsd(temp3, dest); - masm.subsd(temp3, dest); - masm.mulsd(temp3, externalAddress(twoPowFiftyFiveMPtr)); // 0x00000000, - // 0x3c800000 - masm.jmp(bb15); - - masm.bind(bb1); - masm.pextrw(gpr3, dest, 3); - masm.andl(gpr3, 32752); - masm.cmpl(gpr3, 32752); - masm.jcc(ConditionFlag.Equal, bb14); - - masm.subl(gpr3, 16224); - masm.shrl(gpr3, 7); - masm.andl(gpr3, 65532); - masm.leaq(gpr10, externalAddress(piInvTablePtr)); - masm.addq(gpr3, gpr10); - masm.movdq(gpr1, dest); - masm.movl(gpr9, new AMD64Address(gpr3, 20)); - masm.movl(gpr7, new AMD64Address(gpr3, 24)); - masm.movl(gpr4, gpr1); - masm.shrq(gpr1, 21); - masm.orl(gpr1, Integer.MIN_VALUE); - masm.shrl(gpr1, 11); - masm.movl(gpr8, gpr9); - masm.imulq(gpr9, gpr4); - masm.imulq(gpr8, gpr1); - masm.imulq(gpr7, gpr1); - masm.movl(gpr5, new AMD64Address(gpr3, 16)); - masm.movl(gpr6, new AMD64Address(gpr3, 12)); - masm.movl(gpr10, gpr9); - masm.shrq(gpr9, 32); - masm.addq(gpr8, gpr9); - masm.addq(gpr10, gpr7); - masm.movl(gpr7, gpr10); - masm.shrq(gpr10, 32); - masm.addq(gpr8, gpr10); - masm.movl(gpr9, gpr5); - masm.imulq(gpr5, gpr4); - masm.imulq(gpr9, gpr1); - masm.movl(gpr10, gpr6); - masm.imulq(gpr6, gpr4); - masm.movl(gpr2, gpr5); - masm.shrq(gpr5, 32); - masm.addq(gpr8, gpr2); - masm.movl(gpr2, gpr8); - masm.shrq(gpr8, 32); - masm.addq(gpr9, gpr5); - masm.addq(gpr9, gpr8); - masm.shlq(gpr2, 32); - masm.orq(gpr7, gpr2); - masm.imulq(gpr10, gpr1); - masm.movl(gpr8, new AMD64Address(gpr3, 8)); - masm.movl(gpr5, new AMD64Address(gpr3, 4)); - masm.movl(gpr2, gpr6); - masm.shrq(gpr6, 32); - masm.addq(gpr9, gpr2); - masm.movl(gpr2, gpr9); - masm.shrq(gpr9, 32); - masm.addq(gpr10, gpr6); - masm.addq(gpr10, gpr9); - masm.movq(gpr6, gpr8); - masm.imulq(gpr8, gpr4); - masm.imulq(gpr6, gpr1); - masm.movl(gpr9, gpr8); - masm.shrq(gpr8, 32); - masm.addq(gpr10, gpr9); - masm.movl(gpr9, gpr10); - masm.shrq(gpr10, 32); - masm.addq(gpr6, gpr8); - masm.addq(gpr6, gpr10); - masm.movq(gpr8, gpr5); - masm.imulq(gpr5, gpr4); - masm.imulq(gpr8, gpr1); - masm.shlq(gpr9, 32); - masm.orq(gpr9, gpr2); - masm.movl(gpr1, new AMD64Address(gpr3, 0)); - masm.movl(gpr10, gpr5); - masm.shrq(gpr5, 32); - masm.addq(gpr6, gpr10); - masm.movl(gpr10, gpr6); - masm.shrq(gpr6, 32); - masm.addq(gpr8, gpr5); - masm.addq(gpr8, gpr6); - masm.imulq(gpr4, gpr1); - masm.pextrw(gpr2, dest, 3); - masm.leaq(gpr6, externalAddress(piInvTablePtr)); - masm.subq(gpr3, gpr6); - masm.addl(gpr3, gpr3); - masm.addl(gpr3, gpr3); - masm.addl(gpr3, gpr3); - masm.addl(gpr3, 19); - masm.movl(gpr5, 32768); - masm.andl(gpr5, gpr2); - masm.shrl(gpr2, 4); - masm.andl(gpr2, 2047); - masm.subl(gpr2, 1023); - masm.subl(gpr3, gpr2); - masm.addq(gpr8, gpr4); - masm.movl(gpr4, gpr3); - masm.addl(gpr4, 32); - masm.cmpl(gpr3, 1); - masm.jcc(ConditionFlag.Less, bb4); - - masm.negl(gpr3); - masm.addl(gpr3, 29); - masm.shll(gpr8); - masm.movl(gpr6, gpr8); - masm.andl(gpr8, 536870911); - masm.testl(gpr8, 268435456); - masm.jcc(ConditionFlag.NotEqual, bb5); - - masm.shrl(gpr8); - masm.movl(gpr2, 0); - masm.shlq(gpr8, 32); - masm.orq(gpr8, gpr10); - - masm.bind(bb6); - - masm.cmpq(gpr8, 0); - masm.jcc(ConditionFlag.Equal, bb8); - - masm.bind(bb9); - masm.bsrq(gpr10, gpr8); - masm.movl(gpr3, 29); - masm.subl(gpr3, gpr10); - masm.jcc(ConditionFlag.LessEqual, bb10); - - masm.shlq(gpr8); - masm.movq(gpr1, gpr9); - masm.shlq(gpr9); - masm.addl(gpr4, gpr3); - masm.negl(gpr3); - masm.addl(gpr3, 64); - masm.shrq(gpr1); - masm.shrq(gpr7); - masm.orq(gpr8, gpr1); - masm.orq(gpr9, gpr7); - - masm.bind(bb11); - masm.cvtsi2sdq(dest, gpr8); - masm.shrq(gpr9, 1); - masm.cvtsi2sdq(temp3, gpr9); - masm.xorpd(temp4, temp4); - masm.shll(gpr4, 4); - masm.negl(gpr4); - masm.addl(gpr4, 16368); - masm.orl(gpr4, gpr5); - masm.xorl(gpr4, gpr2); - masm.pinsrw(temp4, gpr4, 3); - masm.leaq(gpr1, externalAddress(piFourPtr)); - masm.movdqu(temp2, new AMD64Address(gpr1, 0)); // 0x40000000, - // 0x3fe921fb, - // 0x18469899, - // 0x3e64442d - masm.xorpd(temp5, temp5); - masm.subl(gpr4, 1008); - masm.pinsrw(temp5, gpr4, 3); - masm.mulsd(dest, temp4); - masm.shll(gpr5, 16); - masm.sarl(gpr5, 31); - masm.mulsd(temp3, temp5); - masm.movdqu(temp1, dest); - masm.pshufd(temp6, temp2, 0xE); - masm.mulsd(dest, temp2); - masm.shrl(gpr6, 29); - masm.addsd(temp1, temp3); - masm.mulsd(temp3, temp2); - masm.addl(gpr6, gpr5); - masm.xorl(gpr6, gpr5); - masm.mulsd(temp6, temp1); - masm.movl(gpr1, gpr6); - masm.addsd(temp6, temp3); - masm.movdqu(temp2, dest); - masm.addsd(dest, temp6); - masm.subsd(temp2, dest); - masm.addsd(temp6, temp2); - - masm.bind(bb12); - masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr)); // 0x6dc9c883, - // 0x40245f30 - masm.mulsd(temp1, dest); - masm.movdq(temp5, externalAddress(oneHalfPtr)); // 0x00000000, - // 0x3fe00000, - // 0x00000000, - // 0x3fe00000 - masm.movdq(temp4, externalAddress(signMaskPtr)); // 0x00000000, - // 0x80000000 - masm.pand(temp4, dest); - masm.por(temp5, temp4); - masm.addpd(temp1, temp5); - masm.cvttsd2sil(gpr4, temp1); - masm.cvtsi2sdl(temp1, gpr4); - masm.movdq(temp3, externalAddress(pOnePtr)); // 0x54400000, - // 0x3fb921fb - masm.movdqu(temp2, externalAddress(pTwoPtr)); // 0x1a600000, - // 0x3d90b461, - // 0x1a600000, - // 0x3d90b461 - masm.mulsd(temp3, temp1); - masm.unpcklpd(temp1, temp1); - masm.shll(gpr1, 3); - masm.addl(gpr4, 1865216); - masm.movdqu(temp4, dest); - masm.addl(gpr4, gpr1); - masm.andl(gpr4, 63); - masm.movdqu(temp5, externalAddress(scFourPtr)); // 0x54400000, - // 0x3fb921fb - masm.leaq(gpr1, externalAddress(cTablePtr)); - masm.shll(gpr4, 5); - masm.addq(gpr1, gpr4); - masm.movdqu(temp8, new AMD64Address(gpr1, 0)); - masm.mulpd(temp2, temp1); - masm.subsd(dest, temp3); - masm.mulsd(temp1, externalAddress(pThreePtr)); // 0x2e037073, - // 0x3b63198a - masm.subsd(temp4, temp3); - masm.unpcklpd(dest, dest); - masm.movdqu(temp3, temp4); - masm.subsd(temp4, temp2); - masm.mulpd(temp5, dest); - masm.subpd(dest, temp2); - masm.pshufd(temp7, temp8, 0xE); - masm.movdqu(temp9, temp7); - masm.mulsd(temp7, temp4); - masm.subsd(temp3, temp4); - masm.mulpd(temp5, dest); - masm.mulpd(dest, dest); - masm.subsd(temp3, temp2); - masm.movdqu(temp2, temp8); - masm.subsd(temp1, temp3); - masm.movdq(temp3, new AMD64Address(gpr1, 24)); - masm.addsd(temp2, temp3); - masm.subsd(temp7, temp2); - masm.subsd(temp1, temp6); - masm.movdqu(temp6, externalAddress(scTwoPtr)); // 0x11111111, - // 0x3f811111, - // 0x55555555, - // 0x3fa55555 - masm.mulsd(temp2, temp4); - masm.mulpd(temp6, dest); - masm.mulsd(temp3, temp4); - masm.mulpd(temp2, dest); - masm.mulpd(dest, dest); - masm.addpd(temp5, externalAddress(scThreePtr)); // 0x1a01a01a, - // 0xbf2a01a0, - // 0x16c16c17, - // 0xbf56c16c - masm.mulsd(temp4, temp8); - masm.addpd(temp6, externalAddress(scOnePtr)); // 0x55555555, - // 0xbfc55555, - // 0x00000000, - // 0xbfe00000 - masm.mulpd(temp5, dest); - masm.movdqu(dest, temp3); - masm.addsd(temp3, temp9); - masm.mulpd(temp1, temp7); - masm.movdqu(temp7, temp4); - masm.addsd(temp4, temp3); - masm.addpd(temp6, temp5); - masm.subsd(temp9, temp3); - masm.subsd(temp3, temp4); - masm.addsd(temp1, new AMD64Address(gpr1, 16)); - masm.mulpd(temp6, temp2); - masm.addsd(temp9, dest); - masm.addsd(temp3, temp7); - masm.addsd(temp1, temp9); - masm.addsd(temp1, temp3); - masm.addsd(temp1, temp6); - masm.unpckhpd(temp6, temp6); - masm.movdqu(dest, temp4); - masm.addsd(temp1, temp6); - masm.addsd(dest, temp1); - masm.jmp(bb15); - - masm.bind(bb8); - masm.addl(gpr4, 64); - masm.movq(gpr8, gpr9); - masm.movq(gpr9, gpr7); - masm.movl(gpr7, 0); - masm.cmpq(gpr8, 0); - masm.jcc(ConditionFlag.NotEqual, bb9); - - masm.addl(gpr4, 64); - masm.movq(gpr8, gpr9); - masm.movq(gpr9, gpr7); - masm.cmpq(gpr8, 0); - masm.jcc(ConditionFlag.NotEqual, bb9); - - masm.xorpd(dest, dest); - masm.xorpd(temp6, temp6); - masm.jmp(bb12); - - masm.bind(bb10); - masm.jcc(ConditionFlag.Equal, bb11); - - masm.negl(gpr3); - masm.shrq(gpr9); - masm.movq(gpr1, gpr8); - masm.shrq(gpr8); - masm.subl(gpr4, gpr3); - masm.negl(gpr3); - masm.addl(gpr3, 64); - masm.shlq(gpr1); - masm.orq(gpr9, gpr1); - masm.jmp(bb11); - - masm.bind(bb4); - masm.negl(gpr3); - masm.shlq(gpr8, 32); - masm.orq(gpr8, gpr10); - masm.shlq(gpr8); - masm.movq(gpr6, gpr8); - masm.testl(gpr8, Integer.MIN_VALUE); - masm.jcc(ConditionFlag.NotEqual, bb13); - - masm.shrl(gpr8); - masm.movl(gpr2, 0); - masm.shrq(gpr6, 3); - masm.jmp(bb6); - - masm.bind(bb5); - masm.shrl(gpr8); - masm.movl(gpr2, 536870912); - masm.shrl(gpr2); - masm.shlq(gpr8, 32); - masm.orq(gpr8, gpr10); - masm.shlq(gpr2, 32); - masm.addl(gpr6, 536870912); - masm.movl(gpr3, 0); - masm.movl(gpr10, 0); - masm.subq(gpr3, gpr7); - masm.sbbq(gpr10, gpr9); - masm.sbbq(gpr2, gpr8); - masm.movq(gpr7, gpr3); - masm.movq(gpr9, gpr10); - masm.movq(gpr8, gpr2); - masm.movl(gpr2, 32768); - masm.jmp(bb6); - - masm.bind(bb13); - masm.shrl(gpr8); - masm.movq(gpr2, 0x100000000L); - masm.shrq(gpr2); - masm.movl(gpr3, 0); - masm.movl(gpr10, 0); - masm.subq(gpr3, gpr7); - masm.sbbq(gpr10, gpr9); - masm.sbbq(gpr2, gpr8); - masm.movq(gpr7, gpr3); - masm.movq(gpr9, gpr10); - masm.movq(gpr8, gpr2); - masm.movl(gpr2, 32768); - masm.shrq(gpr6, 3); - masm.addl(gpr6, 536870912); - masm.jmp(bb6); - - masm.bind(bb15); - } - - /* - * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) - * Source Code - * - * ALGORITHM DESCRIPTION - COS() --------------------- - * - * 1. RANGE REDUCTION - * - * We perform an initial range reduction from X to r with - * - * X =~= N * pi/32 + r - * - * so that |r| <= pi/64 + epsilon. We restrict inputs to those where |N| <= 932560. Beyond this, - * the range reduction is insufficiently accurate. For extremely small inputs, denormalization - * can occur internally, impacting performance. This means that the main path is actually only - * taken for 2^-252 <= |X| < 90112. - * - * To avoid branches, we perform the range reduction to full accuracy each time. - * - * X - N * (P_1 + P_2 + P_3) - * - * where P_1 and P_2 are 32-bit numbers (so multiplication by N is exact) and P_3 is a 53-bit - * number. Together, these approximate pi well enough for all cases in the restricted range. - * - * The main reduction sequence is: - * - * y = 32/pi * x N = integer(y) (computed by adding and subtracting off SHIFTER) - * - * m_1 = N * P_1 m_2 = N * P_2 r_1 = x - m_1 r = r_1 - m_2 (this r can be used for most of the - * calculation) - * - * c_1 = r_1 - r m_3 = N * P_3 c_2 = c_1 - m_2 c = c_2 - m_3 - * - * 2. MAIN ALGORITHM - * - * The algorithm uses a table lookup based on B = M * pi / 32 where M = N mod 64. The stored - * values are: sigma closest power of 2 to cos(B) C_hl 53-bit cos(B) - sigma S_hi + S_lo 2 * - * 53-bit sin(B) - * - * The computation is organized as follows: - * - * sin(B + r + c) = [sin(B) + sigma * r] + r * (cos(B) - sigma) + sin(B) * [cos(r + c) - 1] + - * cos(B) * [sin(r + c) - r] - * - * which is approximately: - * - * [S_hi + sigma * r] + C_hl * r + S_lo + S_hi * [(cos(r) - 1) - r * c] + (C_hl + sigma) * - * [(sin(r) - r) + c] - * - * and this is what is actually computed. We separate this sum into four parts: - * - * hi + med + pols + corr - * - * where - * - * hi = S_hi + sigma r med = C_hl * r pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) - * corr = S_lo + c * ((C_hl + sigma) - S_hi * r) - * - * 3. POLYNOMIAL - * - * The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) can be rearranged freely, - * since it is quite small, so we exploit parallelism to the fullest. - * - * psc4 = SC_4 * r_1 msc4 = psc4 * r r2 = r * r msc2 = SC_2 * r2 r4 = r2 * r2 psc3 = SC_3 + msc4 - * psc1 = SC_1 + msc2 msc3 = r4 * psc3 sincospols = psc1 + msc3 pols = sincospols * - * - * 4. CORRECTION TERM - * - * This is where the "c" component of the range reduction is taken into account; recall that - * just "r" is used for most of the calculation. - * - * -c = m_3 - c_2 -d = S_hi * r - (C_hl + sigma) corr = -c * -d + S_lo - * - * 5. COMPENSATED SUMMATIONS - * - * The two successive compensated summations add up the high and medium parts, leaving just the - * low parts to add up at the end. - * - * rs = sigma * r res_int = S_hi + rs k_0 = S_hi - res_int k_2 = k_0 + rs med = C_hl * r res_hi - * = res_int + med k_1 = res_int - res_hi k_3 = k_1 + med - * - * 6. FINAL SUMMATION - * - * We now add up all the small parts: - * - * res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 - * - * Now the overall result is just: - * - * res_hi + res_lo - * - * 7. SMALL ARGUMENTS - * - * Inputs with |X| < 2^-252 are treated specially as 1 - |x|. - * - * Special cases: cos(NaN) = quiet NaN, and raise invalid exception cos(INF) = NaN and raise - * invalid exception cos(0) = 1 - * - */ - - public int[] one = { - 0x00000000, 0x3ff00000 - }; + @Temp protected Value[] temps; - public void cosIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { - ArrayDataPointerConstant oneHalfPtr = new ArrayDataPointerConstant(oneHalf, 16); - ArrayDataPointerConstant pTwoPtr = new ArrayDataPointerConstant(pTwo, 16); - ArrayDataPointerConstant scFourPtr = new ArrayDataPointerConstant(scFour, 16); - ArrayDataPointerConstant cTablePtr = new ArrayDataPointerConstant(cTable, 16); - ArrayDataPointerConstant scTwoPtr = new ArrayDataPointerConstant(scTwo, 16); - ArrayDataPointerConstant scThreePtr = new ArrayDataPointerConstant(scThree, 16); - ArrayDataPointerConstant scOnePtr = new ArrayDataPointerConstant(scOne, 16); - ArrayDataPointerConstant piInvTablePtr = new ArrayDataPointerConstant(piInvTable, 16); - ArrayDataPointerConstant piFourPtr = new ArrayDataPointerConstant(piFour, 16); - ArrayDataPointerConstant piThirtyTwoInvPtr = new ArrayDataPointerConstant(piThirtyTwoInv, 8); - ArrayDataPointerConstant signMaskPtr = new ArrayDataPointerConstant(signMask, 8); - ArrayDataPointerConstant pThreePtr = new ArrayDataPointerConstant(pThree, 8); - ArrayDataPointerConstant pOnePtr = new ArrayDataPointerConstant(pOne, 8); - ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 8); + public AMD64MathIntrinsicUnaryOp(LIRInstructionClass type, Register... registers) { + super(type); - Label bb0 = new Label(); - Label bb1 = new Label(); - Label bb3 = new Label(); - Label bb4 = new Label(); - Label bb5 = new Label(); - Label bb6 = new Label(); - Label bb7 = new Label(); - Label bb8 = new Label(); - Label bb9 = new Label(); - Label bb10 = new Label(); - Label bb11 = new Label(); - Label bb12 = new Label(); - Label bb13 = new Label(); - Label bb14 = new Label(); + input = xmm0.asValue(LIRKind.value(AMD64Kind.DOUBLE)); + output = xmm0.asValue(LIRKind.value(AMD64Kind.DOUBLE)); - Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); - Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); - Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); - Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); - Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD); - Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD); - Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD); - Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD); - Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD); - Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD); - - Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); - Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); - Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); - Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); - Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); - Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); - Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); - Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE); - Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE); - - AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); - - setCrb(crb); - masm.movdq(stackSlot, value); - if (dest.encoding != value.encoding) { - masm.movdqu(dest, value); - } - - masm.leaq(gpr1, stackSlot); - masm.movl(gpr1, new AMD64Address(gpr1, 4)); - masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr)); // 0x6dc9c883, - // 0x40245f30 - - masm.andl(gpr1, 2147418112); - masm.subl(gpr1, 808452096); - masm.cmpl(gpr1, 281346048); - masm.jcc(ConditionFlag.Above, bb0); - - masm.mulsd(temp1, dest); - masm.movdqu(temp5, externalAddress(oneHalfPtr)); // 0x00000000, - // 0x3fe00000, - // 0x00000000, - // 0x3fe00000 - masm.movdq(temp4, externalAddress(signMaskPtr)); // 0x00000000, - // 0x80000000 - masm.pand(temp4, dest); - masm.por(temp5, temp4); - masm.addpd(temp1, temp5); - masm.cvttsd2sil(gpr4, temp1); - masm.cvtsi2sdl(temp1, gpr4); - masm.movdqu(temp2, externalAddress(pTwoPtr)); // 0x1a600000, - // 0x3d90b461, - // 0x1a600000, - // 0x3d90b461 - masm.movdq(temp3, externalAddress(pOnePtr)); // 0x54400000, - // 0x3fb921fb - masm.mulsd(temp3, temp1); - masm.unpcklpd(temp1, temp1); - masm.addq(gpr4, 1865232); - masm.movdqu(temp4, dest); - masm.andq(gpr4, 63); - masm.movdqu(temp5, externalAddress(scFourPtr)); // 0xa556c734, - // 0x3ec71de3, - // 0x1a01a01a, - // 0x3efa01a0 - masm.leaq(gpr1, externalAddress(cTablePtr)); - masm.shlq(gpr4, 5); - masm.addq(gpr1, gpr4); - masm.movdqu(temp8, new AMD64Address(gpr1, 0)); - masm.mulpd(temp2, temp1); - masm.subsd(dest, temp3); - masm.mulsd(temp1, externalAddress(pThreePtr)); // 0x2e037073, - // 0x3b63198a - masm.subsd(temp4, temp3); - masm.unpcklpd(dest, dest); - masm.movdqu(temp3, temp4); - masm.subsd(temp4, temp2); - masm.mulpd(temp5, dest); - masm.subpd(dest, temp2); - masm.pshufd(temp7, temp8, 0xE); - masm.movdqu(temp6, externalAddress(scTwoPtr)); // 0x11111111, - // 0x3f811111, - // 0x55555555, - // 0x3fa55555 - masm.mulsd(temp7, temp4); - masm.subsd(temp3, temp4); - masm.mulpd(temp5, dest); - masm.mulpd(dest, dest); - masm.subsd(temp3, temp2); - masm.movdqu(temp2, temp8); - masm.subsd(temp1, temp3); - masm.movdq(temp3, new AMD64Address(gpr1, 24)); - masm.addsd(temp2, temp3); - masm.subsd(temp7, temp2); - masm.mulsd(temp2, temp4); - masm.mulpd(temp6, dest); - masm.mulsd(temp3, temp4); - masm.mulpd(temp2, dest); - masm.mulpd(dest, dest); - masm.addpd(temp5, externalAddress(scThreePtr)); // 0x1a01a01a, - // 0xbf2a01a0, - // 0x16c16c17, - // 0xbf56c16c - masm.mulsd(temp4, temp8); - masm.pshufd(temp9, temp8, 0xE); - masm.addpd(temp6, externalAddress(scOnePtr)); // 0x55555555, - // 0xbfc55555, - // 0x00000000, - // 0xbfe00000 - masm.mulpd(temp5, dest); - masm.movdqu(dest, temp3); - masm.addsd(temp3, temp9); - masm.mulpd(temp1, temp7); - masm.movdqu(temp7, temp4); - masm.addsd(temp4, temp3); - masm.addpd(temp6, temp5); - masm.subsd(temp9, temp3); - masm.subsd(temp3, temp4); - masm.addsd(temp1, new AMD64Address(gpr1, 16)); - masm.mulpd(temp6, temp2); - masm.addsd(dest, temp9); - masm.addsd(temp3, temp7); - masm.addsd(dest, temp1); - masm.addsd(dest, temp3); - masm.addsd(dest, temp6); - masm.unpckhpd(temp6, temp6); - masm.addsd(dest, temp6); - masm.addsd(dest, temp4); - masm.jmp(bb13); - - masm.bind(bb14); - masm.xorpd(temp1, temp1); - masm.xorpd(dest, dest); - masm.divsd(dest, temp1); - masm.jmp(bb13); - - masm.bind(bb0); - masm.jcc(ConditionFlag.Greater, bb1); - - masm.pextrw(gpr1, dest, 3); - masm.andl(gpr1, 32767); - masm.pinsrw(dest, gpr1, 3); - masm.movdq(temp1, externalAddress(onePtr)); // 0x00000000, - // 0x3ff00000 - masm.subsd(temp1, dest); - masm.movdqu(dest, temp1); - masm.jmp(bb13); - - masm.bind(bb1); - masm.pextrw(gpr3, dest, 3); - masm.andl(gpr3, 32752); - masm.cmpl(gpr3, 32752); - masm.jcc(ConditionFlag.Equal, bb14); - - masm.subl(gpr3, 16224); - masm.shrl(gpr3, 7); - masm.andl(gpr3, 65532); - masm.leaq(gpr10, externalAddress(piInvTablePtr)); - masm.addq(gpr3, gpr10); - masm.movdq(gpr1, dest); - masm.movl(gpr9, new AMD64Address(gpr3, 20)); - masm.movl(gpr7, new AMD64Address(gpr3, 24)); - masm.movl(gpr4, gpr1); - masm.shrq(gpr1, 21); - masm.orl(gpr1, Integer.MIN_VALUE); - masm.shrl(gpr1, 11); - masm.movl(gpr8, gpr9); - masm.imulq(gpr9, gpr4); - masm.imulq(gpr8, gpr1); - masm.imulq(gpr7, gpr1); - masm.movl(gpr5, new AMD64Address(gpr3, 16)); - masm.movl(gpr6, new AMD64Address(gpr3, 12)); - masm.movl(gpr10, gpr9); - masm.shrq(gpr9, 32); - masm.addq(gpr8, gpr9); - masm.addq(gpr10, gpr7); - masm.movl(gpr7, gpr10); - masm.shrq(gpr10, 32); - masm.addq(gpr8, gpr10); - masm.movl(gpr9, gpr5); - masm.imulq(gpr5, gpr4); - masm.imulq(gpr9, gpr1); - masm.movl(gpr10, gpr6); - masm.imulq(gpr6, gpr4); - masm.movl(gpr2, gpr5); - masm.shrq(gpr5, 32); - masm.addq(gpr8, gpr2); - masm.movl(gpr2, gpr8); - masm.shrq(gpr8, 32); - masm.addq(gpr9, gpr5); - masm.addq(gpr9, gpr8); - masm.shlq(gpr2, 32); - masm.orq(gpr7, gpr2); - masm.imulq(gpr10, gpr1); - masm.movl(gpr8, new AMD64Address(gpr3, 8)); - masm.movl(gpr5, new AMD64Address(gpr3, 4)); - masm.movl(gpr2, gpr6); - masm.shrq(gpr6, 32); - masm.addq(gpr9, gpr2); - masm.movl(gpr2, gpr9); - masm.shrq(gpr9, 32); - masm.addq(gpr10, gpr6); - masm.addq(gpr10, gpr9); - masm.movq(gpr6, gpr8); - masm.imulq(gpr8, gpr4); - masm.imulq(gpr6, gpr1); - masm.movl(gpr9, gpr8); - masm.shrq(gpr8, 32); - masm.addq(gpr10, gpr9); - masm.movl(gpr9, gpr10); - masm.shrq(gpr10, 32); - masm.addq(gpr6, gpr8); - masm.addq(gpr6, gpr10); - masm.movq(gpr8, gpr5); - masm.imulq(gpr5, gpr4); - masm.imulq(gpr8, gpr1); - masm.shlq(gpr9, 32); - masm.orq(gpr9, gpr2); - masm.movl(gpr1, new AMD64Address(gpr3, 0)); - masm.movl(gpr10, gpr5); - masm.shrq(gpr5, 32); - masm.addq(gpr6, gpr10); - masm.movl(gpr10, gpr6); - masm.shrq(gpr6, 32); - masm.addq(gpr8, gpr5); - masm.addq(gpr8, gpr6); - masm.imulq(gpr4, gpr1); - masm.pextrw(gpr2, dest, 3); - masm.leaq(gpr6, externalAddress(piInvTablePtr)); - masm.subq(gpr3, gpr6); - masm.addl(gpr3, gpr3); - masm.addl(gpr3, gpr3); - masm.addl(gpr3, gpr3); - masm.addl(gpr3, 19); - masm.movl(gpr5, 32768); - masm.andl(gpr5, gpr2); - masm.shrl(gpr2, 4); - masm.andl(gpr2, 2047); - masm.subl(gpr2, 1023); - masm.subl(gpr3, gpr2); - masm.addq(gpr8, gpr4); - masm.movl(gpr4, gpr3); - masm.addl(gpr4, 32); - masm.cmpl(gpr3, 1); - masm.jcc(ConditionFlag.Less, bb3); - - masm.negl(gpr3); - masm.addl(gpr3, 29); - masm.shll(gpr8); - masm.movl(gpr6, gpr8); - masm.andl(gpr8, 536870911); - masm.testl(gpr8, 268435456); - masm.jcc(ConditionFlag.NotEqual, bb4); - - masm.shrl(gpr8); - masm.movl(gpr2, 0); - masm.shlq(gpr8, 32); - masm.orq(gpr8, gpr10); - - masm.bind(bb5); - - masm.bind(bb6); - masm.cmpq(gpr8, 0); - masm.jcc(ConditionFlag.Equal, bb7); - - masm.bind(bb8); - masm.bsrq(gpr10, gpr8); - masm.movl(gpr3, 29); - masm.subl(gpr3, gpr10); - masm.jcc(ConditionFlag.LessEqual, bb9); - - masm.shlq(gpr8); - masm.movq(gpr1, gpr9); - masm.shlq(gpr9); - masm.addl(gpr4, gpr3); - masm.negl(gpr3); - masm.addl(gpr3, 64); - masm.shrq(gpr1); - masm.shrq(gpr7); - masm.orq(gpr8, gpr1); - masm.orq(gpr9, gpr7); - - masm.bind(bb10); - masm.cvtsi2sdq(dest, gpr8); - masm.shrq(gpr9, 1); - masm.cvtsi2sdq(temp3, gpr9); - masm.xorpd(temp4, temp4); - masm.shll(gpr4, 4); - masm.negl(gpr4); - masm.addl(gpr4, 16368); - masm.orl(gpr4, gpr5); - masm.xorl(gpr4, gpr2); - masm.pinsrw(temp4, gpr4, 3); - masm.leaq(gpr2, externalAddress(piFourPtr)); - masm.movdqu(temp2, new AMD64Address(gpr2, 0)); // 0x40000000, - // 0x3fe921fb, - // 0x18469899, - // 0x3e64442d - masm.xorpd(temp5, temp5); - masm.subl(gpr4, 1008); - masm.pinsrw(temp5, gpr4, 3); - masm.mulsd(dest, temp4); - masm.shll(gpr5, 16); - masm.sarl(gpr5, 31); - masm.mulsd(temp3, temp5); - masm.movdqu(temp1, dest); - masm.mulsd(dest, temp2); - masm.pshufd(temp6, temp2, 0xE); - masm.shrl(gpr6, 29); - masm.addsd(temp1, temp3); - masm.mulsd(temp3, temp2); - masm.addl(gpr6, gpr5); - masm.xorl(gpr6, gpr5); - masm.mulsd(temp6, temp1); - masm.movl(gpr1, gpr6); - masm.addsd(temp6, temp3); - masm.movdqu(temp2, dest); - masm.addsd(dest, temp6); - masm.subsd(temp2, dest); - masm.addsd(temp6, temp2); - - masm.bind(bb11); - masm.movq(temp1, externalAddress(piThirtyTwoInvPtr)); // 0x6dc9c883, - // 0x40245f30 - masm.mulsd(temp1, dest); - masm.movdq(temp5, externalAddress(oneHalfPtr)); // 0x00000000, - // 0x3fe00000, - // 0x00000000, - // 0x3fe00000 - masm.movdq(temp4, externalAddress(signMaskPtr)); // 0x00000000, - // 0x80000000 - masm.pand(temp4, dest); - masm.por(temp5, temp4); - masm.addpd(temp1, temp5); - masm.cvttsd2siq(gpr4, temp1); - masm.cvtsi2sdq(temp1, gpr4); - masm.movdq(temp3, externalAddress(pOnePtr)); // 0x54400000, - // 0x3fb921fb - masm.movdqu(temp2, externalAddress(pTwoPtr)); // 0x1a600000, - // 0x3d90b461, - // 0x1a600000, - // 0x3d90b461 - masm.mulsd(temp3, temp1); - masm.unpcklpd(temp1, temp1); - masm.shll(gpr1, 3); - masm.addl(gpr4, 1865232); - masm.movdqu(temp4, dest); - masm.addl(gpr4, gpr1); - masm.andl(gpr4, 63); - masm.movdqu(temp5, externalAddress(scFourPtr)); // 0xa556c734, - // 0x3ec71de3, - // 0x1a01a01a, - // 0x3efa01a0 - masm.leaq(gpr1, externalAddress(cTablePtr)); - masm.shll(gpr4, 5); - masm.addq(gpr1, gpr4); - masm.movdqu(temp8, new AMD64Address(gpr1, 0)); - masm.mulpd(temp2, temp1); - masm.subsd(dest, temp3); - masm.mulsd(temp1, externalAddress(pThreePtr)); // 0x2e037073, - // 0x3b63198a - masm.subsd(temp4, temp3); - masm.unpcklpd(dest, dest); - masm.movdqu(temp3, temp4); - masm.subsd(temp4, temp2); - masm.mulpd(temp5, dest); - masm.pshufd(temp7, temp8, 0xE); - masm.movdqu(temp9, temp7); - masm.subpd(dest, temp2); - masm.mulsd(temp7, temp4); - masm.subsd(temp3, temp4); - masm.mulpd(temp5, dest); - masm.mulpd(dest, dest); - masm.subsd(temp3, temp2); - masm.movdqu(temp2, temp8); - masm.subsd(temp1, temp3); - masm.movdq(temp3, new AMD64Address(gpr1, 24)); - masm.addsd(temp2, temp3); - masm.subsd(temp7, temp2); - masm.subsd(temp1, temp6); - masm.movdqu(temp6, externalAddress(scTwoPtr)); // 0x11111111, - // 0x3f811111, - // 0x55555555, - // 0x3fa55555 - masm.mulsd(temp2, temp4); - masm.mulpd(temp6, dest); - masm.mulsd(temp3, temp4); - masm.mulpd(temp2, dest); - masm.mulpd(dest, dest); - masm.addpd(temp5, externalAddress(scThreePtr)); // 0x1a01a01a, - // 0xbf2a01a0, - // 0x16c16c17, - // 0xbf56c16c - masm.mulsd(temp4, temp8); - masm.addpd(temp6, externalAddress(scOnePtr)); // 0x55555555, - // 0xbfc55555, - // 0x00000000, - // 0xbfe00000 - masm.mulpd(temp5, dest); - masm.movdqu(dest, temp3); - masm.addsd(temp3, temp9); - masm.mulpd(temp1, temp7); - masm.movdqu(temp7, temp4); - masm.addsd(temp4, temp3); - masm.addpd(temp6, temp5); - masm.subsd(temp9, temp3); - masm.subsd(temp3, temp4); - masm.addsd(temp1, new AMD64Address(gpr1, 16)); - masm.mulpd(temp6, temp2); - masm.addsd(temp9, dest); - masm.addsd(temp3, temp7); - masm.addsd(temp1, temp9); - masm.addsd(temp1, temp3); - masm.addsd(temp1, temp6); - masm.unpckhpd(temp6, temp6); - masm.movdqu(dest, temp4); - masm.addsd(temp1, temp6); - masm.addsd(dest, temp1); - masm.jmp(bb13); - - masm.bind(bb7); - masm.addl(gpr4, 64); - masm.movq(gpr8, gpr9); - masm.movq(gpr9, gpr7); - masm.movl(gpr7, 0); - masm.cmpq(gpr8, 0); - masm.jcc(ConditionFlag.NotEqual, bb8); - - masm.addl(gpr4, 64); - masm.movq(gpr8, gpr9); - masm.movq(gpr9, gpr7); - masm.cmpq(gpr8, 0); - masm.jcc(ConditionFlag.NotEqual, bb8); - - masm.xorpd(dest, dest); - masm.xorpd(temp6, temp6); - masm.jmp(bb11); - - masm.bind(bb9); - masm.jcc(ConditionFlag.Equal, bb10); - - masm.negl(gpr3); - masm.shrq(gpr9); - masm.movq(gpr1, gpr8); - masm.shrq(gpr8); - masm.subl(gpr4, gpr3); - masm.negl(gpr3); - masm.addl(gpr3, 64); - masm.shlq(gpr1); - masm.orq(gpr9, gpr1); - masm.jmp(bb10); - - masm.bind(bb3); - masm.negl(gpr3); - masm.shlq(gpr8, 32); - masm.orq(gpr8, gpr10); - masm.shlq(gpr8); - masm.movq(gpr6, gpr8); - masm.testl(gpr8, Integer.MIN_VALUE); - masm.jcc(ConditionFlag.NotEqual, bb12); - - masm.shrl(gpr8); - masm.movl(gpr2, 0); - masm.shrq(gpr6, 3); - masm.jmp(bb6); - - masm.bind(bb4); - masm.shrl(gpr8); - masm.movl(gpr2, 536870912); - masm.shrl(gpr2); - masm.shlq(gpr8, 32); - masm.orq(gpr8, gpr10); - masm.shlq(gpr2, 32); - masm.addl(gpr6, 536870912); - masm.movl(gpr3, 0); - masm.movl(gpr10, 0); - masm.subq(gpr3, gpr7); - masm.sbbq(gpr10, gpr9); - masm.sbbq(gpr2, gpr8); - masm.movq(gpr7, gpr3); - masm.movq(gpr9, gpr10); - masm.movq(gpr8, gpr2); - masm.movl(gpr2, 32768); - masm.jmp(bb5); - - masm.bind(bb12); - masm.shrl(gpr8); - masm.movq(gpr2, 0x100000000L); - masm.shrq(gpr2); - masm.movl(gpr3, 0); - masm.movl(gpr10, 0); - masm.subq(gpr3, gpr7); - masm.sbbq(gpr10, gpr9); - masm.sbbq(gpr2, gpr8); - masm.movq(gpr7, gpr3); - masm.movq(gpr9, gpr10); - masm.movq(gpr8, gpr2); - masm.movl(gpr2, 32768); - masm.shrq(gpr6, 3); - masm.addl(gpr6, 536870912); - masm.jmp(bb6); - - masm.bind(bb13); + temps = registersToValues(registers); } - /* - * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) - * Source Code - * - * ALGORITHM DESCRIPTION - TAN() --------------------- - * - * Polynomials coefficients and other constants. - * - * Note that in this algorithm, there is a different polynomial for each breakpoint, so there - * are 32 sets of polynomial coefficients as well as 32 instances of the other constants. - * - * The polynomial coefficients and constants are offset from the start of the main block as - * follows: - * - * 0: c8 | c0 16: c9 | c1 32: c10 | c2 48: c11 | c3 64: c12 | c4 80: c13 | c5 96: c14 | c6 112: - * c15 | c7 128: T_hi 136: T_lo 144: Sigma 152: T_hl 160: Tau 168: Mask 176: (end of block) - * - * The total table size is therefore 5632 bytes. - * - * Note that c0 and c1 are always zero. We could try storing other constants here, and just - * loading the low part of the SIMD register in these cases, after ensuring the high part is - * zero. - * - * The higher terms of the polynomial are computed in the *low* part of the SIMD register. This - * is so we can overlap the multiplication by r^8 and the unpacking of the other part. - * - * The constants are: T_hi + T_lo = accurate constant term in power series Sigma + T_hl = - * accurate coefficient of r in power series (Sigma=1 bit) Tau = multiplier for the reciprocal, - * always -1 or 0 - * - * The basic reconstruction formula using these constants is: - * - * High = tau * recip_hi + t_hi Med = (sgn * r + t_hl * r)_hi Low = (sgn * r + t_hl * r)_lo + - * tau * recip_lo + T_lo + (T_hl + sigma) * c + pol - * - * where pol = c0 + c1 * r + c2 * r^2 + ... + c15 * r^15 - * - * (c0 = c1 = 0, but using them keeps SIMD regularity) - * - * We then do a compensated sum High + Med, add the low parts together and then do the final - * sum. - * - * Here recip_hi + recip_lo is an accurate reciprocal of the remainder modulo pi/2 - * - * Special cases: tan(NaN) = quiet NaN, and raise invalid exception tan(INF) = NaN and raise - * invalid exception tan(+/-0) = +/-0 - * - */ - - private static int[] oneHalfTan = { - 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000 - }; - - private static int[] mulSixteen = { - 0x00000000, 0x40300000, 0x00000000, 0x3ff00000 - }; - - private static int[] signMaskTan = { - 0x00000000, 0x80000000, 0x00000000, 0x80000000 - }; - - private static int[] piThirtyTwoInvTan = { - 0x6dc9c883, 0x3fe45f30, 0x6dc9c883, 0x40245f30 - }; - - private static int[] pOneTan = { - 0x54444000, 0x3fb921fb, 0x54440000, 0x3fb921fb - }; - - private static int[] pTwoTan = { - 0x67674000, 0xbd32e7b9, 0x4c4c0000, 0x3d468c23 - }; - - private static int[] pThreeTan = { - 0x3707344a, 0x3aa8a2e0, 0x03707345, 0x3ae98a2e - }; - - private static int[] cTableTan = { - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x882c10fa, - 0x3f9664f4, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x55e6c23d, 0x3f8226e3, 0x55555555, - 0x3fd55555, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x0e157de0, 0x3f6d6d3d, 0x11111111, 0x3fc11111, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x452b75e3, 0x3f57da36, - 0x1ba1ba1c, 0x3faba1ba, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x3ff00000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x4e435f9b, - 0x3f953f83, 0x00000000, 0x00000000, 0x3c6e8e46, 0x3f9b74ea, - 0x00000000, 0x00000000, 0xda5b7511, 0x3f85ad63, 0xdc230b9b, - 0x3fb97558, 0x26cb3788, 0x3f881308, 0x76fc4985, 0x3fd62ac9, - 0x77bb08ba, 0x3f757c85, 0xb6247521, 0x3fb1381e, 0x5922170c, - 0x3f754e95, 0x8746482d, 0x3fc27f83, 0x11055b30, 0x3f64e391, - 0x3e666320, 0x3fa3e609, 0x0de9dae3, 0x3f6301df, 0x1f1dca06, - 0x3fafa8ae, 0x8c5b2da2, 0x3fb936bb, 0x4e88f7a5, 0x3c587d05, - 0x00000000, 0x3ff00000, 0xa8935dd9, 0x3f83dde2, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x5a279ea3, 0x3faa3407, - 0x00000000, 0x00000000, 0x432d65fa, 0x3fa70153, 0x00000000, - 0x00000000, 0x891a4602, 0x3f9d03ef, 0xd62ca5f8, 0x3fca77d9, - 0xb35f4628, 0x3f97a265, 0x433258fa, 0x3fd8cf51, 0xb58fd909, - 0x3f8f88e3, 0x01771cea, 0x3fc2b154, 0xf3562f8e, 0x3f888f57, - 0xc028a723, 0x3fc7370f, 0x20b7f9f0, 0x3f80f44c, 0x214368e9, - 0x3fb6dfaa, 0x28891863, 0x3f79b4b6, 0x172dbbf0, 0x3fb6cb8e, - 0xe0553158, 0x3fc975f5, 0x593fe814, 0x3c2ef5d3, 0x00000000, - 0x3ff00000, 0x03dec550, 0x3fa44203, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x9314533e, 0x3fbb8ec5, 0x00000000, - 0x00000000, 0x09aa36d0, 0x3fb6d3f4, 0x00000000, 0x00000000, - 0xdcb427fd, 0x3fb13950, 0xd87ab0bb, 0x3fd5335e, 0xce0ae8a5, - 0x3fabb382, 0x79143126, 0x3fddba41, 0x5f2b28d4, 0x3fa552f1, - 0x59f21a6d, 0x3fd015ab, 0x22c27d95, 0x3fa0e984, 0xe19fc6aa, - 0x3fd0576c, 0x8f2c2950, 0x3f9a4898, 0xc0b3f22c, 0x3fc59462, - 0x1883a4b8, 0x3f94b61c, 0x3f838640, 0x3fc30eb8, 0x355c63dc, - 0x3fd36a08, 0x1dce993d, 0xbc6d704d, 0x00000000, 0x3ff00000, - 0x2b82ab63, 0x3fb78e92, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x56f37042, 0x3fccfc56, 0x00000000, 0x00000000, - 0xaa563951, 0x3fc90125, 0x00000000, 0x00000000, 0x3d0e7c5d, - 0x3fc50533, 0x9bed9b2e, 0x3fdf0ed9, 0x5fe7c47c, 0x3fc1f250, - 0x96c125e5, 0x3fe2edd9, 0x5a02bbd8, 0x3fbe5c71, 0x86362c20, - 0x3fda08b7, 0x4b4435ed, 0x3fb9d342, 0x4b494091, 0x3fd911bd, - 0xb56658be, 0x3fb5e4c7, 0x93a2fd76, 0x3fd3c092, 0xda271794, - 0x3fb29910, 0x3303df2b, 0x3fd189be, 0x99fcef32, 0x3fda8279, - 0xb68c1467, 0x3c708b2f, 0x00000000, 0x3ff00000, 0x980c4337, - 0x3fc5f619, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0xcc03e501, 0x3fdff10f, 0x00000000, 0x00000000, 0x44a4e845, - 0x3fddb63b, 0x00000000, 0x00000000, 0x3768ad9f, 0x3fdb72a4, - 0x3dd01cca, 0x3fe5fdb9, 0xa61d2811, 0x3fd972b2, 0x5645ad0b, - 0x3fe977f9, 0xd013b3ab, 0x3fd78ca3, 0xbf0bf914, 0x3fe4f192, - 0x4d53e730, 0x3fd5d060, 0x3f8b9000, 0x3fe49933, 0xe2b82f08, - 0x3fd4322a, 0x5936a835, 0x3fe27ae1, 0xb1c61c9b, 0x3fd2b3fb, - 0xef478605, 0x3fe1659e, 0x190834ec, 0x3fe11ab7, 0xcdb625ea, - 0xbc8e564b, 0x00000000, 0x3ff00000, 0xb07217e3, 0x3fd248f1, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2b2c49d0, - 0x3ff2de9c, 0x00000000, 0x00000000, 0x2655bc98, 0x3ff33e58, - 0x00000000, 0x00000000, 0xff691fa2, 0x3ff3972e, 0xe93463bd, - 0x3feeed87, 0x070e10a0, 0x3ff3f5b2, 0xf4d790a4, 0x3ff20c10, - 0xa04e8ea3, 0x3ff4541a, 0x386accd3, 0x3ff1369e, 0x222a66dd, - 0x3ff4b521, 0x22a9777e, 0x3ff20817, 0x52a04a6e, 0x3ff5178f, - 0xddaa0031, 0x3ff22137, 0x4447d47c, 0x3ff57c01, 0x1e9c7f1d, - 0x3ff29311, 0x2ab7f990, 0x3fe561b8, 0x209c7df1, 0x3c87a8c5, - 0x00000000, 0x3ff00000, 0x4170bcc6, 0x3fdc92d8, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0xc7ab4d5a, 0x40085e24, - 0x00000000, 0x00000000, 0xe93ea75d, 0x400b963d, 0x00000000, - 0x00000000, 0x94a7f25a, 0x400f37e2, 0x4b6261cb, 0x3ff5f984, - 0x5a9dd812, 0x4011aab0, 0x74c30018, 0x3ffaf5a5, 0x7f2ce8e3, - 0x4013fe8b, 0xfe8e54fa, 0x3ffd7334, 0x670d618d, 0x4016a10c, - 0x4db97058, 0x4000e012, 0x24df44dd, 0x40199c5f, 0x697d6ece, - 0x4003006e, 0x83298b82, 0x401cfc4d, 0x19d490d6, 0x40058c19, - 0x2ae42850, 0x3fea4300, 0x118e20e6, 0xbc7a6db8, 0x00000000, - 0x40000000, 0xe33345b8, 0xbfd4e526, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x65965966, 0x40219659, 0x00000000, - 0x00000000, 0x882c10fa, 0x402664f4, 0x00000000, 0x00000000, - 0x83cd3723, 0x402c8342, 0x00000000, 0x40000000, 0x55e6c23d, - 0x403226e3, 0x55555555, 0x40055555, 0x34451939, 0x40371c96, - 0xaaaaaaab, 0x400aaaaa, 0x0e157de0, 0x403d6d3d, 0x11111111, - 0x40111111, 0xa738201f, 0x4042bbce, 0x05b05b06, 0x4015b05b, - 0x452b75e3, 0x4047da36, 0x1ba1ba1c, 0x401ba1ba, 0x00000000, - 0x3ff00000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x4f48b8d3, 0xbf33eaf9, 0x00000000, 0x00000000, - 0x0cf7586f, 0x3f20b8ea, 0x00000000, 0x00000000, 0xd0258911, - 0xbf0abaf3, 0x23e49fe9, 0xbfab5a8c, 0x2d53222e, 0x3ef60d15, - 0x21169451, 0x3fa172b2, 0xbb254dbc, 0xbee1d3b5, 0xdbf93b8e, - 0xbf84c7db, 0x05b4630b, 0x3ecd3364, 0xee9aada7, 0x3f743924, - 0x794a8297, 0xbeb7b7b9, 0xe015f797, 0xbf5d41f5, 0xe41a4a56, - 0x3ea35dfb, 0xe4c2a251, 0x3f49a2ab, 0x5af9e000, 0xbfce49ce, - 0x8c743719, 0x3d1eb860, 0x00000000, 0x00000000, 0x1b4863cf, - 0x3fd78294, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, - 0x535ad890, 0xbf2b9320, 0x00000000, 0x00000000, 0x018fdf1f, - 0x3f16d61d, 0x00000000, 0x00000000, 0x0359f1be, 0xbf0139e4, - 0xa4317c6d, 0xbfa67e17, 0x82672d0f, 0x3eebb405, 0x2f1b621e, - 0x3f9f455b, 0x51ccf238, 0xbed55317, 0xf437b9ac, 0xbf804bee, - 0xc791a2b5, 0x3ec0e993, 0x919a1db2, 0x3f7080c2, 0x336a5b0e, - 0xbeaa48a2, 0x0a268358, 0xbf55a443, 0xdfd978e4, 0x3e94b61f, - 0xd7767a58, 0x3f431806, 0x2aea0000, 0xbfc9bbe8, 0x7723ea61, - 0xbd3a2369, 0x00000000, 0x00000000, 0xdf7796ff, 0x3fd6e642, - 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0xb9ff07ce, - 0xbf231c78, 0x00000000, 0x00000000, 0xa5517182, 0x3f0ff0e0, - 0x00000000, 0x00000000, 0x790b4cbc, 0xbef66191, 0x848a46c6, - 0xbfa21ac0, 0xb16435fa, 0x3ee1d3ec, 0x2a1aa832, 0x3f9c71ea, - 0xfdd299ef, 0xbec9dd1a, 0x3f8dbaaf, 0xbf793363, 0x309fc6ea, - 0x3eb415d6, 0xbee60471, 0x3f6b83ba, 0x94a0a697, 0xbe9dae11, - 0x3e5c67b3, 0xbf4fd07b, 0x9a8f3e3e, 0x3e86bd75, 0xa4beb7a4, - 0x3f3d1eb1, 0x29cfc000, 0xbfc549ce, 0xbf159358, 0xbd397b33, - 0x00000000, 0x00000000, 0x871fee6c, 0x3fd666f0, 0x00000000, - 0x3ff00000, 0x00000000, 0xfffffff8, 0x7d98a556, 0xbf1a3958, - 0x00000000, 0x00000000, 0x9d88dc01, 0x3f0704c2, 0x00000000, - 0x00000000, 0x73742a2b, 0xbeed054a, 0x58844587, 0xbf9c2a13, - 0x55688a79, 0x3ed7a326, 0xee33f1d6, 0x3f9a48f4, 0xa8dc9888, - 0xbebf8939, 0xaad4b5b8, 0xbf72f746, 0x9102efa1, 0x3ea88f82, - 0xdabc29cf, 0x3f678228, 0x9289afb8, 0xbe90f456, 0x741fb4ed, - 0xbf46f3a3, 0xa97f6663, 0x3e79b4bf, 0xca89ff3f, 0x3f36db70, - 0xa8a2a000, 0xbfc0ee13, 0x3da24be1, 0xbd338b9f, 0x00000000, - 0x00000000, 0x11cd6c69, 0x3fd601fd, 0x00000000, 0x3ff00000, - 0x00000000, 0xfffffff8, 0x1a154b97, 0xbf116b01, 0x00000000, - 0x00000000, 0x2d427630, 0x3f0147bf, 0x00000000, 0x00000000, - 0xb93820c8, 0xbee264d4, 0xbb6cbb18, 0xbf94ab8c, 0x888d4d92, - 0x3ed0568b, 0x60730f7c, 0x3f98b19b, 0xe4b1fb11, 0xbeb2f950, - 0x22cf9f74, 0xbf6b21cd, 0x4a3ff0a6, 0x3e9f499e, 0xfd2b83ce, - 0x3f64aad7, 0x637b73af, 0xbe83487c, 0xe522591a, 0xbf3fc092, - 0xa158e8bc, 0x3e6e3aae, 0xe5e82ffa, 0x3f329d2f, 0xd636a000, - 0xbfb9477f, 0xc2c2d2bc, 0xbd135ef9, 0x00000000, 0x00000000, - 0xf2fdb123, 0x3fd5b566, 0x00000000, 0x3ff00000, 0x00000000, - 0xfffffff8, 0xc41acb64, 0xbf05448d, 0x00000000, 0x00000000, - 0xdbb03d6f, 0x3efb7ad2, 0x00000000, 0x00000000, 0x9e42962d, - 0xbed5aea5, 0x2579f8ef, 0xbf8b2398, 0x288a1ed9, 0x3ec81441, - 0xb0198dc5, 0x3f979a3a, 0x2fdfe253, 0xbea57cd3, 0x5766336f, - 0xbf617caa, 0x600944c3, 0x3e954ed6, 0xa4e0aaf8, 0x3f62c646, - 0x6b8fb29c, 0xbe74e3a3, 0xdc4c0409, 0xbf33f952, 0x9bffe365, - 0x3e6301ec, 0xb8869e44, 0x3f2fc566, 0xe1e04000, 0xbfb0cc62, - 0x016b907f, 0xbd119cbc, 0x00000000, 0x00000000, 0xe6b9d8fa, - 0x3fd57fb3, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, - 0x5daf22a6, 0xbef429d7, 0x00000000, 0x00000000, 0x06bca545, - 0x3ef7a27d, 0x00000000, 0x00000000, 0x7211c19a, 0xbec41c3e, - 0x956ed53e, 0xbf7ae3f4, 0xee750e72, 0x3ec3901b, 0x91d443f5, - 0x3f96f713, 0x36661e6c, 0xbe936e09, 0x506f9381, 0xbf5122e8, - 0xcb6dd43f, 0x3e9041b9, 0x6698b2ff, 0x3f61b0c7, 0x576bf12b, - 0xbe625a8a, 0xe5a0e9dc, 0xbf23499d, 0x110384dd, 0x3e5b1c2c, - 0x68d43db6, 0x3f2cb899, 0x6ecac000, 0xbfa0c414, 0xcd7dd58c, - 0x3d13500f, 0x00000000, 0x00000000, 0x85a2c8fb, 0x3fd55fe0, - 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x2bf70ebe, 0x3ef66a8f, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0xd644267f, 0x3ec22805, 0x16c16c17, 0x3f96c16c, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xc4e09162, - 0x3e8d6db2, 0xbc011567, 0x3f61566a, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x1f79955c, 0x3e57da4e, 0x9334ef0b, - 0x3f2bbd77, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x55555555, 0x3fd55555, 0x00000000, - 0x3ff00000, 0x00000000, 0xfffffff8, 0x5daf22a6, 0x3ef429d7, - 0x00000000, 0x00000000, 0x06bca545, 0x3ef7a27d, 0x00000000, - 0x00000000, 0x7211c19a, 0x3ec41c3e, 0x956ed53e, 0x3f7ae3f4, - 0xee750e72, 0x3ec3901b, 0x91d443f5, 0x3f96f713, 0x36661e6c, - 0x3e936e09, 0x506f9381, 0x3f5122e8, 0xcb6dd43f, 0x3e9041b9, - 0x6698b2ff, 0x3f61b0c7, 0x576bf12b, 0x3e625a8a, 0xe5a0e9dc, - 0x3f23499d, 0x110384dd, 0x3e5b1c2c, 0x68d43db6, 0x3f2cb899, - 0x6ecac000, 0x3fa0c414, 0xcd7dd58c, 0xbd13500f, 0x00000000, - 0x00000000, 0x85a2c8fb, 0x3fd55fe0, 0x00000000, 0x3ff00000, - 0x00000000, 0xfffffff8, 0xc41acb64, 0x3f05448d, 0x00000000, - 0x00000000, 0xdbb03d6f, 0x3efb7ad2, 0x00000000, 0x00000000, - 0x9e42962d, 0x3ed5aea5, 0x2579f8ef, 0x3f8b2398, 0x288a1ed9, - 0x3ec81441, 0xb0198dc5, 0x3f979a3a, 0x2fdfe253, 0x3ea57cd3, - 0x5766336f, 0x3f617caa, 0x600944c3, 0x3e954ed6, 0xa4e0aaf8, - 0x3f62c646, 0x6b8fb29c, 0x3e74e3a3, 0xdc4c0409, 0x3f33f952, - 0x9bffe365, 0x3e6301ec, 0xb8869e44, 0x3f2fc566, 0xe1e04000, - 0x3fb0cc62, 0x016b907f, 0x3d119cbc, 0x00000000, 0x00000000, - 0xe6b9d8fa, 0x3fd57fb3, 0x00000000, 0x3ff00000, 0x00000000, - 0xfffffff8, 0x1a154b97, 0x3f116b01, 0x00000000, 0x00000000, - 0x2d427630, 0x3f0147bf, 0x00000000, 0x00000000, 0xb93820c8, - 0x3ee264d4, 0xbb6cbb18, 0x3f94ab8c, 0x888d4d92, 0x3ed0568b, - 0x60730f7c, 0x3f98b19b, 0xe4b1fb11, 0x3eb2f950, 0x22cf9f74, - 0x3f6b21cd, 0x4a3ff0a6, 0x3e9f499e, 0xfd2b83ce, 0x3f64aad7, - 0x637b73af, 0x3e83487c, 0xe522591a, 0x3f3fc092, 0xa158e8bc, - 0x3e6e3aae, 0xe5e82ffa, 0x3f329d2f, 0xd636a000, 0x3fb9477f, - 0xc2c2d2bc, 0x3d135ef9, 0x00000000, 0x00000000, 0xf2fdb123, - 0x3fd5b566, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, - 0x7d98a556, 0x3f1a3958, 0x00000000, 0x00000000, 0x9d88dc01, - 0x3f0704c2, 0x00000000, 0x00000000, 0x73742a2b, 0x3eed054a, - 0x58844587, 0x3f9c2a13, 0x55688a79, 0x3ed7a326, 0xee33f1d6, - 0x3f9a48f4, 0xa8dc9888, 0x3ebf8939, 0xaad4b5b8, 0x3f72f746, - 0x9102efa1, 0x3ea88f82, 0xdabc29cf, 0x3f678228, 0x9289afb8, - 0x3e90f456, 0x741fb4ed, 0x3f46f3a3, 0xa97f6663, 0x3e79b4bf, - 0xca89ff3f, 0x3f36db70, 0xa8a2a000, 0x3fc0ee13, 0x3da24be1, - 0x3d338b9f, 0x00000000, 0x00000000, 0x11cd6c69, 0x3fd601fd, - 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0xb9ff07ce, - 0x3f231c78, 0x00000000, 0x00000000, 0xa5517182, 0x3f0ff0e0, - 0x00000000, 0x00000000, 0x790b4cbc, 0x3ef66191, 0x848a46c6, - 0x3fa21ac0, 0xb16435fa, 0x3ee1d3ec, 0x2a1aa832, 0x3f9c71ea, - 0xfdd299ef, 0x3ec9dd1a, 0x3f8dbaaf, 0x3f793363, 0x309fc6ea, - 0x3eb415d6, 0xbee60471, 0x3f6b83ba, 0x94a0a697, 0x3e9dae11, - 0x3e5c67b3, 0x3f4fd07b, 0x9a8f3e3e, 0x3e86bd75, 0xa4beb7a4, - 0x3f3d1eb1, 0x29cfc000, 0x3fc549ce, 0xbf159358, 0x3d397b33, - 0x00000000, 0x00000000, 0x871fee6c, 0x3fd666f0, 0x00000000, - 0x3ff00000, 0x00000000, 0xfffffff8, 0x535ad890, 0x3f2b9320, - 0x00000000, 0x00000000, 0x018fdf1f, 0x3f16d61d, 0x00000000, - 0x00000000, 0x0359f1be, 0x3f0139e4, 0xa4317c6d, 0x3fa67e17, - 0x82672d0f, 0x3eebb405, 0x2f1b621e, 0x3f9f455b, 0x51ccf238, - 0x3ed55317, 0xf437b9ac, 0x3f804bee, 0xc791a2b5, 0x3ec0e993, - 0x919a1db2, 0x3f7080c2, 0x336a5b0e, 0x3eaa48a2, 0x0a268358, - 0x3f55a443, 0xdfd978e4, 0x3e94b61f, 0xd7767a58, 0x3f431806, - 0x2aea0000, 0x3fc9bbe8, 0x7723ea61, 0x3d3a2369, 0x00000000, - 0x00000000, 0xdf7796ff, 0x3fd6e642, 0x00000000, 0x3ff00000, - 0x00000000, 0xfffffff8, 0x4f48b8d3, 0x3f33eaf9, 0x00000000, - 0x00000000, 0x0cf7586f, 0x3f20b8ea, 0x00000000, 0x00000000, - 0xd0258911, 0x3f0abaf3, 0x23e49fe9, 0x3fab5a8c, 0x2d53222e, - 0x3ef60d15, 0x21169451, 0x3fa172b2, 0xbb254dbc, 0x3ee1d3b5, - 0xdbf93b8e, 0x3f84c7db, 0x05b4630b, 0x3ecd3364, 0xee9aada7, - 0x3f743924, 0x794a8297, 0x3eb7b7b9, 0xe015f797, 0x3f5d41f5, - 0xe41a4a56, 0x3ea35dfb, 0xe4c2a251, 0x3f49a2ab, 0x5af9e000, - 0x3fce49ce, 0x8c743719, 0xbd1eb860, 0x00000000, 0x00000000, - 0x1b4863cf, 0x3fd78294, 0x00000000, 0x3ff00000, 0x00000000, - 0xfffffff8, 0x65965966, 0xc0219659, 0x00000000, 0x00000000, - 0x882c10fa, 0x402664f4, 0x00000000, 0x00000000, 0x83cd3723, - 0xc02c8342, 0x00000000, 0xc0000000, 0x55e6c23d, 0x403226e3, - 0x55555555, 0x40055555, 0x34451939, 0xc0371c96, 0xaaaaaaab, - 0xc00aaaaa, 0x0e157de0, 0x403d6d3d, 0x11111111, 0x40111111, - 0xa738201f, 0xc042bbce, 0x05b05b06, 0xc015b05b, 0x452b75e3, - 0x4047da36, 0x1ba1ba1c, 0x401ba1ba, 0x00000000, 0xbff00000, - 0x00000000, 0x00000000, 0x00000000, 0x40000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0xc7ab4d5a, 0xc0085e24, 0x00000000, 0x00000000, 0xe93ea75d, - 0x400b963d, 0x00000000, 0x00000000, 0x94a7f25a, 0xc00f37e2, - 0x4b6261cb, 0xbff5f984, 0x5a9dd812, 0x4011aab0, 0x74c30018, - 0x3ffaf5a5, 0x7f2ce8e3, 0xc013fe8b, 0xfe8e54fa, 0xbffd7334, - 0x670d618d, 0x4016a10c, 0x4db97058, 0x4000e012, 0x24df44dd, - 0xc0199c5f, 0x697d6ece, 0xc003006e, 0x83298b82, 0x401cfc4d, - 0x19d490d6, 0x40058c19, 0x2ae42850, 0xbfea4300, 0x118e20e6, - 0x3c7a6db8, 0x00000000, 0x40000000, 0xe33345b8, 0xbfd4e526, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2b2c49d0, - 0xbff2de9c, 0x00000000, 0x00000000, 0x2655bc98, 0x3ff33e58, - 0x00000000, 0x00000000, 0xff691fa2, 0xbff3972e, 0xe93463bd, - 0xbfeeed87, 0x070e10a0, 0x3ff3f5b2, 0xf4d790a4, 0x3ff20c10, - 0xa04e8ea3, 0xbff4541a, 0x386accd3, 0xbff1369e, 0x222a66dd, - 0x3ff4b521, 0x22a9777e, 0x3ff20817, 0x52a04a6e, 0xbff5178f, - 0xddaa0031, 0xbff22137, 0x4447d47c, 0x3ff57c01, 0x1e9c7f1d, - 0x3ff29311, 0x2ab7f990, 0xbfe561b8, 0x209c7df1, 0xbc87a8c5, - 0x00000000, 0x3ff00000, 0x4170bcc6, 0x3fdc92d8, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0xcc03e501, 0xbfdff10f, - 0x00000000, 0x00000000, 0x44a4e845, 0x3fddb63b, 0x00000000, - 0x00000000, 0x3768ad9f, 0xbfdb72a4, 0x3dd01cca, 0xbfe5fdb9, - 0xa61d2811, 0x3fd972b2, 0x5645ad0b, 0x3fe977f9, 0xd013b3ab, - 0xbfd78ca3, 0xbf0bf914, 0xbfe4f192, 0x4d53e730, 0x3fd5d060, - 0x3f8b9000, 0x3fe49933, 0xe2b82f08, 0xbfd4322a, 0x5936a835, - 0xbfe27ae1, 0xb1c61c9b, 0x3fd2b3fb, 0xef478605, 0x3fe1659e, - 0x190834ec, 0xbfe11ab7, 0xcdb625ea, 0x3c8e564b, 0x00000000, - 0x3ff00000, 0xb07217e3, 0x3fd248f1, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x56f37042, 0xbfccfc56, 0x00000000, - 0x00000000, 0xaa563951, 0x3fc90125, 0x00000000, 0x00000000, - 0x3d0e7c5d, 0xbfc50533, 0x9bed9b2e, 0xbfdf0ed9, 0x5fe7c47c, - 0x3fc1f250, 0x96c125e5, 0x3fe2edd9, 0x5a02bbd8, 0xbfbe5c71, - 0x86362c20, 0xbfda08b7, 0x4b4435ed, 0x3fb9d342, 0x4b494091, - 0x3fd911bd, 0xb56658be, 0xbfb5e4c7, 0x93a2fd76, 0xbfd3c092, - 0xda271794, 0x3fb29910, 0x3303df2b, 0x3fd189be, 0x99fcef32, - 0xbfda8279, 0xb68c1467, 0xbc708b2f, 0x00000000, 0x3ff00000, - 0x980c4337, 0x3fc5f619, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x9314533e, 0xbfbb8ec5, 0x00000000, 0x00000000, - 0x09aa36d0, 0x3fb6d3f4, 0x00000000, 0x00000000, 0xdcb427fd, - 0xbfb13950, 0xd87ab0bb, 0xbfd5335e, 0xce0ae8a5, 0x3fabb382, - 0x79143126, 0x3fddba41, 0x5f2b28d4, 0xbfa552f1, 0x59f21a6d, - 0xbfd015ab, 0x22c27d95, 0x3fa0e984, 0xe19fc6aa, 0x3fd0576c, - 0x8f2c2950, 0xbf9a4898, 0xc0b3f22c, 0xbfc59462, 0x1883a4b8, - 0x3f94b61c, 0x3f838640, 0x3fc30eb8, 0x355c63dc, 0xbfd36a08, - 0x1dce993d, 0x3c6d704d, 0x00000000, 0x3ff00000, 0x2b82ab63, - 0x3fb78e92, 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x5a279ea3, 0xbfaa3407, 0x00000000, 0x00000000, 0x432d65fa, - 0x3fa70153, 0x00000000, 0x00000000, 0x891a4602, 0xbf9d03ef, - 0xd62ca5f8, 0xbfca77d9, 0xb35f4628, 0x3f97a265, 0x433258fa, - 0x3fd8cf51, 0xb58fd909, 0xbf8f88e3, 0x01771cea, 0xbfc2b154, - 0xf3562f8e, 0x3f888f57, 0xc028a723, 0x3fc7370f, 0x20b7f9f0, - 0xbf80f44c, 0x214368e9, 0xbfb6dfaa, 0x28891863, 0x3f79b4b6, - 0x172dbbf0, 0x3fb6cb8e, 0xe0553158, 0xbfc975f5, 0x593fe814, - 0xbc2ef5d3, 0x00000000, 0x3ff00000, 0x03dec550, 0x3fa44203, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x4e435f9b, - 0xbf953f83, 0x00000000, 0x00000000, 0x3c6e8e46, 0x3f9b74ea, - 0x00000000, 0x00000000, 0xda5b7511, 0xbf85ad63, 0xdc230b9b, - 0xbfb97558, 0x26cb3788, 0x3f881308, 0x76fc4985, 0x3fd62ac9, - 0x77bb08ba, 0xbf757c85, 0xb6247521, 0xbfb1381e, 0x5922170c, - 0x3f754e95, 0x8746482d, 0x3fc27f83, 0x11055b30, 0xbf64e391, - 0x3e666320, 0xbfa3e609, 0x0de9dae3, 0x3f6301df, 0x1f1dca06, - 0x3fafa8ae, 0x8c5b2da2, 0xbfb936bb, 0x4e88f7a5, 0xbc587d05, - 0x00000000, 0x3ff00000, 0xa8935dd9, 0x3f83dde2, 0x00000000, - 0x00000000, 0x00000000, 0x00000000 - }; - - private static int[] maskThirtyFiveTan = { - 0xfffc0000, 0xffffffff, 0x00000000, 0x00000000 - }; - - private static int[] qElevenTan = { - 0xb8fe4d77, 0x3f82609a - }; - - private static int[] qNineTan = { - 0xbf847a43, 0x3f9664a0 - }; - - private static int[] qSevenTan = { - 0x52c4c8ab, 0x3faba1ba - }; - - private static int[] qFiveTan = { - 0x11092746, 0x3fc11111 - }; - - private static int[] qThreeTan = { - 0x55555612, 0x3fd55555 - }; - - private static int[] piInvTableTan = { - 0x00000000, 0x00000000, 0xa2f9836e, 0x4e441529, 0xfc2757d1, - 0xf534ddc0, 0xdb629599, 0x3c439041, 0xfe5163ab, 0xdebbc561, - 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c, 0xfe1deb1c, - 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41, - 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, - 0xde05980f, 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, - 0x4f463f66, 0x9e5fea2d, 0x7527bac7, 0xebe5f17b, 0x3d0739f7, - 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08, 0x56033046, 0xfc7b6bab, - 0xf0cfbc21 - }; - - private static int[] piFourTan = { - 0x00000000, 0x3fe921fb, 0x4611a626, 0x3e85110b - }; - - private static int[] qqTwoTan = { - 0x676733af, 0x3d32e7b9 - }; - - private static int[] twoPowFiftyFiveTan = { - 0x00000000, 0x43600000 - }; - - private static int[] twoPowMFiftyFiveTan = { - 0x00000000, 0x3c800000 - }; - - public void tanIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { - ArrayDataPointerConstant oneHalfTanPtr = new ArrayDataPointerConstant(oneHalfTan, 16); - ArrayDataPointerConstant mulSixteenPtr = new ArrayDataPointerConstant(mulSixteen, 16); - ArrayDataPointerConstant signMaskTanPtr = new ArrayDataPointerConstant(signMaskTan, 16); - ArrayDataPointerConstant piThirtyTwoInvTanPtr = new ArrayDataPointerConstant(piThirtyTwoInvTan, 16); - ArrayDataPointerConstant pOneTanPtr = new ArrayDataPointerConstant(pOneTan, 16); - ArrayDataPointerConstant pTwoTanPtr = new ArrayDataPointerConstant(pTwoTan, 16); - ArrayDataPointerConstant pThreeTanPtr = new ArrayDataPointerConstant(pThreeTan, 16); - ArrayDataPointerConstant cTableTanPtr = new ArrayDataPointerConstant(cTableTan, 16); - ArrayDataPointerConstant maskThirtyFiveTanPtr = new ArrayDataPointerConstant(maskThirtyFiveTan, 16); - ArrayDataPointerConstant qElevenTanPtr = new ArrayDataPointerConstant(qElevenTan, 16); - ArrayDataPointerConstant qNineTanPtr = new ArrayDataPointerConstant(qNineTan, 16); - ArrayDataPointerConstant qSevenTanPtr = new ArrayDataPointerConstant(qSevenTan, 8); - ArrayDataPointerConstant qFiveTanPtr = new ArrayDataPointerConstant(qFiveTan, 16); - ArrayDataPointerConstant qThreeTanPtr = new ArrayDataPointerConstant(qThreeTan, 16); - ArrayDataPointerConstant piInvTableTanPtr = new ArrayDataPointerConstant(piInvTableTan, 16); - ArrayDataPointerConstant piFourTanPtr = new ArrayDataPointerConstant(piFourTan, 8); - ArrayDataPointerConstant qqTwoTanPtr = new ArrayDataPointerConstant(qqTwoTan, 8); - ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 8); - ArrayDataPointerConstant twoPowFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowFiftyFiveTan, 8); - ArrayDataPointerConstant twoPowMFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowMFiftyFiveTan, 8); - - Label bb0 = new Label(); - Label bb1 = new Label(); - Label bb2 = new Label(); - Label bb3 = new Label(); - Label bb5 = new Label(); - Label bb6 = new Label(); - Label bb8 = new Label(); - Label bb9 = new Label(); - Label bb10 = new Label(); - Label bb11 = new Label(); - Label bb12 = new Label(); - Label bb13 = new Label(); - Label bb14 = new Label(); - Label bb15 = new Label(); - - Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); - Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); - Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); - Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); - Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD); - Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD); - Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD); - Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD); - Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD); - Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD); - - Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); - Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); - Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); - Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); - Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); - Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); - Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); - - setCrb(crb); - if (dest.encoding != value.encoding) { - masm.movdqu(dest, value); - } - - masm.pextrw(gpr1, dest, 3); - masm.andl(gpr1, 32767); - masm.subl(gpr1, 16314); - masm.cmpl(gpr1, 270); - masm.jcc(ConditionFlag.Above, bb0); - - masm.movdqu(temp5, externalAddress(oneHalfTanPtr)); // 0x00000000, - // 0x3fe00000, - // 0x00000000, - // 0x3fe00000 - masm.movdqu(temp6, externalAddress(mulSixteenPtr)); // 0x00000000, - // 0x40300000, - // 0x00000000, - // 0x3ff00000 - masm.unpcklpd(dest, dest); - masm.movdqu(temp4, externalAddress(signMaskTanPtr)); // 0x00000000, - // 0x80000000, - // 0x00000000, - // 0x80000000 - masm.andpd(temp4, dest); - masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr)); // 0x6dc9c883, - // 0x3fe45f30, - // 0x6dc9c883, - // 0x40245f30 - masm.mulpd(temp1, dest); - masm.por(temp5, temp4); - masm.addpd(temp1, temp5); - masm.movdqu(temp7, temp1); - masm.unpckhpd(temp7, temp7); - masm.cvttsd2sil(gpr4, temp7); - masm.cvttpd2dq(temp1, temp1); - masm.cvtdq2pd(temp1, temp1); - masm.mulpd(temp1, temp6); - masm.movdqu(temp3, externalAddress(pOneTanPtr)); // 0x54444000, - // 0x3fb921fb, - // 0x54440000, - // 0x3fb921fb - masm.movdq(temp5, externalAddress(qqTwoTanPtr)); // 0x676733af, - // 0x3d32e7b9 - masm.addq(gpr4, 469248); - masm.movdqu(temp4, externalAddress(pTwoTanPtr)); // 0x67674000, - // 0xbd32e7b9, - // 0x4c4c0000, - // 0x3d468c23 - masm.mulpd(temp3, temp1); - masm.andq(gpr4, 31); - masm.mulsd(temp5, temp1); - masm.movq(gpr3, gpr4); - masm.mulpd(temp4, temp1); - masm.shlq(gpr3, 1); - masm.subpd(dest, temp3); - masm.mulpd(temp1, externalAddress(pThreeTanPtr)); // 0x3707344a, - // 0x3aa8a2e0, - // 0x03707345, - // 0x3ae98a2e - masm.addq(gpr4, gpr3); - masm.shlq(gpr3, 2); - masm.addq(gpr4, gpr3); - masm.addsd(temp5, dest); - masm.movdqu(temp2, dest); - masm.subpd(dest, temp4); - masm.movdq(temp6, externalAddress(onePtr)); // 0x00000000, - // 0x3ff00000 - masm.shlq(gpr4, 4); - masm.leaq(gpr1, externalAddress(cTableTanPtr)); - masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr)); // 0xfffc0000, - // 0xffffffff, - // 0x00000000, - // 0x00000000 - masm.movdqu(temp3, dest); - masm.addq(gpr1, gpr4); - masm.subpd(temp2, dest); - masm.unpckhpd(dest, dest); - masm.divsd(temp6, temp5); - masm.subpd(temp2, temp4); - masm.movdqu(temp7, new AMD64Address(gpr1, 16)); - masm.subsd(temp3, temp5); - masm.mulpd(temp7, dest); - masm.subpd(temp2, temp1); - masm.movdqu(temp1, new AMD64Address(gpr1, 48)); - masm.mulpd(temp1, dest); - masm.movdqu(temp4, new AMD64Address(gpr1, 96)); - masm.mulpd(temp4, dest); - masm.addsd(temp2, temp3); - masm.movdqu(temp3, dest); - masm.mulpd(dest, dest); - masm.addpd(temp7, new AMD64Address(gpr1, 0)); - masm.addpd(temp1, new AMD64Address(gpr1, 32)); - masm.mulpd(temp1, dest); - masm.addpd(temp4, new AMD64Address(gpr1, 80)); - masm.addpd(temp7, temp1); - masm.movdqu(temp1, new AMD64Address(gpr1, 112)); - masm.mulpd(temp1, dest); - masm.mulpd(dest, dest); - masm.addpd(temp4, temp1); - masm.movdqu(temp1, new AMD64Address(gpr1, 64)); - masm.mulpd(temp1, dest); - masm.addpd(temp7, temp1); - masm.movdqu(temp1, temp3); - masm.mulpd(temp3, dest); - masm.mulsd(dest, dest); - masm.mulpd(temp1, new AMD64Address(gpr1, 144)); - masm.mulpd(temp4, temp3); - masm.movdqu(temp3, temp1); - masm.addpd(temp7, temp4); - masm.movdqu(temp4, temp1); - masm.mulsd(dest, temp7); - masm.unpckhpd(temp7, temp7); - masm.addsd(dest, temp7); - masm.unpckhpd(temp1, temp1); - masm.addsd(temp3, temp1); - masm.subsd(temp4, temp3); - masm.addsd(temp1, temp4); - masm.movdqu(temp4, temp2); - masm.movdq(temp7, new AMD64Address(gpr1, 144)); - masm.unpckhpd(temp2, temp2); - masm.addsd(temp7, new AMD64Address(gpr1, 152)); - masm.mulsd(temp7, temp2); - masm.addsd(temp7, new AMD64Address(gpr1, 136)); - masm.addsd(temp7, temp1); - masm.addsd(dest, temp7); - masm.movdq(temp7, externalAddress(onePtr)); // 0x00000000, - // 0x3ff00000 - masm.mulsd(temp4, temp6); - masm.movdq(temp2, new AMD64Address(gpr1, 168)); - masm.andpd(temp2, temp6); - masm.mulsd(temp5, temp2); - masm.mulsd(temp6, new AMD64Address(gpr1, 160)); - masm.subsd(temp7, temp5); - masm.subsd(temp2, new AMD64Address(gpr1, 128)); - masm.subsd(temp7, temp4); - masm.mulsd(temp7, temp6); - masm.movdqu(temp4, temp3); - masm.subsd(temp3, temp2); - masm.addsd(temp2, temp3); - masm.subsd(temp4, temp2); - masm.addsd(dest, temp4); - masm.subsd(dest, temp7); - masm.addsd(dest, temp3); - masm.jmp(bb15); - - masm.bind(bb0); - masm.jcc(ConditionFlag.Greater, bb1); - - masm.pextrw(gpr1, dest, 3); - masm.movl(gpr4, gpr1); - masm.andl(gpr1, 32752); - masm.jcc(ConditionFlag.Equal, bb2); - - masm.andl(gpr4, 32767); - masm.cmpl(gpr4, 15904); - masm.jcc(ConditionFlag.Below, bb3); - - masm.movdqu(temp2, dest); - masm.movdqu(temp3, dest); - masm.movdq(temp1, externalAddress(qElevenTanPtr)); // 0xb8fe4d77, - // 0x3f82609a - masm.mulsd(temp2, dest); - masm.mulsd(temp3, temp2); - masm.mulsd(temp1, temp2); - masm.addsd(temp1, externalAddress(qNineTanPtr)); // 0xbf847a43, - // 0x3f9664a0 - masm.mulsd(temp1, temp2); - masm.addsd(temp1, externalAddress(qSevenTanPtr)); // 0x52c4c8ab, - // 0x3faba1ba - masm.mulsd(temp1, temp2); - masm.addsd(temp1, externalAddress(qFiveTanPtr)); // 0x11092746, - // 0x3fc11111 - masm.mulsd(temp1, temp2); - masm.addsd(temp1, externalAddress(qThreeTanPtr)); // 0x55555612, - // 0x3fd55555 - masm.mulsd(temp1, temp3); - masm.addsd(dest, temp1); - masm.jmp(bb15); - - masm.bind(bb3); - masm.movdq(temp3, externalAddress(twoPowFiftyFiveTanPtr)); // 0x00000000, - // 0x43600000 - masm.mulsd(temp3, dest); - masm.addsd(dest, temp3); - masm.mulsd(dest, externalAddress(twoPowMFiftyFiveTanPtr)); // 0x00000000, - // 0x3c800000 - masm.jmp(bb15); - - masm.bind(bb14); - masm.xorpd(temp1, temp1); - masm.xorpd(dest, dest); - masm.divsd(dest, temp1); - masm.jmp(bb15); - - masm.bind(bb2); - masm.movdqu(temp1, dest); - masm.mulsd(temp1, temp1); - masm.jmp(bb15); - - masm.bind(bb1); - masm.pextrw(gpr3, dest, 3); - masm.andl(gpr3, 32752); - masm.cmpl(gpr3, 32752); - masm.jcc(ConditionFlag.Equal, bb14); - - masm.subl(gpr3, 16224); - masm.shrl(gpr3, 7); - masm.andl(gpr3, 65532); - masm.leaq(gpr10, externalAddress(piInvTableTanPtr)); - masm.addq(gpr3, gpr10); - masm.movdq(gpr1, dest); - masm.movl(gpr9, new AMD64Address(gpr3, 20)); - masm.movl(gpr7, new AMD64Address(gpr3, 24)); - masm.movl(gpr4, gpr1); - masm.shrq(gpr1, 21); - masm.orl(gpr1, Integer.MIN_VALUE); - masm.shrl(gpr1, 11); - masm.movl(gpr8, gpr9); - masm.imulq(gpr9, gpr4); - masm.imulq(gpr8, gpr1); - masm.imulq(gpr7, gpr1); - masm.movl(gpr5, new AMD64Address(gpr3, 16)); - masm.movl(gpr6, new AMD64Address(gpr3, 12)); - masm.movl(gpr10, gpr9); - masm.shrq(gpr9, 32); - masm.addq(gpr8, gpr9); - masm.addq(gpr10, gpr7); - masm.movl(gpr7, gpr10); - masm.shrq(gpr10, 32); - masm.addq(gpr8, gpr10); - masm.movl(gpr9, gpr5); - masm.imulq(gpr5, gpr4); - masm.imulq(gpr9, gpr1); - masm.movl(gpr10, gpr6); - masm.imulq(gpr6, gpr4); - masm.movl(gpr2, gpr5); - masm.shrq(gpr5, 32); - masm.addq(gpr8, gpr2); - masm.movl(gpr2, gpr8); - masm.shrq(gpr8, 32); - masm.addq(gpr9, gpr5); - masm.addq(gpr9, gpr8); - masm.shlq(gpr2, 32); - masm.orq(gpr7, gpr2); - masm.imulq(gpr10, gpr1); - masm.movl(gpr8, new AMD64Address(gpr3, 8)); - masm.movl(gpr5, new AMD64Address(gpr3, 4)); - masm.movl(gpr2, gpr6); - masm.shrq(gpr6, 32); - masm.addq(gpr9, gpr2); - masm.movl(gpr2, gpr9); - masm.shrq(gpr9, 32); - masm.addq(gpr10, gpr6); - masm.addq(gpr10, gpr9); - masm.movq(gpr6, gpr8); - masm.imulq(gpr8, gpr4); - masm.imulq(gpr6, gpr1); - masm.movl(gpr9, gpr8); - masm.shrq(gpr8, 32); - masm.addq(gpr10, gpr9); - masm.movl(gpr9, gpr10); - masm.shrq(gpr10, 32); - masm.addq(gpr6, gpr8); - masm.addq(gpr6, gpr10); - masm.movq(gpr8, gpr5); - masm.imulq(gpr5, gpr4); - masm.imulq(gpr8, gpr1); - masm.shlq(gpr9, 32); - masm.orq(gpr9, gpr2); - masm.movl(gpr1, new AMD64Address(gpr3, 0)); - masm.movl(gpr10, gpr5); - masm.shrq(gpr5, 32); - masm.addq(gpr6, gpr10); - masm.movl(gpr10, gpr6); - masm.shrq(gpr6, 32); - masm.addq(gpr8, gpr5); - masm.addq(gpr8, gpr6); - masm.imulq(gpr4, gpr1); - masm.pextrw(gpr2, dest, 3); - masm.leaq(gpr6, externalAddress(piInvTableTanPtr)); - masm.subq(gpr3, gpr6); - masm.addl(gpr3, gpr3); - masm.addl(gpr3, gpr3); - masm.addl(gpr3, gpr3); - masm.addl(gpr3, 19); - masm.movl(gpr5, 32768); - masm.andl(gpr5, gpr2); - masm.shrl(gpr2, 4); - masm.andl(gpr2, 2047); - masm.subl(gpr2, 1023); - masm.subl(gpr3, gpr2); - masm.addq(gpr8, gpr4); - masm.movl(gpr4, gpr3); - masm.addl(gpr4, 32); - masm.cmpl(gpr3, 0); - masm.jcc(ConditionFlag.Less, bb5); - - masm.negl(gpr3); - masm.addl(gpr3, 29); - masm.shll(gpr8); - masm.movl(gpr6, gpr8); - masm.andl(gpr8, 1073741823); - masm.testl(gpr8, 536870912); - masm.jcc(ConditionFlag.NotEqual, bb6); - - masm.shrl(gpr8); - masm.movl(gpr2, 0); - masm.shlq(gpr8, 32); - masm.orq(gpr8, gpr10); - - masm.bind(bb8); - masm.cmpq(gpr8, 0); - masm.jcc(ConditionFlag.Equal, bb9); - - masm.bind(bb10); - masm.bsrq(gpr10, gpr8); - masm.movl(gpr3, 29); - masm.subl(gpr3, gpr10); - masm.jcc(ConditionFlag.LessEqual, bb11); - - masm.shlq(gpr8); - masm.movq(gpr1, gpr9); - masm.shlq(gpr9); - masm.addl(gpr4, gpr3); - masm.negl(gpr3); - masm.addl(gpr3, 64); - masm.shrq(gpr1); - masm.shrq(gpr7); - masm.orq(gpr8, gpr1); - masm.orq(gpr9, gpr7); - - masm.bind(bb12); - masm.cvtsi2sdq(dest, gpr8); - masm.shrq(gpr9, 1); - masm.cvtsi2sdq(temp3, gpr9); - masm.xorpd(temp4, temp4); - masm.shll(gpr4, 4); - masm.negl(gpr4); - masm.addl(gpr4, 16368); - masm.orl(gpr4, gpr5); - masm.xorl(gpr4, gpr2); - masm.pinsrw(temp4, gpr4, 3); - masm.leaq(gpr1, externalAddress(piFourTanPtr)); - masm.movdq(temp2, new AMD64Address(gpr1, 0)); // 0x00000000, - // 0x3fe921fb, - masm.movdq(temp7, new AMD64Address(gpr1, 8)); // 0x4611a626, - // 0x3e85110b - masm.xorpd(temp5, temp5); - masm.subl(gpr4, 1008); - masm.pinsrw(temp5, gpr4, 3); - masm.mulsd(dest, temp4); - masm.shll(gpr5, 16); - masm.sarl(gpr5, 31); - masm.mulsd(temp3, temp5); - masm.movdqu(temp1, dest); - masm.mulsd(dest, temp2); - masm.shrl(gpr6, 30); - masm.addsd(temp1, temp3); - masm.mulsd(temp3, temp2); - masm.addl(gpr6, gpr5); - masm.xorl(gpr6, gpr5); - masm.mulsd(temp7, temp1); - masm.movl(gpr1, gpr6); - masm.addsd(temp7, temp3); - masm.movdqu(temp2, dest); - masm.addsd(dest, temp7); - masm.subsd(temp2, dest); - masm.addsd(temp7, temp2); - masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr)); // 0x6dc9c883, - // 0x3fe45f30, - // 0x6dc9c883, - // 0x40245f30 - if (masm.supports(CPUFeature.SSE3)) { - masm.movddup(dest, dest); - } else { - masm.movlhps(dest, dest); - } - masm.movdqu(temp4, externalAddress(signMaskTanPtr)); // 0x00000000, - // 0x80000000, - // 0x00000000, - // 0x80000000 - masm.andpd(temp4, dest); - masm.mulpd(temp1, dest); - if (masm.supports(CPUFeature.SSE3)) { - masm.movddup(temp7, temp7); - } else { - masm.movlhps(temp7, temp7); - } - masm.movdqu(temp5, externalAddress(oneHalfTanPtr)); // 0x00000000, - // 0x3fe00000, - // 0x00000000, - // 0x3fe00000 - masm.movdqu(temp6, externalAddress(mulSixteenPtr)); // 0x00000000, - // 0x40300000, - // 0x00000000, - // 0x3ff00000 - masm.por(temp5, temp4); - masm.addpd(temp1, temp5); - masm.movdqu(temp5, temp1); - masm.unpckhpd(temp5, temp5); - masm.cvttsd2sil(gpr4, temp5); - masm.cvttpd2dq(temp1, temp1); - masm.cvtdq2pd(temp1, temp1); - masm.mulpd(temp1, temp6); - masm.movdqu(temp3, externalAddress(pOneTanPtr)); // 0x54444000, - // 0x3fb921fb, - // 0x54440000, - // 0x3fb921fb - masm.movdq(temp5, externalAddress(qqTwoTanPtr)); // 0x676733af, - // 0x3d32e7b9 - masm.shll(gpr1, 4); - masm.addl(gpr4, 469248); - masm.movdqu(temp4, externalAddress(pTwoTanPtr)); // 0x67674000, - // 0xbd32e7b9, - // 0x4c4c0000, - // 0x3d468c23 - masm.mulpd(temp3, temp1); - masm.addl(gpr4, gpr1); - masm.andl(gpr4, 31); - masm.mulsd(temp5, temp1); - masm.movl(gpr3, gpr4); - masm.mulpd(temp4, temp1); - masm.shll(gpr3, 1); - masm.subpd(dest, temp3); - masm.mulpd(temp1, externalAddress(pThreeTanPtr)); // 0x3707344a, - // 0x3aa8a2e0, - // 0x03707345, - // 0x3ae98a2e - masm.addl(gpr4, gpr3); - masm.shll(gpr3, 2); - masm.addl(gpr4, gpr3); - masm.addsd(temp5, dest); - masm.movdqu(temp2, dest); - masm.subpd(dest, temp4); - masm.movdq(temp6, externalAddress(onePtr)); // 0x00000000, - // 0x3ff00000 - masm.shll(gpr4, 4); - masm.leaq(gpr1, externalAddress(cTableTanPtr)); - masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr)); // 0xfffc0000, - // 0xffffffff, - // 0x00000000, - // 0x00000000 - masm.movdqu(temp3, dest); - masm.addq(gpr1, gpr4); - masm.subpd(temp2, dest); - masm.unpckhpd(dest, dest); - masm.divsd(temp6, temp5); - masm.subpd(temp2, temp4); - masm.subsd(temp3, temp5); - masm.subpd(temp2, temp1); - masm.movdqu(temp1, new AMD64Address(gpr1, 48)); - masm.addpd(temp2, temp7); - masm.movdqu(temp7, new AMD64Address(gpr1, 16)); - masm.mulpd(temp7, dest); - masm.movdqu(temp4, new AMD64Address(gpr1, 96)); - masm.mulpd(temp1, dest); - masm.mulpd(temp4, dest); - masm.addsd(temp2, temp3); - masm.movdqu(temp3, dest); - masm.mulpd(dest, dest); - masm.addpd(temp7, new AMD64Address(gpr1, 0)); - masm.addpd(temp1, new AMD64Address(gpr1, 32)); - masm.mulpd(temp1, dest); - masm.addpd(temp4, new AMD64Address(gpr1, 80)); - masm.addpd(temp7, temp1); - masm.movdqu(temp1, new AMD64Address(gpr1, 112)); - masm.mulpd(temp1, dest); - masm.mulpd(dest, dest); - masm.addpd(temp4, temp1); - masm.movdqu(temp1, new AMD64Address(gpr1, 64)); - masm.mulpd(temp1, dest); - masm.addpd(temp7, temp1); - masm.movdqu(temp1, temp3); - masm.mulpd(temp3, dest); - masm.mulsd(dest, dest); - masm.mulpd(temp1, new AMD64Address(gpr1, 144)); - masm.mulpd(temp4, temp3); - masm.movdqu(temp3, temp1); - masm.addpd(temp7, temp4); - masm.movdqu(temp4, temp1); - masm.mulsd(dest, temp7); - masm.unpckhpd(temp7, temp7); - masm.addsd(dest, temp7); - masm.unpckhpd(temp1, temp1); - masm.addsd(temp3, temp1); - masm.subsd(temp4, temp3); - masm.addsd(temp1, temp4); - masm.movdqu(temp4, temp2); - masm.movdq(temp7, new AMD64Address(gpr1, 144)); - masm.unpckhpd(temp2, temp2); - masm.addsd(temp7, new AMD64Address(gpr1, 152)); - masm.mulsd(temp7, temp2); - masm.addsd(temp7, new AMD64Address(gpr1, 136)); - masm.addsd(temp7, temp1); - masm.addsd(dest, temp7); - masm.movdq(temp7, externalAddress(onePtr)); // 0x00000000, - // 0x3ff00000 - masm.mulsd(temp4, temp6); - masm.movdq(temp2, new AMD64Address(gpr1, 168)); - masm.andpd(temp2, temp6); - masm.mulsd(temp5, temp2); - masm.mulsd(temp6, new AMD64Address(gpr1, 160)); - masm.subsd(temp7, temp5); - masm.subsd(temp2, new AMD64Address(gpr1, 128)); - masm.subsd(temp7, temp4); - masm.mulsd(temp7, temp6); - masm.movdqu(temp4, temp3); - masm.subsd(temp3, temp2); - masm.addsd(temp2, temp3); - masm.subsd(temp4, temp2); - masm.addsd(dest, temp4); - masm.subsd(dest, temp7); - masm.addsd(dest, temp3); - masm.jmp(bb15); - - masm.bind(bb9); - masm.addl(gpr4, 64); - masm.movq(gpr8, gpr9); - masm.movq(gpr9, gpr7); - masm.movl(gpr7, 0); - masm.cmpq(gpr8, 0); - masm.jcc(ConditionFlag.NotEqual, bb10); - - masm.addl(gpr4, 64); - masm.movq(gpr8, gpr9); - masm.movq(gpr9, gpr7); - masm.cmpq(gpr8, 0); - masm.jcc(ConditionFlag.NotEqual, bb10); - - masm.jmp(bb12); - - masm.bind(bb11); - masm.jcc(ConditionFlag.Equal, bb12); - - masm.negl(gpr3); - masm.shrq(gpr9); - masm.movq(gpr1, gpr8); - masm.shrq(gpr8); - masm.subl(gpr4, gpr3); - masm.negl(gpr3); - masm.addl(gpr3, 64); - masm.shlq(gpr1); - masm.orq(gpr9, gpr1); - masm.jmp(bb12); - - masm.bind(bb5); - masm.notl(gpr3); - masm.shlq(gpr8, 32); - masm.orq(gpr8, gpr10); - masm.shlq(gpr8); - masm.movq(gpr6, gpr8); - masm.testl(gpr8, Integer.MIN_VALUE); - masm.jcc(ConditionFlag.NotEqual, bb13); - - masm.shrl(gpr8); - masm.movl(gpr2, 0); - masm.shrq(gpr6, 2); - masm.jmp(bb8); - - masm.bind(bb6); - masm.shrl(gpr8); - masm.movl(gpr2, 1073741824); - masm.shrl(gpr2); - masm.shlq(gpr8, 32); - masm.orq(gpr8, gpr10); - masm.shlq(gpr2, 32); - masm.addl(gpr6, 1073741824); - masm.movl(gpr3, 0); - masm.movl(gpr10, 0); - masm.subq(gpr3, gpr7); - masm.sbbq(gpr10, gpr9); - masm.sbbq(gpr2, gpr8); - masm.movq(gpr7, gpr3); - masm.movq(gpr9, gpr10); - masm.movq(gpr8, gpr2); - masm.movl(gpr2, 32768); - masm.jmp(bb8); - - masm.bind(bb13); - masm.shrl(gpr8); - masm.movq(gpr2, 0x100000000L); - masm.shrq(gpr2); - masm.movl(gpr3, 0); - masm.movl(gpr10, 0); - masm.subq(gpr3, gpr7); - masm.sbbq(gpr10, gpr9); - masm.sbbq(gpr2, gpr8); - masm.movq(gpr7, gpr3); - masm.movq(gpr9, gpr10); - masm.movq(gpr8, gpr2); - masm.movl(gpr2, 32768); - masm.shrq(gpr6, 2); - masm.addl(gpr6, 1073741824); - masm.jmp(bb8); - - masm.bind(bb15); + public final Variable emitLIRWrapper(LIRGeneratorTool gen, Value value) { + LIRKind kind = LIRKind.combine(value); + RegisterValue xmm0Value = xmm0.asValue(kind); + gen.emitMove(xmm0Value, value); + gen.append(this); + Variable result = gen.newVariable(kind); + gen.emitMove(result, xmm0Value); + return result; } - /* - * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) - * Source Code - * - * ALGORITHM DESCRIPTION - EXP() --------------------- - * - * Description: Let K = 64 (table size). x x/log(2) n e = 2 = 2 * T[j] * (1 + P(y)) where x = - * m*log(2)/K + y, y in [-log(2)/K..log(2)/K] m = n*K + j, m,n,j - signed integer, j in - * [-K/2..K/2] j/K values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). - * - * P(y) is a minimax polynomial approximation of exp(x)-1 on small interval - * [-log(2)/K..log(2)/K] (were calculated by Maple V). - * - * To avoid problems with arithmetic overflow and underflow, n n1 n2 value of 2 is safely - * computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] where BIAS is a value of exponent bias. - * - * Special cases: exp(NaN) = NaN exp(+INF) = +INF exp(-INF) = 0 exp(x) = 1 for subnormals for - * finite argument, only exp(0)=1 is exact For IEEE double if x > 709.782712893383973096 then - * exp(x) overflow if x < -745.133219101941108420 then exp(x) underflow - * - */ - - private static int[] cvExp = { - 0x652b82fe, 0x40571547, 0x652b82fe, 0x40571547, 0xfefa0000, - 0x3f862e42, 0xfefa0000, 0x3f862e42, 0xbc9e3b3a, 0x3d1cf79a, - 0xbc9e3b3a, 0x3d1cf79a, 0xfffffffe, 0x3fdfffff, 0xfffffffe, - 0x3fdfffff, 0xe3289860, 0x3f56c15c, 0x555b9e25, 0x3fa55555, - 0xc090cf0f, 0x3f811115, 0x55548ba1, 0x3fc55555 - }; - - private static int[] shifterExp = { - 0x00000000, 0x43380000, 0x00000000, 0x43380000 - }; - - private static int[] mMaskExp = { - 0xffffffc0, 0x00000000, 0xffffffc0, 0x00000000 - }; - - private static int[] biasExp = { - 0x0000ffc0, 0x00000000, 0x0000ffc0, 0x00000000 - }; - - private static int[] tblAddrExp = { - 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0e03754d, - 0x3cad7bbf, 0x3e778060, 0x00002c9a, 0x3567f613, 0x3c8cd252, - 0xd3158574, 0x000059b0, 0x61e6c861, 0x3c60f74e, 0x18759bc8, - 0x00008745, 0x5d837b6c, 0x3c979aa6, 0x6cf9890f, 0x0000b558, - 0x702f9cd1, 0x3c3ebe3d, 0x32d3d1a2, 0x0000e3ec, 0x1e63bcd8, - 0x3ca3516e, 0xd0125b50, 0x00011301, 0x26f0387b, 0x3ca4c554, - 0xaea92ddf, 0x0001429a, 0x62523fb6, 0x3ca95153, 0x3c7d517a, - 0x000172b8, 0x3f1353bf, 0x3c8b898c, 0xeb6fcb75, 0x0001a35b, - 0x3e3a2f5f, 0x3c9aecf7, 0x3168b9aa, 0x0001d487, 0x44a6c38d, - 0x3c8a6f41, 0x88628cd6, 0x0002063b, 0xe3a8a894, 0x3c968efd, - 0x6e756238, 0x0002387a, 0x981fe7f2, 0x3c80472b, 0x65e27cdd, - 0x00026b45, 0x6d09ab31, 0x3c82f7e1, 0xf51fdee1, 0x00029e9d, - 0x720c0ab3, 0x3c8b3782, 0xa6e4030b, 0x0002d285, 0x4db0abb6, - 0x3c834d75, 0x0a31b715, 0x000306fe, 0x5dd3f84a, 0x3c8fdd39, - 0xb26416ff, 0x00033c08, 0xcc187d29, 0x3ca12f8c, 0x373aa9ca, - 0x000371a7, 0x738b5e8b, 0x3ca7d229, 0x34e59ff6, 0x0003a7db, - 0xa72a4c6d, 0x3c859f48, 0x4c123422, 0x0003dea6, 0x259d9205, - 0x3ca8b846, 0x21f72e29, 0x0004160a, 0x60c2ac12, 0x3c4363ed, - 0x6061892d, 0x00044e08, 0xdaa10379, 0x3c6ecce1, 0xb5c13cd0, - 0x000486a2, 0xbb7aafb0, 0x3c7690ce, 0xd5362a27, 0x0004bfda, - 0x9b282a09, 0x3ca083cc, 0x769d2ca6, 0x0004f9b2, 0xc1aae707, - 0x3ca509b0, 0x569d4f81, 0x0005342b, 0x18fdd78e, 0x3c933505, - 0x36b527da, 0x00056f47, 0xe21c5409, 0x3c9063e1, 0xdd485429, - 0x0005ab07, 0x2b64c035, 0x3c9432e6, 0x15ad2148, 0x0005e76f, - 0x99f08c0a, 0x3ca01284, 0xb03a5584, 0x0006247e, 0x0073dc06, - 0x3c99f087, 0x82552224, 0x00066238, 0x0da05571, 0x3c998d4d, - 0x667f3bcc, 0x0006a09e, 0x86ce4786, 0x3ca52bb9, 0x3c651a2e, - 0x0006dfb2, 0x206f0dab, 0x3ca32092, 0xe8ec5f73, 0x00071f75, - 0x8e17a7a6, 0x3ca06122, 0x564267c8, 0x00075feb, 0x461e9f86, - 0x3ca244ac, 0x73eb0186, 0x0007a114, 0xabd66c55, 0x3c65ebe1, - 0x36cf4e62, 0x0007e2f3, 0xbbff67d0, 0x3c96fe9f, 0x994cce12, - 0x00082589, 0x14c801df, 0x3c951f14, 0x9b4492ec, 0x000868d9, - 0xc1f0eab4, 0x3c8db72f, 0x422aa0db, 0x0008ace5, 0x59f35f44, - 0x3c7bf683, 0x99157736, 0x0008f1ae, 0x9c06283c, 0x3ca360ba, - 0xb0cdc5e4, 0x00093737, 0x20f962aa, 0x3c95e8d1, 0x9fde4e4f, - 0x00097d82, 0x2b91ce27, 0x3c71affc, 0x82a3f090, 0x0009c491, - 0x589a2ebd, 0x3c9b6d34, 0x7b5de564, 0x000a0c66, 0x9ab89880, - 0x3c95277c, 0xb23e255c, 0x000a5503, 0x6e735ab3, 0x3c846984, - 0x5579fdbf, 0x000a9e6b, 0x92cb3387, 0x3c8c1a77, 0x995ad3ad, - 0x000ae89f, 0xdc2d1d96, 0x3ca22466, 0xb84f15fa, 0x000b33a2, - 0xb19505ae, 0x3ca1112e, 0xf2fb5e46, 0x000b7f76, 0x0a5fddcd, - 0x3c74ffd7, 0x904bc1d2, 0x000bcc1e, 0x30af0cb3, 0x3c736eae, - 0xdd85529c, 0x000c199b, 0xd10959ac, 0x3c84e08f, 0x2e57d14b, - 0x000c67f1, 0x6c921968, 0x3c676b2c, 0xdcef9069, 0x000cb720, - 0x36df99b3, 0x3c937009, 0x4a07897b, 0x000d072d, 0xa63d07a7, - 0x3c74a385, 0xdcfba487, 0x000d5818, 0xd5c192ac, 0x3c8e5a50, - 0x03db3285, 0x000da9e6, 0x1c4a9792, 0x3c98bb73, 0x337b9b5e, - 0x000dfc97, 0x603a88d3, 0x3c74b604, 0xe78b3ff6, 0x000e502e, - 0x92094926, 0x3c916f27, 0xa2a490d9, 0x000ea4af, 0x41aa2008, - 0x3c8ec3bc, 0xee615a27, 0x000efa1b, 0x31d185ee, 0x3c8a64a9, - 0x5b6e4540, 0x000f5076, 0x4d91cd9d, 0x3c77893b, 0x819e90d8, - 0x000fa7c1 - }; - - private static int[] allOnesExp = { - 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff - }; - - private static int[] expBias = { - 0x00000000, 0x3ff00000, 0x00000000, 0x3ff00000 - }; - - private static int[] xMaxExp = { - 0xffffffff, 0x7fefffff - }; - - private static int[] xMinExp = { - 0x00000000, 0x00100000 - }; - - private static int[] infExp = { - 0x00000000, 0x7ff00000 - }; - - private static int[] zeroExp = { - 0x00000000, 0x00000000 - }; - - public void expIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { - ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 16); - ArrayDataPointerConstant cvExpPtr = new ArrayDataPointerConstant(cvExp, 16); - ArrayDataPointerConstant shifterExpPtr = new ArrayDataPointerConstant(shifterExp, 8); - ArrayDataPointerConstant mMaskExpPtr = new ArrayDataPointerConstant(mMaskExp, 16); - ArrayDataPointerConstant biasExpPtr = new ArrayDataPointerConstant(biasExp, 16); - ArrayDataPointerConstant tblAddrExpPtr = new ArrayDataPointerConstant(tblAddrExp, 16); - ArrayDataPointerConstant expBiasPtr = new ArrayDataPointerConstant(expBias, 8); - ArrayDataPointerConstant xMaxExpPtr = new ArrayDataPointerConstant(xMaxExp, 8); - ArrayDataPointerConstant xMinExpPtr = new ArrayDataPointerConstant(xMinExp, 8); - ArrayDataPointerConstant infExpPtr = new ArrayDataPointerConstant(infExp, 8); - ArrayDataPointerConstant zeroExpPtr = new ArrayDataPointerConstant(zeroExp, 8); - ArrayDataPointerConstant allOnesExpPtr = new ArrayDataPointerConstant(allOnesExp, 8); - - Label bb0 = new Label(); - Label bb1 = new Label(); - Label bb2 = new Label(); - Label bb3 = new Label(); - Label bb4 = new Label(); - Label bb5 = new Label(); - Label bb7 = new Label(); - Label bb8 = new Label(); - Label bb9 = new Label(); - Label bb10 = new Label(); - Label bb11 = new Label(); - Label bb12 = new Label(); - Label bb14 = new Label(); - - Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); - Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); - Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); - Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); - Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD); - - Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); - Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); - Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); - Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); - Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); - Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); - Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); - Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE); - Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE); - Register temp10 = asRegister(xmm10Temp, AMD64Kind.DOUBLE); - - AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); - - setCrb(crb); - masm.movsd(stackSlot, value); - if (dest.encoding != value.encoding) { - masm.movdqu(dest, value); - } - - masm.movdqu(temp9, externalAddress(mMaskExpPtr)); // 0xffffffc0, - // 0x00000000, - // 0xffffffc0, - // 0x00000000 - masm.movdqu(temp10, externalAddress(biasExpPtr)); // 0x0000ffc0, - // 0x00000000, - // 0x0000ffc0, - // 0x00000000 - masm.unpcklpd(dest, dest); - masm.leaq(gpr5, stackSlot); - masm.leaq(gpr2, externalAddress(cvExpPtr)); - masm.movdqu(temp1, new AMD64Address(gpr2, 0)); // 0x652b82fe, - // 0x40571547, - // 0x652b82fe, - // 0x40571547 - masm.movdqu(temp6, externalAddress(shifterExpPtr)); // 0x00000000, - // 0x43380000, - // 0x00000000, - // 0x43380000 - masm.movdqu(temp2, new AMD64Address(gpr2, 16)); // 0xfefa0000, - // 0x3f862e42, - // 0xfefa0000, - // 0x3f862e42 - masm.movdqu(temp3, new AMD64Address(gpr2, 32)); // 0xbc9e3b3a, - // 0x3d1cf79a, - // 0xbc9e3b3a, - // 0x3d1cf79a - masm.pextrw(gpr1, dest, 3); - masm.andl(gpr1, 32767); - masm.movl(gpr4, 16527); - masm.subl(gpr4, gpr1); - masm.subl(gpr1, 15504); - masm.orl(gpr4, gpr1); - masm.cmpl(gpr4, Integer.MIN_VALUE); - masm.jcc(ConditionFlag.AboveEqual, bb0); - - masm.leaq(gpr4, externalAddress(tblAddrExpPtr)); - masm.movdqu(temp8, new AMD64Address(gpr2, 48)); // 0xfffffffe, - // 0x3fdfffff, - // 0xfffffffe, - // 0x3fdfffff - masm.movdqu(temp4, new AMD64Address(gpr2, 64)); // 0xe3289860, - // 0x3f56c15c, - // 0x555b9e25, - // 0x3fa55555 - masm.movdqu(temp5, new AMD64Address(gpr2, 80)); // 0xc090cf0f, - // 0x3f811115, - // 0x55548ba1, - // 0x3fc55555 - masm.mulpd(temp1, dest); - masm.addpd(temp1, temp6); - masm.movapd(temp7, temp1); - masm.movdl(gpr1, temp1); - masm.pand(temp7, temp9); - masm.subpd(temp1, temp6); - masm.mulpd(temp2, temp1); - masm.mulpd(temp3, temp1); - masm.paddq(temp7, temp10); - masm.subpd(dest, temp2); - masm.movl(gpr3, gpr1); - masm.andl(gpr3, 63); - masm.shll(gpr3, 4); - masm.movdqu(temp2, new AMD64Address(gpr3, gpr4, Scale.Times1, 0)); - masm.sarl(gpr1, 6); - masm.psllq(temp7, 46); - masm.subpd(dest, temp3); - masm.mulpd(temp4, dest); - masm.movl(gpr4, gpr1); - masm.movapd(temp6, dest); - masm.movapd(temp1, dest); - masm.mulpd(temp6, temp6); - masm.mulpd(dest, temp6); - masm.addpd(temp5, temp4); - masm.mulsd(dest, temp6); - masm.mulpd(temp6, temp8); - masm.addsd(temp1, temp2); - masm.unpckhpd(temp2, temp2); - masm.mulpd(dest, temp5); - masm.addsd(temp1, dest); - masm.por(temp2, temp7); - masm.unpckhpd(dest, dest); - masm.addsd(dest, temp1); - masm.addsd(dest, temp6); - masm.addl(gpr4, 894); - masm.cmpl(gpr4, 1916); - masm.jcc(ConditionFlag.Above, bb1); - - masm.mulsd(dest, temp2); - masm.addsd(dest, temp2); - masm.jmp(bb14); - - masm.bind(bb1); - masm.movdqu(temp6, externalAddress(expBiasPtr)); // 0x00000000, - // 0x3ff00000, - // 0x00000000, - // 0x3ff00000 - masm.xorpd(temp3, temp3); - masm.movdqu(temp4, externalAddress(allOnesExpPtr)); // 0xffffffff, - // 0xffffffff, - // 0xffffffff, - // 0xffffffff - masm.movl(gpr4, -1022); - masm.subl(gpr4, gpr1); - masm.movdl(temp5, gpr4); - masm.psllq(temp4, temp5); - masm.movl(gpr3, gpr1); - masm.sarl(gpr1, 1); - masm.pinsrw(temp3, gpr1, 3); - masm.psllq(temp3, 4); - masm.psubd(temp2, temp3); - masm.mulsd(dest, temp2); - masm.cmpl(gpr4, 52); - masm.jcc(ConditionFlag.Greater, bb2); - - masm.pand(temp4, temp2); - masm.paddd(temp3, temp6); - masm.subsd(temp2, temp4); - masm.addsd(dest, temp2); - masm.cmpl(gpr3, 1023); - masm.jcc(ConditionFlag.GreaterEqual, bb3); - - masm.pextrw(gpr3, dest, 3); - masm.andl(gpr3, 32768); - masm.orl(gpr4, gpr3); - masm.cmpl(gpr4, 0); - masm.jcc(ConditionFlag.Equal, bb4); - - masm.movapd(temp6, dest); - masm.addsd(dest, temp4); - masm.mulsd(dest, temp3); - masm.pextrw(gpr3, dest, 3); - masm.andl(gpr3, 32752); - masm.cmpl(gpr3, 0); - masm.jcc(ConditionFlag.Equal, bb5); - - masm.jmp(bb14); - - masm.bind(bb5); - masm.mulsd(temp6, temp3); - masm.mulsd(temp4, temp3); - masm.movdqu(dest, temp6); - masm.pxor(temp6, temp4); - masm.psrad(temp6, 31); - masm.pshufd(temp6, temp6, 85); - masm.psllq(dest, 1); - masm.psrlq(dest, 1); - masm.pxor(dest, temp6); - masm.psrlq(temp6, 63); - masm.paddq(dest, temp6); - masm.paddq(dest, temp4); - masm.jmp(bb14); - - masm.bind(bb4); - masm.addsd(dest, temp4); - masm.mulsd(dest, temp3); - masm.jmp(bb14); - - masm.bind(bb3); - masm.addsd(dest, temp4); - masm.mulsd(dest, temp3); - masm.pextrw(gpr3, dest, 3); - masm.andl(gpr3, 32752); - masm.cmpl(gpr3, 32752); - masm.jcc(ConditionFlag.AboveEqual, bb7); - - masm.jmp(bb14); - - masm.bind(bb2); - masm.paddd(temp3, temp6); - masm.addpd(dest, temp2); - masm.mulsd(dest, temp3); - masm.jmp(bb14); - - masm.bind(bb8); - masm.movsd(dest, externalAddress(xMaxExpPtr)); // 0xffffffff, - // 0x7fefffff - masm.movsd(temp8, externalAddress(xMinExpPtr)); // 0x00000000, - // 0x00100000 - masm.cmpl(gpr1, 2146435072); - masm.jcc(ConditionFlag.AboveEqual, bb9); - - masm.movl(gpr1, new AMD64Address(gpr5, 4)); - masm.cmpl(gpr1, Integer.MIN_VALUE); - masm.jcc(ConditionFlag.AboveEqual, bb10); - - masm.mulsd(dest, dest); - - masm.bind(bb7); - masm.jmp(bb14); - - masm.bind(bb10); - masm.mulsd(dest, temp8); - masm.jmp(bb14); - - masm.bind(bb9); - masm.movl(gpr4, stackSlot); - masm.cmpl(gpr1, 2146435072); - masm.jcc(ConditionFlag.Above, bb11); - - masm.cmpl(gpr4, 0); - masm.jcc(ConditionFlag.NotEqual, bb11); - - masm.movl(gpr1, new AMD64Address(gpr5, 4)); - masm.cmpl(gpr1, 2146435072); - masm.jcc(ConditionFlag.NotEqual, bb12); - - masm.movsd(dest, externalAddress(infExpPtr)); // 0x00000000, - // 0x7ff00000 - masm.jmp(bb14); - - masm.bind(bb12); - masm.movsd(dest, externalAddress(zeroExpPtr)); // 0x00000000, - // 0x00000000 - masm.jmp(bb14); - - masm.bind(bb11); - masm.movsd(dest, stackSlot); - masm.addsd(dest, dest); - masm.jmp(bb14); - - masm.bind(bb0); - masm.movl(gpr1, new AMD64Address(gpr5, 4)); - masm.andl(gpr1, 2147483647); - masm.cmpl(gpr1, 1083179008); - masm.jcc(ConditionFlag.AboveEqual, bb8); - - masm.addsd(dest, externalAddress(onePtr)); // 0x00000000, - // 0x3ff00000 - masm.bind(bb14); - } }