1 /* 2 * Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.lir.amd64; 26 27 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL; 28 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG; 29 import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.STACK; 30 import static jdk.vm.ci.code.ValueUtil.asRegister; 31 32 import org.graalvm.compiler.asm.Label; 33 import org.graalvm.compiler.asm.amd64.AMD64Address; 34 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale; 35 import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag; 36 import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler; 37 import org.graalvm.compiler.core.common.LIRKind; 38 import org.graalvm.compiler.debug.GraalError; 39 import org.graalvm.compiler.lir.LIRInstructionClass; 40 import org.graalvm.compiler.lir.Opcode; 41 import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant; 42 import org.graalvm.compiler.lir.asm.CompilationResultBuilder; 43 import org.graalvm.compiler.lir.gen.LIRGeneratorTool; 44 45 import jdk.vm.ci.amd64.AMD64; 46 import jdk.vm.ci.amd64.AMD64.CPUFeature; 47 import jdk.vm.ci.amd64.AMD64Kind; 48 import jdk.vm.ci.code.Register; 49 import jdk.vm.ci.meta.AllocatableValue; 50 import jdk.vm.ci.meta.Value; 51 52 public final class AMD64MathIntrinsicUnaryOp extends AMD64LIRInstruction { 53 public static final LIRInstructionClass<AMD64MathIntrinsicUnaryOp> TYPE = LIRInstructionClass.create(AMD64MathIntrinsicUnaryOp.class); 54 55 public enum UnaryIntrinsicOpcode { 56 LOG, 57 LOG10, 58 SIN, 59 COS, 60 TAN, 61 EXP 62 } 63 64 @Opcode private final UnaryIntrinsicOpcode opcode; 65 @Def protected Value result; 66 @Use protected Value input; 67 @Temp({REG, ILLEGAL}) protected Value xmm1Temp = Value.ILLEGAL; 68 @Temp({REG, ILLEGAL}) protected Value xmm2Temp = Value.ILLEGAL; 69 @Temp({REG, ILLEGAL}) protected Value xmm3Temp = Value.ILLEGAL; 70 @Temp({REG, ILLEGAL}) protected Value xmm4Temp = Value.ILLEGAL; 71 @Temp({REG, ILLEGAL}) protected Value xmm5Temp = Value.ILLEGAL; 72 @Temp({REG, ILLEGAL}) protected Value xmm6Temp = Value.ILLEGAL; 73 @Temp({REG, ILLEGAL}) protected Value xmm7Temp = Value.ILLEGAL; 74 @Temp({REG, ILLEGAL}) protected Value xmm8Temp = Value.ILLEGAL; 75 @Temp({REG, ILLEGAL}) protected Value xmm9Temp = Value.ILLEGAL; 76 @Temp({REG, ILLEGAL}) protected Value xmm10Temp = Value.ILLEGAL; 77 @Temp({REG, ILLEGAL}) protected Value gpr1Temp = Value.ILLEGAL; 78 @Temp({REG, ILLEGAL}) protected Value gpr2Temp = Value.ILLEGAL; 79 @Temp protected AllocatableValue rcxTemp; 80 @Temp({REG, ILLEGAL}) protected Value gpr4Temp = Value.ILLEGAL; 81 @Temp({REG, ILLEGAL}) protected Value gpr5Temp = Value.ILLEGAL; 82 @Temp({REG, ILLEGAL}) protected Value gpr6Temp = Value.ILLEGAL; 83 @Temp({REG, ILLEGAL}) protected Value gpr7Temp = Value.ILLEGAL; 84 @Temp({REG, ILLEGAL}) protected Value gpr8Temp = Value.ILLEGAL; 85 @Temp({REG, ILLEGAL}) protected Value gpr9Temp = Value.ILLEGAL; 86 @Temp({REG, ILLEGAL}) protected Value gpr10Temp = Value.ILLEGAL; 87 @Temp({STACK, ILLEGAL}) protected Value stackTemp = Value.ILLEGAL; 88 89 CompilationResultBuilder internalCrb; 90 91 public AMD64MathIntrinsicUnaryOp(LIRGeneratorTool tool, UnaryIntrinsicOpcode opcode, Value result, Value input, Value stackTemp) { 92 super(TYPE); 93 this.opcode = opcode; 94 this.result = result; 95 this.input = input; 96 if (opcode == UnaryIntrinsicOpcode.LOG || opcode == UnaryIntrinsicOpcode.LOG10 || 97 opcode == UnaryIntrinsicOpcode.SIN || opcode == UnaryIntrinsicOpcode.COS || 98 opcode == UnaryIntrinsicOpcode.TAN || opcode == UnaryIntrinsicOpcode.EXP) { 99 this.gpr1Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 100 this.gpr2Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 101 this.rcxTemp = AMD64.rcx.asValue(LIRKind.value(AMD64Kind.QWORD)); 102 this.gpr4Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 103 this.xmm1Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 104 this.xmm2Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 105 this.xmm3Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 106 this.xmm4Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 107 this.xmm5Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 108 this.xmm6Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 109 this.xmm7Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 110 111 if (opcode == UnaryIntrinsicOpcode.EXP) { 112 this.gpr5Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 113 this.xmm8Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 114 this.xmm9Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 115 this.xmm10Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 116 } 117 118 if (opcode == UnaryIntrinsicOpcode.TAN) { 119 this.gpr5Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 120 this.gpr6Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 121 this.gpr7Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 122 this.gpr8Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 123 this.gpr9Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 124 this.gpr10Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 125 } 126 127 if (opcode == UnaryIntrinsicOpcode.SIN || opcode == UnaryIntrinsicOpcode.COS) { 128 this.gpr5Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 129 this.gpr6Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 130 this.gpr7Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 131 this.gpr8Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 132 this.gpr9Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 133 this.gpr10Temp = tool.newVariable(LIRKind.value(AMD64Kind.QWORD)); 134 this.xmm8Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 135 this.xmm9Temp = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE)); 136 } 137 138 this.stackTemp = stackTemp; 139 } 140 } 141 142 public AMD64MathIntrinsicUnaryOp(LIRGeneratorTool tool, UnaryIntrinsicOpcode opcode, Value result, Value input) { 143 this(tool, opcode, result, input, Value.ILLEGAL); 144 } 145 146 private void setCrb(CompilationResultBuilder crb) { 147 internalCrb = crb; 148 } 149 150 private AMD64Address externalAddress(ArrayDataPointerConstant curPtr) { 151 return (AMD64Address) internalCrb.recordDataReferenceInCode(curPtr); 152 } 153 154 @Override 155 public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { 156 switch (opcode) { 157 case LOG: 158 logIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); 159 break; 160 case LOG10: 161 log10Intrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); 162 break; 163 case SIN: 164 sinIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); 165 break; 166 case COS: 167 cosIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); 168 break; 169 case TAN: 170 tanIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); 171 break; 172 case EXP: 173 expIntrinsic(asRegister(result, AMD64Kind.DOUBLE), asRegister(input, AMD64Kind.DOUBLE), crb, masm); 174 break; 175 default: 176 throw GraalError.shouldNotReachHere(); 177 } 178 } 179 180 private static int[] logTwoTable = { 181 0xfefa3800, 0x3fe62e42, 0x93c76730, 0x3d2ef357, 0xaa241800, 182 0x3fe5ee82, 0x0cda46be, 0x3d220238, 0x5c364800, 0x3fe5af40, 183 0xac10c9fb, 0x3d2dfa63, 0x26bb8c00, 0x3fe5707a, 0xff3303dd, 184 0x3d09980b, 0x26867800, 0x3fe5322e, 0x5d257531, 0x3d05ccc4, 185 0x835a5000, 0x3fe4f45a, 0x6d93b8fb, 0xbd2e6c51, 0x6f970c00, 186 0x3fe4b6fd, 0xed4c541c, 0x3cef7115, 0x27e8a400, 0x3fe47a15, 187 0xf94d60aa, 0xbd22cb6a, 0xf2f92400, 0x3fe43d9f, 0x481051f7, 188 0xbcfd984f, 0x2125cc00, 0x3fe4019c, 0x30f0c74c, 0xbd26ce79, 189 0x0c36c000, 0x3fe3c608, 0x7cfe13c2, 0xbd02b736, 0x17197800, 190 0x3fe38ae2, 0xbb5569a4, 0xbd218b7a, 0xad9d8c00, 0x3fe35028, 191 0x9527e6ac, 0x3d10b83f, 0x44340800, 0x3fe315da, 0xc5a0ed9c, 192 0xbd274e93, 0x57b0e000, 0x3fe2dbf5, 0x07b9dc11, 0xbd17a6e5, 193 0x6d0ec000, 0x3fe2a278, 0xe797882d, 0x3d206d2b, 0x1134dc00, 194 0x3fe26962, 0x05226250, 0xbd0b61f1, 0xd8bebc00, 0x3fe230b0, 195 0x6e48667b, 0x3d12fc06, 0x5fc61800, 0x3fe1f863, 0xc9fe81d3, 196 0xbd2a7242, 0x49ae6000, 0x3fe1c078, 0xed70e667, 0x3cccacde, 197 0x40f23c00, 0x3fe188ee, 0xf8ab4650, 0x3d14cc4e, 0xf6f29800, 198 0x3fe151c3, 0xa293ae49, 0xbd2edd97, 0x23c75c00, 0x3fe11af8, 199 0xbb9ddcb2, 0xbd258647, 0x8611cc00, 0x3fe0e489, 0x07801742, 200 0x3d1c2998, 0xe2d05400, 0x3fe0ae76, 0x887e7e27, 0x3d1f486b, 201 0x0533c400, 0x3fe078bf, 0x41edf5fd, 0x3d268122, 0xbe760400, 202 0x3fe04360, 0xe79539e0, 0xbd04c45f, 0xe5b20800, 0x3fe00e5a, 203 0xb1727b1c, 0xbd053ba3, 0xaf7a4800, 0x3fdfb358, 0x3c164935, 204 0x3d0085fa, 0xee031800, 0x3fdf4aa7, 0x6f014a8b, 0x3d12cde5, 205 0x56b41000, 0x3fdee2a1, 0x5a470251, 0x3d2f27f4, 0xc3ddb000, 206 0x3fde7b42, 0x5372bd08, 0xbd246550, 0x1a272800, 0x3fde148a, 207 0x07322938, 0xbd1326b2, 0x484c9800, 0x3fddae75, 0x60dc616a, 208 0xbd1ea42d, 0x46def800, 0x3fdd4902, 0xe9a767a8, 0x3d235baf, 209 0x18064800, 0x3fdce42f, 0x3ec7a6b0, 0xbd0797c3, 0xc7455800, 210 0x3fdc7ff9, 0xc15249ae, 0xbd29b6dd, 0x693fa000, 0x3fdc1c60, 211 0x7fe8e180, 0x3d2cec80, 0x1b80e000, 0x3fdbb961, 0xf40a666d, 212 0x3d27d85b, 0x04462800, 0x3fdb56fa, 0x2d841995, 0x3d109525, 213 0x5248d000, 0x3fdaf529, 0x52774458, 0xbd217cc5, 0x3c8ad800, 214 0x3fda93ed, 0xbea77a5d, 0x3d1e36f2, 0x0224f800, 0x3fda3344, 215 0x7f9d79f5, 0x3d23c645, 0xea15f000, 0x3fd9d32b, 0x10d0c0b0, 216 0xbd26279e, 0x43135800, 0x3fd973a3, 0xa502d9f0, 0xbd152313, 217 0x635bf800, 0x3fd914a8, 0x2ee6307d, 0xbd1766b5, 0xa88b3000, 218 0x3fd8b639, 0xe5e70470, 0xbd205ae1, 0x776dc800, 0x3fd85855, 219 0x3333778a, 0x3d2fd56f, 0x3bd81800, 0x3fd7fafa, 0xc812566a, 220 0xbd272090, 0x687cf800, 0x3fd79e26, 0x2efd1778, 0x3d29ec7d, 221 0x76c67800, 0x3fd741d8, 0x49dc60b3, 0x3d2d8b09, 0xe6af1800, 222 0x3fd6e60e, 0x7c222d87, 0x3d172165, 0x3e9c6800, 0x3fd68ac8, 223 0x2756eba0, 0x3d20a0d3, 0x0b3ab000, 0x3fd63003, 0xe731ae00, 224 0xbd2db623, 0xdf596000, 0x3fd5d5bd, 0x08a465dc, 0xbd0a0b2a, 225 0x53c8d000, 0x3fd57bf7, 0xee5d40ef, 0x3d1faded, 0x0738a000, 226 0x3fd522ae, 0x8164c759, 0x3d2ebe70, 0x9e173000, 0x3fd4c9e0, 227 0x1b0ad8a4, 0xbd2e2089, 0xc271c800, 0x3fd4718d, 0x0967d675, 228 0xbd2f27ce, 0x23d5e800, 0x3fd419b4, 0xec90e09d, 0x3d08e436, 229 0x77333000, 0x3fd3c252, 0xb606bd5c, 0x3d183b54, 0x76be1000, 230 0x3fd36b67, 0xb0f177c8, 0x3d116ecd, 0xe1d36000, 0x3fd314f1, 231 0xd3213cb8, 0xbd28e27a, 0x7cdc9000, 0x3fd2bef0, 0x4a5004f4, 232 0x3d2a9cfa, 0x1134d800, 0x3fd26962, 0xdf5bb3b6, 0x3d2c93c1, 233 0x6d0eb800, 0x3fd21445, 0xba46baea, 0x3d0a87de, 0x635a6800, 234 0x3fd1bf99, 0x5147bdb7, 0x3d2ca6ed, 0xcbacf800, 0x3fd16b5c, 235 0xf7a51681, 0x3d2b9acd, 0x8227e800, 0x3fd1178e, 0x63a5f01c, 236 0xbd2c210e, 0x67616000, 0x3fd0c42d, 0x163ceae9, 0x3d27188b, 237 0x604d5800, 0x3fd07138, 0x16ed4e91, 0x3cf89cdb, 0x5626c800, 238 0x3fd01eae, 0x1485e94a, 0xbd16f08c, 0x6cb3b000, 0x3fcf991c, 239 0xca0cdf30, 0x3d1bcbec, 0xe4dd0000, 0x3fcef5ad, 0x65bb8e11, 240 0xbcca2115, 0xffe71000, 0x3fce530e, 0x6041f430, 0x3cc21227, 241 0xb0d49000, 0x3fcdb13d, 0xf715b035, 0xbd2aff2a, 0xf2656000, 242 0x3fcd1037, 0x75b6f6e4, 0xbd084a7e, 0xc6f01000, 0x3fcc6ffb, 243 0xc5962bd2, 0xbcf1ec72, 0x383be000, 0x3fcbd087, 0x595412b6, 244 0xbd2d4bc4, 0x575bd000, 0x3fcb31d8, 0x4eace1aa, 0xbd0c358d, 245 0x3c8ae000, 0x3fca93ed, 0x50562169, 0xbd287243, 0x07089000, 246 0x3fc9f6c4, 0x6865817a, 0x3d29904d, 0xdcf70000, 0x3fc95a5a, 247 0x58a0ff6f, 0x3d07f228, 0xeb390000, 0x3fc8beaf, 0xaae92cd1, 248 0xbd073d54, 0x6551a000, 0x3fc823c1, 0x9a631e83, 0x3d1e0ddb, 249 0x85445000, 0x3fc7898d, 0x70914305, 0xbd1c6610, 0x8b757000, 250 0x3fc6f012, 0xe59c21e1, 0xbd25118d, 0xbe8c1000, 0x3fc6574e, 251 0x2c3c2e78, 0x3d19cf8b, 0x6b544000, 0x3fc5bf40, 0xeb68981c, 252 0xbd127023, 0xe4a1b000, 0x3fc527e5, 0xe5697dc7, 0x3d2633e8, 253 0x8333b000, 0x3fc4913d, 0x54fdb678, 0x3d258379, 0xa5993000, 254 0x3fc3fb45, 0x7e6a354d, 0xbd2cd1d8, 0xb0159000, 0x3fc365fc, 255 0x234b7289, 0x3cc62fa8, 0x0c868000, 0x3fc2d161, 0xcb81b4a1, 256 0x3d039d6c, 0x2a49c000, 0x3fc23d71, 0x8fd3df5c, 0x3d100d23, 257 0x7e23f000, 0x3fc1aa2b, 0x44389934, 0x3d2ca78e, 0x8227e000, 258 0x3fc1178e, 0xce2d07f2, 0x3d21ef78, 0xb59e4000, 0x3fc08598, 259 0x7009902c, 0xbd27e5dd, 0x39dbe000, 0x3fbfe891, 0x4fa10afd, 260 0xbd2534d6, 0x830a2000, 0x3fbec739, 0xafe645e0, 0xbd2dc068, 261 0x63844000, 0x3fbda727, 0x1fa71733, 0x3d1a8940, 0x01bc4000, 262 0x3fbc8858, 0xc65aacd3, 0x3d2646d1, 0x8dad6000, 0x3fbb6ac8, 263 0x2bf768e5, 0xbd139080, 0x40b1c000, 0x3fba4e76, 0xb94407c8, 264 0xbd0e42b6, 0x5d594000, 0x3fb9335e, 0x3abd47da, 0x3d23115c, 265 0x2f40e000, 0x3fb8197e, 0xf96ffdf7, 0x3d0f80dc, 0x0aeac000, 266 0x3fb700d3, 0xa99ded32, 0x3cec1e8d, 0x4d97a000, 0x3fb5e95a, 267 0x3c5d1d1e, 0xbd2c6906, 0x5d208000, 0x3fb4d311, 0x82f4e1ef, 268 0xbcf53a25, 0xa7d1e000, 0x3fb3bdf5, 0xa5db4ed7, 0x3d2cc85e, 269 0xa4472000, 0x3fb2aa04, 0xae9c697d, 0xbd20b6e8, 0xd1466000, 270 0x3fb1973b, 0x560d9e9b, 0xbd25325d, 0xb59e4000, 0x3fb08598, 271 0x7009902c, 0xbd17e5dd, 0xc006c000, 0x3faeea31, 0x4fc93b7b, 272 0xbd0e113e, 0xcdddc000, 0x3faccb73, 0x47d82807, 0xbd1a68f2, 273 0xd0fb0000, 0x3faaaef2, 0x353bb42e, 0x3d20fc1a, 0x149fc000, 274 0x3fa894aa, 0xd05a267d, 0xbd197995, 0xf2d4c000, 0x3fa67c94, 275 0xec19afa2, 0xbd029efb, 0xd42e0000, 0x3fa466ae, 0x75bdfd28, 276 0xbd2c1673, 0x2f8d0000, 0x3fa252f3, 0xe021b67b, 0x3d283e9a, 277 0x89e74000, 0x3fa0415d, 0x5cf1d753, 0x3d0111c0, 0xec148000, 278 0x3f9c63d2, 0x3f9eb2f3, 0x3d2578c6, 0x28c90000, 0x3f984925, 279 0x325a0c34, 0xbd2aa0ba, 0x25980000, 0x3f9432a9, 0x928637fe, 280 0x3d098139, 0x58938000, 0x3f902056, 0x06e2f7d2, 0xbd23dc5b, 281 0xa3890000, 0x3f882448, 0xda74f640, 0xbd275577, 0x75890000, 282 0x3f801015, 0x999d2be8, 0xbd10c76b, 0x59580000, 0x3f700805, 283 0xcb31c67b, 0x3d2166af, 0x00000000, 0x00000000, 0x00000000, 284 0x80000000 285 }; 286 287 private static int[] logTwoData = { 288 0xfefa3800, 0x3fa62e42, 0x93c76730, 0x3ceef357 289 }; 290 291 private static int[] coeffLogTwoData = { 292 0x92492492, 0x3fc24924, 0x00000000, 0xbfd00000, 0x3d6fb175, 293 0xbfc5555e, 0x55555555, 0x3fd55555, 0x9999999a, 0x3fc99999, 294 0x00000000, 0xbfe00000 295 }; 296 297 /* 298 * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) 299 * Source Code 300 * 301 * ALGORITHM DESCRIPTION - LOG() --------------------- 302 * 303 * x=2^k * mx, mx in [1,2) 304 * 305 * Get B~1/mx based on the output of rcpps instruction (B0) B = int((B0*2^7+0.5))/2^7 306 * 307 * Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts) 308 * 309 * Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and p(r) is a degree 7 310 * polynomial -log(B) read from data table (high, low parts) Result is formed from high and low 311 * parts. 312 * 313 * Special cases: log(NaN) = quiet NaN, and raise invalid exception log(+INF) = that INF log(0) 314 * = -INF with divide-by-zero exception raised log(1) = +0 log(x) = NaN with invalid exception 315 * raised if x < -0, including -INF 316 * 317 */ 318 319 public void logIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { 320 ArrayDataPointerConstant logTwoTablePtr = new ArrayDataPointerConstant(logTwoTable, 16); 321 ArrayDataPointerConstant logTwoDataPtr = new ArrayDataPointerConstant(logTwoData, 16); 322 ArrayDataPointerConstant coeffLogTwoDataPtr = new ArrayDataPointerConstant(coeffLogTwoData, 16); 323 324 Label bb0 = new Label(); 325 Label bb1 = new Label(); 326 Label bb2 = new Label(); 327 Label bb3 = new Label(); 328 Label bb4 = new Label(); 329 Label bb5 = new Label(); 330 Label bb6 = new Label(); 331 Label bb7 = new Label(); 332 Label bb8 = new Label(); 333 334 Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); 335 Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); 336 Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); 337 Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); 338 339 Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); 340 Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); 341 Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); 342 Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); 343 Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); 344 Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); 345 Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); 346 347 AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); 348 349 setCrb(crb); 350 masm.movdq(stackSlot, value); 351 if (dest.encoding != value.encoding) { 352 masm.movdqu(dest, value); 353 } 354 masm.movq(gpr1, 0x3ff0000000000000L); 355 masm.movdq(temp2, gpr1); 356 masm.movq(gpr3, 0x77f0000000000000L); 357 masm.movdq(temp3, gpr3); 358 masm.movl(gpr2, 32768); 359 masm.movdl(temp4, gpr2); 360 masm.movq(gpr2, 0xffffe00000000000L); 361 masm.movdq(temp5, gpr2); 362 masm.movdqu(temp1, value); 363 masm.pextrw(gpr1, dest, 3); 364 masm.por(dest, temp2); 365 masm.movl(gpr2, 16352); 366 masm.psrlq(dest, 27); 367 masm.leaq(gpr4, externalAddress(logTwoTablePtr)); 368 masm.psrld(dest, 2); 369 masm.rcpps(dest, dest); 370 masm.psllq(temp1, 12); 371 masm.pshufd(temp6, temp5, 0xE4); 372 masm.psrlq(temp1, 12); 373 masm.subl(gpr1, 16); 374 masm.cmpl(gpr1, 32736); 375 masm.jcc(ConditionFlag.AboveEqual, bb0); 376 377 masm.bind(bb1); 378 masm.paddd(dest, temp4); 379 masm.por(temp1, temp3); 380 masm.movdl(gpr3, dest); 381 masm.psllq(dest, 29); 382 masm.pand(temp5, temp1); 383 masm.pand(dest, temp6); 384 masm.subsd(temp1, temp5); 385 masm.mulpd(temp5, dest); 386 masm.andl(gpr1, 32752); 387 masm.subl(gpr1, gpr2); 388 masm.cvtsi2sdl(temp7, gpr1); 389 masm.mulsd(temp1, dest); 390 masm.movdq(temp6, externalAddress(logTwoDataPtr)); // 0xfefa3800, 391 // 0x3fa62e42 392 masm.movdqu(temp3, externalAddress(coeffLogTwoDataPtr)); // 0x92492492, 393 // 0x3fc24924, 394 // 0x00000000, 395 // 0xbfd00000 396 masm.subsd(temp5, temp2); 397 masm.andl(gpr3, 16711680); 398 masm.shrl(gpr3, 12); 399 masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, 0)); 400 masm.leaq(gpr4, externalAddress(coeffLogTwoDataPtr)); 401 masm.movdqu(temp4, new AMD64Address(gpr4, 16)); // 0x3d6fb175, 402 // 0xbfc5555e, 403 // 0x55555555, 404 // 0x3fd55555 405 masm.addsd(temp1, temp5); 406 masm.movdqu(temp2, new AMD64Address(gpr4, 32)); // 0x9999999a, 407 // 0x3fc99999, 408 // 0x00000000, 409 // 0xbfe00000 410 masm.mulsd(temp6, temp7); 411 if (masm.supports(CPUFeature.SSE3)) { 412 masm.movddup(temp5, temp1); 413 } else { 414 masm.movdqu(temp5, temp1); 415 masm.movlhps(temp5, temp5); 416 } 417 masm.leaq(gpr4, externalAddress(logTwoDataPtr)); 418 masm.mulsd(temp7, new AMD64Address(gpr4, 8)); // 0x93c76730, 419 // 0x3ceef357 420 masm.mulsd(temp3, temp1); 421 masm.addsd(dest, temp6); 422 masm.mulpd(temp4, temp5); 423 masm.mulpd(temp5, temp5); 424 if (masm.supports(CPUFeature.SSE3)) { 425 masm.movddup(temp6, dest); 426 } else { 427 masm.movdqu(temp6, dest); 428 masm.movlhps(temp6, temp6); 429 } 430 masm.addsd(dest, temp1); 431 masm.addpd(temp4, temp2); 432 masm.mulpd(temp3, temp5); 433 masm.subsd(temp6, dest); 434 masm.mulsd(temp4, temp1); 435 masm.pshufd(temp2, dest, 0xEE); 436 masm.addsd(temp1, temp6); 437 masm.mulsd(temp5, temp5); 438 masm.addsd(temp7, temp2); 439 masm.addpd(temp4, temp3); 440 masm.addsd(temp1, temp7); 441 masm.mulpd(temp4, temp5); 442 masm.addsd(temp1, temp4); 443 masm.pshufd(temp5, temp4, 0xEE); 444 masm.addsd(temp1, temp5); 445 masm.addsd(dest, temp1); 446 masm.jmp(bb8); 447 448 masm.bind(bb0); 449 masm.movdq(dest, stackSlot); 450 masm.movdq(temp1, stackSlot); 451 masm.addl(gpr1, 16); 452 masm.cmpl(gpr1, 32768); 453 masm.jcc(ConditionFlag.AboveEqual, bb2); 454 455 masm.cmpl(gpr1, 16); 456 masm.jcc(ConditionFlag.Below, bb3); 457 458 masm.bind(bb4); 459 masm.addsd(dest, dest); 460 masm.jmp(bb8); 461 462 masm.bind(bb5); 463 masm.jcc(ConditionFlag.Above, bb4); 464 465 masm.cmpl(gpr3, 0); 466 masm.jcc(ConditionFlag.Above, bb4); 467 468 masm.jmp(bb6); 469 470 masm.bind(bb3); 471 masm.xorpd(temp1, temp1); 472 masm.addsd(temp1, dest); 473 masm.movdl(gpr3, temp1); 474 masm.psrlq(temp1, 32); 475 masm.movdl(gpr2, temp1); 476 masm.orl(gpr3, gpr2); 477 masm.cmpl(gpr3, 0); 478 masm.jcc(ConditionFlag.Equal, bb7); 479 480 masm.xorpd(temp1, temp1); 481 masm.movl(gpr1, 18416); 482 masm.pinsrw(temp1, gpr1, 3); 483 masm.mulsd(dest, temp1); 484 masm.movdqu(temp1, dest); 485 masm.pextrw(gpr1, dest, 3); 486 masm.por(dest, temp2); 487 masm.psrlq(dest, 27); 488 masm.movl(gpr2, 18416); 489 masm.psrld(dest, 2); 490 masm.rcpps(dest, dest); 491 masm.psllq(temp1, 12); 492 masm.pshufd(temp6, temp5, 0xE4); 493 masm.psrlq(temp1, 12); 494 masm.jmp(bb1); 495 496 masm.bind(bb2); 497 masm.movdl(gpr3, temp1); 498 masm.psrlq(temp1, 32); 499 masm.movdl(gpr2, temp1); 500 masm.addl(gpr2, gpr2); 501 masm.cmpl(gpr2, -2097152); 502 masm.jcc(ConditionFlag.AboveEqual, bb5); 503 504 masm.orl(gpr3, gpr2); 505 masm.cmpl(gpr3, 0); 506 masm.jcc(ConditionFlag.Equal, bb7); 507 508 masm.bind(bb6); 509 masm.xorpd(temp1, temp1); 510 masm.xorpd(dest, dest); 511 masm.movl(gpr1, 32752); 512 masm.pinsrw(temp1, gpr1, 3); 513 masm.mulsd(dest, temp1); 514 masm.jmp(bb8); 515 516 masm.bind(bb7); 517 masm.xorpd(temp1, temp1); 518 masm.xorpd(dest, dest); 519 masm.movl(gpr1, 49136); 520 masm.pinsrw(dest, gpr1, 3); 521 masm.divsd(dest, temp1); 522 523 masm.bind(bb8); 524 } 525 526 private static int[] highmaskLogTen = { 527 0xf8000000, 0xffffffff, 0x00000000, 0xffffe000 528 }; 529 530 private static int[] logTenE = { 531 0x00000000, 0x3fdbc000, 0xbf2e4108, 0x3f5a7a6c 532 }; 533 534 private static int[] logTenTable = { 535 0x509f7800, 0x3fd34413, 0x1f12b358, 0x3d1fef31, 0x80333400, 536 0x3fd32418, 0xc671d9d0, 0xbcf542bf, 0x51195000, 0x3fd30442, 537 0x78a4b0c3, 0x3d18216a, 0x6fc79400, 0x3fd2e490, 0x80fa389d, 538 0xbc902869, 0x89d04000, 0x3fd2c502, 0x75c2f564, 0x3d040754, 539 0x4ddd1c00, 0x3fd2a598, 0xd219b2c3, 0xbcfa1d84, 0x6baa7c00, 540 0x3fd28651, 0xfd9abec1, 0x3d1be6d3, 0x94028800, 0x3fd2672d, 541 0xe289a455, 0xbd1ede5e, 0x78b86400, 0x3fd2482c, 0x6734d179, 542 0x3d1fe79b, 0xcca3c800, 0x3fd2294d, 0x981a40b8, 0xbced34ea, 543 0x439c5000, 0x3fd20a91, 0xcc392737, 0xbd1a9cc3, 0x92752c00, 544 0x3fd1ebf6, 0x03c9afe7, 0x3d1e98f8, 0x6ef8dc00, 0x3fd1cd7d, 545 0x71dae7f4, 0x3d08a86c, 0x8fe4dc00, 0x3fd1af25, 0xee9185a1, 546 0xbcff3412, 0xace59400, 0x3fd190ee, 0xc2cab353, 0x3cf17ed9, 547 0x7e925000, 0x3fd172d8, 0x6952c1b2, 0x3cf1521c, 0xbe694400, 548 0x3fd154e2, 0xcacb79ca, 0xbd0bdc78, 0x26cbac00, 0x3fd1370d, 549 0xf71f4de1, 0xbd01f8be, 0x72fa0800, 0x3fd11957, 0x55bf910b, 550 0x3c946e2b, 0x5f106000, 0x3fd0fbc1, 0x39e639c1, 0x3d14a84b, 551 0xa802a800, 0x3fd0de4a, 0xd3f31d5d, 0xbd178385, 0x0b992000, 552 0x3fd0c0f3, 0x3843106f, 0xbd1f602f, 0x486ce800, 0x3fd0a3ba, 553 0x8819497c, 0x3cef987a, 0x1de49400, 0x3fd086a0, 0x1caa0467, 554 0x3d0faec7, 0x4c30cc00, 0x3fd069a4, 0xa4424372, 0xbd1618fc, 555 0x94490000, 0x3fd04cc6, 0x946517d2, 0xbd18384b, 0xb7e84000, 556 0x3fd03006, 0xe0109c37, 0xbd19a6ac, 0x798a0c00, 0x3fd01364, 557 0x5121e864, 0xbd164cf7, 0x38ce8000, 0x3fcfedbf, 0x46214d1a, 558 0xbcbbc402, 0xc8e62000, 0x3fcfb4ef, 0xdab93203, 0x3d1e0176, 559 0x2cb02800, 0x3fcf7c5a, 0x2a2ea8e4, 0xbcfec86a, 0xeeeaa000, 560 0x3fcf43fd, 0xc18e49a4, 0x3cf110a8, 0x9bb6e800, 0x3fcf0bda, 561 0x923cc9c0, 0xbd15ce99, 0xc093f000, 0x3fced3ef, 0x4d4b51e9, 562 0x3d1a04c7, 0xec58f800, 0x3fce9c3c, 0x163cad59, 0x3cac8260, 563 0x9a907000, 0x3fce2d7d, 0x3fa93646, 0x3ce4a1c0, 0x37311000, 564 0x3fcdbf99, 0x32abd1fd, 0x3d07ea9d, 0x6744b800, 0x3fcd528c, 565 0x4dcbdfd4, 0xbd1b08e2, 0xe36de800, 0x3fcce653, 0x0b7b7f7f, 566 0xbd1b8f03, 0x77506800, 0x3fcc7aec, 0xa821c9fb, 0x3d13c163, 567 0x00ff8800, 0x3fcc1053, 0x536bca76, 0xbd074ee5, 0x70719800, 568 0x3fcba684, 0xd7da9b6b, 0xbd1fbf16, 0xc6f8d800, 0x3fcb3d7d, 569 0xe2220bb3, 0x3d1a295d, 0x16c15800, 0x3fcad53c, 0xe724911e, 570 0xbcf55822, 0x82533800, 0x3fca6dbc, 0x6d982371, 0x3cac567c, 571 0x3c19e800, 0x3fca06fc, 0x84d17d80, 0x3d1da204, 0x85ef8000, 572 0x3fc9a0f8, 0x54466a6a, 0xbd002204, 0xb0ac2000, 0x3fc93bae, 573 0xd601fd65, 0x3d18840c, 0x1bb9b000, 0x3fc8d71c, 0x7bf58766, 574 0xbd14f897, 0x34aae800, 0x3fc8733e, 0x3af6ac24, 0xbd0f5c45, 575 0x76d68000, 0x3fc81012, 0x4303e1a1, 0xbd1f9a80, 0x6af57800, 576 0x3fc7ad96, 0x43fbcb46, 0x3cf4c33e, 0xa6c51000, 0x3fc74bc7, 577 0x70f0eac5, 0xbd192e3b, 0xccab9800, 0x3fc6eaa3, 0xc0093dfe, 578 0xbd0faf15, 0x8b60b800, 0x3fc68a28, 0xde78d5fd, 0xbc9ea4ee, 579 0x9d987000, 0x3fc62a53, 0x962bea6e, 0xbd194084, 0xc9b0e800, 580 0x3fc5cb22, 0x888dd999, 0x3d1fe201, 0xe1634800, 0x3fc56c93, 581 0x16ada7ad, 0x3d1b1188, 0xc176c000, 0x3fc50ea4, 0x4159b5b5, 582 0xbcf09c08, 0x51766000, 0x3fc4b153, 0x84393d23, 0xbcf6a89c, 583 0x83695000, 0x3fc4549d, 0x9f0b8bbb, 0x3d1c4b8c, 0x538d5800, 584 0x3fc3f881, 0xf49df747, 0x3cf89b99, 0xc8138000, 0x3fc39cfc, 585 0xd503b834, 0xbd13b99f, 0xf0df0800, 0x3fc3420d, 0xf011b386, 586 0xbd05d8be, 0xe7466800, 0x3fc2e7b2, 0xf39c7bc2, 0xbd1bb94e, 587 0xcdd62800, 0x3fc28de9, 0x05e6d69b, 0xbd10ed05, 0xd015d800, 588 0x3fc234b0, 0xe29b6c9d, 0xbd1ff967, 0x224ea800, 0x3fc1dc06, 589 0x727711fc, 0xbcffb30d, 0x01540000, 0x3fc183e8, 0x39786c5a, 590 0x3cc23f57, 0xb24d9800, 0x3fc12c54, 0xc905a342, 0x3d003a1d, 591 0x82835800, 0x3fc0d54a, 0x9b9920c0, 0x3d03b25a, 0xc72ac000, 592 0x3fc07ec7, 0x46f26a24, 0x3cf0fa41, 0xdd35d800, 0x3fc028ca, 593 0x41d9d6dc, 0x3d034a65, 0x52474000, 0x3fbfa6a4, 0x44f66449, 594 0x3d19cad3, 0x2da3d000, 0x3fbefcb8, 0x67832999, 0x3d18400f, 595 0x32a10000, 0x3fbe53ce, 0x9c0e3b1a, 0xbcff62fd, 0x556b7000, 596 0x3fbdabe3, 0x02976913, 0xbcf8243b, 0x97e88000, 0x3fbd04f4, 597 0xec793797, 0x3d1c0578, 0x09647000, 0x3fbc5eff, 0x05fc0565, 598 0xbd1d799e, 0xc6426000, 0x3fbbb9ff, 0x4625f5ed, 0x3d1f5723, 599 0xf7afd000, 0x3fbb15f3, 0xdd5aae61, 0xbd1a7e1e, 0xd358b000, 600 0x3fba72d8, 0x3314e4d3, 0x3d17bc91, 0x9b1f5000, 0x3fb9d0ab, 601 0x9a4d514b, 0x3cf18c9b, 0x9cd4e000, 0x3fb92f69, 0x7e4496ab, 602 0x3cf1f96d, 0x31f4f000, 0x3fb88f10, 0xf56479e7, 0x3d165818, 603 0xbf628000, 0x3fb7ef9c, 0x26bf486d, 0xbd1113a6, 0xb526b000, 604 0x3fb7510c, 0x1a1c3384, 0x3ca9898d, 0x8e31e000, 0x3fb6b35d, 605 0xb3875361, 0xbd0661ac, 0xd01de000, 0x3fb6168c, 0x2a7cacfa, 606 0xbd1bdf10, 0x0af23000, 0x3fb57a98, 0xff868816, 0x3cf046d0, 607 0xd8ea0000, 0x3fb4df7c, 0x1515fbe7, 0xbd1fd529, 0xde3b2000, 608 0x3fb44538, 0x6e59a132, 0x3d1faeee, 0xc8df9000, 0x3fb3abc9, 609 0xf1322361, 0xbd198807, 0x505f1000, 0x3fb3132d, 0x0888e6ab, 610 0x3d1e5380, 0x359bd000, 0x3fb27b61, 0xdfbcbb22, 0xbcfe2724, 611 0x429ee000, 0x3fb1e463, 0x6eb4c58c, 0xbcfe4dd6, 0x4a673000, 612 0x3fb14e31, 0x4ce1ac9b, 0x3d1ba691, 0x28b96000, 0x3fb0b8c9, 613 0x8c7813b8, 0xbd0b3872, 0xc1f08000, 0x3fb02428, 0xc2bc8c2c, 614 0x3cb5ea6b, 0x05a1a000, 0x3faf209c, 0x72e8f18e, 0xbce8df84, 615 0xc0b5e000, 0x3fadfa6d, 0x9fdef436, 0x3d087364, 0xaf416000, 616 0x3facd5c2, 0x1068c3a9, 0x3d0827e7, 0xdb356000, 0x3fabb296, 617 0x120a34d3, 0x3d101a9f, 0x5dfea000, 0x3faa90e6, 0xdaded264, 618 0xbd14c392, 0x6034c000, 0x3fa970ad, 0x1c9d06a9, 0xbd1b705e, 619 0x194c6000, 0x3fa851e8, 0x83996ad9, 0xbd0117bc, 0xcf4ac000, 620 0x3fa73492, 0xb1a94a62, 0xbca5ea42, 0xd67b4000, 0x3fa618a9, 621 0x75aed8ca, 0xbd07119b, 0x9126c000, 0x3fa4fe29, 0x5291d533, 622 0x3d12658f, 0x6f4d4000, 0x3fa3e50e, 0xcd2c5cd9, 0x3d1d5c70, 623 0xee608000, 0x3fa2cd54, 0xd1008489, 0x3d1a4802, 0x9900e000, 624 0x3fa1b6f9, 0x54fb5598, 0xbd16593f, 0x06bb6000, 0x3fa0a1f9, 625 0x64ef57b4, 0xbd17636b, 0xb7940000, 0x3f9f1c9f, 0xee6a4737, 626 0x3cb5d479, 0x91aa0000, 0x3f9cf7f5, 0x3a16373c, 0x3d087114, 627 0x156b8000, 0x3f9ad5ed, 0x836c554a, 0x3c6900b0, 0xd4764000, 628 0x3f98b67f, 0xed12f17b, 0xbcffc974, 0x77dec000, 0x3f9699a7, 629 0x232ce7ea, 0x3d1e35bb, 0xbfbf4000, 0x3f947f5d, 0xd84ffa6e, 630 0x3d0e0a49, 0x82c7c000, 0x3f92679c, 0x8d170e90, 0xbd14d9f2, 631 0xadd20000, 0x3f90525d, 0x86d9f88e, 0x3cdeb986, 0x86f10000, 632 0x3f8c7f36, 0xb9e0a517, 0x3ce29faa, 0xb75c8000, 0x3f885e9e, 633 0x542568cb, 0xbd1f7bdb, 0x46b30000, 0x3f8442e8, 0xb954e7d9, 634 0x3d1e5287, 0xb7e60000, 0x3f802c07, 0x22da0b17, 0xbd19fb27, 635 0x6c8b0000, 0x3f7833e3, 0x821271ef, 0xbd190f96, 0x29910000, 636 0x3f701936, 0xbc3491a5, 0xbd1bcf45, 0x354a0000, 0x3f600fe3, 637 0xc0ff520a, 0xbd19d71c, 0x00000000, 0x00000000, 0x00000000, 638 0x00000000 639 }; 640 641 private static int[] logTwoLogTenData = { 642 0x509f7800, 0x3f934413, 0x1f12b358, 0x3cdfef31 643 }; 644 645 private static int[] coeffLogTenData = { 646 0xc1a5f12e, 0x40358874, 0x64d4ef0d, 0xc0089309, 0x385593b1, 647 0xc025c917, 0xdc963467, 0x3ffc6a02, 0x7f9d3aa1, 0x4016ab9f, 648 0xdc77b115, 0xbff27af2 649 }; 650 651 /* 652 * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) 653 * Source Code 654 * 655 * ALGORITHM DESCRIPTION - LOG10() --------------------- 656 * 657 * Let x=2^k * mx, mx in [1,2) 658 * 659 * Get B~1/mx based on the output of rcpss instruction (B0) B = int((B0*LH*2^7+0.5))/2^7 LH is a 660 * short approximation for log10(e) 661 * 662 * Reduced argument: r=B*mx-LH (computed accurately in high and low parts) 663 * 664 * Result: k*log10(2) - log(B) + p(r) p(r) is a degree 7 polynomial -log(B) read from data table 665 * (high, low parts) Result is formed from high and low parts 666 * 667 * Special cases: log10(0) = -INF with divide-by-zero exception raised log10(1) = +0 log10(x) = 668 * NaN with invalid exception raised if x < -0, including -INF log10(+INF) = +INF 669 * 670 */ 671 672 public void log10Intrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { 673 ArrayDataPointerConstant highmaskLogTenPtr = new ArrayDataPointerConstant(highmaskLogTen, 16); 674 ArrayDataPointerConstant logTenEPtr = new ArrayDataPointerConstant(logTenE, 16); 675 ArrayDataPointerConstant logTenTablePtr = new ArrayDataPointerConstant(logTenTable, 16); 676 ArrayDataPointerConstant logTwoLogTenDataPtr = new ArrayDataPointerConstant(logTwoLogTenData, 16); 677 ArrayDataPointerConstant coeffLogTenDataPtr = new ArrayDataPointerConstant(coeffLogTenData, 16); 678 679 Label bb0 = new Label(); 680 Label bb1 = new Label(); 681 Label bb2 = new Label(); 682 Label bb3 = new Label(); 683 Label bb4 = new Label(); 684 Label bb5 = new Label(); 685 Label bb6 = new Label(); 686 Label bb7 = new Label(); 687 Label bb8 = new Label(); 688 689 Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); 690 Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); 691 Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); 692 Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); 693 694 Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); 695 Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); 696 Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); 697 Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); 698 Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); 699 Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); 700 Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); 701 702 AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); 703 704 setCrb(crb); 705 masm.movdq(stackSlot, value); 706 if (dest.encoding != value.encoding) { 707 masm.movdqu(dest, value); 708 } 709 masm.movdqu(temp5, externalAddress(highmaskLogTenPtr)); // 0xf8000000, 710 // 0xffffffff, 711 // 0x00000000, 712 // 0xffffe000 713 masm.xorpd(temp2, temp2); 714 masm.movl(gpr1, 16368); 715 masm.pinsrw(temp2, gpr1, 3); 716 masm.movl(gpr2, 1054736384); 717 masm.movdl(temp7, gpr2); 718 masm.xorpd(temp3, temp3); 719 masm.movl(gpr3, 30704); 720 masm.pinsrw(temp3, gpr3, 3); 721 masm.movl(gpr3, 32768); 722 masm.movdl(temp4, gpr3); 723 masm.movdqu(temp1, value); 724 masm.pextrw(gpr1, dest, 3); 725 masm.por(dest, temp2); 726 masm.movl(gpr2, 16352); 727 masm.psrlq(dest, 27); 728 masm.movdqu(temp2, externalAddress(logTenEPtr)); // 0x00000000, 729 // 0x3fdbc000, 730 // 0xbf2e4108, 731 // 0x3f5a7a6c 732 masm.psrld(dest, 2); 733 masm.rcpps(dest, dest); 734 masm.psllq(temp1, 12); 735 masm.pshufd(temp6, temp5, 0x4E); 736 masm.psrlq(temp1, 12); 737 masm.subl(gpr1, 16); 738 masm.cmpl(gpr1, 32736); 739 masm.jcc(ConditionFlag.AboveEqual, bb0); 740 741 masm.bind(bb1); 742 masm.mulss(dest, temp7); 743 masm.por(temp1, temp3); 744 masm.andpd(temp5, temp1); 745 masm.paddd(dest, temp4); 746 masm.movdqu(temp3, externalAddress(coeffLogTenDataPtr)); // 0xc1a5f12e, 747 // 0x40358874, 748 // 0x64d4ef0d, 749 // 0xc0089309 750 masm.leaq(gpr4, externalAddress(coeffLogTenDataPtr)); 751 masm.movdqu(temp4, new AMD64Address(gpr4, 16)); // 0x385593b1, 752 // 0xc025c917, 753 // 0xdc963467, 754 // 0x3ffc6a02 755 masm.subsd(temp1, temp5); 756 masm.movdl(gpr3, dest); 757 masm.psllq(dest, 29); 758 masm.andpd(dest, temp6); 759 masm.movdq(temp6, externalAddress(logTwoLogTenDataPtr)); // 0x509f7800, 760 // 0x3f934413 761 masm.andl(gpr1, 32752); 762 masm.subl(gpr1, gpr2); 763 masm.cvtsi2sdl(temp7, gpr1); 764 masm.mulpd(temp5, dest); 765 masm.mulsd(temp1, dest); 766 masm.subsd(temp5, temp2); 767 masm.movdqu(temp2, new AMD64Address(gpr4, 32)); // 0x7f9d3aa1, 768 // 0x4016ab9f, 769 // 0xdc77b115, 770 // 0xbff27af2 771 masm.leaq(gpr4, externalAddress(logTenTablePtr)); 772 masm.andl(gpr3, 16711680); 773 masm.shrl(gpr3, 12); 774 masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, -1504)); 775 masm.addsd(temp1, temp5); 776 masm.mulsd(temp6, temp7); 777 masm.pshufd(temp5, temp1, 0x44); 778 masm.leaq(gpr4, externalAddress(logTwoLogTenDataPtr)); 779 masm.mulsd(temp7, new AMD64Address(gpr4, 8)); // 0x1f12b358, 780 // 0x3cdfef31 781 masm.mulsd(temp3, temp1); 782 masm.addsd(dest, temp6); 783 masm.mulpd(temp4, temp5); 784 masm.leaq(gpr4, externalAddress(logTenEPtr)); 785 masm.movdq(temp6, new AMD64Address(gpr4, 8)); // 0xbf2e4108, 786 // 0x3f5a7a6c 787 masm.mulpd(temp5, temp5); 788 masm.addpd(temp4, temp2); 789 masm.mulpd(temp3, temp5); 790 masm.pshufd(temp2, dest, 0xE4); 791 masm.addsd(dest, temp1); 792 masm.mulsd(temp4, temp1); 793 masm.subsd(temp2, dest); 794 masm.mulsd(temp6, temp1); 795 masm.addsd(temp1, temp2); 796 masm.pshufd(temp2, dest, 0xEE); 797 masm.mulsd(temp5, temp5); 798 masm.addsd(temp7, temp2); 799 masm.addsd(temp1, temp6); 800 masm.addpd(temp4, temp3); 801 masm.addsd(temp1, temp7); 802 masm.mulpd(temp4, temp5); 803 masm.addsd(temp1, temp4); 804 masm.pshufd(temp5, temp4, 0xEE); 805 masm.addsd(temp1, temp5); 806 masm.addsd(dest, temp1); 807 masm.jmp(bb8); 808 809 masm.bind(bb0); 810 masm.movdq(dest, stackSlot); 811 masm.movdq(temp1, stackSlot); 812 masm.addl(gpr1, 16); 813 masm.cmpl(gpr1, 32768); 814 masm.jcc(ConditionFlag.AboveEqual, bb2); 815 816 masm.cmpl(gpr1, 16); 817 masm.jcc(ConditionFlag.Below, bb3); 818 819 masm.bind(bb4); 820 masm.addsd(dest, dest); 821 masm.jmp(bb8); 822 823 masm.bind(bb5); 824 masm.jcc(ConditionFlag.Above, bb4); 825 826 masm.cmpl(gpr3, 0); 827 masm.jcc(ConditionFlag.Above, bb4); 828 829 masm.jmp(bb6); 830 831 masm.bind(bb3); 832 masm.xorpd(temp1, temp1); 833 masm.addsd(temp1, dest); 834 masm.movdl(gpr3, temp1); 835 masm.psrlq(temp1, 32); 836 masm.movdl(gpr2, temp1); 837 masm.orl(gpr3, gpr2); 838 masm.cmpl(gpr3, 0); 839 masm.jcc(ConditionFlag.Equal, bb7); 840 841 masm.xorpd(temp1, temp1); 842 masm.xorpd(temp2, temp2); 843 masm.movl(gpr1, 18416); 844 masm.pinsrw(temp1, gpr1, 3); 845 masm.mulsd(dest, temp1); 846 masm.movl(gpr1, 16368); 847 masm.pinsrw(temp2, gpr1, 3); 848 masm.movdqu(temp1, dest); 849 masm.pextrw(gpr1, dest, 3); 850 masm.por(dest, temp2); 851 masm.movl(gpr2, 18416); 852 masm.psrlq(dest, 27); 853 masm.movdqu(temp2, externalAddress(logTenEPtr)); // 0x00000000, 854 // 0x3fdbc000, 855 // 0xbf2e4108, 856 // 0x3f5a7a6c 857 masm.psrld(dest, 2); 858 masm.rcpps(dest, dest); 859 masm.psllq(temp1, 12); 860 masm.pshufd(temp6, temp5, 0x4E); 861 masm.psrlq(temp1, 12); 862 masm.jmp(bb1); 863 864 masm.bind(bb2); 865 masm.movdl(gpr3, temp1); 866 masm.psrlq(temp1, 32); 867 masm.movdl(gpr2, temp1); 868 masm.addl(gpr2, gpr2); 869 masm.cmpl(gpr2, -2097152); 870 masm.jcc(ConditionFlag.AboveEqual, bb5); 871 872 masm.orl(gpr3, gpr2); 873 masm.cmpl(gpr3, 0); 874 masm.jcc(ConditionFlag.Equal, bb7); 875 876 masm.bind(bb6); 877 masm.xorpd(temp1, temp1); 878 masm.xorpd(dest, dest); 879 masm.movl(gpr1, 32752); 880 masm.pinsrw(temp1, gpr1, 3); 881 masm.mulsd(dest, temp1); 882 masm.jmp(bb8); 883 884 masm.bind(bb7); 885 masm.xorpd(temp1, temp1); 886 masm.xorpd(dest, dest); 887 masm.movl(gpr1, 49136); 888 masm.pinsrw(dest, gpr1, 3); 889 masm.divsd(dest, temp1); 890 891 masm.bind(bb8); 892 } 893 894 /* 895 * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) 896 * Source Code 897 * 898 * ALGORITHM DESCRIPTION - SIN() --------------------- 899 * 900 * 1. RANGE REDUCTION 901 * 902 * We perform an initial range reduction from X to r with 903 * 904 * X =~= N * pi/32 + r 905 * 906 * so that |r| <= pi/64 + epsilon. We restrict inputs to those where |N| <= 932560. Beyond this, 907 * the range reduction is insufficiently accurate. For extremely small inputs, denormalization 908 * can occur internally, impacting performance. This means that the main path is actually only 909 * taken for 2^-252 <= |X| < 90112. 910 * 911 * To avoid branches, we perform the range reduction to full accuracy each time. 912 * 913 * X - N * (P_1 + P_2 + P_3) 914 * 915 * where P_1 and P_2 are 32-bit numbers (so multiplication by N is exact) and P_3 is a 53-bit 916 * number. Together, these approximate pi well enough for all cases in the restricted range. 917 * 918 * The main reduction sequence is: 919 * 920 * y = 32/pi * x N = integer(y) (computed by adding and subtracting off SHIFTER) 921 * 922 * m_1 = N * P_1 m_2 = N * P_2 r_1 = x - m_1 r = r_1 - m_2 (this r can be used for most of the 923 * calculation) 924 * 925 * c_1 = r_1 - r m_3 = N * P_3 c_2 = c_1 - m_2 c = c_2 - m_3 926 * 927 * 2. MAIN ALGORITHM 928 * 929 * The algorithm uses a table lookup based on B = M * pi / 32 where M = N mod 64. The stored 930 * values are: sigma closest power of 2 to cos(B) C_hl 53-bit cos(B) - sigma S_hi + S_lo 2 * 931 * 53-bit sin(B) 932 * 933 * The computation is organized as follows: 934 * 935 * sin(B + r + c) = [sin(B) + sigma * r] + r * (cos(B) - sigma) + sin(B) * [cos(r + c) - 1] + 936 * cos(B) * [sin(r + c) - r] 937 * 938 * which is approximately: 939 * 940 * [S_hi + sigma * r] + C_hl * r + S_lo + S_hi * [(cos(r) - 1) - r * c] + (C_hl + sigma) * 941 * [(sin(r) - r) + c] 942 * 943 * and this is what is actually computed. We separate this sum into four parts: 944 * 945 * hi + med + pols + corr 946 * 947 * where 948 * 949 * hi = S_hi + sigma r med = C_hl * r pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) 950 * corr = S_lo + c * ((C_hl + sigma) - S_hi * r) 951 * 952 * 3. POLYNOMIAL 953 * 954 * The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) can be rearranged freely, 955 * since it is quite small, so we exploit parallelism to the fullest. 956 * 957 * psc4 = SC_4 * r_1 msc4 = psc4 * r r2 = r * r msc2 = SC_2 * r2 r4 = r2 * r2 psc3 = SC_3 + msc4 958 * psc1 = SC_1 + msc2 msc3 = r4 * psc3 sincospols = psc1 + msc3 pols = sincospols * <S_hi * r^2 959 * | (C_hl + sigma) * r^3> 960 * 961 * 4. CORRECTION TERM 962 * 963 * This is where the "c" component of the range reduction is taken into account; recall that 964 * just "r" is used for most of the calculation. 965 * 966 * -c = m_3 - c_2 -d = S_hi * r - (C_hl + sigma) corr = -c * -d + S_lo 967 * 968 * 5. COMPENSATED SUMMATIONS 969 * 970 * The two successive compensated summations add up the high and medium parts, leaving just the 971 * low parts to add up at the end. 972 * 973 * rs = sigma * r res_int = S_hi + rs k_0 = S_hi - res_int k_2 = k_0 + rs med = C_hl * r res_hi 974 * = res_int + med k_1 = res_int - res_hi k_3 = k_1 + med 975 * 976 * 6. FINAL SUMMATION 977 * 978 * We now add up all the small parts: 979 * 980 * res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 981 * 982 * Now the overall result is just: 983 * 984 * res_hi + res_lo 985 * 986 * 7. SMALL ARGUMENTS 987 * 988 * If |x| < SNN (SNN meaning the smallest normal number), we simply perform 0.1111111 cdots 1111 989 * * x. For SNN <= |x|, we do 2^-55 * (2^55 * x - x). 990 * 991 * Special cases: sin(NaN) = quiet NaN, and raise invalid exception sin(INF) = NaN and raise 992 * invalid exception sin(+/-0) = +/-0 993 * 994 */ 995 996 public int[] oneHalf = { 997 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000 998 }; 999 1000 public int[] pTwo = { 1001 0x1a600000, 0x3d90b461, 0x1a600000, 0x3d90b461 1002 }; 1003 1004 public int[] scFour = { 1005 0xa556c734, 0x3ec71de3, 0x1a01a01a, 0x3efa01a0 1006 }; 1007 1008 public int[] cTable = { 1009 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 1010 0x00000000, 0x00000000, 0x3ff00000, 0x176d6d31, 0xbf73b92e, 1011 0xbc29b42c, 0x3fb917a6, 0xe0000000, 0xbc3e2718, 0x00000000, 1012 0x3ff00000, 0x011469fb, 0xbf93ad06, 0x3c69a60b, 0x3fc8f8b8, 1013 0xc0000000, 0xbc626d19, 0x00000000, 0x3ff00000, 0x939d225a, 1014 0xbfa60bea, 0x2ed59f06, 0x3fd29406, 0xa0000000, 0xbc75d28d, 1015 0x00000000, 0x3ff00000, 0x866b95cf, 0xbfb37ca1, 0xa6aea963, 1016 0x3fd87de2, 0xe0000000, 0xbc672ced, 0x00000000, 0x3ff00000, 1017 0x73fa1279, 0xbfbe3a68, 0x3806f63b, 0x3fde2b5d, 0x20000000, 1018 0x3c5e0d89, 0x00000000, 0x3ff00000, 0x5bc57974, 0xbfc59267, 1019 0x39ae68c8, 0x3fe1c73b, 0x20000000, 0x3c8b25dd, 0x00000000, 1020 0x3ff00000, 0x53aba2fd, 0xbfcd0dfe, 0x25091dd6, 0x3fe44cf3, 1021 0x20000000, 0x3c68076a, 0x00000000, 0x3ff00000, 0x99fcef32, 1022 0x3fca8279, 0x667f3bcd, 0x3fe6a09e, 0x20000000, 0xbc8bdd34, 1023 0x00000000, 0x3fe00000, 0x94247758, 0x3fc133cc, 0x6b151741, 1024 0x3fe8bc80, 0x20000000, 0xbc82c5e1, 0x00000000, 0x3fe00000, 1025 0x9ae68c87, 0x3fac73b3, 0x290ea1a3, 0x3fea9b66, 0xe0000000, 1026 0x3c39f630, 0x00000000, 0x3fe00000, 0x7f909c4e, 0xbf9d4a2c, 1027 0xf180bdb1, 0x3fec38b2, 0x80000000, 0xbc76e0b1, 0x00000000, 1028 0x3fe00000, 0x65455a75, 0xbfbe0875, 0xcf328d46, 0x3fed906b, 1029 0x20000000, 0x3c7457e6, 0x00000000, 0x3fe00000, 0x76acf82d, 1030 0x3fa4a031, 0x56c62dda, 0x3fee9f41, 0xe0000000, 0x3c8760b1, 1031 0x00000000, 0x3fd00000, 0x0e5967d5, 0xbfac1d1f, 0xcff75cb0, 1032 0x3fef6297, 0x20000000, 0x3c756217, 0x00000000, 0x3fd00000, 1033 0x0f592f50, 0xbf9ba165, 0xa3d12526, 0x3fefd88d, 0x40000000, 1034 0xbc887df6, 0x00000000, 0x3fc00000, 0x00000000, 0x00000000, 1035 0x00000000, 0x3ff00000, 0x00000000, 0x00000000, 0x00000000, 1036 0x00000000, 0x0f592f50, 0x3f9ba165, 0xa3d12526, 0x3fefd88d, 1037 0x40000000, 0xbc887df6, 0x00000000, 0xbfc00000, 0x0e5967d5, 1038 0x3fac1d1f, 0xcff75cb0, 0x3fef6297, 0x20000000, 0x3c756217, 1039 0x00000000, 0xbfd00000, 0x76acf82d, 0xbfa4a031, 0x56c62dda, 1040 0x3fee9f41, 0xe0000000, 0x3c8760b1, 0x00000000, 0xbfd00000, 1041 0x65455a75, 0x3fbe0875, 0xcf328d46, 0x3fed906b, 0x20000000, 1042 0x3c7457e6, 0x00000000, 0xbfe00000, 0x7f909c4e, 0x3f9d4a2c, 1043 0xf180bdb1, 0x3fec38b2, 0x80000000, 0xbc76e0b1, 0x00000000, 1044 0xbfe00000, 0x9ae68c87, 0xbfac73b3, 0x290ea1a3, 0x3fea9b66, 1045 0xe0000000, 0x3c39f630, 0x00000000, 0xbfe00000, 0x94247758, 1046 0xbfc133cc, 0x6b151741, 0x3fe8bc80, 0x20000000, 0xbc82c5e1, 1047 0x00000000, 0xbfe00000, 0x99fcef32, 0xbfca8279, 0x667f3bcd, 1048 0x3fe6a09e, 0x20000000, 0xbc8bdd34, 0x00000000, 0xbfe00000, 1049 0x53aba2fd, 0x3fcd0dfe, 0x25091dd6, 0x3fe44cf3, 0x20000000, 1050 0x3c68076a, 0x00000000, 0xbff00000, 0x5bc57974, 0x3fc59267, 1051 0x39ae68c8, 0x3fe1c73b, 0x20000000, 0x3c8b25dd, 0x00000000, 1052 0xbff00000, 0x73fa1279, 0x3fbe3a68, 0x3806f63b, 0x3fde2b5d, 1053 0x20000000, 0x3c5e0d89, 0x00000000, 0xbff00000, 0x866b95cf, 1054 0x3fb37ca1, 0xa6aea963, 0x3fd87de2, 0xe0000000, 0xbc672ced, 1055 0x00000000, 0xbff00000, 0x939d225a, 0x3fa60bea, 0x2ed59f06, 1056 0x3fd29406, 0xa0000000, 0xbc75d28d, 0x00000000, 0xbff00000, 1057 0x011469fb, 0x3f93ad06, 0x3c69a60b, 0x3fc8f8b8, 0xc0000000, 1058 0xbc626d19, 0x00000000, 0xbff00000, 0x176d6d31, 0x3f73b92e, 1059 0xbc29b42c, 0x3fb917a6, 0xe0000000, 0xbc3e2718, 0x00000000, 1060 0xbff00000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 1061 0x00000000, 0x00000000, 0x00000000, 0xbff00000, 0x176d6d31, 1062 0x3f73b92e, 0xbc29b42c, 0xbfb917a6, 0xe0000000, 0x3c3e2718, 1063 0x00000000, 0xbff00000, 0x011469fb, 0x3f93ad06, 0x3c69a60b, 1064 0xbfc8f8b8, 0xc0000000, 0x3c626d19, 0x00000000, 0xbff00000, 1065 0x939d225a, 0x3fa60bea, 0x2ed59f06, 0xbfd29406, 0xa0000000, 1066 0x3c75d28d, 0x00000000, 0xbff00000, 0x866b95cf, 0x3fb37ca1, 1067 0xa6aea963, 0xbfd87de2, 0xe0000000, 0x3c672ced, 0x00000000, 1068 0xbff00000, 0x73fa1279, 0x3fbe3a68, 0x3806f63b, 0xbfde2b5d, 1069 0x20000000, 0xbc5e0d89, 0x00000000, 0xbff00000, 0x5bc57974, 1070 0x3fc59267, 0x39ae68c8, 0xbfe1c73b, 0x20000000, 0xbc8b25dd, 1071 0x00000000, 0xbff00000, 0x53aba2fd, 0x3fcd0dfe, 0x25091dd6, 1072 0xbfe44cf3, 0x20000000, 0xbc68076a, 0x00000000, 0xbff00000, 1073 0x99fcef32, 0xbfca8279, 0x667f3bcd, 0xbfe6a09e, 0x20000000, 1074 0x3c8bdd34, 0x00000000, 0xbfe00000, 0x94247758, 0xbfc133cc, 1075 0x6b151741, 0xbfe8bc80, 0x20000000, 0x3c82c5e1, 0x00000000, 1076 0xbfe00000, 0x9ae68c87, 0xbfac73b3, 0x290ea1a3, 0xbfea9b66, 1077 0xe0000000, 0xbc39f630, 0x00000000, 0xbfe00000, 0x7f909c4e, 1078 0x3f9d4a2c, 0xf180bdb1, 0xbfec38b2, 0x80000000, 0x3c76e0b1, 1079 0x00000000, 0xbfe00000, 0x65455a75, 0x3fbe0875, 0xcf328d46, 1080 0xbfed906b, 0x20000000, 0xbc7457e6, 0x00000000, 0xbfe00000, 1081 0x76acf82d, 0xbfa4a031, 0x56c62dda, 0xbfee9f41, 0xe0000000, 1082 0xbc8760b1, 0x00000000, 0xbfd00000, 0x0e5967d5, 0x3fac1d1f, 1083 0xcff75cb0, 0xbfef6297, 0x20000000, 0xbc756217, 0x00000000, 1084 0xbfd00000, 0x0f592f50, 0x3f9ba165, 0xa3d12526, 0xbfefd88d, 1085 0x40000000, 0x3c887df6, 0x00000000, 0xbfc00000, 0x00000000, 1086 0x00000000, 0x00000000, 0xbff00000, 0x00000000, 0x00000000, 1087 0x00000000, 0x00000000, 0x0f592f50, 0xbf9ba165, 0xa3d12526, 1088 0xbfefd88d, 0x40000000, 0x3c887df6, 0x00000000, 0x3fc00000, 1089 0x0e5967d5, 0xbfac1d1f, 0xcff75cb0, 0xbfef6297, 0x20000000, 1090 0xbc756217, 0x00000000, 0x3fd00000, 0x76acf82d, 0x3fa4a031, 1091 0x56c62dda, 0xbfee9f41, 0xe0000000, 0xbc8760b1, 0x00000000, 1092 0x3fd00000, 0x65455a75, 0xbfbe0875, 0xcf328d46, 0xbfed906b, 1093 0x20000000, 0xbc7457e6, 0x00000000, 0x3fe00000, 0x7f909c4e, 1094 0xbf9d4a2c, 0xf180bdb1, 0xbfec38b2, 0x80000000, 0x3c76e0b1, 1095 0x00000000, 0x3fe00000, 0x9ae68c87, 0x3fac73b3, 0x290ea1a3, 1096 0xbfea9b66, 0xe0000000, 0xbc39f630, 0x00000000, 0x3fe00000, 1097 0x94247758, 0x3fc133cc, 0x6b151741, 0xbfe8bc80, 0x20000000, 1098 0x3c82c5e1, 0x00000000, 0x3fe00000, 0x99fcef32, 0x3fca8279, 1099 0x667f3bcd, 0xbfe6a09e, 0x20000000, 0x3c8bdd34, 0x00000000, 1100 0x3fe00000, 0x53aba2fd, 0xbfcd0dfe, 0x25091dd6, 0xbfe44cf3, 1101 0x20000000, 0xbc68076a, 0x00000000, 0x3ff00000, 0x5bc57974, 1102 0xbfc59267, 0x39ae68c8, 0xbfe1c73b, 0x20000000, 0xbc8b25dd, 1103 0x00000000, 0x3ff00000, 0x73fa1279, 0xbfbe3a68, 0x3806f63b, 1104 0xbfde2b5d, 0x20000000, 0xbc5e0d89, 0x00000000, 0x3ff00000, 1105 0x866b95cf, 0xbfb37ca1, 0xa6aea963, 0xbfd87de2, 0xe0000000, 1106 0x3c672ced, 0x00000000, 0x3ff00000, 0x939d225a, 0xbfa60bea, 1107 0x2ed59f06, 0xbfd29406, 0xa0000000, 0x3c75d28d, 0x00000000, 1108 0x3ff00000, 0x011469fb, 0xbf93ad06, 0x3c69a60b, 0xbfc8f8b8, 1109 0xc0000000, 0x3c626d19, 0x00000000, 0x3ff00000, 0x176d6d31, 1110 0xbf73b92e, 0xbc29b42c, 0xbfb917a6, 0xe0000000, 0x3c3e2718, 1111 0x00000000, 0x3ff00000 1112 }; 1113 1114 public int[] scTwo = { 1115 0x11111111, 0x3f811111, 0x55555555, 0x3fa55555 1116 }; 1117 1118 public int[] scThree = { 1119 0x1a01a01a, 0xbf2a01a0, 0x16c16c17, 0xbf56c16c 1120 }; 1121 1122 public int[] scOne = { 1123 0x55555555, 0xbfc55555, 0x00000000, 0xbfe00000 1124 }; 1125 1126 public int[] piInvTable = { 1127 0x00000000, 0x00000000, 0xa2f9836e, 0x4e441529, 0xfc2757d1, 1128 0xf534ddc0, 0xdb629599, 0x3c439041, 0xfe5163ab, 0xdebbc561, 1129 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c, 0xfe1deb1c, 1130 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41, 1131 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 1132 0xde05980f, 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 1133 0x4f463f66, 0x9e5fea2d, 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 1134 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08, 0x56033046, 0xfc7b6bab, 1135 0xf0cfbc21 1136 }; 1137 1138 public int[] piFour = { 1139 0x40000000, 0x3fe921fb, 0x18469899, 0x3e64442d 1140 }; 1141 1142 public int[] piThirtyTwoInv = { 1143 0x6dc9c883, 0x40245f30 1144 }; 1145 1146 public int[] shifter = { 1147 0x00000000, 0x43380000 1148 }; 1149 1150 public int[] signMask = { 1151 0x00000000, 0x80000000 1152 }; 1153 1154 public int[] pThree = { 1155 0x2e037073, 0x3b63198a 1156 }; 1157 1158 public int[] allOnes = { 1159 0xffffffff, 0x3fefffff 1160 }; 1161 1162 public int[] twoPowFiftyFive = { 1163 0x00000000, 0x43600000 1164 }; 1165 1166 public int[] twoPowFiftyFiveM = { 1167 0x00000000, 0x3c800000 1168 }; 1169 1170 public int[] pOne = { 1171 0x54400000, 0x3fb921fb 1172 }; 1173 1174 public void sinIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { 1175 ArrayDataPointerConstant oneHalfPtr = new ArrayDataPointerConstant(oneHalf, 16); 1176 ArrayDataPointerConstant pTwoPtr = new ArrayDataPointerConstant(pTwo, 16); 1177 ArrayDataPointerConstant scFourPtr = new ArrayDataPointerConstant(scFour, 16); 1178 ArrayDataPointerConstant cTablePtr = new ArrayDataPointerConstant(cTable, 16); 1179 ArrayDataPointerConstant scTwoPtr = new ArrayDataPointerConstant(scTwo, 16); 1180 ArrayDataPointerConstant scThreePtr = new ArrayDataPointerConstant(scThree, 16); 1181 ArrayDataPointerConstant scOnePtr = new ArrayDataPointerConstant(scOne, 16); 1182 ArrayDataPointerConstant piInvTablePtr = new ArrayDataPointerConstant(piInvTable, 16); 1183 ArrayDataPointerConstant piFourPtr = new ArrayDataPointerConstant(piFour, 16); 1184 ArrayDataPointerConstant piThirtyTwoInvPtr = new ArrayDataPointerConstant(piThirtyTwoInv, 8); 1185 ArrayDataPointerConstant shifterPtr = new ArrayDataPointerConstant(shifter, 8); 1186 ArrayDataPointerConstant signMaskPtr = new ArrayDataPointerConstant(signMask, 8); 1187 ArrayDataPointerConstant pThreePtr = new ArrayDataPointerConstant(pThree, 8); 1188 ArrayDataPointerConstant allOnesPtr = new ArrayDataPointerConstant(allOnes, 8); 1189 ArrayDataPointerConstant twoPowFiftyFivePtr = new ArrayDataPointerConstant(twoPowFiftyFive, 8); 1190 ArrayDataPointerConstant twoPowFiftyFiveMPtr = new ArrayDataPointerConstant(twoPowFiftyFiveM, 8); 1191 ArrayDataPointerConstant pOnePtr = new ArrayDataPointerConstant(pOne, 8); 1192 1193 Label bb0 = new Label(); 1194 Label bb1 = new Label(); 1195 Label bb2 = new Label(); 1196 Label bb4 = new Label(); 1197 Label bb5 = new Label(); 1198 Label bb6 = new Label(); 1199 Label bb8 = new Label(); 1200 Label bb9 = new Label(); 1201 Label bb10 = new Label(); 1202 Label bb11 = new Label(); 1203 Label bb12 = new Label(); 1204 Label bb13 = new Label(); 1205 Label bb14 = new Label(); 1206 Label bb15 = new Label(); 1207 1208 Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); 1209 Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); 1210 Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); 1211 Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); 1212 Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD); 1213 Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD); 1214 Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD); 1215 Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD); 1216 Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD); 1217 Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD); 1218 1219 Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); 1220 Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); 1221 Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); 1222 Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); 1223 Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); 1224 Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); 1225 Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); 1226 Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE); 1227 Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE); 1228 1229 AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); 1230 1231 setCrb(crb); 1232 masm.movsd(stackSlot, value); 1233 if (dest.encoding != value.encoding) { 1234 masm.movdqu(dest, value); 1235 } 1236 1237 masm.leaq(gpr1, stackSlot); 1238 masm.movl(gpr1, new AMD64Address(gpr1, 4)); 1239 masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr)); // 0x6dc9c883, 1240 // 0x40245f30 1241 masm.movdq(temp2, externalAddress(shifterPtr)); // 0x00000000, 1242 // 0x43380000 1243 1244 masm.andl(gpr1, 2147418112); 1245 masm.subl(gpr1, 808452096); 1246 masm.cmpl(gpr1, 281346048); 1247 masm.jcc(ConditionFlag.Above, bb0); 1248 1249 masm.mulsd(temp1, dest); 1250 masm.movdqu(temp5, externalAddress(oneHalfPtr)); // 0x00000000, 1251 // 0x3fe00000, 1252 // 0x00000000, 1253 // 0x3fe00000 1254 masm.movdq(temp4, externalAddress(signMaskPtr)); // 0x00000000, 1255 // 0x80000000 1256 masm.pand(temp4, dest); 1257 masm.por(temp5, temp4); 1258 masm.addpd(temp1, temp5); 1259 masm.cvttsd2sil(gpr4, temp1); 1260 masm.cvtsi2sdl(temp1, gpr4); 1261 masm.movdqu(temp6, externalAddress(pTwoPtr)); // 0x1a600000, 1262 // 0x3d90b461, 1263 // 0x1a600000, 1264 // 0x3d90b461 1265 masm.movq(gpr7, 0x3fb921fb54400000L); 1266 masm.movdq(temp3, gpr7); 1267 masm.movdqu(temp5, externalAddress(scFourPtr)); // 0xa556c734, 1268 // 0x3ec71de3, 1269 // 0x1a01a01a, 1270 // 0x3efa01a0 1271 masm.pshufd(temp4, dest, 0x44); 1272 masm.mulsd(temp3, temp1); 1273 if (masm.supports(CPUFeature.SSE3)) { 1274 masm.movddup(temp1, temp1); 1275 } else { 1276 masm.movlhps(temp1, temp1); 1277 } 1278 masm.andl(gpr4, 63); 1279 masm.shll(gpr4, 5); 1280 masm.leaq(gpr1, externalAddress(cTablePtr)); 1281 masm.addq(gpr1, gpr4); 1282 masm.movdqu(temp8, new AMD64Address(gpr1, 0)); 1283 masm.mulpd(temp6, temp1); 1284 masm.mulsd(temp1, externalAddress(pThreePtr)); // 0x2e037073, 1285 // 0x3b63198a 1286 masm.subsd(temp4, temp3); 1287 masm.subsd(dest, temp3); 1288 if (masm.supports(CPUFeature.SSE3)) { 1289 masm.movddup(temp3, temp4); 1290 } else { 1291 masm.movdqu(temp3, temp4); 1292 masm.movlhps(temp3, temp3); 1293 } 1294 masm.subsd(temp4, temp6); 1295 masm.pshufd(dest, dest, 0x44); 1296 masm.pshufd(temp7, temp8, 0xE); 1297 masm.movdqu(temp2, temp8); 1298 masm.movdqu(temp9, temp7); 1299 masm.mulpd(temp5, dest); 1300 masm.subpd(dest, temp6); 1301 masm.mulsd(temp7, temp4); 1302 masm.subsd(temp3, temp4); 1303 masm.mulpd(temp5, dest); 1304 masm.mulpd(dest, dest); 1305 masm.subsd(temp3, temp6); 1306 masm.movdqu(temp6, externalAddress(scTwoPtr)); // 0x11111111, 1307 // 0x3f811111, 1308 // 0x55555555, 1309 // 0x3fa55555 1310 masm.subsd(temp1, temp3); 1311 masm.movdq(temp3, new AMD64Address(gpr1, 24)); 1312 masm.addsd(temp2, temp3); 1313 masm.subsd(temp7, temp2); 1314 masm.mulsd(temp2, temp4); 1315 masm.mulpd(temp6, dest); 1316 masm.mulsd(temp3, temp4); 1317 masm.mulpd(temp2, dest); 1318 masm.mulpd(dest, dest); 1319 masm.addpd(temp5, externalAddress(scThreePtr)); // 0x1a01a01a, 1320 // 0xbf2a01a0, 1321 // 0x16c16c17, 1322 // 0xbf56c16c 1323 masm.mulsd(temp4, temp8); 1324 masm.addpd(temp6, externalAddress(scOnePtr)); // 0x55555555, 1325 // 0xbfc55555, 1326 // 0x00000000, 1327 // 0xbfe00000 1328 masm.mulpd(temp5, dest); 1329 masm.movdqu(dest, temp3); 1330 masm.addsd(temp3, temp9); 1331 masm.mulpd(temp1, temp7); 1332 masm.movdqu(temp7, temp4); 1333 masm.addsd(temp4, temp3); 1334 masm.addpd(temp6, temp5); 1335 masm.subsd(temp9, temp3); 1336 masm.subsd(temp3, temp4); 1337 masm.addsd(temp1, new AMD64Address(gpr1, 16)); 1338 masm.mulpd(temp6, temp2); 1339 masm.addsd(temp9, dest); 1340 masm.addsd(temp3, temp7); 1341 masm.addsd(temp1, temp9); 1342 masm.addsd(temp1, temp3); 1343 masm.addsd(temp1, temp6); 1344 masm.unpckhpd(temp6, temp6); 1345 masm.movdqu(dest, temp4); 1346 masm.addsd(temp1, temp6); 1347 masm.addsd(dest, temp1); 1348 masm.jmp(bb15); 1349 1350 masm.bind(bb14); 1351 masm.xorpd(temp1, temp1); 1352 masm.xorpd(dest, dest); 1353 masm.divsd(dest, temp1); 1354 masm.jmp(bb15); 1355 1356 masm.bind(bb0); 1357 masm.jcc(ConditionFlag.Greater, bb1); 1358 1359 masm.shrl(gpr1, 20); 1360 masm.cmpl(gpr1, 3325); 1361 masm.jcc(ConditionFlag.NotEqual, bb2); 1362 1363 masm.mulsd(dest, externalAddress(allOnesPtr)); // 0xffffffff, 1364 // 0x3fefffff 1365 masm.jmp(bb15); 1366 1367 masm.bind(bb2); 1368 masm.movdq(temp3, externalAddress(twoPowFiftyFivePtr)); // 0x00000000, 1369 // 0x43600000 1370 masm.mulsd(temp3, dest); 1371 masm.subsd(temp3, dest); 1372 masm.mulsd(temp3, externalAddress(twoPowFiftyFiveMPtr)); // 0x00000000, 1373 // 0x3c800000 1374 masm.jmp(bb15); 1375 1376 masm.bind(bb1); 1377 masm.pextrw(gpr3, dest, 3); 1378 masm.andl(gpr3, 32752); 1379 masm.cmpl(gpr3, 32752); 1380 masm.jcc(ConditionFlag.Equal, bb14); 1381 1382 masm.subl(gpr3, 16224); 1383 masm.shrl(gpr3, 7); 1384 masm.andl(gpr3, 65532); 1385 masm.leaq(gpr10, externalAddress(piInvTablePtr)); 1386 masm.addq(gpr3, gpr10); 1387 masm.movdq(gpr1, dest); 1388 masm.movl(gpr9, new AMD64Address(gpr3, 20)); 1389 masm.movl(gpr7, new AMD64Address(gpr3, 24)); 1390 masm.movl(gpr4, gpr1); 1391 masm.shrq(gpr1, 21); 1392 masm.orl(gpr1, Integer.MIN_VALUE); 1393 masm.shrl(gpr1, 11); 1394 masm.movl(gpr8, gpr9); 1395 masm.imulq(gpr9, gpr4); 1396 masm.imulq(gpr8, gpr1); 1397 masm.imulq(gpr7, gpr1); 1398 masm.movl(gpr5, new AMD64Address(gpr3, 16)); 1399 masm.movl(gpr6, new AMD64Address(gpr3, 12)); 1400 masm.movl(gpr10, gpr9); 1401 masm.shrq(gpr9, 32); 1402 masm.addq(gpr8, gpr9); 1403 masm.addq(gpr10, gpr7); 1404 masm.movl(gpr7, gpr10); 1405 masm.shrq(gpr10, 32); 1406 masm.addq(gpr8, gpr10); 1407 masm.movl(gpr9, gpr5); 1408 masm.imulq(gpr5, gpr4); 1409 masm.imulq(gpr9, gpr1); 1410 masm.movl(gpr10, gpr6); 1411 masm.imulq(gpr6, gpr4); 1412 masm.movl(gpr2, gpr5); 1413 masm.shrq(gpr5, 32); 1414 masm.addq(gpr8, gpr2); 1415 masm.movl(gpr2, gpr8); 1416 masm.shrq(gpr8, 32); 1417 masm.addq(gpr9, gpr5); 1418 masm.addq(gpr9, gpr8); 1419 masm.shlq(gpr2, 32); 1420 masm.orq(gpr7, gpr2); 1421 masm.imulq(gpr10, gpr1); 1422 masm.movl(gpr8, new AMD64Address(gpr3, 8)); 1423 masm.movl(gpr5, new AMD64Address(gpr3, 4)); 1424 masm.movl(gpr2, gpr6); 1425 masm.shrq(gpr6, 32); 1426 masm.addq(gpr9, gpr2); 1427 masm.movl(gpr2, gpr9); 1428 masm.shrq(gpr9, 32); 1429 masm.addq(gpr10, gpr6); 1430 masm.addq(gpr10, gpr9); 1431 masm.movq(gpr6, gpr8); 1432 masm.imulq(gpr8, gpr4); 1433 masm.imulq(gpr6, gpr1); 1434 masm.movl(gpr9, gpr8); 1435 masm.shrq(gpr8, 32); 1436 masm.addq(gpr10, gpr9); 1437 masm.movl(gpr9, gpr10); 1438 masm.shrq(gpr10, 32); 1439 masm.addq(gpr6, gpr8); 1440 masm.addq(gpr6, gpr10); 1441 masm.movq(gpr8, gpr5); 1442 masm.imulq(gpr5, gpr4); 1443 masm.imulq(gpr8, gpr1); 1444 masm.shlq(gpr9, 32); 1445 masm.orq(gpr9, gpr2); 1446 masm.movl(gpr1, new AMD64Address(gpr3, 0)); 1447 masm.movl(gpr10, gpr5); 1448 masm.shrq(gpr5, 32); 1449 masm.addq(gpr6, gpr10); 1450 masm.movl(gpr10, gpr6); 1451 masm.shrq(gpr6, 32); 1452 masm.addq(gpr8, gpr5); 1453 masm.addq(gpr8, gpr6); 1454 masm.imulq(gpr4, gpr1); 1455 masm.pextrw(gpr2, dest, 3); 1456 masm.leaq(gpr6, externalAddress(piInvTablePtr)); 1457 masm.subq(gpr3, gpr6); 1458 masm.addl(gpr3, gpr3); 1459 masm.addl(gpr3, gpr3); 1460 masm.addl(gpr3, gpr3); 1461 masm.addl(gpr3, 19); 1462 masm.movl(gpr5, 32768); 1463 masm.andl(gpr5, gpr2); 1464 masm.shrl(gpr2, 4); 1465 masm.andl(gpr2, 2047); 1466 masm.subl(gpr2, 1023); 1467 masm.subl(gpr3, gpr2); 1468 masm.addq(gpr8, gpr4); 1469 masm.movl(gpr4, gpr3); 1470 masm.addl(gpr4, 32); 1471 masm.cmpl(gpr3, 1); 1472 masm.jcc(ConditionFlag.Less, bb4); 1473 1474 masm.negl(gpr3); 1475 masm.addl(gpr3, 29); 1476 masm.shll(gpr8); 1477 masm.movl(gpr6, gpr8); 1478 masm.andl(gpr8, 536870911); 1479 masm.testl(gpr8, 268435456); 1480 masm.jcc(ConditionFlag.NotEqual, bb5); 1481 1482 masm.shrl(gpr8); 1483 masm.movl(gpr2, 0); 1484 masm.shlq(gpr8, 32); 1485 masm.orq(gpr8, gpr10); 1486 1487 masm.bind(bb6); 1488 1489 masm.cmpq(gpr8, 0); 1490 masm.jcc(ConditionFlag.Equal, bb8); 1491 1492 masm.bind(bb9); 1493 masm.bsrq(gpr10, gpr8); 1494 masm.movl(gpr3, 29); 1495 masm.subl(gpr3, gpr10); 1496 masm.jcc(ConditionFlag.LessEqual, bb10); 1497 1498 masm.shlq(gpr8); 1499 masm.movq(gpr1, gpr9); 1500 masm.shlq(gpr9); 1501 masm.addl(gpr4, gpr3); 1502 masm.negl(gpr3); 1503 masm.addl(gpr3, 64); 1504 masm.shrq(gpr1); 1505 masm.shrq(gpr7); 1506 masm.orq(gpr8, gpr1); 1507 masm.orq(gpr9, gpr7); 1508 1509 masm.bind(bb11); 1510 masm.cvtsi2sdq(dest, gpr8); 1511 masm.shrq(gpr9, 1); 1512 masm.cvtsi2sdq(temp3, gpr9); 1513 masm.xorpd(temp4, temp4); 1514 masm.shll(gpr4, 4); 1515 masm.negl(gpr4); 1516 masm.addl(gpr4, 16368); 1517 masm.orl(gpr4, gpr5); 1518 masm.xorl(gpr4, gpr2); 1519 masm.pinsrw(temp4, gpr4, 3); 1520 masm.leaq(gpr1, externalAddress(piFourPtr)); 1521 masm.movdqu(temp2, new AMD64Address(gpr1, 0)); // 0x40000000, 1522 // 0x3fe921fb, 1523 // 0x18469899, 1524 // 0x3e64442d 1525 masm.xorpd(temp5, temp5); 1526 masm.subl(gpr4, 1008); 1527 masm.pinsrw(temp5, gpr4, 3); 1528 masm.mulsd(dest, temp4); 1529 masm.shll(gpr5, 16); 1530 masm.sarl(gpr5, 31); 1531 masm.mulsd(temp3, temp5); 1532 masm.movdqu(temp1, dest); 1533 masm.pshufd(temp6, temp2, 0xE); 1534 masm.mulsd(dest, temp2); 1535 masm.shrl(gpr6, 29); 1536 masm.addsd(temp1, temp3); 1537 masm.mulsd(temp3, temp2); 1538 masm.addl(gpr6, gpr5); 1539 masm.xorl(gpr6, gpr5); 1540 masm.mulsd(temp6, temp1); 1541 masm.movl(gpr1, gpr6); 1542 masm.addsd(temp6, temp3); 1543 masm.movdqu(temp2, dest); 1544 masm.addsd(dest, temp6); 1545 masm.subsd(temp2, dest); 1546 masm.addsd(temp6, temp2); 1547 1548 masm.bind(bb12); 1549 masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr)); // 0x6dc9c883, 1550 // 0x40245f30 1551 masm.mulsd(temp1, dest); 1552 masm.movdq(temp5, externalAddress(oneHalfPtr)); // 0x00000000, 1553 // 0x3fe00000, 1554 // 0x00000000, 1555 // 0x3fe00000 1556 masm.movdq(temp4, externalAddress(signMaskPtr)); // 0x00000000, 1557 // 0x80000000 1558 masm.pand(temp4, dest); 1559 masm.por(temp5, temp4); 1560 masm.addpd(temp1, temp5); 1561 masm.cvttsd2sil(gpr4, temp1); 1562 masm.cvtsi2sdl(temp1, gpr4); 1563 masm.movdq(temp3, externalAddress(pOnePtr)); // 0x54400000, 1564 // 0x3fb921fb 1565 masm.movdqu(temp2, externalAddress(pTwoPtr)); // 0x1a600000, 1566 // 0x3d90b461, 1567 // 0x1a600000, 1568 // 0x3d90b461 1569 masm.mulsd(temp3, temp1); 1570 masm.unpcklpd(temp1, temp1); 1571 masm.shll(gpr1, 3); 1572 masm.addl(gpr4, 1865216); 1573 masm.movdqu(temp4, dest); 1574 masm.addl(gpr4, gpr1); 1575 masm.andl(gpr4, 63); 1576 masm.movdqu(temp5, externalAddress(scFourPtr)); // 0x54400000, 1577 // 0x3fb921fb 1578 masm.leaq(gpr1, externalAddress(cTablePtr)); 1579 masm.shll(gpr4, 5); 1580 masm.addq(gpr1, gpr4); 1581 masm.movdqu(temp8, new AMD64Address(gpr1, 0)); 1582 masm.mulpd(temp2, temp1); 1583 masm.subsd(dest, temp3); 1584 masm.mulsd(temp1, externalAddress(pThreePtr)); // 0x2e037073, 1585 // 0x3b63198a 1586 masm.subsd(temp4, temp3); 1587 masm.unpcklpd(dest, dest); 1588 masm.movdqu(temp3, temp4); 1589 masm.subsd(temp4, temp2); 1590 masm.mulpd(temp5, dest); 1591 masm.subpd(dest, temp2); 1592 masm.pshufd(temp7, temp8, 0xE); 1593 masm.movdqu(temp9, temp7); 1594 masm.mulsd(temp7, temp4); 1595 masm.subsd(temp3, temp4); 1596 masm.mulpd(temp5, dest); 1597 masm.mulpd(dest, dest); 1598 masm.subsd(temp3, temp2); 1599 masm.movdqu(temp2, temp8); 1600 masm.subsd(temp1, temp3); 1601 masm.movdq(temp3, new AMD64Address(gpr1, 24)); 1602 masm.addsd(temp2, temp3); 1603 masm.subsd(temp7, temp2); 1604 masm.subsd(temp1, temp6); 1605 masm.movdqu(temp6, externalAddress(scTwoPtr)); // 0x11111111, 1606 // 0x3f811111, 1607 // 0x55555555, 1608 // 0x3fa55555 1609 masm.mulsd(temp2, temp4); 1610 masm.mulpd(temp6, dest); 1611 masm.mulsd(temp3, temp4); 1612 masm.mulpd(temp2, dest); 1613 masm.mulpd(dest, dest); 1614 masm.addpd(temp5, externalAddress(scThreePtr)); // 0x1a01a01a, 1615 // 0xbf2a01a0, 1616 // 0x16c16c17, 1617 // 0xbf56c16c 1618 masm.mulsd(temp4, temp8); 1619 masm.addpd(temp6, externalAddress(scOnePtr)); // 0x55555555, 1620 // 0xbfc55555, 1621 // 0x00000000, 1622 // 0xbfe00000 1623 masm.mulpd(temp5, dest); 1624 masm.movdqu(dest, temp3); 1625 masm.addsd(temp3, temp9); 1626 masm.mulpd(temp1, temp7); 1627 masm.movdqu(temp7, temp4); 1628 masm.addsd(temp4, temp3); 1629 masm.addpd(temp6, temp5); 1630 masm.subsd(temp9, temp3); 1631 masm.subsd(temp3, temp4); 1632 masm.addsd(temp1, new AMD64Address(gpr1, 16)); 1633 masm.mulpd(temp6, temp2); 1634 masm.addsd(temp9, dest); 1635 masm.addsd(temp3, temp7); 1636 masm.addsd(temp1, temp9); 1637 masm.addsd(temp1, temp3); 1638 masm.addsd(temp1, temp6); 1639 masm.unpckhpd(temp6, temp6); 1640 masm.movdqu(dest, temp4); 1641 masm.addsd(temp1, temp6); 1642 masm.addsd(dest, temp1); 1643 masm.jmp(bb15); 1644 1645 masm.bind(bb8); 1646 masm.addl(gpr4, 64); 1647 masm.movq(gpr8, gpr9); 1648 masm.movq(gpr9, gpr7); 1649 masm.movl(gpr7, 0); 1650 masm.cmpq(gpr8, 0); 1651 masm.jcc(ConditionFlag.NotEqual, bb9); 1652 1653 masm.addl(gpr4, 64); 1654 masm.movq(gpr8, gpr9); 1655 masm.movq(gpr9, gpr7); 1656 masm.cmpq(gpr8, 0); 1657 masm.jcc(ConditionFlag.NotEqual, bb9); 1658 1659 masm.xorpd(dest, dest); 1660 masm.xorpd(temp6, temp6); 1661 masm.jmp(bb12); 1662 1663 masm.bind(bb10); 1664 masm.jcc(ConditionFlag.Equal, bb11); 1665 1666 masm.negl(gpr3); 1667 masm.shrq(gpr9); 1668 masm.movq(gpr1, gpr8); 1669 masm.shrq(gpr8); 1670 masm.subl(gpr4, gpr3); 1671 masm.negl(gpr3); 1672 masm.addl(gpr3, 64); 1673 masm.shlq(gpr1); 1674 masm.orq(gpr9, gpr1); 1675 masm.jmp(bb11); 1676 1677 masm.bind(bb4); 1678 masm.negl(gpr3); 1679 masm.shlq(gpr8, 32); 1680 masm.orq(gpr8, gpr10); 1681 masm.shlq(gpr8); 1682 masm.movq(gpr6, gpr8); 1683 masm.testl(gpr8, Integer.MIN_VALUE); 1684 masm.jcc(ConditionFlag.NotEqual, bb13); 1685 1686 masm.shrl(gpr8); 1687 masm.movl(gpr2, 0); 1688 masm.shrq(gpr6, 3); 1689 masm.jmp(bb6); 1690 1691 masm.bind(bb5); 1692 masm.shrl(gpr8); 1693 masm.movl(gpr2, 536870912); 1694 masm.shrl(gpr2); 1695 masm.shlq(gpr8, 32); 1696 masm.orq(gpr8, gpr10); 1697 masm.shlq(gpr2, 32); 1698 masm.addl(gpr6, 536870912); 1699 masm.movl(gpr3, 0); 1700 masm.movl(gpr10, 0); 1701 masm.subq(gpr3, gpr7); 1702 masm.sbbq(gpr10, gpr9); 1703 masm.sbbq(gpr2, gpr8); 1704 masm.movq(gpr7, gpr3); 1705 masm.movq(gpr9, gpr10); 1706 masm.movq(gpr8, gpr2); 1707 masm.movl(gpr2, 32768); 1708 masm.jmp(bb6); 1709 1710 masm.bind(bb13); 1711 masm.shrl(gpr8); 1712 masm.movq(gpr2, 0x100000000L); 1713 masm.shrq(gpr2); 1714 masm.movl(gpr3, 0); 1715 masm.movl(gpr10, 0); 1716 masm.subq(gpr3, gpr7); 1717 masm.sbbq(gpr10, gpr9); 1718 masm.sbbq(gpr2, gpr8); 1719 masm.movq(gpr7, gpr3); 1720 masm.movq(gpr9, gpr10); 1721 masm.movq(gpr8, gpr2); 1722 masm.movl(gpr2, 32768); 1723 masm.shrq(gpr6, 3); 1724 masm.addl(gpr6, 536870912); 1725 masm.jmp(bb6); 1726 1727 masm.bind(bb15); 1728 } 1729 1730 /* 1731 * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) 1732 * Source Code 1733 * 1734 * ALGORITHM DESCRIPTION - COS() --------------------- 1735 * 1736 * 1. RANGE REDUCTION 1737 * 1738 * We perform an initial range reduction from X to r with 1739 * 1740 * X =~= N * pi/32 + r 1741 * 1742 * so that |r| <= pi/64 + epsilon. We restrict inputs to those where |N| <= 932560. Beyond this, 1743 * the range reduction is insufficiently accurate. For extremely small inputs, denormalization 1744 * can occur internally, impacting performance. This means that the main path is actually only 1745 * taken for 2^-252 <= |X| < 90112. 1746 * 1747 * To avoid branches, we perform the range reduction to full accuracy each time. 1748 * 1749 * X - N * (P_1 + P_2 + P_3) 1750 * 1751 * where P_1 and P_2 are 32-bit numbers (so multiplication by N is exact) and P_3 is a 53-bit 1752 * number. Together, these approximate pi well enough for all cases in the restricted range. 1753 * 1754 * The main reduction sequence is: 1755 * 1756 * y = 32/pi * x N = integer(y) (computed by adding and subtracting off SHIFTER) 1757 * 1758 * m_1 = N * P_1 m_2 = N * P_2 r_1 = x - m_1 r = r_1 - m_2 (this r can be used for most of the 1759 * calculation) 1760 * 1761 * c_1 = r_1 - r m_3 = N * P_3 c_2 = c_1 - m_2 c = c_2 - m_3 1762 * 1763 * 2. MAIN ALGORITHM 1764 * 1765 * The algorithm uses a table lookup based on B = M * pi / 32 where M = N mod 64. The stored 1766 * values are: sigma closest power of 2 to cos(B) C_hl 53-bit cos(B) - sigma S_hi + S_lo 2 * 1767 * 53-bit sin(B) 1768 * 1769 * The computation is organized as follows: 1770 * 1771 * sin(B + r + c) = [sin(B) + sigma * r] + r * (cos(B) - sigma) + sin(B) * [cos(r + c) - 1] + 1772 * cos(B) * [sin(r + c) - r] 1773 * 1774 * which is approximately: 1775 * 1776 * [S_hi + sigma * r] + C_hl * r + S_lo + S_hi * [(cos(r) - 1) - r * c] + (C_hl + sigma) * 1777 * [(sin(r) - r) + c] 1778 * 1779 * and this is what is actually computed. We separate this sum into four parts: 1780 * 1781 * hi + med + pols + corr 1782 * 1783 * where 1784 * 1785 * hi = S_hi + sigma r med = C_hl * r pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) 1786 * corr = S_lo + c * ((C_hl + sigma) - S_hi * r) 1787 * 1788 * 3. POLYNOMIAL 1789 * 1790 * The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) can be rearranged freely, 1791 * since it is quite small, so we exploit parallelism to the fullest. 1792 * 1793 * psc4 = SC_4 * r_1 msc4 = psc4 * r r2 = r * r msc2 = SC_2 * r2 r4 = r2 * r2 psc3 = SC_3 + msc4 1794 * psc1 = SC_1 + msc2 msc3 = r4 * psc3 sincospols = psc1 + msc3 pols = sincospols * <S_hi * r^2 1795 * | (C_hl + sigma) * r^3> 1796 * 1797 * 4. CORRECTION TERM 1798 * 1799 * This is where the "c" component of the range reduction is taken into account; recall that 1800 * just "r" is used for most of the calculation. 1801 * 1802 * -c = m_3 - c_2 -d = S_hi * r - (C_hl + sigma) corr = -c * -d + S_lo 1803 * 1804 * 5. COMPENSATED SUMMATIONS 1805 * 1806 * The two successive compensated summations add up the high and medium parts, leaving just the 1807 * low parts to add up at the end. 1808 * 1809 * rs = sigma * r res_int = S_hi + rs k_0 = S_hi - res_int k_2 = k_0 + rs med = C_hl * r res_hi 1810 * = res_int + med k_1 = res_int - res_hi k_3 = k_1 + med 1811 * 1812 * 6. FINAL SUMMATION 1813 * 1814 * We now add up all the small parts: 1815 * 1816 * res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 1817 * 1818 * Now the overall result is just: 1819 * 1820 * res_hi + res_lo 1821 * 1822 * 7. SMALL ARGUMENTS 1823 * 1824 * Inputs with |X| < 2^-252 are treated specially as 1 - |x|. 1825 * 1826 * Special cases: cos(NaN) = quiet NaN, and raise invalid exception cos(INF) = NaN and raise 1827 * invalid exception cos(0) = 1 1828 * 1829 */ 1830 1831 public int[] one = { 1832 0x00000000, 0x3ff00000 1833 }; 1834 1835 public void cosIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { 1836 ArrayDataPointerConstant oneHalfPtr = new ArrayDataPointerConstant(oneHalf, 16); 1837 ArrayDataPointerConstant pTwoPtr = new ArrayDataPointerConstant(pTwo, 16); 1838 ArrayDataPointerConstant scFourPtr = new ArrayDataPointerConstant(scFour, 16); 1839 ArrayDataPointerConstant cTablePtr = new ArrayDataPointerConstant(cTable, 16); 1840 ArrayDataPointerConstant scTwoPtr = new ArrayDataPointerConstant(scTwo, 16); 1841 ArrayDataPointerConstant scThreePtr = new ArrayDataPointerConstant(scThree, 16); 1842 ArrayDataPointerConstant scOnePtr = new ArrayDataPointerConstant(scOne, 16); 1843 ArrayDataPointerConstant piInvTablePtr = new ArrayDataPointerConstant(piInvTable, 16); 1844 ArrayDataPointerConstant piFourPtr = new ArrayDataPointerConstant(piFour, 16); 1845 ArrayDataPointerConstant piThirtyTwoInvPtr = new ArrayDataPointerConstant(piThirtyTwoInv, 8); 1846 ArrayDataPointerConstant signMaskPtr = new ArrayDataPointerConstant(signMask, 8); 1847 ArrayDataPointerConstant pThreePtr = new ArrayDataPointerConstant(pThree, 8); 1848 ArrayDataPointerConstant pOnePtr = new ArrayDataPointerConstant(pOne, 8); 1849 ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 8); 1850 1851 Label bb0 = new Label(); 1852 Label bb1 = new Label(); 1853 Label bb3 = new Label(); 1854 Label bb4 = new Label(); 1855 Label bb5 = new Label(); 1856 Label bb6 = new Label(); 1857 Label bb7 = new Label(); 1858 Label bb8 = new Label(); 1859 Label bb9 = new Label(); 1860 Label bb10 = new Label(); 1861 Label bb11 = new Label(); 1862 Label bb12 = new Label(); 1863 Label bb13 = new Label(); 1864 Label bb14 = new Label(); 1865 1866 Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); 1867 Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); 1868 Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); 1869 Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); 1870 Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD); 1871 Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD); 1872 Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD); 1873 Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD); 1874 Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD); 1875 Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD); 1876 1877 Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); 1878 Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); 1879 Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); 1880 Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); 1881 Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); 1882 Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); 1883 Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); 1884 Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE); 1885 Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE); 1886 1887 AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); 1888 1889 setCrb(crb); 1890 masm.movdq(stackSlot, value); 1891 if (dest.encoding != value.encoding) { 1892 masm.movdqu(dest, value); 1893 } 1894 1895 masm.leaq(gpr1, stackSlot); 1896 masm.movl(gpr1, new AMD64Address(gpr1, 4)); 1897 masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr)); // 0x6dc9c883, 1898 // 0x40245f30 1899 1900 masm.andl(gpr1, 2147418112); 1901 masm.subl(gpr1, 808452096); 1902 masm.cmpl(gpr1, 281346048); 1903 masm.jcc(ConditionFlag.Above, bb0); 1904 1905 masm.mulsd(temp1, dest); 1906 masm.movdqu(temp5, externalAddress(oneHalfPtr)); // 0x00000000, 1907 // 0x3fe00000, 1908 // 0x00000000, 1909 // 0x3fe00000 1910 masm.movdq(temp4, externalAddress(signMaskPtr)); // 0x00000000, 1911 // 0x80000000 1912 masm.pand(temp4, dest); 1913 masm.por(temp5, temp4); 1914 masm.addpd(temp1, temp5); 1915 masm.cvttsd2sil(gpr4, temp1); 1916 masm.cvtsi2sdl(temp1, gpr4); 1917 masm.movdqu(temp2, externalAddress(pTwoPtr)); // 0x1a600000, 1918 // 0x3d90b461, 1919 // 0x1a600000, 1920 // 0x3d90b461 1921 masm.movdq(temp3, externalAddress(pOnePtr)); // 0x54400000, 1922 // 0x3fb921fb 1923 masm.mulsd(temp3, temp1); 1924 masm.unpcklpd(temp1, temp1); 1925 masm.addq(gpr4, 1865232); 1926 masm.movdqu(temp4, dest); 1927 masm.andq(gpr4, 63); 1928 masm.movdqu(temp5, externalAddress(scFourPtr)); // 0xa556c734, 1929 // 0x3ec71de3, 1930 // 0x1a01a01a, 1931 // 0x3efa01a0 1932 masm.leaq(gpr1, externalAddress(cTablePtr)); 1933 masm.shlq(gpr4, 5); 1934 masm.addq(gpr1, gpr4); 1935 masm.movdqu(temp8, new AMD64Address(gpr1, 0)); 1936 masm.mulpd(temp2, temp1); 1937 masm.subsd(dest, temp3); 1938 masm.mulsd(temp1, externalAddress(pThreePtr)); // 0x2e037073, 1939 // 0x3b63198a 1940 masm.subsd(temp4, temp3); 1941 masm.unpcklpd(dest, dest); 1942 masm.movdqu(temp3, temp4); 1943 masm.subsd(temp4, temp2); 1944 masm.mulpd(temp5, dest); 1945 masm.subpd(dest, temp2); 1946 masm.pshufd(temp7, temp8, 0xE); 1947 masm.movdqu(temp6, externalAddress(scTwoPtr)); // 0x11111111, 1948 // 0x3f811111, 1949 // 0x55555555, 1950 // 0x3fa55555 1951 masm.mulsd(temp7, temp4); 1952 masm.subsd(temp3, temp4); 1953 masm.mulpd(temp5, dest); 1954 masm.mulpd(dest, dest); 1955 masm.subsd(temp3, temp2); 1956 masm.movdqu(temp2, temp8); 1957 masm.subsd(temp1, temp3); 1958 masm.movdq(temp3, new AMD64Address(gpr1, 24)); 1959 masm.addsd(temp2, temp3); 1960 masm.subsd(temp7, temp2); 1961 masm.mulsd(temp2, temp4); 1962 masm.mulpd(temp6, dest); 1963 masm.mulsd(temp3, temp4); 1964 masm.mulpd(temp2, dest); 1965 masm.mulpd(dest, dest); 1966 masm.addpd(temp5, externalAddress(scThreePtr)); // 0x1a01a01a, 1967 // 0xbf2a01a0, 1968 // 0x16c16c17, 1969 // 0xbf56c16c 1970 masm.mulsd(temp4, temp8); 1971 masm.pshufd(temp9, temp8, 0xE); 1972 masm.addpd(temp6, externalAddress(scOnePtr)); // 0x55555555, 1973 // 0xbfc55555, 1974 // 0x00000000, 1975 // 0xbfe00000 1976 masm.mulpd(temp5, dest); 1977 masm.movdqu(dest, temp3); 1978 masm.addsd(temp3, temp9); 1979 masm.mulpd(temp1, temp7); 1980 masm.movdqu(temp7, temp4); 1981 masm.addsd(temp4, temp3); 1982 masm.addpd(temp6, temp5); 1983 masm.subsd(temp9, temp3); 1984 masm.subsd(temp3, temp4); 1985 masm.addsd(temp1, new AMD64Address(gpr1, 16)); 1986 masm.mulpd(temp6, temp2); 1987 masm.addsd(dest, temp9); 1988 masm.addsd(temp3, temp7); 1989 masm.addsd(dest, temp1); 1990 masm.addsd(dest, temp3); 1991 masm.addsd(dest, temp6); 1992 masm.unpckhpd(temp6, temp6); 1993 masm.addsd(dest, temp6); 1994 masm.addsd(dest, temp4); 1995 masm.jmp(bb13); 1996 1997 masm.bind(bb14); 1998 masm.xorpd(temp1, temp1); 1999 masm.xorpd(dest, dest); 2000 masm.divsd(dest, temp1); 2001 masm.jmp(bb13); 2002 2003 masm.bind(bb0); 2004 masm.jcc(ConditionFlag.Greater, bb1); 2005 2006 masm.pextrw(gpr1, dest, 3); 2007 masm.andl(gpr1, 32767); 2008 masm.pinsrw(dest, gpr1, 3); 2009 masm.movdq(temp1, externalAddress(onePtr)); // 0x00000000, 2010 // 0x3ff00000 2011 masm.subsd(temp1, dest); 2012 masm.movdqu(dest, temp1); 2013 masm.jmp(bb13); 2014 2015 masm.bind(bb1); 2016 masm.pextrw(gpr3, dest, 3); 2017 masm.andl(gpr3, 32752); 2018 masm.cmpl(gpr3, 32752); 2019 masm.jcc(ConditionFlag.Equal, bb14); 2020 2021 masm.subl(gpr3, 16224); 2022 masm.shrl(gpr3, 7); 2023 masm.andl(gpr3, 65532); 2024 masm.leaq(gpr10, externalAddress(piInvTablePtr)); 2025 masm.addq(gpr3, gpr10); 2026 masm.movdq(gpr1, dest); 2027 masm.movl(gpr9, new AMD64Address(gpr3, 20)); 2028 masm.movl(gpr7, new AMD64Address(gpr3, 24)); 2029 masm.movl(gpr4, gpr1); 2030 masm.shrq(gpr1, 21); 2031 masm.orl(gpr1, Integer.MIN_VALUE); 2032 masm.shrl(gpr1, 11); 2033 masm.movl(gpr8, gpr9); 2034 masm.imulq(gpr9, gpr4); 2035 masm.imulq(gpr8, gpr1); 2036 masm.imulq(gpr7, gpr1); 2037 masm.movl(gpr5, new AMD64Address(gpr3, 16)); 2038 masm.movl(gpr6, new AMD64Address(gpr3, 12)); 2039 masm.movl(gpr10, gpr9); 2040 masm.shrq(gpr9, 32); 2041 masm.addq(gpr8, gpr9); 2042 masm.addq(gpr10, gpr7); 2043 masm.movl(gpr7, gpr10); 2044 masm.shrq(gpr10, 32); 2045 masm.addq(gpr8, gpr10); 2046 masm.movl(gpr9, gpr5); 2047 masm.imulq(gpr5, gpr4); 2048 masm.imulq(gpr9, gpr1); 2049 masm.movl(gpr10, gpr6); 2050 masm.imulq(gpr6, gpr4); 2051 masm.movl(gpr2, gpr5); 2052 masm.shrq(gpr5, 32); 2053 masm.addq(gpr8, gpr2); 2054 masm.movl(gpr2, gpr8); 2055 masm.shrq(gpr8, 32); 2056 masm.addq(gpr9, gpr5); 2057 masm.addq(gpr9, gpr8); 2058 masm.shlq(gpr2, 32); 2059 masm.orq(gpr7, gpr2); 2060 masm.imulq(gpr10, gpr1); 2061 masm.movl(gpr8, new AMD64Address(gpr3, 8)); 2062 masm.movl(gpr5, new AMD64Address(gpr3, 4)); 2063 masm.movl(gpr2, gpr6); 2064 masm.shrq(gpr6, 32); 2065 masm.addq(gpr9, gpr2); 2066 masm.movl(gpr2, gpr9); 2067 masm.shrq(gpr9, 32); 2068 masm.addq(gpr10, gpr6); 2069 masm.addq(gpr10, gpr9); 2070 masm.movq(gpr6, gpr8); 2071 masm.imulq(gpr8, gpr4); 2072 masm.imulq(gpr6, gpr1); 2073 masm.movl(gpr9, gpr8); 2074 masm.shrq(gpr8, 32); 2075 masm.addq(gpr10, gpr9); 2076 masm.movl(gpr9, gpr10); 2077 masm.shrq(gpr10, 32); 2078 masm.addq(gpr6, gpr8); 2079 masm.addq(gpr6, gpr10); 2080 masm.movq(gpr8, gpr5); 2081 masm.imulq(gpr5, gpr4); 2082 masm.imulq(gpr8, gpr1); 2083 masm.shlq(gpr9, 32); 2084 masm.orq(gpr9, gpr2); 2085 masm.movl(gpr1, new AMD64Address(gpr3, 0)); 2086 masm.movl(gpr10, gpr5); 2087 masm.shrq(gpr5, 32); 2088 masm.addq(gpr6, gpr10); 2089 masm.movl(gpr10, gpr6); 2090 masm.shrq(gpr6, 32); 2091 masm.addq(gpr8, gpr5); 2092 masm.addq(gpr8, gpr6); 2093 masm.imulq(gpr4, gpr1); 2094 masm.pextrw(gpr2, dest, 3); 2095 masm.leaq(gpr6, externalAddress(piInvTablePtr)); 2096 masm.subq(gpr3, gpr6); 2097 masm.addl(gpr3, gpr3); 2098 masm.addl(gpr3, gpr3); 2099 masm.addl(gpr3, gpr3); 2100 masm.addl(gpr3, 19); 2101 masm.movl(gpr5, 32768); 2102 masm.andl(gpr5, gpr2); 2103 masm.shrl(gpr2, 4); 2104 masm.andl(gpr2, 2047); 2105 masm.subl(gpr2, 1023); 2106 masm.subl(gpr3, gpr2); 2107 masm.addq(gpr8, gpr4); 2108 masm.movl(gpr4, gpr3); 2109 masm.addl(gpr4, 32); 2110 masm.cmpl(gpr3, 1); 2111 masm.jcc(ConditionFlag.Less, bb3); 2112 2113 masm.negl(gpr3); 2114 masm.addl(gpr3, 29); 2115 masm.shll(gpr8); 2116 masm.movl(gpr6, gpr8); 2117 masm.andl(gpr8, 536870911); 2118 masm.testl(gpr8, 268435456); 2119 masm.jcc(ConditionFlag.NotEqual, bb4); 2120 2121 masm.shrl(gpr8); 2122 masm.movl(gpr2, 0); 2123 masm.shlq(gpr8, 32); 2124 masm.orq(gpr8, gpr10); 2125 2126 masm.bind(bb5); 2127 2128 masm.bind(bb6); 2129 masm.cmpq(gpr8, 0); 2130 masm.jcc(ConditionFlag.Equal, bb7); 2131 2132 masm.bind(bb8); 2133 masm.bsrq(gpr10, gpr8); 2134 masm.movl(gpr3, 29); 2135 masm.subl(gpr3, gpr10); 2136 masm.jcc(ConditionFlag.LessEqual, bb9); 2137 2138 masm.shlq(gpr8); 2139 masm.movq(gpr1, gpr9); 2140 masm.shlq(gpr9); 2141 masm.addl(gpr4, gpr3); 2142 masm.negl(gpr3); 2143 masm.addl(gpr3, 64); 2144 masm.shrq(gpr1); 2145 masm.shrq(gpr7); 2146 masm.orq(gpr8, gpr1); 2147 masm.orq(gpr9, gpr7); 2148 2149 masm.bind(bb10); 2150 masm.cvtsi2sdq(dest, gpr8); 2151 masm.shrq(gpr9, 1); 2152 masm.cvtsi2sdq(temp3, gpr9); 2153 masm.xorpd(temp4, temp4); 2154 masm.shll(gpr4, 4); 2155 masm.negl(gpr4); 2156 masm.addl(gpr4, 16368); 2157 masm.orl(gpr4, gpr5); 2158 masm.xorl(gpr4, gpr2); 2159 masm.pinsrw(temp4, gpr4, 3); 2160 masm.leaq(gpr2, externalAddress(piFourPtr)); 2161 masm.movdqu(temp2, new AMD64Address(gpr2, 0)); // 0x40000000, 2162 // 0x3fe921fb, 2163 // 0x18469899, 2164 // 0x3e64442d 2165 masm.xorpd(temp5, temp5); 2166 masm.subl(gpr4, 1008); 2167 masm.pinsrw(temp5, gpr4, 3); 2168 masm.mulsd(dest, temp4); 2169 masm.shll(gpr5, 16); 2170 masm.sarl(gpr5, 31); 2171 masm.mulsd(temp3, temp5); 2172 masm.movdqu(temp1, dest); 2173 masm.mulsd(dest, temp2); 2174 masm.pshufd(temp6, temp2, 0xE); 2175 masm.shrl(gpr6, 29); 2176 masm.addsd(temp1, temp3); 2177 masm.mulsd(temp3, temp2); 2178 masm.addl(gpr6, gpr5); 2179 masm.xorl(gpr6, gpr5); 2180 masm.mulsd(temp6, temp1); 2181 masm.movl(gpr1, gpr6); 2182 masm.addsd(temp6, temp3); 2183 masm.movdqu(temp2, dest); 2184 masm.addsd(dest, temp6); 2185 masm.subsd(temp2, dest); 2186 masm.addsd(temp6, temp2); 2187 2188 masm.bind(bb11); 2189 masm.movq(temp1, externalAddress(piThirtyTwoInvPtr)); // 0x6dc9c883, 2190 // 0x40245f30 2191 masm.mulsd(temp1, dest); 2192 masm.movdq(temp5, externalAddress(oneHalfPtr)); // 0x00000000, 2193 // 0x3fe00000, 2194 // 0x00000000, 2195 // 0x3fe00000 2196 masm.movdq(temp4, externalAddress(signMaskPtr)); // 0x00000000, 2197 // 0x80000000 2198 masm.pand(temp4, dest); 2199 masm.por(temp5, temp4); 2200 masm.addpd(temp1, temp5); 2201 masm.cvttsd2siq(gpr4, temp1); 2202 masm.cvtsi2sdq(temp1, gpr4); 2203 masm.movdq(temp3, externalAddress(pOnePtr)); // 0x54400000, 2204 // 0x3fb921fb 2205 masm.movdqu(temp2, externalAddress(pTwoPtr)); // 0x1a600000, 2206 // 0x3d90b461, 2207 // 0x1a600000, 2208 // 0x3d90b461 2209 masm.mulsd(temp3, temp1); 2210 masm.unpcklpd(temp1, temp1); 2211 masm.shll(gpr1, 3); 2212 masm.addl(gpr4, 1865232); 2213 masm.movdqu(temp4, dest); 2214 masm.addl(gpr4, gpr1); 2215 masm.andl(gpr4, 63); 2216 masm.movdqu(temp5, externalAddress(scFourPtr)); // 0xa556c734, 2217 // 0x3ec71de3, 2218 // 0x1a01a01a, 2219 // 0x3efa01a0 2220 masm.leaq(gpr1, externalAddress(cTablePtr)); 2221 masm.shll(gpr4, 5); 2222 masm.addq(gpr1, gpr4); 2223 masm.movdqu(temp8, new AMD64Address(gpr1, 0)); 2224 masm.mulpd(temp2, temp1); 2225 masm.subsd(dest, temp3); 2226 masm.mulsd(temp1, externalAddress(pThreePtr)); // 0x2e037073, 2227 // 0x3b63198a 2228 masm.subsd(temp4, temp3); 2229 masm.unpcklpd(dest, dest); 2230 masm.movdqu(temp3, temp4); 2231 masm.subsd(temp4, temp2); 2232 masm.mulpd(temp5, dest); 2233 masm.pshufd(temp7, temp8, 0xE); 2234 masm.movdqu(temp9, temp7); 2235 masm.subpd(dest, temp2); 2236 masm.mulsd(temp7, temp4); 2237 masm.subsd(temp3, temp4); 2238 masm.mulpd(temp5, dest); 2239 masm.mulpd(dest, dest); 2240 masm.subsd(temp3, temp2); 2241 masm.movdqu(temp2, temp8); 2242 masm.subsd(temp1, temp3); 2243 masm.movdq(temp3, new AMD64Address(gpr1, 24)); 2244 masm.addsd(temp2, temp3); 2245 masm.subsd(temp7, temp2); 2246 masm.subsd(temp1, temp6); 2247 masm.movdqu(temp6, externalAddress(scTwoPtr)); // 0x11111111, 2248 // 0x3f811111, 2249 // 0x55555555, 2250 // 0x3fa55555 2251 masm.mulsd(temp2, temp4); 2252 masm.mulpd(temp6, dest); 2253 masm.mulsd(temp3, temp4); 2254 masm.mulpd(temp2, dest); 2255 masm.mulpd(dest, dest); 2256 masm.addpd(temp5, externalAddress(scThreePtr)); // 0x1a01a01a, 2257 // 0xbf2a01a0, 2258 // 0x16c16c17, 2259 // 0xbf56c16c 2260 masm.mulsd(temp4, temp8); 2261 masm.addpd(temp6, externalAddress(scOnePtr)); // 0x55555555, 2262 // 0xbfc55555, 2263 // 0x00000000, 2264 // 0xbfe00000 2265 masm.mulpd(temp5, dest); 2266 masm.movdqu(dest, temp3); 2267 masm.addsd(temp3, temp9); 2268 masm.mulpd(temp1, temp7); 2269 masm.movdqu(temp7, temp4); 2270 masm.addsd(temp4, temp3); 2271 masm.addpd(temp6, temp5); 2272 masm.subsd(temp9, temp3); 2273 masm.subsd(temp3, temp4); 2274 masm.addsd(temp1, new AMD64Address(gpr1, 16)); 2275 masm.mulpd(temp6, temp2); 2276 masm.addsd(temp9, dest); 2277 masm.addsd(temp3, temp7); 2278 masm.addsd(temp1, temp9); 2279 masm.addsd(temp1, temp3); 2280 masm.addsd(temp1, temp6); 2281 masm.unpckhpd(temp6, temp6); 2282 masm.movdqu(dest, temp4); 2283 masm.addsd(temp1, temp6); 2284 masm.addsd(dest, temp1); 2285 masm.jmp(bb13); 2286 2287 masm.bind(bb7); 2288 masm.addl(gpr4, 64); 2289 masm.movq(gpr8, gpr9); 2290 masm.movq(gpr9, gpr7); 2291 masm.movl(gpr7, 0); 2292 masm.cmpq(gpr8, 0); 2293 masm.jcc(ConditionFlag.NotEqual, bb8); 2294 2295 masm.addl(gpr4, 64); 2296 masm.movq(gpr8, gpr9); 2297 masm.movq(gpr9, gpr7); 2298 masm.cmpq(gpr8, 0); 2299 masm.jcc(ConditionFlag.NotEqual, bb8); 2300 2301 masm.xorpd(dest, dest); 2302 masm.xorpd(temp6, temp6); 2303 masm.jmp(bb11); 2304 2305 masm.bind(bb9); 2306 masm.jcc(ConditionFlag.Equal, bb10); 2307 2308 masm.negl(gpr3); 2309 masm.shrq(gpr9); 2310 masm.movq(gpr1, gpr8); 2311 masm.shrq(gpr8); 2312 masm.subl(gpr4, gpr3); 2313 masm.negl(gpr3); 2314 masm.addl(gpr3, 64); 2315 masm.shlq(gpr1); 2316 masm.orq(gpr9, gpr1); 2317 masm.jmp(bb10); 2318 2319 masm.bind(bb3); 2320 masm.negl(gpr3); 2321 masm.shlq(gpr8, 32); 2322 masm.orq(gpr8, gpr10); 2323 masm.shlq(gpr8); 2324 masm.movq(gpr6, gpr8); 2325 masm.testl(gpr8, Integer.MIN_VALUE); 2326 masm.jcc(ConditionFlag.NotEqual, bb12); 2327 2328 masm.shrl(gpr8); 2329 masm.movl(gpr2, 0); 2330 masm.shrq(gpr6, 3); 2331 masm.jmp(bb6); 2332 2333 masm.bind(bb4); 2334 masm.shrl(gpr8); 2335 masm.movl(gpr2, 536870912); 2336 masm.shrl(gpr2); 2337 masm.shlq(gpr8, 32); 2338 masm.orq(gpr8, gpr10); 2339 masm.shlq(gpr2, 32); 2340 masm.addl(gpr6, 536870912); 2341 masm.movl(gpr3, 0); 2342 masm.movl(gpr10, 0); 2343 masm.subq(gpr3, gpr7); 2344 masm.sbbq(gpr10, gpr9); 2345 masm.sbbq(gpr2, gpr8); 2346 masm.movq(gpr7, gpr3); 2347 masm.movq(gpr9, gpr10); 2348 masm.movq(gpr8, gpr2); 2349 masm.movl(gpr2, 32768); 2350 masm.jmp(bb5); 2351 2352 masm.bind(bb12); 2353 masm.shrl(gpr8); 2354 masm.movq(gpr2, 0x100000000L); 2355 masm.shrq(gpr2); 2356 masm.movl(gpr3, 0); 2357 masm.movl(gpr10, 0); 2358 masm.subq(gpr3, gpr7); 2359 masm.sbbq(gpr10, gpr9); 2360 masm.sbbq(gpr2, gpr8); 2361 masm.movq(gpr7, gpr3); 2362 masm.movq(gpr9, gpr10); 2363 masm.movq(gpr8, gpr2); 2364 masm.movl(gpr2, 32768); 2365 masm.shrq(gpr6, 3); 2366 masm.addl(gpr6, 536870912); 2367 masm.jmp(bb6); 2368 2369 masm.bind(bb13); 2370 } 2371 2372 /* 2373 * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) 2374 * Source Code 2375 * 2376 * ALGORITHM DESCRIPTION - TAN() --------------------- 2377 * 2378 * Polynomials coefficients and other constants. 2379 * 2380 * Note that in this algorithm, there is a different polynomial for each breakpoint, so there 2381 * are 32 sets of polynomial coefficients as well as 32 instances of the other constants. 2382 * 2383 * The polynomial coefficients and constants are offset from the start of the main block as 2384 * follows: 2385 * 2386 * 0: c8 | c0 16: c9 | c1 32: c10 | c2 48: c11 | c3 64: c12 | c4 80: c13 | c5 96: c14 | c6 112: 2387 * c15 | c7 128: T_hi 136: T_lo 144: Sigma 152: T_hl 160: Tau 168: Mask 176: (end of block) 2388 * 2389 * The total table size is therefore 5632 bytes. 2390 * 2391 * Note that c0 and c1 are always zero. We could try storing other constants here, and just 2392 * loading the low part of the SIMD register in these cases, after ensuring the high part is 2393 * zero. 2394 * 2395 * The higher terms of the polynomial are computed in the *low* part of the SIMD register. This 2396 * is so we can overlap the multiplication by r^8 and the unpacking of the other part. 2397 * 2398 * The constants are: T_hi + T_lo = accurate constant term in power series Sigma + T_hl = 2399 * accurate coefficient of r in power series (Sigma=1 bit) Tau = multiplier for the reciprocal, 2400 * always -1 or 0 2401 * 2402 * The basic reconstruction formula using these constants is: 2403 * 2404 * High = tau * recip_hi + t_hi Med = (sgn * r + t_hl * r)_hi Low = (sgn * r + t_hl * r)_lo + 2405 * tau * recip_lo + T_lo + (T_hl + sigma) * c + pol 2406 * 2407 * where pol = c0 + c1 * r + c2 * r^2 + ... + c15 * r^15 2408 * 2409 * (c0 = c1 = 0, but using them keeps SIMD regularity) 2410 * 2411 * We then do a compensated sum High + Med, add the low parts together and then do the final 2412 * sum. 2413 * 2414 * Here recip_hi + recip_lo is an accurate reciprocal of the remainder modulo pi/2 2415 * 2416 * Special cases: tan(NaN) = quiet NaN, and raise invalid exception tan(INF) = NaN and raise 2417 * invalid exception tan(+/-0) = +/-0 2418 * 2419 */ 2420 2421 private static int[] oneHalfTan = { 2422 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000 2423 }; 2424 2425 private static int[] mulSixteen = { 2426 0x00000000, 0x40300000, 0x00000000, 0x3ff00000 2427 }; 2428 2429 private static int[] signMaskTan = { 2430 0x00000000, 0x80000000, 0x00000000, 0x80000000 2431 }; 2432 2433 private static int[] piThirtyTwoInvTan = { 2434 0x6dc9c883, 0x3fe45f30, 0x6dc9c883, 0x40245f30 2435 }; 2436 2437 private static int[] pOneTan = { 2438 0x54444000, 0x3fb921fb, 0x54440000, 0x3fb921fb 2439 }; 2440 2441 private static int[] pTwoTan = { 2442 0x67674000, 0xbd32e7b9, 0x4c4c0000, 0x3d468c23 2443 }; 2444 2445 private static int[] pThreeTan = { 2446 0x3707344a, 0x3aa8a2e0, 0x03707345, 0x3ae98a2e 2447 }; 2448 2449 private static int[] cTableTan = { 2450 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x882c10fa, 2451 0x3f9664f4, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 2452 0x00000000, 0x00000000, 0x55e6c23d, 0x3f8226e3, 0x55555555, 2453 0x3fd55555, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 2454 0x0e157de0, 0x3f6d6d3d, 0x11111111, 0x3fc11111, 0x00000000, 2455 0x00000000, 0x00000000, 0x00000000, 0x452b75e3, 0x3f57da36, 2456 0x1ba1ba1c, 0x3faba1ba, 0x00000000, 0x00000000, 0x00000000, 2457 0x00000000, 0x00000000, 0x3ff00000, 0x00000000, 0x00000000, 2458 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x4e435f9b, 2459 0x3f953f83, 0x00000000, 0x00000000, 0x3c6e8e46, 0x3f9b74ea, 2460 0x00000000, 0x00000000, 0xda5b7511, 0x3f85ad63, 0xdc230b9b, 2461 0x3fb97558, 0x26cb3788, 0x3f881308, 0x76fc4985, 0x3fd62ac9, 2462 0x77bb08ba, 0x3f757c85, 0xb6247521, 0x3fb1381e, 0x5922170c, 2463 0x3f754e95, 0x8746482d, 0x3fc27f83, 0x11055b30, 0x3f64e391, 2464 0x3e666320, 0x3fa3e609, 0x0de9dae3, 0x3f6301df, 0x1f1dca06, 2465 0x3fafa8ae, 0x8c5b2da2, 0x3fb936bb, 0x4e88f7a5, 0x3c587d05, 2466 0x00000000, 0x3ff00000, 0xa8935dd9, 0x3f83dde2, 0x00000000, 2467 0x00000000, 0x00000000, 0x00000000, 0x5a279ea3, 0x3faa3407, 2468 0x00000000, 0x00000000, 0x432d65fa, 0x3fa70153, 0x00000000, 2469 0x00000000, 0x891a4602, 0x3f9d03ef, 0xd62ca5f8, 0x3fca77d9, 2470 0xb35f4628, 0x3f97a265, 0x433258fa, 0x3fd8cf51, 0xb58fd909, 2471 0x3f8f88e3, 0x01771cea, 0x3fc2b154, 0xf3562f8e, 0x3f888f57, 2472 0xc028a723, 0x3fc7370f, 0x20b7f9f0, 0x3f80f44c, 0x214368e9, 2473 0x3fb6dfaa, 0x28891863, 0x3f79b4b6, 0x172dbbf0, 0x3fb6cb8e, 2474 0xe0553158, 0x3fc975f5, 0x593fe814, 0x3c2ef5d3, 0x00000000, 2475 0x3ff00000, 0x03dec550, 0x3fa44203, 0x00000000, 0x00000000, 2476 0x00000000, 0x00000000, 0x9314533e, 0x3fbb8ec5, 0x00000000, 2477 0x00000000, 0x09aa36d0, 0x3fb6d3f4, 0x00000000, 0x00000000, 2478 0xdcb427fd, 0x3fb13950, 0xd87ab0bb, 0x3fd5335e, 0xce0ae8a5, 2479 0x3fabb382, 0x79143126, 0x3fddba41, 0x5f2b28d4, 0x3fa552f1, 2480 0x59f21a6d, 0x3fd015ab, 0x22c27d95, 0x3fa0e984, 0xe19fc6aa, 2481 0x3fd0576c, 0x8f2c2950, 0x3f9a4898, 0xc0b3f22c, 0x3fc59462, 2482 0x1883a4b8, 0x3f94b61c, 0x3f838640, 0x3fc30eb8, 0x355c63dc, 2483 0x3fd36a08, 0x1dce993d, 0xbc6d704d, 0x00000000, 0x3ff00000, 2484 0x2b82ab63, 0x3fb78e92, 0x00000000, 0x00000000, 0x00000000, 2485 0x00000000, 0x56f37042, 0x3fccfc56, 0x00000000, 0x00000000, 2486 0xaa563951, 0x3fc90125, 0x00000000, 0x00000000, 0x3d0e7c5d, 2487 0x3fc50533, 0x9bed9b2e, 0x3fdf0ed9, 0x5fe7c47c, 0x3fc1f250, 2488 0x96c125e5, 0x3fe2edd9, 0x5a02bbd8, 0x3fbe5c71, 0x86362c20, 2489 0x3fda08b7, 0x4b4435ed, 0x3fb9d342, 0x4b494091, 0x3fd911bd, 2490 0xb56658be, 0x3fb5e4c7, 0x93a2fd76, 0x3fd3c092, 0xda271794, 2491 0x3fb29910, 0x3303df2b, 0x3fd189be, 0x99fcef32, 0x3fda8279, 2492 0xb68c1467, 0x3c708b2f, 0x00000000, 0x3ff00000, 0x980c4337, 2493 0x3fc5f619, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 2494 0xcc03e501, 0x3fdff10f, 0x00000000, 0x00000000, 0x44a4e845, 2495 0x3fddb63b, 0x00000000, 0x00000000, 0x3768ad9f, 0x3fdb72a4, 2496 0x3dd01cca, 0x3fe5fdb9, 0xa61d2811, 0x3fd972b2, 0x5645ad0b, 2497 0x3fe977f9, 0xd013b3ab, 0x3fd78ca3, 0xbf0bf914, 0x3fe4f192, 2498 0x4d53e730, 0x3fd5d060, 0x3f8b9000, 0x3fe49933, 0xe2b82f08, 2499 0x3fd4322a, 0x5936a835, 0x3fe27ae1, 0xb1c61c9b, 0x3fd2b3fb, 2500 0xef478605, 0x3fe1659e, 0x190834ec, 0x3fe11ab7, 0xcdb625ea, 2501 0xbc8e564b, 0x00000000, 0x3ff00000, 0xb07217e3, 0x3fd248f1, 2502 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2b2c49d0, 2503 0x3ff2de9c, 0x00000000, 0x00000000, 0x2655bc98, 0x3ff33e58, 2504 0x00000000, 0x00000000, 0xff691fa2, 0x3ff3972e, 0xe93463bd, 2505 0x3feeed87, 0x070e10a0, 0x3ff3f5b2, 0xf4d790a4, 0x3ff20c10, 2506 0xa04e8ea3, 0x3ff4541a, 0x386accd3, 0x3ff1369e, 0x222a66dd, 2507 0x3ff4b521, 0x22a9777e, 0x3ff20817, 0x52a04a6e, 0x3ff5178f, 2508 0xddaa0031, 0x3ff22137, 0x4447d47c, 0x3ff57c01, 0x1e9c7f1d, 2509 0x3ff29311, 0x2ab7f990, 0x3fe561b8, 0x209c7df1, 0x3c87a8c5, 2510 0x00000000, 0x3ff00000, 0x4170bcc6, 0x3fdc92d8, 0x00000000, 2511 0x00000000, 0x00000000, 0x00000000, 0xc7ab4d5a, 0x40085e24, 2512 0x00000000, 0x00000000, 0xe93ea75d, 0x400b963d, 0x00000000, 2513 0x00000000, 0x94a7f25a, 0x400f37e2, 0x4b6261cb, 0x3ff5f984, 2514 0x5a9dd812, 0x4011aab0, 0x74c30018, 0x3ffaf5a5, 0x7f2ce8e3, 2515 0x4013fe8b, 0xfe8e54fa, 0x3ffd7334, 0x670d618d, 0x4016a10c, 2516 0x4db97058, 0x4000e012, 0x24df44dd, 0x40199c5f, 0x697d6ece, 2517 0x4003006e, 0x83298b82, 0x401cfc4d, 0x19d490d6, 0x40058c19, 2518 0x2ae42850, 0x3fea4300, 0x118e20e6, 0xbc7a6db8, 0x00000000, 2519 0x40000000, 0xe33345b8, 0xbfd4e526, 0x00000000, 0x00000000, 2520 0x00000000, 0x00000000, 0x65965966, 0x40219659, 0x00000000, 2521 0x00000000, 0x882c10fa, 0x402664f4, 0x00000000, 0x00000000, 2522 0x83cd3723, 0x402c8342, 0x00000000, 0x40000000, 0x55e6c23d, 2523 0x403226e3, 0x55555555, 0x40055555, 0x34451939, 0x40371c96, 2524 0xaaaaaaab, 0x400aaaaa, 0x0e157de0, 0x403d6d3d, 0x11111111, 2525 0x40111111, 0xa738201f, 0x4042bbce, 0x05b05b06, 0x4015b05b, 2526 0x452b75e3, 0x4047da36, 0x1ba1ba1c, 0x401ba1ba, 0x00000000, 2527 0x3ff00000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, 2528 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 2529 0x00000000, 0x4f48b8d3, 0xbf33eaf9, 0x00000000, 0x00000000, 2530 0x0cf7586f, 0x3f20b8ea, 0x00000000, 0x00000000, 0xd0258911, 2531 0xbf0abaf3, 0x23e49fe9, 0xbfab5a8c, 0x2d53222e, 0x3ef60d15, 2532 0x21169451, 0x3fa172b2, 0xbb254dbc, 0xbee1d3b5, 0xdbf93b8e, 2533 0xbf84c7db, 0x05b4630b, 0x3ecd3364, 0xee9aada7, 0x3f743924, 2534 0x794a8297, 0xbeb7b7b9, 0xe015f797, 0xbf5d41f5, 0xe41a4a56, 2535 0x3ea35dfb, 0xe4c2a251, 0x3f49a2ab, 0x5af9e000, 0xbfce49ce, 2536 0x8c743719, 0x3d1eb860, 0x00000000, 0x00000000, 0x1b4863cf, 2537 0x3fd78294, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 2538 0x535ad890, 0xbf2b9320, 0x00000000, 0x00000000, 0x018fdf1f, 2539 0x3f16d61d, 0x00000000, 0x00000000, 0x0359f1be, 0xbf0139e4, 2540 0xa4317c6d, 0xbfa67e17, 0x82672d0f, 0x3eebb405, 0x2f1b621e, 2541 0x3f9f455b, 0x51ccf238, 0xbed55317, 0xf437b9ac, 0xbf804bee, 2542 0xc791a2b5, 0x3ec0e993, 0x919a1db2, 0x3f7080c2, 0x336a5b0e, 2543 0xbeaa48a2, 0x0a268358, 0xbf55a443, 0xdfd978e4, 0x3e94b61f, 2544 0xd7767a58, 0x3f431806, 0x2aea0000, 0xbfc9bbe8, 0x7723ea61, 2545 0xbd3a2369, 0x00000000, 0x00000000, 0xdf7796ff, 0x3fd6e642, 2546 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0xb9ff07ce, 2547 0xbf231c78, 0x00000000, 0x00000000, 0xa5517182, 0x3f0ff0e0, 2548 0x00000000, 0x00000000, 0x790b4cbc, 0xbef66191, 0x848a46c6, 2549 0xbfa21ac0, 0xb16435fa, 0x3ee1d3ec, 0x2a1aa832, 0x3f9c71ea, 2550 0xfdd299ef, 0xbec9dd1a, 0x3f8dbaaf, 0xbf793363, 0x309fc6ea, 2551 0x3eb415d6, 0xbee60471, 0x3f6b83ba, 0x94a0a697, 0xbe9dae11, 2552 0x3e5c67b3, 0xbf4fd07b, 0x9a8f3e3e, 0x3e86bd75, 0xa4beb7a4, 2553 0x3f3d1eb1, 0x29cfc000, 0xbfc549ce, 0xbf159358, 0xbd397b33, 2554 0x00000000, 0x00000000, 0x871fee6c, 0x3fd666f0, 0x00000000, 2555 0x3ff00000, 0x00000000, 0xfffffff8, 0x7d98a556, 0xbf1a3958, 2556 0x00000000, 0x00000000, 0x9d88dc01, 0x3f0704c2, 0x00000000, 2557 0x00000000, 0x73742a2b, 0xbeed054a, 0x58844587, 0xbf9c2a13, 2558 0x55688a79, 0x3ed7a326, 0xee33f1d6, 0x3f9a48f4, 0xa8dc9888, 2559 0xbebf8939, 0xaad4b5b8, 0xbf72f746, 0x9102efa1, 0x3ea88f82, 2560 0xdabc29cf, 0x3f678228, 0x9289afb8, 0xbe90f456, 0x741fb4ed, 2561 0xbf46f3a3, 0xa97f6663, 0x3e79b4bf, 0xca89ff3f, 0x3f36db70, 2562 0xa8a2a000, 0xbfc0ee13, 0x3da24be1, 0xbd338b9f, 0x00000000, 2563 0x00000000, 0x11cd6c69, 0x3fd601fd, 0x00000000, 0x3ff00000, 2564 0x00000000, 0xfffffff8, 0x1a154b97, 0xbf116b01, 0x00000000, 2565 0x00000000, 0x2d427630, 0x3f0147bf, 0x00000000, 0x00000000, 2566 0xb93820c8, 0xbee264d4, 0xbb6cbb18, 0xbf94ab8c, 0x888d4d92, 2567 0x3ed0568b, 0x60730f7c, 0x3f98b19b, 0xe4b1fb11, 0xbeb2f950, 2568 0x22cf9f74, 0xbf6b21cd, 0x4a3ff0a6, 0x3e9f499e, 0xfd2b83ce, 2569 0x3f64aad7, 0x637b73af, 0xbe83487c, 0xe522591a, 0xbf3fc092, 2570 0xa158e8bc, 0x3e6e3aae, 0xe5e82ffa, 0x3f329d2f, 0xd636a000, 2571 0xbfb9477f, 0xc2c2d2bc, 0xbd135ef9, 0x00000000, 0x00000000, 2572 0xf2fdb123, 0x3fd5b566, 0x00000000, 0x3ff00000, 0x00000000, 2573 0xfffffff8, 0xc41acb64, 0xbf05448d, 0x00000000, 0x00000000, 2574 0xdbb03d6f, 0x3efb7ad2, 0x00000000, 0x00000000, 0x9e42962d, 2575 0xbed5aea5, 0x2579f8ef, 0xbf8b2398, 0x288a1ed9, 0x3ec81441, 2576 0xb0198dc5, 0x3f979a3a, 0x2fdfe253, 0xbea57cd3, 0x5766336f, 2577 0xbf617caa, 0x600944c3, 0x3e954ed6, 0xa4e0aaf8, 0x3f62c646, 2578 0x6b8fb29c, 0xbe74e3a3, 0xdc4c0409, 0xbf33f952, 0x9bffe365, 2579 0x3e6301ec, 0xb8869e44, 0x3f2fc566, 0xe1e04000, 0xbfb0cc62, 2580 0x016b907f, 0xbd119cbc, 0x00000000, 0x00000000, 0xe6b9d8fa, 2581 0x3fd57fb3, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 2582 0x5daf22a6, 0xbef429d7, 0x00000000, 0x00000000, 0x06bca545, 2583 0x3ef7a27d, 0x00000000, 0x00000000, 0x7211c19a, 0xbec41c3e, 2584 0x956ed53e, 0xbf7ae3f4, 0xee750e72, 0x3ec3901b, 0x91d443f5, 2585 0x3f96f713, 0x36661e6c, 0xbe936e09, 0x506f9381, 0xbf5122e8, 2586 0xcb6dd43f, 0x3e9041b9, 0x6698b2ff, 0x3f61b0c7, 0x576bf12b, 2587 0xbe625a8a, 0xe5a0e9dc, 0xbf23499d, 0x110384dd, 0x3e5b1c2c, 2588 0x68d43db6, 0x3f2cb899, 0x6ecac000, 0xbfa0c414, 0xcd7dd58c, 2589 0x3d13500f, 0x00000000, 0x00000000, 0x85a2c8fb, 0x3fd55fe0, 2590 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0x00000000, 2591 0x00000000, 0x00000000, 0x00000000, 0x2bf70ebe, 0x3ef66a8f, 2592 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 2593 0x00000000, 0xd644267f, 0x3ec22805, 0x16c16c17, 0x3f96c16c, 2594 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xc4e09162, 2595 0x3e8d6db2, 0xbc011567, 0x3f61566a, 0x00000000, 0x00000000, 2596 0x00000000, 0x00000000, 0x1f79955c, 0x3e57da4e, 0x9334ef0b, 2597 0x3f2bbd77, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 2598 0x00000000, 0x00000000, 0x55555555, 0x3fd55555, 0x00000000, 2599 0x3ff00000, 0x00000000, 0xfffffff8, 0x5daf22a6, 0x3ef429d7, 2600 0x00000000, 0x00000000, 0x06bca545, 0x3ef7a27d, 0x00000000, 2601 0x00000000, 0x7211c19a, 0x3ec41c3e, 0x956ed53e, 0x3f7ae3f4, 2602 0xee750e72, 0x3ec3901b, 0x91d443f5, 0x3f96f713, 0x36661e6c, 2603 0x3e936e09, 0x506f9381, 0x3f5122e8, 0xcb6dd43f, 0x3e9041b9, 2604 0x6698b2ff, 0x3f61b0c7, 0x576bf12b, 0x3e625a8a, 0xe5a0e9dc, 2605 0x3f23499d, 0x110384dd, 0x3e5b1c2c, 0x68d43db6, 0x3f2cb899, 2606 0x6ecac000, 0x3fa0c414, 0xcd7dd58c, 0xbd13500f, 0x00000000, 2607 0x00000000, 0x85a2c8fb, 0x3fd55fe0, 0x00000000, 0x3ff00000, 2608 0x00000000, 0xfffffff8, 0xc41acb64, 0x3f05448d, 0x00000000, 2609 0x00000000, 0xdbb03d6f, 0x3efb7ad2, 0x00000000, 0x00000000, 2610 0x9e42962d, 0x3ed5aea5, 0x2579f8ef, 0x3f8b2398, 0x288a1ed9, 2611 0x3ec81441, 0xb0198dc5, 0x3f979a3a, 0x2fdfe253, 0x3ea57cd3, 2612 0x5766336f, 0x3f617caa, 0x600944c3, 0x3e954ed6, 0xa4e0aaf8, 2613 0x3f62c646, 0x6b8fb29c, 0x3e74e3a3, 0xdc4c0409, 0x3f33f952, 2614 0x9bffe365, 0x3e6301ec, 0xb8869e44, 0x3f2fc566, 0xe1e04000, 2615 0x3fb0cc62, 0x016b907f, 0x3d119cbc, 0x00000000, 0x00000000, 2616 0xe6b9d8fa, 0x3fd57fb3, 0x00000000, 0x3ff00000, 0x00000000, 2617 0xfffffff8, 0x1a154b97, 0x3f116b01, 0x00000000, 0x00000000, 2618 0x2d427630, 0x3f0147bf, 0x00000000, 0x00000000, 0xb93820c8, 2619 0x3ee264d4, 0xbb6cbb18, 0x3f94ab8c, 0x888d4d92, 0x3ed0568b, 2620 0x60730f7c, 0x3f98b19b, 0xe4b1fb11, 0x3eb2f950, 0x22cf9f74, 2621 0x3f6b21cd, 0x4a3ff0a6, 0x3e9f499e, 0xfd2b83ce, 0x3f64aad7, 2622 0x637b73af, 0x3e83487c, 0xe522591a, 0x3f3fc092, 0xa158e8bc, 2623 0x3e6e3aae, 0xe5e82ffa, 0x3f329d2f, 0xd636a000, 0x3fb9477f, 2624 0xc2c2d2bc, 0x3d135ef9, 0x00000000, 0x00000000, 0xf2fdb123, 2625 0x3fd5b566, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 2626 0x7d98a556, 0x3f1a3958, 0x00000000, 0x00000000, 0x9d88dc01, 2627 0x3f0704c2, 0x00000000, 0x00000000, 0x73742a2b, 0x3eed054a, 2628 0x58844587, 0x3f9c2a13, 0x55688a79, 0x3ed7a326, 0xee33f1d6, 2629 0x3f9a48f4, 0xa8dc9888, 0x3ebf8939, 0xaad4b5b8, 0x3f72f746, 2630 0x9102efa1, 0x3ea88f82, 0xdabc29cf, 0x3f678228, 0x9289afb8, 2631 0x3e90f456, 0x741fb4ed, 0x3f46f3a3, 0xa97f6663, 0x3e79b4bf, 2632 0xca89ff3f, 0x3f36db70, 0xa8a2a000, 0x3fc0ee13, 0x3da24be1, 2633 0x3d338b9f, 0x00000000, 0x00000000, 0x11cd6c69, 0x3fd601fd, 2634 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0xb9ff07ce, 2635 0x3f231c78, 0x00000000, 0x00000000, 0xa5517182, 0x3f0ff0e0, 2636 0x00000000, 0x00000000, 0x790b4cbc, 0x3ef66191, 0x848a46c6, 2637 0x3fa21ac0, 0xb16435fa, 0x3ee1d3ec, 0x2a1aa832, 0x3f9c71ea, 2638 0xfdd299ef, 0x3ec9dd1a, 0x3f8dbaaf, 0x3f793363, 0x309fc6ea, 2639 0x3eb415d6, 0xbee60471, 0x3f6b83ba, 0x94a0a697, 0x3e9dae11, 2640 0x3e5c67b3, 0x3f4fd07b, 0x9a8f3e3e, 0x3e86bd75, 0xa4beb7a4, 2641 0x3f3d1eb1, 0x29cfc000, 0x3fc549ce, 0xbf159358, 0x3d397b33, 2642 0x00000000, 0x00000000, 0x871fee6c, 0x3fd666f0, 0x00000000, 2643 0x3ff00000, 0x00000000, 0xfffffff8, 0x535ad890, 0x3f2b9320, 2644 0x00000000, 0x00000000, 0x018fdf1f, 0x3f16d61d, 0x00000000, 2645 0x00000000, 0x0359f1be, 0x3f0139e4, 0xa4317c6d, 0x3fa67e17, 2646 0x82672d0f, 0x3eebb405, 0x2f1b621e, 0x3f9f455b, 0x51ccf238, 2647 0x3ed55317, 0xf437b9ac, 0x3f804bee, 0xc791a2b5, 0x3ec0e993, 2648 0x919a1db2, 0x3f7080c2, 0x336a5b0e, 0x3eaa48a2, 0x0a268358, 2649 0x3f55a443, 0xdfd978e4, 0x3e94b61f, 0xd7767a58, 0x3f431806, 2650 0x2aea0000, 0x3fc9bbe8, 0x7723ea61, 0x3d3a2369, 0x00000000, 2651 0x00000000, 0xdf7796ff, 0x3fd6e642, 0x00000000, 0x3ff00000, 2652 0x00000000, 0xfffffff8, 0x4f48b8d3, 0x3f33eaf9, 0x00000000, 2653 0x00000000, 0x0cf7586f, 0x3f20b8ea, 0x00000000, 0x00000000, 2654 0xd0258911, 0x3f0abaf3, 0x23e49fe9, 0x3fab5a8c, 0x2d53222e, 2655 0x3ef60d15, 0x21169451, 0x3fa172b2, 0xbb254dbc, 0x3ee1d3b5, 2656 0xdbf93b8e, 0x3f84c7db, 0x05b4630b, 0x3ecd3364, 0xee9aada7, 2657 0x3f743924, 0x794a8297, 0x3eb7b7b9, 0xe015f797, 0x3f5d41f5, 2658 0xe41a4a56, 0x3ea35dfb, 0xe4c2a251, 0x3f49a2ab, 0x5af9e000, 2659 0x3fce49ce, 0x8c743719, 0xbd1eb860, 0x00000000, 0x00000000, 2660 0x1b4863cf, 0x3fd78294, 0x00000000, 0x3ff00000, 0x00000000, 2661 0xfffffff8, 0x65965966, 0xc0219659, 0x00000000, 0x00000000, 2662 0x882c10fa, 0x402664f4, 0x00000000, 0x00000000, 0x83cd3723, 2663 0xc02c8342, 0x00000000, 0xc0000000, 0x55e6c23d, 0x403226e3, 2664 0x55555555, 0x40055555, 0x34451939, 0xc0371c96, 0xaaaaaaab, 2665 0xc00aaaaa, 0x0e157de0, 0x403d6d3d, 0x11111111, 0x40111111, 2666 0xa738201f, 0xc042bbce, 0x05b05b06, 0xc015b05b, 0x452b75e3, 2667 0x4047da36, 0x1ba1ba1c, 0x401ba1ba, 0x00000000, 0xbff00000, 2668 0x00000000, 0x00000000, 0x00000000, 0x40000000, 0x00000000, 2669 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 2670 0xc7ab4d5a, 0xc0085e24, 0x00000000, 0x00000000, 0xe93ea75d, 2671 0x400b963d, 0x00000000, 0x00000000, 0x94a7f25a, 0xc00f37e2, 2672 0x4b6261cb, 0xbff5f984, 0x5a9dd812, 0x4011aab0, 0x74c30018, 2673 0x3ffaf5a5, 0x7f2ce8e3, 0xc013fe8b, 0xfe8e54fa, 0xbffd7334, 2674 0x670d618d, 0x4016a10c, 0x4db97058, 0x4000e012, 0x24df44dd, 2675 0xc0199c5f, 0x697d6ece, 0xc003006e, 0x83298b82, 0x401cfc4d, 2676 0x19d490d6, 0x40058c19, 0x2ae42850, 0xbfea4300, 0x118e20e6, 2677 0x3c7a6db8, 0x00000000, 0x40000000, 0xe33345b8, 0xbfd4e526, 2678 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2b2c49d0, 2679 0xbff2de9c, 0x00000000, 0x00000000, 0x2655bc98, 0x3ff33e58, 2680 0x00000000, 0x00000000, 0xff691fa2, 0xbff3972e, 0xe93463bd, 2681 0xbfeeed87, 0x070e10a0, 0x3ff3f5b2, 0xf4d790a4, 0x3ff20c10, 2682 0xa04e8ea3, 0xbff4541a, 0x386accd3, 0xbff1369e, 0x222a66dd, 2683 0x3ff4b521, 0x22a9777e, 0x3ff20817, 0x52a04a6e, 0xbff5178f, 2684 0xddaa0031, 0xbff22137, 0x4447d47c, 0x3ff57c01, 0x1e9c7f1d, 2685 0x3ff29311, 0x2ab7f990, 0xbfe561b8, 0x209c7df1, 0xbc87a8c5, 2686 0x00000000, 0x3ff00000, 0x4170bcc6, 0x3fdc92d8, 0x00000000, 2687 0x00000000, 0x00000000, 0x00000000, 0xcc03e501, 0xbfdff10f, 2688 0x00000000, 0x00000000, 0x44a4e845, 0x3fddb63b, 0x00000000, 2689 0x00000000, 0x3768ad9f, 0xbfdb72a4, 0x3dd01cca, 0xbfe5fdb9, 2690 0xa61d2811, 0x3fd972b2, 0x5645ad0b, 0x3fe977f9, 0xd013b3ab, 2691 0xbfd78ca3, 0xbf0bf914, 0xbfe4f192, 0x4d53e730, 0x3fd5d060, 2692 0x3f8b9000, 0x3fe49933, 0xe2b82f08, 0xbfd4322a, 0x5936a835, 2693 0xbfe27ae1, 0xb1c61c9b, 0x3fd2b3fb, 0xef478605, 0x3fe1659e, 2694 0x190834ec, 0xbfe11ab7, 0xcdb625ea, 0x3c8e564b, 0x00000000, 2695 0x3ff00000, 0xb07217e3, 0x3fd248f1, 0x00000000, 0x00000000, 2696 0x00000000, 0x00000000, 0x56f37042, 0xbfccfc56, 0x00000000, 2697 0x00000000, 0xaa563951, 0x3fc90125, 0x00000000, 0x00000000, 2698 0x3d0e7c5d, 0xbfc50533, 0x9bed9b2e, 0xbfdf0ed9, 0x5fe7c47c, 2699 0x3fc1f250, 0x96c125e5, 0x3fe2edd9, 0x5a02bbd8, 0xbfbe5c71, 2700 0x86362c20, 0xbfda08b7, 0x4b4435ed, 0x3fb9d342, 0x4b494091, 2701 0x3fd911bd, 0xb56658be, 0xbfb5e4c7, 0x93a2fd76, 0xbfd3c092, 2702 0xda271794, 0x3fb29910, 0x3303df2b, 0x3fd189be, 0x99fcef32, 2703 0xbfda8279, 0xb68c1467, 0xbc708b2f, 0x00000000, 0x3ff00000, 2704 0x980c4337, 0x3fc5f619, 0x00000000, 0x00000000, 0x00000000, 2705 0x00000000, 0x9314533e, 0xbfbb8ec5, 0x00000000, 0x00000000, 2706 0x09aa36d0, 0x3fb6d3f4, 0x00000000, 0x00000000, 0xdcb427fd, 2707 0xbfb13950, 0xd87ab0bb, 0xbfd5335e, 0xce0ae8a5, 0x3fabb382, 2708 0x79143126, 0x3fddba41, 0x5f2b28d4, 0xbfa552f1, 0x59f21a6d, 2709 0xbfd015ab, 0x22c27d95, 0x3fa0e984, 0xe19fc6aa, 0x3fd0576c, 2710 0x8f2c2950, 0xbf9a4898, 0xc0b3f22c, 0xbfc59462, 0x1883a4b8, 2711 0x3f94b61c, 0x3f838640, 0x3fc30eb8, 0x355c63dc, 0xbfd36a08, 2712 0x1dce993d, 0x3c6d704d, 0x00000000, 0x3ff00000, 0x2b82ab63, 2713 0x3fb78e92, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 2714 0x5a279ea3, 0xbfaa3407, 0x00000000, 0x00000000, 0x432d65fa, 2715 0x3fa70153, 0x00000000, 0x00000000, 0x891a4602, 0xbf9d03ef, 2716 0xd62ca5f8, 0xbfca77d9, 0xb35f4628, 0x3f97a265, 0x433258fa, 2717 0x3fd8cf51, 0xb58fd909, 0xbf8f88e3, 0x01771cea, 0xbfc2b154, 2718 0xf3562f8e, 0x3f888f57, 0xc028a723, 0x3fc7370f, 0x20b7f9f0, 2719 0xbf80f44c, 0x214368e9, 0xbfb6dfaa, 0x28891863, 0x3f79b4b6, 2720 0x172dbbf0, 0x3fb6cb8e, 0xe0553158, 0xbfc975f5, 0x593fe814, 2721 0xbc2ef5d3, 0x00000000, 0x3ff00000, 0x03dec550, 0x3fa44203, 2722 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x4e435f9b, 2723 0xbf953f83, 0x00000000, 0x00000000, 0x3c6e8e46, 0x3f9b74ea, 2724 0x00000000, 0x00000000, 0xda5b7511, 0xbf85ad63, 0xdc230b9b, 2725 0xbfb97558, 0x26cb3788, 0x3f881308, 0x76fc4985, 0x3fd62ac9, 2726 0x77bb08ba, 0xbf757c85, 0xb6247521, 0xbfb1381e, 0x5922170c, 2727 0x3f754e95, 0x8746482d, 0x3fc27f83, 0x11055b30, 0xbf64e391, 2728 0x3e666320, 0xbfa3e609, 0x0de9dae3, 0x3f6301df, 0x1f1dca06, 2729 0x3fafa8ae, 0x8c5b2da2, 0xbfb936bb, 0x4e88f7a5, 0xbc587d05, 2730 0x00000000, 0x3ff00000, 0xa8935dd9, 0x3f83dde2, 0x00000000, 2731 0x00000000, 0x00000000, 0x00000000 2732 }; 2733 2734 private static int[] maskThirtyFiveTan = { 2735 0xfffc0000, 0xffffffff, 0x00000000, 0x00000000 2736 }; 2737 2738 private static int[] qElevenTan = { 2739 0xb8fe4d77, 0x3f82609a 2740 }; 2741 2742 private static int[] qNineTan = { 2743 0xbf847a43, 0x3f9664a0 2744 }; 2745 2746 private static int[] qSevenTan = { 2747 0x52c4c8ab, 0x3faba1ba 2748 }; 2749 2750 private static int[] qFiveTan = { 2751 0x11092746, 0x3fc11111 2752 }; 2753 2754 private static int[] qThreeTan = { 2755 0x55555612, 0x3fd55555 2756 }; 2757 2758 private static int[] piInvTableTan = { 2759 0x00000000, 0x00000000, 0xa2f9836e, 0x4e441529, 0xfc2757d1, 2760 0xf534ddc0, 0xdb629599, 0x3c439041, 0xfe5163ab, 0xdebbc561, 2761 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c, 0xfe1deb1c, 2762 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41, 2763 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 2764 0xde05980f, 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 2765 0x4f463f66, 0x9e5fea2d, 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 2766 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08, 0x56033046, 0xfc7b6bab, 2767 0xf0cfbc21 2768 }; 2769 2770 private static int[] piFourTan = { 2771 0x00000000, 0x3fe921fb, 0x4611a626, 0x3e85110b 2772 }; 2773 2774 private static int[] qqTwoTan = { 2775 0x676733af, 0x3d32e7b9 2776 }; 2777 2778 private static int[] twoPowFiftyFiveTan = { 2779 0x00000000, 0x43600000 2780 }; 2781 2782 private static int[] twoPowMFiftyFiveTan = { 2783 0x00000000, 0x3c800000 2784 }; 2785 2786 public void tanIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { 2787 ArrayDataPointerConstant oneHalfTanPtr = new ArrayDataPointerConstant(oneHalfTan, 16); 2788 ArrayDataPointerConstant mulSixteenPtr = new ArrayDataPointerConstant(mulSixteen, 16); 2789 ArrayDataPointerConstant signMaskTanPtr = new ArrayDataPointerConstant(signMaskTan, 16); 2790 ArrayDataPointerConstant piThirtyTwoInvTanPtr = new ArrayDataPointerConstant(piThirtyTwoInvTan, 16); 2791 ArrayDataPointerConstant pOneTanPtr = new ArrayDataPointerConstant(pOneTan, 16); 2792 ArrayDataPointerConstant pTwoTanPtr = new ArrayDataPointerConstant(pTwoTan, 16); 2793 ArrayDataPointerConstant pThreeTanPtr = new ArrayDataPointerConstant(pThreeTan, 16); 2794 ArrayDataPointerConstant cTableTanPtr = new ArrayDataPointerConstant(cTableTan, 16); 2795 ArrayDataPointerConstant maskThirtyFiveTanPtr = new ArrayDataPointerConstant(maskThirtyFiveTan, 16); 2796 ArrayDataPointerConstant qElevenTanPtr = new ArrayDataPointerConstant(qElevenTan, 16); 2797 ArrayDataPointerConstant qNineTanPtr = new ArrayDataPointerConstant(qNineTan, 16); 2798 ArrayDataPointerConstant qSevenTanPtr = new ArrayDataPointerConstant(qSevenTan, 8); 2799 ArrayDataPointerConstant qFiveTanPtr = new ArrayDataPointerConstant(qFiveTan, 16); 2800 ArrayDataPointerConstant qThreeTanPtr = new ArrayDataPointerConstant(qThreeTan, 16); 2801 ArrayDataPointerConstant piInvTableTanPtr = new ArrayDataPointerConstant(piInvTableTan, 16); 2802 ArrayDataPointerConstant piFourTanPtr = new ArrayDataPointerConstant(piFourTan, 8); 2803 ArrayDataPointerConstant qqTwoTanPtr = new ArrayDataPointerConstant(qqTwoTan, 8); 2804 ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 8); 2805 ArrayDataPointerConstant twoPowFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowFiftyFiveTan, 8); 2806 ArrayDataPointerConstant twoPowMFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowMFiftyFiveTan, 8); 2807 2808 Label bb0 = new Label(); 2809 Label bb1 = new Label(); 2810 Label bb2 = new Label(); 2811 Label bb3 = new Label(); 2812 Label bb5 = new Label(); 2813 Label bb6 = new Label(); 2814 Label bb8 = new Label(); 2815 Label bb9 = new Label(); 2816 Label bb10 = new Label(); 2817 Label bb11 = new Label(); 2818 Label bb12 = new Label(); 2819 Label bb13 = new Label(); 2820 Label bb14 = new Label(); 2821 Label bb15 = new Label(); 2822 2823 Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); 2824 Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); 2825 Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); 2826 Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); 2827 Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD); 2828 Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD); 2829 Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD); 2830 Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD); 2831 Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD); 2832 Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD); 2833 2834 Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); 2835 Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); 2836 Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); 2837 Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); 2838 Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); 2839 Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); 2840 Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); 2841 2842 setCrb(crb); 2843 if (dest.encoding != value.encoding) { 2844 masm.movdqu(dest, value); 2845 } 2846 2847 masm.pextrw(gpr1, dest, 3); 2848 masm.andl(gpr1, 32767); 2849 masm.subl(gpr1, 16314); 2850 masm.cmpl(gpr1, 270); 2851 masm.jcc(ConditionFlag.Above, bb0); 2852 2853 masm.movdqu(temp5, externalAddress(oneHalfTanPtr)); // 0x00000000, 2854 // 0x3fe00000, 2855 // 0x00000000, 2856 // 0x3fe00000 2857 masm.movdqu(temp6, externalAddress(mulSixteenPtr)); // 0x00000000, 2858 // 0x40300000, 2859 // 0x00000000, 2860 // 0x3ff00000 2861 masm.unpcklpd(dest, dest); 2862 masm.movdqu(temp4, externalAddress(signMaskTanPtr)); // 0x00000000, 2863 // 0x80000000, 2864 // 0x00000000, 2865 // 0x80000000 2866 masm.andpd(temp4, dest); 2867 masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr)); // 0x6dc9c883, 2868 // 0x3fe45f30, 2869 // 0x6dc9c883, 2870 // 0x40245f30 2871 masm.mulpd(temp1, dest); 2872 masm.por(temp5, temp4); 2873 masm.addpd(temp1, temp5); 2874 masm.movdqu(temp7, temp1); 2875 masm.unpckhpd(temp7, temp7); 2876 masm.cvttsd2sil(gpr4, temp7); 2877 masm.cvttpd2dq(temp1, temp1); 2878 masm.cvtdq2pd(temp1, temp1); 2879 masm.mulpd(temp1, temp6); 2880 masm.movdqu(temp3, externalAddress(pOneTanPtr)); // 0x54444000, 2881 // 0x3fb921fb, 2882 // 0x54440000, 2883 // 0x3fb921fb 2884 masm.movdq(temp5, externalAddress(qqTwoTanPtr)); // 0x676733af, 2885 // 0x3d32e7b9 2886 masm.addq(gpr4, 469248); 2887 masm.movdqu(temp4, externalAddress(pTwoTanPtr)); // 0x67674000, 2888 // 0xbd32e7b9, 2889 // 0x4c4c0000, 2890 // 0x3d468c23 2891 masm.mulpd(temp3, temp1); 2892 masm.andq(gpr4, 31); 2893 masm.mulsd(temp5, temp1); 2894 masm.movq(gpr3, gpr4); 2895 masm.mulpd(temp4, temp1); 2896 masm.shlq(gpr3, 1); 2897 masm.subpd(dest, temp3); 2898 masm.mulpd(temp1, externalAddress(pThreeTanPtr)); // 0x3707344a, 2899 // 0x3aa8a2e0, 2900 // 0x03707345, 2901 // 0x3ae98a2e 2902 masm.addq(gpr4, gpr3); 2903 masm.shlq(gpr3, 2); 2904 masm.addq(gpr4, gpr3); 2905 masm.addsd(temp5, dest); 2906 masm.movdqu(temp2, dest); 2907 masm.subpd(dest, temp4); 2908 masm.movdq(temp6, externalAddress(onePtr)); // 0x00000000, 2909 // 0x3ff00000 2910 masm.shlq(gpr4, 4); 2911 masm.leaq(gpr1, externalAddress(cTableTanPtr)); 2912 masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr)); // 0xfffc0000, 2913 // 0xffffffff, 2914 // 0x00000000, 2915 // 0x00000000 2916 masm.movdqu(temp3, dest); 2917 masm.addq(gpr1, gpr4); 2918 masm.subpd(temp2, dest); 2919 masm.unpckhpd(dest, dest); 2920 masm.divsd(temp6, temp5); 2921 masm.subpd(temp2, temp4); 2922 masm.movdqu(temp7, new AMD64Address(gpr1, 16)); 2923 masm.subsd(temp3, temp5); 2924 masm.mulpd(temp7, dest); 2925 masm.subpd(temp2, temp1); 2926 masm.movdqu(temp1, new AMD64Address(gpr1, 48)); 2927 masm.mulpd(temp1, dest); 2928 masm.movdqu(temp4, new AMD64Address(gpr1, 96)); 2929 masm.mulpd(temp4, dest); 2930 masm.addsd(temp2, temp3); 2931 masm.movdqu(temp3, dest); 2932 masm.mulpd(dest, dest); 2933 masm.addpd(temp7, new AMD64Address(gpr1, 0)); 2934 masm.addpd(temp1, new AMD64Address(gpr1, 32)); 2935 masm.mulpd(temp1, dest); 2936 masm.addpd(temp4, new AMD64Address(gpr1, 80)); 2937 masm.addpd(temp7, temp1); 2938 masm.movdqu(temp1, new AMD64Address(gpr1, 112)); 2939 masm.mulpd(temp1, dest); 2940 masm.mulpd(dest, dest); 2941 masm.addpd(temp4, temp1); 2942 masm.movdqu(temp1, new AMD64Address(gpr1, 64)); 2943 masm.mulpd(temp1, dest); 2944 masm.addpd(temp7, temp1); 2945 masm.movdqu(temp1, temp3); 2946 masm.mulpd(temp3, dest); 2947 masm.mulsd(dest, dest); 2948 masm.mulpd(temp1, new AMD64Address(gpr1, 144)); 2949 masm.mulpd(temp4, temp3); 2950 masm.movdqu(temp3, temp1); 2951 masm.addpd(temp7, temp4); 2952 masm.movdqu(temp4, temp1); 2953 masm.mulsd(dest, temp7); 2954 masm.unpckhpd(temp7, temp7); 2955 masm.addsd(dest, temp7); 2956 masm.unpckhpd(temp1, temp1); 2957 masm.addsd(temp3, temp1); 2958 masm.subsd(temp4, temp3); 2959 masm.addsd(temp1, temp4); 2960 masm.movdqu(temp4, temp2); 2961 masm.movdq(temp7, new AMD64Address(gpr1, 144)); 2962 masm.unpckhpd(temp2, temp2); 2963 masm.addsd(temp7, new AMD64Address(gpr1, 152)); 2964 masm.mulsd(temp7, temp2); 2965 masm.addsd(temp7, new AMD64Address(gpr1, 136)); 2966 masm.addsd(temp7, temp1); 2967 masm.addsd(dest, temp7); 2968 masm.movdq(temp7, externalAddress(onePtr)); // 0x00000000, 2969 // 0x3ff00000 2970 masm.mulsd(temp4, temp6); 2971 masm.movdq(temp2, new AMD64Address(gpr1, 168)); 2972 masm.andpd(temp2, temp6); 2973 masm.mulsd(temp5, temp2); 2974 masm.mulsd(temp6, new AMD64Address(gpr1, 160)); 2975 masm.subsd(temp7, temp5); 2976 masm.subsd(temp2, new AMD64Address(gpr1, 128)); 2977 masm.subsd(temp7, temp4); 2978 masm.mulsd(temp7, temp6); 2979 masm.movdqu(temp4, temp3); 2980 masm.subsd(temp3, temp2); 2981 masm.addsd(temp2, temp3); 2982 masm.subsd(temp4, temp2); 2983 masm.addsd(dest, temp4); 2984 masm.subsd(dest, temp7); 2985 masm.addsd(dest, temp3); 2986 masm.jmp(bb15); 2987 2988 masm.bind(bb0); 2989 masm.jcc(ConditionFlag.Greater, bb1); 2990 2991 masm.pextrw(gpr1, dest, 3); 2992 masm.movl(gpr4, gpr1); 2993 masm.andl(gpr1, 32752); 2994 masm.jcc(ConditionFlag.Equal, bb2); 2995 2996 masm.andl(gpr4, 32767); 2997 masm.cmpl(gpr4, 15904); 2998 masm.jcc(ConditionFlag.Below, bb3); 2999 3000 masm.movdqu(temp2, dest); 3001 masm.movdqu(temp3, dest); 3002 masm.movdq(temp1, externalAddress(qElevenTanPtr)); // 0xb8fe4d77, 3003 // 0x3f82609a 3004 masm.mulsd(temp2, dest); 3005 masm.mulsd(temp3, temp2); 3006 masm.mulsd(temp1, temp2); 3007 masm.addsd(temp1, externalAddress(qNineTanPtr)); // 0xbf847a43, 3008 // 0x3f9664a0 3009 masm.mulsd(temp1, temp2); 3010 masm.addsd(temp1, externalAddress(qSevenTanPtr)); // 0x52c4c8ab, 3011 // 0x3faba1ba 3012 masm.mulsd(temp1, temp2); 3013 masm.addsd(temp1, externalAddress(qFiveTanPtr)); // 0x11092746, 3014 // 0x3fc11111 3015 masm.mulsd(temp1, temp2); 3016 masm.addsd(temp1, externalAddress(qThreeTanPtr)); // 0x55555612, 3017 // 0x3fd55555 3018 masm.mulsd(temp1, temp3); 3019 masm.addsd(dest, temp1); 3020 masm.jmp(bb15); 3021 3022 masm.bind(bb3); 3023 masm.movdq(temp3, externalAddress(twoPowFiftyFiveTanPtr)); // 0x00000000, 3024 // 0x43600000 3025 masm.mulsd(temp3, dest); 3026 masm.addsd(dest, temp3); 3027 masm.mulsd(dest, externalAddress(twoPowMFiftyFiveTanPtr)); // 0x00000000, 3028 // 0x3c800000 3029 masm.jmp(bb15); 3030 3031 masm.bind(bb14); 3032 masm.xorpd(temp1, temp1); 3033 masm.xorpd(dest, dest); 3034 masm.divsd(dest, temp1); 3035 masm.jmp(bb15); 3036 3037 masm.bind(bb2); 3038 masm.movdqu(temp1, dest); 3039 masm.mulsd(temp1, temp1); 3040 masm.jmp(bb15); 3041 3042 masm.bind(bb1); 3043 masm.pextrw(gpr3, dest, 3); 3044 masm.andl(gpr3, 32752); 3045 masm.cmpl(gpr3, 32752); 3046 masm.jcc(ConditionFlag.Equal, bb14); 3047 3048 masm.subl(gpr3, 16224); 3049 masm.shrl(gpr3, 7); 3050 masm.andl(gpr3, 65532); 3051 masm.leaq(gpr10, externalAddress(piInvTableTanPtr)); 3052 masm.addq(gpr3, gpr10); 3053 masm.movdq(gpr1, dest); 3054 masm.movl(gpr9, new AMD64Address(gpr3, 20)); 3055 masm.movl(gpr7, new AMD64Address(gpr3, 24)); 3056 masm.movl(gpr4, gpr1); 3057 masm.shrq(gpr1, 21); 3058 masm.orl(gpr1, Integer.MIN_VALUE); 3059 masm.shrl(gpr1, 11); 3060 masm.movl(gpr8, gpr9); 3061 masm.imulq(gpr9, gpr4); 3062 masm.imulq(gpr8, gpr1); 3063 masm.imulq(gpr7, gpr1); 3064 masm.movl(gpr5, new AMD64Address(gpr3, 16)); 3065 masm.movl(gpr6, new AMD64Address(gpr3, 12)); 3066 masm.movl(gpr10, gpr9); 3067 masm.shrq(gpr9, 32); 3068 masm.addq(gpr8, gpr9); 3069 masm.addq(gpr10, gpr7); 3070 masm.movl(gpr7, gpr10); 3071 masm.shrq(gpr10, 32); 3072 masm.addq(gpr8, gpr10); 3073 masm.movl(gpr9, gpr5); 3074 masm.imulq(gpr5, gpr4); 3075 masm.imulq(gpr9, gpr1); 3076 masm.movl(gpr10, gpr6); 3077 masm.imulq(gpr6, gpr4); 3078 masm.movl(gpr2, gpr5); 3079 masm.shrq(gpr5, 32); 3080 masm.addq(gpr8, gpr2); 3081 masm.movl(gpr2, gpr8); 3082 masm.shrq(gpr8, 32); 3083 masm.addq(gpr9, gpr5); 3084 masm.addq(gpr9, gpr8); 3085 masm.shlq(gpr2, 32); 3086 masm.orq(gpr7, gpr2); 3087 masm.imulq(gpr10, gpr1); 3088 masm.movl(gpr8, new AMD64Address(gpr3, 8)); 3089 masm.movl(gpr5, new AMD64Address(gpr3, 4)); 3090 masm.movl(gpr2, gpr6); 3091 masm.shrq(gpr6, 32); 3092 masm.addq(gpr9, gpr2); 3093 masm.movl(gpr2, gpr9); 3094 masm.shrq(gpr9, 32); 3095 masm.addq(gpr10, gpr6); 3096 masm.addq(gpr10, gpr9); 3097 masm.movq(gpr6, gpr8); 3098 masm.imulq(gpr8, gpr4); 3099 masm.imulq(gpr6, gpr1); 3100 masm.movl(gpr9, gpr8); 3101 masm.shrq(gpr8, 32); 3102 masm.addq(gpr10, gpr9); 3103 masm.movl(gpr9, gpr10); 3104 masm.shrq(gpr10, 32); 3105 masm.addq(gpr6, gpr8); 3106 masm.addq(gpr6, gpr10); 3107 masm.movq(gpr8, gpr5); 3108 masm.imulq(gpr5, gpr4); 3109 masm.imulq(gpr8, gpr1); 3110 masm.shlq(gpr9, 32); 3111 masm.orq(gpr9, gpr2); 3112 masm.movl(gpr1, new AMD64Address(gpr3, 0)); 3113 masm.movl(gpr10, gpr5); 3114 masm.shrq(gpr5, 32); 3115 masm.addq(gpr6, gpr10); 3116 masm.movl(gpr10, gpr6); 3117 masm.shrq(gpr6, 32); 3118 masm.addq(gpr8, gpr5); 3119 masm.addq(gpr8, gpr6); 3120 masm.imulq(gpr4, gpr1); 3121 masm.pextrw(gpr2, dest, 3); 3122 masm.leaq(gpr6, externalAddress(piInvTableTanPtr)); 3123 masm.subq(gpr3, gpr6); 3124 masm.addl(gpr3, gpr3); 3125 masm.addl(gpr3, gpr3); 3126 masm.addl(gpr3, gpr3); 3127 masm.addl(gpr3, 19); 3128 masm.movl(gpr5, 32768); 3129 masm.andl(gpr5, gpr2); 3130 masm.shrl(gpr2, 4); 3131 masm.andl(gpr2, 2047); 3132 masm.subl(gpr2, 1023); 3133 masm.subl(gpr3, gpr2); 3134 masm.addq(gpr8, gpr4); 3135 masm.movl(gpr4, gpr3); 3136 masm.addl(gpr4, 32); 3137 masm.cmpl(gpr3, 0); 3138 masm.jcc(ConditionFlag.Less, bb5); 3139 3140 masm.negl(gpr3); 3141 masm.addl(gpr3, 29); 3142 masm.shll(gpr8); 3143 masm.movl(gpr6, gpr8); 3144 masm.andl(gpr8, 1073741823); 3145 masm.testl(gpr8, 536870912); 3146 masm.jcc(ConditionFlag.NotEqual, bb6); 3147 3148 masm.shrl(gpr8); 3149 masm.movl(gpr2, 0); 3150 masm.shlq(gpr8, 32); 3151 masm.orq(gpr8, gpr10); 3152 3153 masm.bind(bb8); 3154 masm.cmpq(gpr8, 0); 3155 masm.jcc(ConditionFlag.Equal, bb9); 3156 3157 masm.bind(bb10); 3158 masm.bsrq(gpr10, gpr8); 3159 masm.movl(gpr3, 29); 3160 masm.subl(gpr3, gpr10); 3161 masm.jcc(ConditionFlag.LessEqual, bb11); 3162 3163 masm.shlq(gpr8); 3164 masm.movq(gpr1, gpr9); 3165 masm.shlq(gpr9); 3166 masm.addl(gpr4, gpr3); 3167 masm.negl(gpr3); 3168 masm.addl(gpr3, 64); 3169 masm.shrq(gpr1); 3170 masm.shrq(gpr7); 3171 masm.orq(gpr8, gpr1); 3172 masm.orq(gpr9, gpr7); 3173 3174 masm.bind(bb12); 3175 masm.cvtsi2sdq(dest, gpr8); 3176 masm.shrq(gpr9, 1); 3177 masm.cvtsi2sdq(temp3, gpr9); 3178 masm.xorpd(temp4, temp4); 3179 masm.shll(gpr4, 4); 3180 masm.negl(gpr4); 3181 masm.addl(gpr4, 16368); 3182 masm.orl(gpr4, gpr5); 3183 masm.xorl(gpr4, gpr2); 3184 masm.pinsrw(temp4, gpr4, 3); 3185 masm.leaq(gpr1, externalAddress(piFourTanPtr)); 3186 masm.movdq(temp2, new AMD64Address(gpr1, 0)); // 0x00000000, 3187 // 0x3fe921fb, 3188 masm.movdq(temp7, new AMD64Address(gpr1, 8)); // 0x4611a626, 3189 // 0x3e85110b 3190 masm.xorpd(temp5, temp5); 3191 masm.subl(gpr4, 1008); 3192 masm.pinsrw(temp5, gpr4, 3); 3193 masm.mulsd(dest, temp4); 3194 masm.shll(gpr5, 16); 3195 masm.sarl(gpr5, 31); 3196 masm.mulsd(temp3, temp5); 3197 masm.movdqu(temp1, dest); 3198 masm.mulsd(dest, temp2); 3199 masm.shrl(gpr6, 30); 3200 masm.addsd(temp1, temp3); 3201 masm.mulsd(temp3, temp2); 3202 masm.addl(gpr6, gpr5); 3203 masm.xorl(gpr6, gpr5); 3204 masm.mulsd(temp7, temp1); 3205 masm.movl(gpr1, gpr6); 3206 masm.addsd(temp7, temp3); 3207 masm.movdqu(temp2, dest); 3208 masm.addsd(dest, temp7); 3209 masm.subsd(temp2, dest); 3210 masm.addsd(temp7, temp2); 3211 masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr)); // 0x6dc9c883, 3212 // 0x3fe45f30, 3213 // 0x6dc9c883, 3214 // 0x40245f30 3215 if (masm.supports(CPUFeature.SSE3)) { 3216 masm.movddup(dest, dest); 3217 } else { 3218 masm.movlhps(dest, dest); 3219 } 3220 masm.movdqu(temp4, externalAddress(signMaskTanPtr)); // 0x00000000, 3221 // 0x80000000, 3222 // 0x00000000, 3223 // 0x80000000 3224 masm.andpd(temp4, dest); 3225 masm.mulpd(temp1, dest); 3226 if (masm.supports(CPUFeature.SSE3)) { 3227 masm.movddup(temp7, temp7); 3228 } else { 3229 masm.movlhps(temp7, temp7); 3230 } 3231 masm.movdqu(temp5, externalAddress(oneHalfTanPtr)); // 0x00000000, 3232 // 0x3fe00000, 3233 // 0x00000000, 3234 // 0x3fe00000 3235 masm.movdqu(temp6, externalAddress(mulSixteenPtr)); // 0x00000000, 3236 // 0x40300000, 3237 // 0x00000000, 3238 // 0x3ff00000 3239 masm.por(temp5, temp4); 3240 masm.addpd(temp1, temp5); 3241 masm.movdqu(temp5, temp1); 3242 masm.unpckhpd(temp5, temp5); 3243 masm.cvttsd2sil(gpr4, temp5); 3244 masm.cvttpd2dq(temp1, temp1); 3245 masm.cvtdq2pd(temp1, temp1); 3246 masm.mulpd(temp1, temp6); 3247 masm.movdqu(temp3, externalAddress(pOneTanPtr)); // 0x54444000, 3248 // 0x3fb921fb, 3249 // 0x54440000, 3250 // 0x3fb921fb 3251 masm.movdq(temp5, externalAddress(qqTwoTanPtr)); // 0x676733af, 3252 // 0x3d32e7b9 3253 masm.shll(gpr1, 4); 3254 masm.addl(gpr4, 469248); 3255 masm.movdqu(temp4, externalAddress(pTwoTanPtr)); // 0x67674000, 3256 // 0xbd32e7b9, 3257 // 0x4c4c0000, 3258 // 0x3d468c23 3259 masm.mulpd(temp3, temp1); 3260 masm.addl(gpr4, gpr1); 3261 masm.andl(gpr4, 31); 3262 masm.mulsd(temp5, temp1); 3263 masm.movl(gpr3, gpr4); 3264 masm.mulpd(temp4, temp1); 3265 masm.shll(gpr3, 1); 3266 masm.subpd(dest, temp3); 3267 masm.mulpd(temp1, externalAddress(pThreeTanPtr)); // 0x3707344a, 3268 // 0x3aa8a2e0, 3269 // 0x03707345, 3270 // 0x3ae98a2e 3271 masm.addl(gpr4, gpr3); 3272 masm.shll(gpr3, 2); 3273 masm.addl(gpr4, gpr3); 3274 masm.addsd(temp5, dest); 3275 masm.movdqu(temp2, dest); 3276 masm.subpd(dest, temp4); 3277 masm.movdq(temp6, externalAddress(onePtr)); // 0x00000000, 3278 // 0x3ff00000 3279 masm.shll(gpr4, 4); 3280 masm.leaq(gpr1, externalAddress(cTableTanPtr)); 3281 masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr)); // 0xfffc0000, 3282 // 0xffffffff, 3283 // 0x00000000, 3284 // 0x00000000 3285 masm.movdqu(temp3, dest); 3286 masm.addq(gpr1, gpr4); 3287 masm.subpd(temp2, dest); 3288 masm.unpckhpd(dest, dest); 3289 masm.divsd(temp6, temp5); 3290 masm.subpd(temp2, temp4); 3291 masm.subsd(temp3, temp5); 3292 masm.subpd(temp2, temp1); 3293 masm.movdqu(temp1, new AMD64Address(gpr1, 48)); 3294 masm.addpd(temp2, temp7); 3295 masm.movdqu(temp7, new AMD64Address(gpr1, 16)); 3296 masm.mulpd(temp7, dest); 3297 masm.movdqu(temp4, new AMD64Address(gpr1, 96)); 3298 masm.mulpd(temp1, dest); 3299 masm.mulpd(temp4, dest); 3300 masm.addsd(temp2, temp3); 3301 masm.movdqu(temp3, dest); 3302 masm.mulpd(dest, dest); 3303 masm.addpd(temp7, new AMD64Address(gpr1, 0)); 3304 masm.addpd(temp1, new AMD64Address(gpr1, 32)); 3305 masm.mulpd(temp1, dest); 3306 masm.addpd(temp4, new AMD64Address(gpr1, 80)); 3307 masm.addpd(temp7, temp1); 3308 masm.movdqu(temp1, new AMD64Address(gpr1, 112)); 3309 masm.mulpd(temp1, dest); 3310 masm.mulpd(dest, dest); 3311 masm.addpd(temp4, temp1); 3312 masm.movdqu(temp1, new AMD64Address(gpr1, 64)); 3313 masm.mulpd(temp1, dest); 3314 masm.addpd(temp7, temp1); 3315 masm.movdqu(temp1, temp3); 3316 masm.mulpd(temp3, dest); 3317 masm.mulsd(dest, dest); 3318 masm.mulpd(temp1, new AMD64Address(gpr1, 144)); 3319 masm.mulpd(temp4, temp3); 3320 masm.movdqu(temp3, temp1); 3321 masm.addpd(temp7, temp4); 3322 masm.movdqu(temp4, temp1); 3323 masm.mulsd(dest, temp7); 3324 masm.unpckhpd(temp7, temp7); 3325 masm.addsd(dest, temp7); 3326 masm.unpckhpd(temp1, temp1); 3327 masm.addsd(temp3, temp1); 3328 masm.subsd(temp4, temp3); 3329 masm.addsd(temp1, temp4); 3330 masm.movdqu(temp4, temp2); 3331 masm.movdq(temp7, new AMD64Address(gpr1, 144)); 3332 masm.unpckhpd(temp2, temp2); 3333 masm.addsd(temp7, new AMD64Address(gpr1, 152)); 3334 masm.mulsd(temp7, temp2); 3335 masm.addsd(temp7, new AMD64Address(gpr1, 136)); 3336 masm.addsd(temp7, temp1); 3337 masm.addsd(dest, temp7); 3338 masm.movdq(temp7, externalAddress(onePtr)); // 0x00000000, 3339 // 0x3ff00000 3340 masm.mulsd(temp4, temp6); 3341 masm.movdq(temp2, new AMD64Address(gpr1, 168)); 3342 masm.andpd(temp2, temp6); 3343 masm.mulsd(temp5, temp2); 3344 masm.mulsd(temp6, new AMD64Address(gpr1, 160)); 3345 masm.subsd(temp7, temp5); 3346 masm.subsd(temp2, new AMD64Address(gpr1, 128)); 3347 masm.subsd(temp7, temp4); 3348 masm.mulsd(temp7, temp6); 3349 masm.movdqu(temp4, temp3); 3350 masm.subsd(temp3, temp2); 3351 masm.addsd(temp2, temp3); 3352 masm.subsd(temp4, temp2); 3353 masm.addsd(dest, temp4); 3354 masm.subsd(dest, temp7); 3355 masm.addsd(dest, temp3); 3356 masm.jmp(bb15); 3357 3358 masm.bind(bb9); 3359 masm.addl(gpr4, 64); 3360 masm.movq(gpr8, gpr9); 3361 masm.movq(gpr9, gpr7); 3362 masm.movl(gpr7, 0); 3363 masm.cmpq(gpr8, 0); 3364 masm.jcc(ConditionFlag.NotEqual, bb10); 3365 3366 masm.addl(gpr4, 64); 3367 masm.movq(gpr8, gpr9); 3368 masm.movq(gpr9, gpr7); 3369 masm.cmpq(gpr8, 0); 3370 masm.jcc(ConditionFlag.NotEqual, bb10); 3371 3372 masm.jmp(bb12); 3373 3374 masm.bind(bb11); 3375 masm.jcc(ConditionFlag.Equal, bb12); 3376 3377 masm.negl(gpr3); 3378 masm.shrq(gpr9); 3379 masm.movq(gpr1, gpr8); 3380 masm.shrq(gpr8); 3381 masm.subl(gpr4, gpr3); 3382 masm.negl(gpr3); 3383 masm.addl(gpr3, 64); 3384 masm.shlq(gpr1); 3385 masm.orq(gpr9, gpr1); 3386 masm.jmp(bb12); 3387 3388 masm.bind(bb5); 3389 masm.notl(gpr3); 3390 masm.shlq(gpr8, 32); 3391 masm.orq(gpr8, gpr10); 3392 masm.shlq(gpr8); 3393 masm.movq(gpr6, gpr8); 3394 masm.testl(gpr8, Integer.MIN_VALUE); 3395 masm.jcc(ConditionFlag.NotEqual, bb13); 3396 3397 masm.shrl(gpr8); 3398 masm.movl(gpr2, 0); 3399 masm.shrq(gpr6, 2); 3400 masm.jmp(bb8); 3401 3402 masm.bind(bb6); 3403 masm.shrl(gpr8); 3404 masm.movl(gpr2, 1073741824); 3405 masm.shrl(gpr2); 3406 masm.shlq(gpr8, 32); 3407 masm.orq(gpr8, gpr10); 3408 masm.shlq(gpr2, 32); 3409 masm.addl(gpr6, 1073741824); 3410 masm.movl(gpr3, 0); 3411 masm.movl(gpr10, 0); 3412 masm.subq(gpr3, gpr7); 3413 masm.sbbq(gpr10, gpr9); 3414 masm.sbbq(gpr2, gpr8); 3415 masm.movq(gpr7, gpr3); 3416 masm.movq(gpr9, gpr10); 3417 masm.movq(gpr8, gpr2); 3418 masm.movl(gpr2, 32768); 3419 masm.jmp(bb8); 3420 3421 masm.bind(bb13); 3422 masm.shrl(gpr8); 3423 masm.movq(gpr2, 0x100000000L); 3424 masm.shrq(gpr2); 3425 masm.movl(gpr3, 0); 3426 masm.movl(gpr10, 0); 3427 masm.subq(gpr3, gpr7); 3428 masm.sbbq(gpr10, gpr9); 3429 masm.sbbq(gpr2, gpr8); 3430 masm.movq(gpr7, gpr3); 3431 masm.movq(gpr9, gpr10); 3432 masm.movq(gpr8, gpr2); 3433 masm.movl(gpr2, 32768); 3434 masm.shrq(gpr6, 2); 3435 masm.addl(gpr6, 1073741824); 3436 masm.jmp(bb8); 3437 3438 masm.bind(bb15); 3439 } 3440 3441 /* 3442 * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM) 3443 * Source Code 3444 * 3445 * ALGORITHM DESCRIPTION - EXP() --------------------- 3446 * 3447 * Description: Let K = 64 (table size). x x/log(2) n e = 2 = 2 * T[j] * (1 + P(y)) where x = 3448 * m*log(2)/K + y, y in [-log(2)/K..log(2)/K] m = n*K + j, m,n,j - signed integer, j in 3449 * [-K/2..K/2] j/K values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). 3450 * 3451 * P(y) is a minimax polynomial approximation of exp(x)-1 on small interval 3452 * [-log(2)/K..log(2)/K] (were calculated by Maple V). 3453 * 3454 * To avoid problems with arithmetic overflow and underflow, n n1 n2 value of 2 is safely 3455 * computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] where BIAS is a value of exponent bias. 3456 * 3457 * Special cases: exp(NaN) = NaN exp(+INF) = +INF exp(-INF) = 0 exp(x) = 1 for subnormals for 3458 * finite argument, only exp(0)=1 is exact For IEEE double if x > 709.782712893383973096 then 3459 * exp(x) overflow if x < -745.133219101941108420 then exp(x) underflow 3460 * 3461 */ 3462 3463 private static int[] cvExp = { 3464 0x652b82fe, 0x40571547, 0x652b82fe, 0x40571547, 0xfefa0000, 3465 0x3f862e42, 0xfefa0000, 0x3f862e42, 0xbc9e3b3a, 0x3d1cf79a, 3466 0xbc9e3b3a, 0x3d1cf79a, 0xfffffffe, 0x3fdfffff, 0xfffffffe, 3467 0x3fdfffff, 0xe3289860, 0x3f56c15c, 0x555b9e25, 0x3fa55555, 3468 0xc090cf0f, 0x3f811115, 0x55548ba1, 0x3fc55555 3469 }; 3470 3471 private static int[] shifterExp = { 3472 0x00000000, 0x43380000, 0x00000000, 0x43380000 3473 }; 3474 3475 private static int[] mMaskExp = { 3476 0xffffffc0, 0x00000000, 0xffffffc0, 0x00000000 3477 }; 3478 3479 private static int[] biasExp = { 3480 0x0000ffc0, 0x00000000, 0x0000ffc0, 0x00000000 3481 }; 3482 3483 private static int[] tblAddrExp = { 3484 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0e03754d, 3485 0x3cad7bbf, 0x3e778060, 0x00002c9a, 0x3567f613, 0x3c8cd252, 3486 0xd3158574, 0x000059b0, 0x61e6c861, 0x3c60f74e, 0x18759bc8, 3487 0x00008745, 0x5d837b6c, 0x3c979aa6, 0x6cf9890f, 0x0000b558, 3488 0x702f9cd1, 0x3c3ebe3d, 0x32d3d1a2, 0x0000e3ec, 0x1e63bcd8, 3489 0x3ca3516e, 0xd0125b50, 0x00011301, 0x26f0387b, 0x3ca4c554, 3490 0xaea92ddf, 0x0001429a, 0x62523fb6, 0x3ca95153, 0x3c7d517a, 3491 0x000172b8, 0x3f1353bf, 0x3c8b898c, 0xeb6fcb75, 0x0001a35b, 3492 0x3e3a2f5f, 0x3c9aecf7, 0x3168b9aa, 0x0001d487, 0x44a6c38d, 3493 0x3c8a6f41, 0x88628cd6, 0x0002063b, 0xe3a8a894, 0x3c968efd, 3494 0x6e756238, 0x0002387a, 0x981fe7f2, 0x3c80472b, 0x65e27cdd, 3495 0x00026b45, 0x6d09ab31, 0x3c82f7e1, 0xf51fdee1, 0x00029e9d, 3496 0x720c0ab3, 0x3c8b3782, 0xa6e4030b, 0x0002d285, 0x4db0abb6, 3497 0x3c834d75, 0x0a31b715, 0x000306fe, 0x5dd3f84a, 0x3c8fdd39, 3498 0xb26416ff, 0x00033c08, 0xcc187d29, 0x3ca12f8c, 0x373aa9ca, 3499 0x000371a7, 0x738b5e8b, 0x3ca7d229, 0x34e59ff6, 0x0003a7db, 3500 0xa72a4c6d, 0x3c859f48, 0x4c123422, 0x0003dea6, 0x259d9205, 3501 0x3ca8b846, 0x21f72e29, 0x0004160a, 0x60c2ac12, 0x3c4363ed, 3502 0x6061892d, 0x00044e08, 0xdaa10379, 0x3c6ecce1, 0xb5c13cd0, 3503 0x000486a2, 0xbb7aafb0, 0x3c7690ce, 0xd5362a27, 0x0004bfda, 3504 0x9b282a09, 0x3ca083cc, 0x769d2ca6, 0x0004f9b2, 0xc1aae707, 3505 0x3ca509b0, 0x569d4f81, 0x0005342b, 0x18fdd78e, 0x3c933505, 3506 0x36b527da, 0x00056f47, 0xe21c5409, 0x3c9063e1, 0xdd485429, 3507 0x0005ab07, 0x2b64c035, 0x3c9432e6, 0x15ad2148, 0x0005e76f, 3508 0x99f08c0a, 0x3ca01284, 0xb03a5584, 0x0006247e, 0x0073dc06, 3509 0x3c99f087, 0x82552224, 0x00066238, 0x0da05571, 0x3c998d4d, 3510 0x667f3bcc, 0x0006a09e, 0x86ce4786, 0x3ca52bb9, 0x3c651a2e, 3511 0x0006dfb2, 0x206f0dab, 0x3ca32092, 0xe8ec5f73, 0x00071f75, 3512 0x8e17a7a6, 0x3ca06122, 0x564267c8, 0x00075feb, 0x461e9f86, 3513 0x3ca244ac, 0x73eb0186, 0x0007a114, 0xabd66c55, 0x3c65ebe1, 3514 0x36cf4e62, 0x0007e2f3, 0xbbff67d0, 0x3c96fe9f, 0x994cce12, 3515 0x00082589, 0x14c801df, 0x3c951f14, 0x9b4492ec, 0x000868d9, 3516 0xc1f0eab4, 0x3c8db72f, 0x422aa0db, 0x0008ace5, 0x59f35f44, 3517 0x3c7bf683, 0x99157736, 0x0008f1ae, 0x9c06283c, 0x3ca360ba, 3518 0xb0cdc5e4, 0x00093737, 0x20f962aa, 0x3c95e8d1, 0x9fde4e4f, 3519 0x00097d82, 0x2b91ce27, 0x3c71affc, 0x82a3f090, 0x0009c491, 3520 0x589a2ebd, 0x3c9b6d34, 0x7b5de564, 0x000a0c66, 0x9ab89880, 3521 0x3c95277c, 0xb23e255c, 0x000a5503, 0x6e735ab3, 0x3c846984, 3522 0x5579fdbf, 0x000a9e6b, 0x92cb3387, 0x3c8c1a77, 0x995ad3ad, 3523 0x000ae89f, 0xdc2d1d96, 0x3ca22466, 0xb84f15fa, 0x000b33a2, 3524 0xb19505ae, 0x3ca1112e, 0xf2fb5e46, 0x000b7f76, 0x0a5fddcd, 3525 0x3c74ffd7, 0x904bc1d2, 0x000bcc1e, 0x30af0cb3, 0x3c736eae, 3526 0xdd85529c, 0x000c199b, 0xd10959ac, 0x3c84e08f, 0x2e57d14b, 3527 0x000c67f1, 0x6c921968, 0x3c676b2c, 0xdcef9069, 0x000cb720, 3528 0x36df99b3, 0x3c937009, 0x4a07897b, 0x000d072d, 0xa63d07a7, 3529 0x3c74a385, 0xdcfba487, 0x000d5818, 0xd5c192ac, 0x3c8e5a50, 3530 0x03db3285, 0x000da9e6, 0x1c4a9792, 0x3c98bb73, 0x337b9b5e, 3531 0x000dfc97, 0x603a88d3, 0x3c74b604, 0xe78b3ff6, 0x000e502e, 3532 0x92094926, 0x3c916f27, 0xa2a490d9, 0x000ea4af, 0x41aa2008, 3533 0x3c8ec3bc, 0xee615a27, 0x000efa1b, 0x31d185ee, 0x3c8a64a9, 3534 0x5b6e4540, 0x000f5076, 0x4d91cd9d, 0x3c77893b, 0x819e90d8, 3535 0x000fa7c1 3536 }; 3537 3538 private static int[] allOnesExp = { 3539 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff 3540 }; 3541 3542 private static int[] expBias = { 3543 0x00000000, 0x3ff00000, 0x00000000, 0x3ff00000 3544 }; 3545 3546 private static int[] xMaxExp = { 3547 0xffffffff, 0x7fefffff 3548 }; 3549 3550 private static int[] xMinExp = { 3551 0x00000000, 0x00100000 3552 }; 3553 3554 private static int[] infExp = { 3555 0x00000000, 0x7ff00000 3556 }; 3557 3558 private static int[] zeroExp = { 3559 0x00000000, 0x00000000 3560 }; 3561 3562 public void expIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) { 3563 ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 16); 3564 ArrayDataPointerConstant cvExpPtr = new ArrayDataPointerConstant(cvExp, 16); 3565 ArrayDataPointerConstant shifterExpPtr = new ArrayDataPointerConstant(shifterExp, 8); 3566 ArrayDataPointerConstant mMaskExpPtr = new ArrayDataPointerConstant(mMaskExp, 16); 3567 ArrayDataPointerConstant biasExpPtr = new ArrayDataPointerConstant(biasExp, 16); 3568 ArrayDataPointerConstant tblAddrExpPtr = new ArrayDataPointerConstant(tblAddrExp, 16); 3569 ArrayDataPointerConstant expBiasPtr = new ArrayDataPointerConstant(expBias, 8); 3570 ArrayDataPointerConstant xMaxExpPtr = new ArrayDataPointerConstant(xMaxExp, 8); 3571 ArrayDataPointerConstant xMinExpPtr = new ArrayDataPointerConstant(xMinExp, 8); 3572 ArrayDataPointerConstant infExpPtr = new ArrayDataPointerConstant(infExp, 8); 3573 ArrayDataPointerConstant zeroExpPtr = new ArrayDataPointerConstant(zeroExp, 8); 3574 ArrayDataPointerConstant allOnesExpPtr = new ArrayDataPointerConstant(allOnesExp, 8); 3575 3576 Label bb0 = new Label(); 3577 Label bb1 = new Label(); 3578 Label bb2 = new Label(); 3579 Label bb3 = new Label(); 3580 Label bb4 = new Label(); 3581 Label bb5 = new Label(); 3582 Label bb7 = new Label(); 3583 Label bb8 = new Label(); 3584 Label bb9 = new Label(); 3585 Label bb10 = new Label(); 3586 Label bb11 = new Label(); 3587 Label bb12 = new Label(); 3588 Label bb14 = new Label(); 3589 3590 Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD); 3591 Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD); 3592 Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD); 3593 Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD); 3594 Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD); 3595 3596 Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE); 3597 Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE); 3598 Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE); 3599 Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE); 3600 Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE); 3601 Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE); 3602 Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE); 3603 Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE); 3604 Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE); 3605 Register temp10 = asRegister(xmm10Temp, AMD64Kind.DOUBLE); 3606 3607 AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp); 3608 3609 setCrb(crb); 3610 masm.movsd(stackSlot, value); 3611 if (dest.encoding != value.encoding) { 3612 masm.movdqu(dest, value); 3613 } 3614 3615 masm.movdqu(temp9, externalAddress(mMaskExpPtr)); // 0xffffffc0, 3616 // 0x00000000, 3617 // 0xffffffc0, 3618 // 0x00000000 3619 masm.movdqu(temp10, externalAddress(biasExpPtr)); // 0x0000ffc0, 3620 // 0x00000000, 3621 // 0x0000ffc0, 3622 // 0x00000000 3623 masm.unpcklpd(dest, dest); 3624 masm.leaq(gpr5, stackSlot); 3625 masm.leaq(gpr2, externalAddress(cvExpPtr)); 3626 masm.movdqu(temp1, new AMD64Address(gpr2, 0)); // 0x652b82fe, 3627 // 0x40571547, 3628 // 0x652b82fe, 3629 // 0x40571547 3630 masm.movdqu(temp6, externalAddress(shifterExpPtr)); // 0x00000000, 3631 // 0x43380000, 3632 // 0x00000000, 3633 // 0x43380000 3634 masm.movdqu(temp2, new AMD64Address(gpr2, 16)); // 0xfefa0000, 3635 // 0x3f862e42, 3636 // 0xfefa0000, 3637 // 0x3f862e42 3638 masm.movdqu(temp3, new AMD64Address(gpr2, 32)); // 0xbc9e3b3a, 3639 // 0x3d1cf79a, 3640 // 0xbc9e3b3a, 3641 // 0x3d1cf79a 3642 masm.pextrw(gpr1, dest, 3); 3643 masm.andl(gpr1, 32767); 3644 masm.movl(gpr4, 16527); 3645 masm.subl(gpr4, gpr1); 3646 masm.subl(gpr1, 15504); 3647 masm.orl(gpr4, gpr1); 3648 masm.cmpl(gpr4, Integer.MIN_VALUE); 3649 masm.jcc(ConditionFlag.AboveEqual, bb0); 3650 3651 masm.leaq(gpr4, externalAddress(tblAddrExpPtr)); 3652 masm.movdqu(temp8, new AMD64Address(gpr2, 48)); // 0xfffffffe, 3653 // 0x3fdfffff, 3654 // 0xfffffffe, 3655 // 0x3fdfffff 3656 masm.movdqu(temp4, new AMD64Address(gpr2, 64)); // 0xe3289860, 3657 // 0x3f56c15c, 3658 // 0x555b9e25, 3659 // 0x3fa55555 3660 masm.movdqu(temp5, new AMD64Address(gpr2, 80)); // 0xc090cf0f, 3661 // 0x3f811115, 3662 // 0x55548ba1, 3663 // 0x3fc55555 3664 masm.mulpd(temp1, dest); 3665 masm.addpd(temp1, temp6); 3666 masm.movapd(temp7, temp1); 3667 masm.movdl(gpr1, temp1); 3668 masm.pand(temp7, temp9); 3669 masm.subpd(temp1, temp6); 3670 masm.mulpd(temp2, temp1); 3671 masm.mulpd(temp3, temp1); 3672 masm.paddq(temp7, temp10); 3673 masm.subpd(dest, temp2); 3674 masm.movl(gpr3, gpr1); 3675 masm.andl(gpr3, 63); 3676 masm.shll(gpr3, 4); 3677 masm.movdqu(temp2, new AMD64Address(gpr3, gpr4, Scale.Times1, 0)); 3678 masm.sarl(gpr1, 6); 3679 masm.psllq(temp7, 46); 3680 masm.subpd(dest, temp3); 3681 masm.mulpd(temp4, dest); 3682 masm.movl(gpr4, gpr1); 3683 masm.movapd(temp6, dest); 3684 masm.movapd(temp1, dest); 3685 masm.mulpd(temp6, temp6); 3686 masm.mulpd(dest, temp6); 3687 masm.addpd(temp5, temp4); 3688 masm.mulsd(dest, temp6); 3689 masm.mulpd(temp6, temp8); 3690 masm.addsd(temp1, temp2); 3691 masm.unpckhpd(temp2, temp2); 3692 masm.mulpd(dest, temp5); 3693 masm.addsd(temp1, dest); 3694 masm.por(temp2, temp7); 3695 masm.unpckhpd(dest, dest); 3696 masm.addsd(dest, temp1); 3697 masm.addsd(dest, temp6); 3698 masm.addl(gpr4, 894); 3699 masm.cmpl(gpr4, 1916); 3700 masm.jcc(ConditionFlag.Above, bb1); 3701 3702 masm.mulsd(dest, temp2); 3703 masm.addsd(dest, temp2); 3704 masm.jmp(bb14); 3705 3706 masm.bind(bb1); 3707 masm.movdqu(temp6, externalAddress(expBiasPtr)); // 0x00000000, 3708 // 0x3ff00000, 3709 // 0x00000000, 3710 // 0x3ff00000 3711 masm.xorpd(temp3, temp3); 3712 masm.movdqu(temp4, externalAddress(allOnesExpPtr)); // 0xffffffff, 3713 // 0xffffffff, 3714 // 0xffffffff, 3715 // 0xffffffff 3716 masm.movl(gpr4, -1022); 3717 masm.subl(gpr4, gpr1); 3718 masm.movdl(temp5, gpr4); 3719 masm.psllq(temp4, temp5); 3720 masm.movl(gpr3, gpr1); 3721 masm.sarl(gpr1, 1); 3722 masm.pinsrw(temp3, gpr1, 3); 3723 masm.psllq(temp3, 4); 3724 masm.psubd(temp2, temp3); 3725 masm.mulsd(dest, temp2); 3726 masm.cmpl(gpr4, 52); 3727 masm.jcc(ConditionFlag.Greater, bb2); 3728 3729 masm.pand(temp4, temp2); 3730 masm.paddd(temp3, temp6); 3731 masm.subsd(temp2, temp4); 3732 masm.addsd(dest, temp2); 3733 masm.cmpl(gpr3, 1023); 3734 masm.jcc(ConditionFlag.GreaterEqual, bb3); 3735 3736 masm.pextrw(gpr3, dest, 3); 3737 masm.andl(gpr3, 32768); 3738 masm.orl(gpr4, gpr3); 3739 masm.cmpl(gpr4, 0); 3740 masm.jcc(ConditionFlag.Equal, bb4); 3741 3742 masm.movapd(temp6, dest); 3743 masm.addsd(dest, temp4); 3744 masm.mulsd(dest, temp3); 3745 masm.pextrw(gpr3, dest, 3); 3746 masm.andl(gpr3, 32752); 3747 masm.cmpl(gpr3, 0); 3748 masm.jcc(ConditionFlag.Equal, bb5); 3749 3750 masm.jmp(bb14); 3751 3752 masm.bind(bb5); 3753 masm.mulsd(temp6, temp3); 3754 masm.mulsd(temp4, temp3); 3755 masm.movdqu(dest, temp6); 3756 masm.pxor(temp6, temp4); 3757 masm.psrad(temp6, 31); 3758 masm.pshufd(temp6, temp6, 85); 3759 masm.psllq(dest, 1); 3760 masm.psrlq(dest, 1); 3761 masm.pxor(dest, temp6); 3762 masm.psrlq(temp6, 63); 3763 masm.paddq(dest, temp6); 3764 masm.paddq(dest, temp4); 3765 masm.jmp(bb14); 3766 3767 masm.bind(bb4); 3768 masm.addsd(dest, temp4); 3769 masm.mulsd(dest, temp3); 3770 masm.jmp(bb14); 3771 3772 masm.bind(bb3); 3773 masm.addsd(dest, temp4); 3774 masm.mulsd(dest, temp3); 3775 masm.pextrw(gpr3, dest, 3); 3776 masm.andl(gpr3, 32752); 3777 masm.cmpl(gpr3, 32752); 3778 masm.jcc(ConditionFlag.AboveEqual, bb7); 3779 3780 masm.jmp(bb14); 3781 3782 masm.bind(bb2); 3783 masm.paddd(temp3, temp6); 3784 masm.addpd(dest, temp2); 3785 masm.mulsd(dest, temp3); 3786 masm.jmp(bb14); 3787 3788 masm.bind(bb8); 3789 masm.movsd(dest, externalAddress(xMaxExpPtr)); // 0xffffffff, 3790 // 0x7fefffff 3791 masm.movsd(temp8, externalAddress(xMinExpPtr)); // 0x00000000, 3792 // 0x00100000 3793 masm.cmpl(gpr1, 2146435072); 3794 masm.jcc(ConditionFlag.AboveEqual, bb9); 3795 3796 masm.movl(gpr1, new AMD64Address(gpr5, 4)); 3797 masm.cmpl(gpr1, Integer.MIN_VALUE); 3798 masm.jcc(ConditionFlag.AboveEqual, bb10); 3799 3800 masm.mulsd(dest, dest); 3801 3802 masm.bind(bb7); 3803 masm.jmp(bb14); 3804 3805 masm.bind(bb10); 3806 masm.mulsd(dest, temp8); 3807 masm.jmp(bb14); 3808 3809 masm.bind(bb9); 3810 masm.movl(gpr4, stackSlot); 3811 masm.cmpl(gpr1, 2146435072); 3812 masm.jcc(ConditionFlag.Above, bb11); 3813 3814 masm.cmpl(gpr4, 0); 3815 masm.jcc(ConditionFlag.NotEqual, bb11); 3816 3817 masm.movl(gpr1, new AMD64Address(gpr5, 4)); 3818 masm.cmpl(gpr1, 2146435072); 3819 masm.jcc(ConditionFlag.NotEqual, bb12); 3820 3821 masm.movsd(dest, externalAddress(infExpPtr)); // 0x00000000, 3822 // 0x7ff00000 3823 masm.jmp(bb14); 3824 3825 masm.bind(bb12); 3826 masm.movsd(dest, externalAddress(zeroExpPtr)); // 0x00000000, 3827 // 0x00000000 3828 masm.jmp(bb14); 3829 3830 masm.bind(bb11); 3831 masm.movsd(dest, stackSlot); 3832 masm.addsd(dest, dest); 3833 masm.jmp(bb14); 3834 3835 masm.bind(bb0); 3836 masm.movl(gpr1, new AMD64Address(gpr5, 4)); 3837 masm.andl(gpr1, 2147483647); 3838 masm.cmpl(gpr1, 1083179008); 3839 masm.jcc(ConditionFlag.AboveEqual, bb8); 3840 3841 masm.addsd(dest, externalAddress(onePtr)); // 0x00000000, 3842 // 0x3ff00000 3843 masm.bind(bb14); 3844 } 3845 }