1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016, Intel Corporation. All rights reserved. 4 * Intel Math Library (LIBM) Source Code 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 27 package org.graalvm.compiler.lir.amd64; 28 29 import static jdk.vm.ci.amd64.AMD64.r10; 30 import static jdk.vm.ci.amd64.AMD64.r11; 31 import static jdk.vm.ci.amd64.AMD64.r8; 32 import static jdk.vm.ci.amd64.AMD64.r9; 33 import static jdk.vm.ci.amd64.AMD64.rax; 34 import static jdk.vm.ci.amd64.AMD64.rbx; 35 import static jdk.vm.ci.amd64.AMD64.rcx; 36 import static jdk.vm.ci.amd64.AMD64.rdi; 37 import static jdk.vm.ci.amd64.AMD64.rdx; 38 import static jdk.vm.ci.amd64.AMD64.rsi; 39 import static jdk.vm.ci.amd64.AMD64.rsp; 40 import static jdk.vm.ci.amd64.AMD64.xmm0; 41 import static jdk.vm.ci.amd64.AMD64.xmm1; 42 import static jdk.vm.ci.amd64.AMD64.xmm2; 43 import static jdk.vm.ci.amd64.AMD64.xmm3; 44 import static jdk.vm.ci.amd64.AMD64.xmm4; 45 import static jdk.vm.ci.amd64.AMD64.xmm5; 46 import static jdk.vm.ci.amd64.AMD64.xmm6; 47 import static jdk.vm.ci.amd64.AMD64.xmm7; 48 import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.pointerConstant; 49 import static org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper.recordExternalAddress; 50 51 import org.graalvm.compiler.asm.Label; 52 import org.graalvm.compiler.asm.amd64.AMD64Address; 53 import org.graalvm.compiler.asm.amd64.AMD64Assembler; 54 import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler; 55 import org.graalvm.compiler.lir.LIRInstructionClass; 56 import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant; 57 import org.graalvm.compiler.lir.asm.CompilationResultBuilder; 58 59 import jdk.vm.ci.amd64.AMD64; 60 61 /** 62 * <pre> 63 * ALGORITHM DESCRIPTION - TAN() 64 * --------------------- 65 * 66 * Polynomials coefficients and other constants. 67 * 68 * Note that in this algorithm, there is a different polynomial for 69 * each breakpoint, so there are 32 sets of polynomial coefficients 70 * as well as 32 instances of the other constants. 71 * 72 * The polynomial coefficients and constants are offset from the start 73 * of the main block as follows: 74 * 75 * 0: c8 | c0 76 * 16: c9 | c1 77 * 32: c10 | c2 78 * 48: c11 | c3 79 * 64: c12 | c4 80 * 80: c13 | c5 81 * 96: c14 | c6 82 * 112: c15 | c7 83 * 128: T_hi 84 * 136: T_lo 85 * 144: Sigma 86 * 152: T_hl 87 * 160: Tau 88 * 168: Mask 89 * 176: (end of block) 90 * 91 * The total table size is therefore 5632 bytes. 92 * 93 * Note that c0 and c1 are always zero. We could try storing 94 * other constants here, and just loading the low part of the 95 * SIMD register in these cases, after ensuring the high part 96 * is zero. 97 * 98 * The higher terms of the polynomial are computed in the *low* 99 * part of the SIMD register. This is so we can overlap the 100 * multiplication by r^8 and the unpacking of the other part. 101 * 102 * The constants are: 103 * T_hi + T_lo = accurate constant term in power series 104 * Sigma + T_hl = accurate coefficient of r in power series (Sigma=1 bit) 105 * Tau = multiplier for the reciprocal, always -1 or 0 106 * 107 * The basic reconstruction formula using these constants is: 108 * 109 * High = tau * recip_hi + t_hi 110 * Med = (sgn * r + t_hl * r)_hi 111 * Low = (sgn * r + t_hl * r)_lo + 112 * tau * recip_lo + T_lo + (T_hl + sigma) * c + pol 113 * 114 * where pol = c0 + c1 * r + c2 * r^2 + ... + c15 * r^15 115 * 116 * (c0 = c1 = 0, but using them keeps SIMD regularity) 117 * 118 * We then do a compensated sum High + Med, add the low parts together 119 * and then do the final sum. 120 * 121 * Here recip_hi + recip_lo is an accurate reciprocal of the remainder 122 * modulo pi/2 123 * 124 * Special cases: 125 * tan(NaN) = quiet NaN, and raise invalid exception 126 * tan(INF) = NaN and raise invalid exception 127 * tan(+/-0) = +/-0 128 * </pre> 129 */ 130 public final class AMD64MathTanOp extends AMD64MathIntrinsicUnaryOp { 131 132 public static final LIRInstructionClass<AMD64MathTanOp> TYPE = LIRInstructionClass.create(AMD64MathTanOp.class); 133 134 public AMD64MathTanOp() { 135 super(TYPE, /* GPR */ rax, rcx, rdx, rbx, rsi, rdi, r8, r9, r10, r11, 136 /* XMM */ xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7); 137 } 138 139 private ArrayDataPointerConstant onehalf = pointerConstant(16, new int[]{ 140 // @formatter:off 141 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000 142 // @formatter:on 143 }); 144 145 private ArrayDataPointerConstant mul16 = pointerConstant(16, new int[]{ 146 // @formatter:off 147 0x00000000, 0x40300000, 0x00000000, 0x3ff00000 148 // @formatter:on 149 }); 150 151 private ArrayDataPointerConstant signMask = pointerConstant(16, new int[]{ 152 // @formatter:off 153 0x00000000, 0x80000000, 0x00000000, 0x80000000 154 // @formatter:on 155 }); 156 157 private ArrayDataPointerConstant pi32Inv = pointerConstant(16, new int[]{ 158 // @formatter:off 159 0x6dc9c883, 0x3fe45f30, 0x6dc9c883, 0x40245f30 160 // @formatter:on 161 }); 162 163 private ArrayDataPointerConstant p1 = pointerConstant(16, new int[]{ 164 // @formatter:off 165 0x54444000, 0x3fb921fb, 0x54440000, 0x3fb921fb 166 // @formatter:on 167 }); 168 169 private ArrayDataPointerConstant p2 = pointerConstant(16, new int[]{ 170 // @formatter:off 171 0x67674000, 0xbd32e7b9, 0x4c4c0000, 0x3d468c23 172 // @formatter:on 173 }); 174 175 private ArrayDataPointerConstant p3 = pointerConstant(16, new int[]{ 176 // @formatter:off 177 0x3707344a, 0x3aa8a2e0, 0x03707345, 0x3ae98a2e 178 // @formatter:on 179 }); 180 181 private ArrayDataPointerConstant ctable = pointerConstant(16, new int[]{ 182 // @formatter:off 183 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x882c10fa, 184 0x3f9664f4, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 185 0x00000000, 0x00000000, 0x55e6c23d, 0x3f8226e3, 0x55555555, 186 0x3fd55555, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 187 0x0e157de0, 0x3f6d6d3d, 0x11111111, 0x3fc11111, 0x00000000, 188 0x00000000, 0x00000000, 0x00000000, 0x452b75e3, 0x3f57da36, 189 0x1ba1ba1c, 0x3faba1ba, 0x00000000, 0x00000000, 0x00000000, 190 0x00000000, 0x00000000, 0x3ff00000, 0x00000000, 0x00000000, 191 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x4e435f9b, 192 0x3f953f83, 0x00000000, 0x00000000, 0x3c6e8e46, 0x3f9b74ea, 193 0x00000000, 0x00000000, 0xda5b7511, 0x3f85ad63, 0xdc230b9b, 194 0x3fb97558, 0x26cb3788, 0x3f881308, 0x76fc4985, 0x3fd62ac9, 195 0x77bb08ba, 0x3f757c85, 0xb6247521, 0x3fb1381e, 0x5922170c, 196 0x3f754e95, 0x8746482d, 0x3fc27f83, 0x11055b30, 0x3f64e391, 197 0x3e666320, 0x3fa3e609, 0x0de9dae3, 0x3f6301df, 0x1f1dca06, 198 0x3fafa8ae, 0x8c5b2da2, 0x3fb936bb, 0x4e88f7a5, 0x3c587d05, 199 0x00000000, 0x3ff00000, 0xa8935dd9, 0x3f83dde2, 0x00000000, 200 0x00000000, 0x00000000, 0x00000000, 0x5a279ea3, 0x3faa3407, 201 0x00000000, 0x00000000, 0x432d65fa, 0x3fa70153, 0x00000000, 202 0x00000000, 0x891a4602, 0x3f9d03ef, 0xd62ca5f8, 0x3fca77d9, 203 0xb35f4628, 0x3f97a265, 0x433258fa, 0x3fd8cf51, 0xb58fd909, 204 0x3f8f88e3, 0x01771cea, 0x3fc2b154, 0xf3562f8e, 0x3f888f57, 205 0xc028a723, 0x3fc7370f, 0x20b7f9f0, 0x3f80f44c, 0x214368e9, 206 0x3fb6dfaa, 0x28891863, 0x3f79b4b6, 0x172dbbf0, 0x3fb6cb8e, 207 0xe0553158, 0x3fc975f5, 0x593fe814, 0x3c2ef5d3, 0x00000000, 208 0x3ff00000, 0x03dec550, 0x3fa44203, 0x00000000, 0x00000000, 209 0x00000000, 0x00000000, 0x9314533e, 0x3fbb8ec5, 0x00000000, 210 0x00000000, 0x09aa36d0, 0x3fb6d3f4, 0x00000000, 0x00000000, 211 0xdcb427fd, 0x3fb13950, 0xd87ab0bb, 0x3fd5335e, 0xce0ae8a5, 212 0x3fabb382, 0x79143126, 0x3fddba41, 0x5f2b28d4, 0x3fa552f1, 213 0x59f21a6d, 0x3fd015ab, 0x22c27d95, 0x3fa0e984, 0xe19fc6aa, 214 0x3fd0576c, 0x8f2c2950, 0x3f9a4898, 0xc0b3f22c, 0x3fc59462, 215 0x1883a4b8, 0x3f94b61c, 0x3f838640, 0x3fc30eb8, 0x355c63dc, 216 0x3fd36a08, 0x1dce993d, 0xbc6d704d, 0x00000000, 0x3ff00000, 217 0x2b82ab63, 0x3fb78e92, 0x00000000, 0x00000000, 0x00000000, 218 0x00000000, 0x56f37042, 0x3fccfc56, 0x00000000, 0x00000000, 219 0xaa563951, 0x3fc90125, 0x00000000, 0x00000000, 0x3d0e7c5d, 220 0x3fc50533, 0x9bed9b2e, 0x3fdf0ed9, 0x5fe7c47c, 0x3fc1f250, 221 0x96c125e5, 0x3fe2edd9, 0x5a02bbd8, 0x3fbe5c71, 0x86362c20, 222 0x3fda08b7, 0x4b4435ed, 0x3fb9d342, 0x4b494091, 0x3fd911bd, 223 0xb56658be, 0x3fb5e4c7, 0x93a2fd76, 0x3fd3c092, 0xda271794, 224 0x3fb29910, 0x3303df2b, 0x3fd189be, 0x99fcef32, 0x3fda8279, 225 0xb68c1467, 0x3c708b2f, 0x00000000, 0x3ff00000, 0x980c4337, 226 0x3fc5f619, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 227 0xcc03e501, 0x3fdff10f, 0x00000000, 0x00000000, 0x44a4e845, 228 0x3fddb63b, 0x00000000, 0x00000000, 0x3768ad9f, 0x3fdb72a4, 229 0x3dd01cca, 0x3fe5fdb9, 0xa61d2811, 0x3fd972b2, 0x5645ad0b, 230 0x3fe977f9, 0xd013b3ab, 0x3fd78ca3, 0xbf0bf914, 0x3fe4f192, 231 0x4d53e730, 0x3fd5d060, 0x3f8b9000, 0x3fe49933, 0xe2b82f08, 232 0x3fd4322a, 0x5936a835, 0x3fe27ae1, 0xb1c61c9b, 0x3fd2b3fb, 233 0xef478605, 0x3fe1659e, 0x190834ec, 0x3fe11ab7, 0xcdb625ea, 234 0xbc8e564b, 0x00000000, 0x3ff00000, 0xb07217e3, 0x3fd248f1, 235 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2b2c49d0, 236 0x3ff2de9c, 0x00000000, 0x00000000, 0x2655bc98, 0x3ff33e58, 237 0x00000000, 0x00000000, 0xff691fa2, 0x3ff3972e, 0xe93463bd, 238 0x3feeed87, 0x070e10a0, 0x3ff3f5b2, 0xf4d790a4, 0x3ff20c10, 239 0xa04e8ea3, 0x3ff4541a, 0x386accd3, 0x3ff1369e, 0x222a66dd, 240 0x3ff4b521, 0x22a9777e, 0x3ff20817, 0x52a04a6e, 0x3ff5178f, 241 0xddaa0031, 0x3ff22137, 0x4447d47c, 0x3ff57c01, 0x1e9c7f1d, 242 0x3ff29311, 0x2ab7f990, 0x3fe561b8, 0x209c7df1, 0x3c87a8c5, 243 0x00000000, 0x3ff00000, 0x4170bcc6, 0x3fdc92d8, 0x00000000, 244 0x00000000, 0x00000000, 0x00000000, 0xc7ab4d5a, 0x40085e24, 245 0x00000000, 0x00000000, 0xe93ea75d, 0x400b963d, 0x00000000, 246 0x00000000, 0x94a7f25a, 0x400f37e2, 0x4b6261cb, 0x3ff5f984, 247 0x5a9dd812, 0x4011aab0, 0x74c30018, 0x3ffaf5a5, 0x7f2ce8e3, 248 0x4013fe8b, 0xfe8e54fa, 0x3ffd7334, 0x670d618d, 0x4016a10c, 249 0x4db97058, 0x4000e012, 0x24df44dd, 0x40199c5f, 0x697d6ece, 250 0x4003006e, 0x83298b82, 0x401cfc4d, 0x19d490d6, 0x40058c19, 251 0x2ae42850, 0x3fea4300, 0x118e20e6, 0xbc7a6db8, 0x00000000, 252 0x40000000, 0xe33345b8, 0xbfd4e526, 0x00000000, 0x00000000, 253 0x00000000, 0x00000000, 0x65965966, 0x40219659, 0x00000000, 254 0x00000000, 0x882c10fa, 0x402664f4, 0x00000000, 0x00000000, 255 0x83cd3723, 0x402c8342, 0x00000000, 0x40000000, 0x55e6c23d, 256 0x403226e3, 0x55555555, 0x40055555, 0x34451939, 0x40371c96, 257 0xaaaaaaab, 0x400aaaaa, 0x0e157de0, 0x403d6d3d, 0x11111111, 258 0x40111111, 0xa738201f, 0x4042bbce, 0x05b05b06, 0x4015b05b, 259 0x452b75e3, 0x4047da36, 0x1ba1ba1c, 0x401ba1ba, 0x00000000, 260 0x3ff00000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, 261 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 262 0x00000000, 0x4f48b8d3, 0xbf33eaf9, 0x00000000, 0x00000000, 263 0x0cf7586f, 0x3f20b8ea, 0x00000000, 0x00000000, 0xd0258911, 264 0xbf0abaf3, 0x23e49fe9, 0xbfab5a8c, 0x2d53222e, 0x3ef60d15, 265 0x21169451, 0x3fa172b2, 0xbb254dbc, 0xbee1d3b5, 0xdbf93b8e, 266 0xbf84c7db, 0x05b4630b, 0x3ecd3364, 0xee9aada7, 0x3f743924, 267 0x794a8297, 0xbeb7b7b9, 0xe015f797, 0xbf5d41f5, 0xe41a4a56, 268 0x3ea35dfb, 0xe4c2a251, 0x3f49a2ab, 0x5af9e000, 0xbfce49ce, 269 0x8c743719, 0x3d1eb860, 0x00000000, 0x00000000, 0x1b4863cf, 270 0x3fd78294, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 271 0x535ad890, 0xbf2b9320, 0x00000000, 0x00000000, 0x018fdf1f, 272 0x3f16d61d, 0x00000000, 0x00000000, 0x0359f1be, 0xbf0139e4, 273 0xa4317c6d, 0xbfa67e17, 0x82672d0f, 0x3eebb405, 0x2f1b621e, 274 0x3f9f455b, 0x51ccf238, 0xbed55317, 0xf437b9ac, 0xbf804bee, 275 0xc791a2b5, 0x3ec0e993, 0x919a1db2, 0x3f7080c2, 0x336a5b0e, 276 0xbeaa48a2, 0x0a268358, 0xbf55a443, 0xdfd978e4, 0x3e94b61f, 277 0xd7767a58, 0x3f431806, 0x2aea0000, 0xbfc9bbe8, 0x7723ea61, 278 0xbd3a2369, 0x00000000, 0x00000000, 0xdf7796ff, 0x3fd6e642, 279 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0xb9ff07ce, 280 0xbf231c78, 0x00000000, 0x00000000, 0xa5517182, 0x3f0ff0e0, 281 0x00000000, 0x00000000, 0x790b4cbc, 0xbef66191, 0x848a46c6, 282 0xbfa21ac0, 0xb16435fa, 0x3ee1d3ec, 0x2a1aa832, 0x3f9c71ea, 283 0xfdd299ef, 0xbec9dd1a, 0x3f8dbaaf, 0xbf793363, 0x309fc6ea, 284 0x3eb415d6, 0xbee60471, 0x3f6b83ba, 0x94a0a697, 0xbe9dae11, 285 0x3e5c67b3, 0xbf4fd07b, 0x9a8f3e3e, 0x3e86bd75, 0xa4beb7a4, 286 0x3f3d1eb1, 0x29cfc000, 0xbfc549ce, 0xbf159358, 0xbd397b33, 287 0x00000000, 0x00000000, 0x871fee6c, 0x3fd666f0, 0x00000000, 288 0x3ff00000, 0x00000000, 0xfffffff8, 0x7d98a556, 0xbf1a3958, 289 0x00000000, 0x00000000, 0x9d88dc01, 0x3f0704c2, 0x00000000, 290 0x00000000, 0x73742a2b, 0xbeed054a, 0x58844587, 0xbf9c2a13, 291 0x55688a79, 0x3ed7a326, 0xee33f1d6, 0x3f9a48f4, 0xa8dc9888, 292 0xbebf8939, 0xaad4b5b8, 0xbf72f746, 0x9102efa1, 0x3ea88f82, 293 0xdabc29cf, 0x3f678228, 0x9289afb8, 0xbe90f456, 0x741fb4ed, 294 0xbf46f3a3, 0xa97f6663, 0x3e79b4bf, 0xca89ff3f, 0x3f36db70, 295 0xa8a2a000, 0xbfc0ee13, 0x3da24be1, 0xbd338b9f, 0x00000000, 296 0x00000000, 0x11cd6c69, 0x3fd601fd, 0x00000000, 0x3ff00000, 297 0x00000000, 0xfffffff8, 0x1a154b97, 0xbf116b01, 0x00000000, 298 0x00000000, 0x2d427630, 0x3f0147bf, 0x00000000, 0x00000000, 299 0xb93820c8, 0xbee264d4, 0xbb6cbb18, 0xbf94ab8c, 0x888d4d92, 300 0x3ed0568b, 0x60730f7c, 0x3f98b19b, 0xe4b1fb11, 0xbeb2f950, 301 0x22cf9f74, 0xbf6b21cd, 0x4a3ff0a6, 0x3e9f499e, 0xfd2b83ce, 302 0x3f64aad7, 0x637b73af, 0xbe83487c, 0xe522591a, 0xbf3fc092, 303 0xa158e8bc, 0x3e6e3aae, 0xe5e82ffa, 0x3f329d2f, 0xd636a000, 304 0xbfb9477f, 0xc2c2d2bc, 0xbd135ef9, 0x00000000, 0x00000000, 305 0xf2fdb123, 0x3fd5b566, 0x00000000, 0x3ff00000, 0x00000000, 306 0xfffffff8, 0xc41acb64, 0xbf05448d, 0x00000000, 0x00000000, 307 0xdbb03d6f, 0x3efb7ad2, 0x00000000, 0x00000000, 0x9e42962d, 308 0xbed5aea5, 0x2579f8ef, 0xbf8b2398, 0x288a1ed9, 0x3ec81441, 309 0xb0198dc5, 0x3f979a3a, 0x2fdfe253, 0xbea57cd3, 0x5766336f, 310 0xbf617caa, 0x600944c3, 0x3e954ed6, 0xa4e0aaf8, 0x3f62c646, 311 0x6b8fb29c, 0xbe74e3a3, 0xdc4c0409, 0xbf33f952, 0x9bffe365, 312 0x3e6301ec, 0xb8869e44, 0x3f2fc566, 0xe1e04000, 0xbfb0cc62, 313 0x016b907f, 0xbd119cbc, 0x00000000, 0x00000000, 0xe6b9d8fa, 314 0x3fd57fb3, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 315 0x5daf22a6, 0xbef429d7, 0x00000000, 0x00000000, 0x06bca545, 316 0x3ef7a27d, 0x00000000, 0x00000000, 0x7211c19a, 0xbec41c3e, 317 0x956ed53e, 0xbf7ae3f4, 0xee750e72, 0x3ec3901b, 0x91d443f5, 318 0x3f96f713, 0x36661e6c, 0xbe936e09, 0x506f9381, 0xbf5122e8, 319 0xcb6dd43f, 0x3e9041b9, 0x6698b2ff, 0x3f61b0c7, 0x576bf12b, 320 0xbe625a8a, 0xe5a0e9dc, 0xbf23499d, 0x110384dd, 0x3e5b1c2c, 321 0x68d43db6, 0x3f2cb899, 0x6ecac000, 0xbfa0c414, 0xcd7dd58c, 322 0x3d13500f, 0x00000000, 0x00000000, 0x85a2c8fb, 0x3fd55fe0, 323 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0x00000000, 324 0x00000000, 0x00000000, 0x00000000, 0x2bf70ebe, 0x3ef66a8f, 325 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 326 0x00000000, 0xd644267f, 0x3ec22805, 0x16c16c17, 0x3f96c16c, 327 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xc4e09162, 328 0x3e8d6db2, 0xbc011567, 0x3f61566a, 0x00000000, 0x00000000, 329 0x00000000, 0x00000000, 0x1f79955c, 0x3e57da4e, 0x9334ef0b, 330 0x3f2bbd77, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 331 0x00000000, 0x00000000, 0x55555555, 0x3fd55555, 0x00000000, 332 0x3ff00000, 0x00000000, 0xfffffff8, 0x5daf22a6, 0x3ef429d7, 333 0x00000000, 0x00000000, 0x06bca545, 0x3ef7a27d, 0x00000000, 334 0x00000000, 0x7211c19a, 0x3ec41c3e, 0x956ed53e, 0x3f7ae3f4, 335 0xee750e72, 0x3ec3901b, 0x91d443f5, 0x3f96f713, 0x36661e6c, 336 0x3e936e09, 0x506f9381, 0x3f5122e8, 0xcb6dd43f, 0x3e9041b9, 337 0x6698b2ff, 0x3f61b0c7, 0x576bf12b, 0x3e625a8a, 0xe5a0e9dc, 338 0x3f23499d, 0x110384dd, 0x3e5b1c2c, 0x68d43db6, 0x3f2cb899, 339 0x6ecac000, 0x3fa0c414, 0xcd7dd58c, 0xbd13500f, 0x00000000, 340 0x00000000, 0x85a2c8fb, 0x3fd55fe0, 0x00000000, 0x3ff00000, 341 0x00000000, 0xfffffff8, 0xc41acb64, 0x3f05448d, 0x00000000, 342 0x00000000, 0xdbb03d6f, 0x3efb7ad2, 0x00000000, 0x00000000, 343 0x9e42962d, 0x3ed5aea5, 0x2579f8ef, 0x3f8b2398, 0x288a1ed9, 344 0x3ec81441, 0xb0198dc5, 0x3f979a3a, 0x2fdfe253, 0x3ea57cd3, 345 0x5766336f, 0x3f617caa, 0x600944c3, 0x3e954ed6, 0xa4e0aaf8, 346 0x3f62c646, 0x6b8fb29c, 0x3e74e3a3, 0xdc4c0409, 0x3f33f952, 347 0x9bffe365, 0x3e6301ec, 0xb8869e44, 0x3f2fc566, 0xe1e04000, 348 0x3fb0cc62, 0x016b907f, 0x3d119cbc, 0x00000000, 0x00000000, 349 0xe6b9d8fa, 0x3fd57fb3, 0x00000000, 0x3ff00000, 0x00000000, 350 0xfffffff8, 0x1a154b97, 0x3f116b01, 0x00000000, 0x00000000, 351 0x2d427630, 0x3f0147bf, 0x00000000, 0x00000000, 0xb93820c8, 352 0x3ee264d4, 0xbb6cbb18, 0x3f94ab8c, 0x888d4d92, 0x3ed0568b, 353 0x60730f7c, 0x3f98b19b, 0xe4b1fb11, 0x3eb2f950, 0x22cf9f74, 354 0x3f6b21cd, 0x4a3ff0a6, 0x3e9f499e, 0xfd2b83ce, 0x3f64aad7, 355 0x637b73af, 0x3e83487c, 0xe522591a, 0x3f3fc092, 0xa158e8bc, 356 0x3e6e3aae, 0xe5e82ffa, 0x3f329d2f, 0xd636a000, 0x3fb9477f, 357 0xc2c2d2bc, 0x3d135ef9, 0x00000000, 0x00000000, 0xf2fdb123, 358 0x3fd5b566, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 359 0x7d98a556, 0x3f1a3958, 0x00000000, 0x00000000, 0x9d88dc01, 360 0x3f0704c2, 0x00000000, 0x00000000, 0x73742a2b, 0x3eed054a, 361 0x58844587, 0x3f9c2a13, 0x55688a79, 0x3ed7a326, 0xee33f1d6, 362 0x3f9a48f4, 0xa8dc9888, 0x3ebf8939, 0xaad4b5b8, 0x3f72f746, 363 0x9102efa1, 0x3ea88f82, 0xdabc29cf, 0x3f678228, 0x9289afb8, 364 0x3e90f456, 0x741fb4ed, 0x3f46f3a3, 0xa97f6663, 0x3e79b4bf, 365 0xca89ff3f, 0x3f36db70, 0xa8a2a000, 0x3fc0ee13, 0x3da24be1, 366 0x3d338b9f, 0x00000000, 0x00000000, 0x11cd6c69, 0x3fd601fd, 367 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0xb9ff07ce, 368 0x3f231c78, 0x00000000, 0x00000000, 0xa5517182, 0x3f0ff0e0, 369 0x00000000, 0x00000000, 0x790b4cbc, 0x3ef66191, 0x848a46c6, 370 0x3fa21ac0, 0xb16435fa, 0x3ee1d3ec, 0x2a1aa832, 0x3f9c71ea, 371 0xfdd299ef, 0x3ec9dd1a, 0x3f8dbaaf, 0x3f793363, 0x309fc6ea, 372 0x3eb415d6, 0xbee60471, 0x3f6b83ba, 0x94a0a697, 0x3e9dae11, 373 0x3e5c67b3, 0x3f4fd07b, 0x9a8f3e3e, 0x3e86bd75, 0xa4beb7a4, 374 0x3f3d1eb1, 0x29cfc000, 0x3fc549ce, 0xbf159358, 0x3d397b33, 375 0x00000000, 0x00000000, 0x871fee6c, 0x3fd666f0, 0x00000000, 376 0x3ff00000, 0x00000000, 0xfffffff8, 0x535ad890, 0x3f2b9320, 377 0x00000000, 0x00000000, 0x018fdf1f, 0x3f16d61d, 0x00000000, 378 0x00000000, 0x0359f1be, 0x3f0139e4, 0xa4317c6d, 0x3fa67e17, 379 0x82672d0f, 0x3eebb405, 0x2f1b621e, 0x3f9f455b, 0x51ccf238, 380 0x3ed55317, 0xf437b9ac, 0x3f804bee, 0xc791a2b5, 0x3ec0e993, 381 0x919a1db2, 0x3f7080c2, 0x336a5b0e, 0x3eaa48a2, 0x0a268358, 382 0x3f55a443, 0xdfd978e4, 0x3e94b61f, 0xd7767a58, 0x3f431806, 383 0x2aea0000, 0x3fc9bbe8, 0x7723ea61, 0x3d3a2369, 0x00000000, 384 0x00000000, 0xdf7796ff, 0x3fd6e642, 0x00000000, 0x3ff00000, 385 0x00000000, 0xfffffff8, 0x4f48b8d3, 0x3f33eaf9, 0x00000000, 386 0x00000000, 0x0cf7586f, 0x3f20b8ea, 0x00000000, 0x00000000, 387 0xd0258911, 0x3f0abaf3, 0x23e49fe9, 0x3fab5a8c, 0x2d53222e, 388 0x3ef60d15, 0x21169451, 0x3fa172b2, 0xbb254dbc, 0x3ee1d3b5, 389 0xdbf93b8e, 0x3f84c7db, 0x05b4630b, 0x3ecd3364, 0xee9aada7, 390 0x3f743924, 0x794a8297, 0x3eb7b7b9, 0xe015f797, 0x3f5d41f5, 391 0xe41a4a56, 0x3ea35dfb, 0xe4c2a251, 0x3f49a2ab, 0x5af9e000, 392 0x3fce49ce, 0x8c743719, 0xbd1eb860, 0x00000000, 0x00000000, 393 0x1b4863cf, 0x3fd78294, 0x00000000, 0x3ff00000, 0x00000000, 394 0xfffffff8, 0x65965966, 0xc0219659, 0x00000000, 0x00000000, 395 0x882c10fa, 0x402664f4, 0x00000000, 0x00000000, 0x83cd3723, 396 0xc02c8342, 0x00000000, 0xc0000000, 0x55e6c23d, 0x403226e3, 397 0x55555555, 0x40055555, 0x34451939, 0xc0371c96, 0xaaaaaaab, 398 0xc00aaaaa, 0x0e157de0, 0x403d6d3d, 0x11111111, 0x40111111, 399 0xa738201f, 0xc042bbce, 0x05b05b06, 0xc015b05b, 0x452b75e3, 400 0x4047da36, 0x1ba1ba1c, 0x401ba1ba, 0x00000000, 0xbff00000, 401 0x00000000, 0x00000000, 0x00000000, 0x40000000, 0x00000000, 402 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 403 0xc7ab4d5a, 0xc0085e24, 0x00000000, 0x00000000, 0xe93ea75d, 404 0x400b963d, 0x00000000, 0x00000000, 0x94a7f25a, 0xc00f37e2, 405 0x4b6261cb, 0xbff5f984, 0x5a9dd812, 0x4011aab0, 0x74c30018, 406 0x3ffaf5a5, 0x7f2ce8e3, 0xc013fe8b, 0xfe8e54fa, 0xbffd7334, 407 0x670d618d, 0x4016a10c, 0x4db97058, 0x4000e012, 0x24df44dd, 408 0xc0199c5f, 0x697d6ece, 0xc003006e, 0x83298b82, 0x401cfc4d, 409 0x19d490d6, 0x40058c19, 0x2ae42850, 0xbfea4300, 0x118e20e6, 410 0x3c7a6db8, 0x00000000, 0x40000000, 0xe33345b8, 0xbfd4e526, 411 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2b2c49d0, 412 0xbff2de9c, 0x00000000, 0x00000000, 0x2655bc98, 0x3ff33e58, 413 0x00000000, 0x00000000, 0xff691fa2, 0xbff3972e, 0xe93463bd, 414 0xbfeeed87, 0x070e10a0, 0x3ff3f5b2, 0xf4d790a4, 0x3ff20c10, 415 0xa04e8ea3, 0xbff4541a, 0x386accd3, 0xbff1369e, 0x222a66dd, 416 0x3ff4b521, 0x22a9777e, 0x3ff20817, 0x52a04a6e, 0xbff5178f, 417 0xddaa0031, 0xbff22137, 0x4447d47c, 0x3ff57c01, 0x1e9c7f1d, 418 0x3ff29311, 0x2ab7f990, 0xbfe561b8, 0x209c7df1, 0xbc87a8c5, 419 0x00000000, 0x3ff00000, 0x4170bcc6, 0x3fdc92d8, 0x00000000, 420 0x00000000, 0x00000000, 0x00000000, 0xcc03e501, 0xbfdff10f, 421 0x00000000, 0x00000000, 0x44a4e845, 0x3fddb63b, 0x00000000, 422 0x00000000, 0x3768ad9f, 0xbfdb72a4, 0x3dd01cca, 0xbfe5fdb9, 423 0xa61d2811, 0x3fd972b2, 0x5645ad0b, 0x3fe977f9, 0xd013b3ab, 424 0xbfd78ca3, 0xbf0bf914, 0xbfe4f192, 0x4d53e730, 0x3fd5d060, 425 0x3f8b9000, 0x3fe49933, 0xe2b82f08, 0xbfd4322a, 0x5936a835, 426 0xbfe27ae1, 0xb1c61c9b, 0x3fd2b3fb, 0xef478605, 0x3fe1659e, 427 0x190834ec, 0xbfe11ab7, 0xcdb625ea, 0x3c8e564b, 0x00000000, 428 0x3ff00000, 0xb07217e3, 0x3fd248f1, 0x00000000, 0x00000000, 429 0x00000000, 0x00000000, 0x56f37042, 0xbfccfc56, 0x00000000, 430 0x00000000, 0xaa563951, 0x3fc90125, 0x00000000, 0x00000000, 431 0x3d0e7c5d, 0xbfc50533, 0x9bed9b2e, 0xbfdf0ed9, 0x5fe7c47c, 432 0x3fc1f250, 0x96c125e5, 0x3fe2edd9, 0x5a02bbd8, 0xbfbe5c71, 433 0x86362c20, 0xbfda08b7, 0x4b4435ed, 0x3fb9d342, 0x4b494091, 434 0x3fd911bd, 0xb56658be, 0xbfb5e4c7, 0x93a2fd76, 0xbfd3c092, 435 0xda271794, 0x3fb29910, 0x3303df2b, 0x3fd189be, 0x99fcef32, 436 0xbfda8279, 0xb68c1467, 0xbc708b2f, 0x00000000, 0x3ff00000, 437 0x980c4337, 0x3fc5f619, 0x00000000, 0x00000000, 0x00000000, 438 0x00000000, 0x9314533e, 0xbfbb8ec5, 0x00000000, 0x00000000, 439 0x09aa36d0, 0x3fb6d3f4, 0x00000000, 0x00000000, 0xdcb427fd, 440 0xbfb13950, 0xd87ab0bb, 0xbfd5335e, 0xce0ae8a5, 0x3fabb382, 441 0x79143126, 0x3fddba41, 0x5f2b28d4, 0xbfa552f1, 0x59f21a6d, 442 0xbfd015ab, 0x22c27d95, 0x3fa0e984, 0xe19fc6aa, 0x3fd0576c, 443 0x8f2c2950, 0xbf9a4898, 0xc0b3f22c, 0xbfc59462, 0x1883a4b8, 444 0x3f94b61c, 0x3f838640, 0x3fc30eb8, 0x355c63dc, 0xbfd36a08, 445 0x1dce993d, 0x3c6d704d, 0x00000000, 0x3ff00000, 0x2b82ab63, 446 0x3fb78e92, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 447 0x5a279ea3, 0xbfaa3407, 0x00000000, 0x00000000, 0x432d65fa, 448 0x3fa70153, 0x00000000, 0x00000000, 0x891a4602, 0xbf9d03ef, 449 0xd62ca5f8, 0xbfca77d9, 0xb35f4628, 0x3f97a265, 0x433258fa, 450 0x3fd8cf51, 0xb58fd909, 0xbf8f88e3, 0x01771cea, 0xbfc2b154, 451 0xf3562f8e, 0x3f888f57, 0xc028a723, 0x3fc7370f, 0x20b7f9f0, 452 0xbf80f44c, 0x214368e9, 0xbfb6dfaa, 0x28891863, 0x3f79b4b6, 453 0x172dbbf0, 0x3fb6cb8e, 0xe0553158, 0xbfc975f5, 0x593fe814, 454 0xbc2ef5d3, 0x00000000, 0x3ff00000, 0x03dec550, 0x3fa44203, 455 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x4e435f9b, 456 0xbf953f83, 0x00000000, 0x00000000, 0x3c6e8e46, 0x3f9b74ea, 457 0x00000000, 0x00000000, 0xda5b7511, 0xbf85ad63, 0xdc230b9b, 458 0xbfb97558, 0x26cb3788, 0x3f881308, 0x76fc4985, 0x3fd62ac9, 459 0x77bb08ba, 0xbf757c85, 0xb6247521, 0xbfb1381e, 0x5922170c, 460 0x3f754e95, 0x8746482d, 0x3fc27f83, 0x11055b30, 0xbf64e391, 461 0x3e666320, 0xbfa3e609, 0x0de9dae3, 0x3f6301df, 0x1f1dca06, 462 0x3fafa8ae, 0x8c5b2da2, 0xbfb936bb, 0x4e88f7a5, 0xbc587d05, 463 0x00000000, 0x3ff00000, 0xa8935dd9, 0x3f83dde2, 0x00000000, 464 0x00000000, 0x00000000, 0x00000000 465 // @formatter:on 466 }); 467 468 private ArrayDataPointerConstant mask35 = pointerConstant(16, new int[]{ 469 // @formatter:off 470 0xfffc0000, 0xffffffff, 0x00000000, 0x00000000 471 // @formatter:on 472 }); 473 474 private ArrayDataPointerConstant q11 = pointerConstant(16, new int[]{ 475 // @formatter:off 476 0xb8fe4d77, 0x3f82609a 477 // @formatter:on 478 }); 479 480 private ArrayDataPointerConstant q9 = pointerConstant(16, new int[]{ 481 // @formatter:off 482 0xbf847a43, 0x3f9664a0 483 // @formatter:on 484 }); 485 486 private ArrayDataPointerConstant q7 = pointerConstant(16, new int[]{ 487 // @formatter:off 488 0x52c4c8ab, 0x3faba1ba 489 // @formatter:on 490 }); 491 492 private ArrayDataPointerConstant q5 = pointerConstant(16, new int[]{ 493 // @formatter:off 494 0x11092746, 0x3fc11111 495 // @formatter:on 496 }); 497 498 private ArrayDataPointerConstant q3 = pointerConstant(16, new int[]{ 499 // @formatter:off 500 0x55555612, 0x3fd55555 501 // @formatter:on 502 }); 503 504 private ArrayDataPointerConstant piInvTable = pointerConstant(16, new int[]{ 505 // @formatter:off 506 0x00000000, 0x00000000, 0xa2f9836e, 0x4e441529, 0xfc2757d1, 507 0xf534ddc0, 0xdb629599, 0x3c439041, 0xfe5163ab, 0xdebbc561, 508 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c, 0xfe1deb1c, 509 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41, 510 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 511 0xde05980f, 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 512 0x4f463f66, 0x9e5fea2d, 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 513 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08, 0x56033046, 0xfc7b6bab, 514 0xf0cfbc21 515 // @formatter:on 516 }); 517 518 private ArrayDataPointerConstant pi4 = pointerConstant(8, new int[]{ 519 // @formatter:off 520 0x00000000, 0x3fe921fb, 521 }); 522 private ArrayDataPointerConstant pi48 = pointerConstant(8, new int[]{ 523 0x4611a626, 0x3e85110b 524 // @formatter:on 525 }); 526 527 private ArrayDataPointerConstant qq2 = pointerConstant(8, new int[]{ 528 // @formatter:off 529 0x676733af, 0x3d32e7b9 530 // @formatter:on 531 }); 532 533 private ArrayDataPointerConstant one = pointerConstant(8, new int[]{ 534 // @formatter:off 535 0x00000000, 0x3ff00000 536 // @formatter:on 537 }); 538 539 private ArrayDataPointerConstant twoPow55 = pointerConstant(8, new int[]{ 540 // @formatter:off 541 0x00000000, 0x43600000 542 // @formatter:on 543 }); 544 545 private ArrayDataPointerConstant twoPowM55 = pointerConstant(4, new int[]{ 546 // @formatter:off 547 0x00000000, 0x3c800000 548 // @formatter:on 549 }); 550 551 private ArrayDataPointerConstant negZero = pointerConstant(4, new int[]{ 552 // @formatter:off 553 0x00000000, 0x80000000 554 // @formatter:on 555 }); 556 557 @Override 558 public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { 559 Label block0 = new Label(); 560 Label block1 = new Label(); 561 Label block2 = new Label(); 562 Label block3 = new Label(); 563 Label block4 = new Label(); 564 Label block5 = new Label(); 565 Label block6 = new Label(); 566 Label block7 = new Label(); 567 Label block8 = new Label(); 568 Label block9 = new Label(); 569 Label block10 = new Label(); 570 Label block11 = new Label(); 571 Label block12 = new Label(); 572 Label block13 = new Label(); 573 Label block14 = new Label(); 574 575 masm.push(rbx); 576 masm.subq(rsp, 16); 577 masm.movsd(new AMD64Address(rsp, 8), xmm0); 578 579 masm.pextrw(rax, xmm0, 3); 580 masm.andl(rax, 32767); 581 masm.subl(rax, 16314); 582 masm.cmpl(rax, 270); 583 masm.jcc(AMD64Assembler.ConditionFlag.Above, block0); 584 masm.movdqu(xmm5, recordExternalAddress(crb, onehalf)); // 0x00000000, 0x3fe00000, 585 // 0x00000000, 0x3fe00000 586 masm.movdqu(xmm6, recordExternalAddress(crb, mul16)); // 0x00000000, 0x40300000, 587 // 0x00000000, 0x3ff00000 588 masm.unpcklpd(xmm0, xmm0); 589 masm.movdqu(xmm4, recordExternalAddress(crb, signMask)); // 0x00000000, 0x80000000, 590 // 0x00000000, 0x80000000 591 masm.andpd(xmm4, xmm0); 592 masm.movdqu(xmm1, recordExternalAddress(crb, pi32Inv)); // 0x6dc9c883, 0x3fe45f30, 593 // 0x6dc9c883, 0x40245f30 594 masm.mulpd(xmm1, xmm0); 595 masm.por(xmm5, xmm4); 596 masm.addpd(xmm1, xmm5); 597 masm.movdqu(xmm7, xmm1); 598 masm.unpckhpd(xmm7, xmm7); 599 masm.cvttsd2sil(rdx, xmm7); 600 masm.cvttpd2dq(xmm1, xmm1); 601 masm.cvtdq2pd(xmm1, xmm1); 602 masm.mulpd(xmm1, xmm6); 603 masm.movdqu(xmm3, recordExternalAddress(crb, p1)); // 0x54444000, 0x3fb921fb, 604 // 0x54440000, 0x3fb921fb 605 masm.movq(xmm5, recordExternalAddress(crb, qq2)); // 0x676733af, 0x3d32e7b9 606 masm.addq(rdx, 469248); 607 masm.movdqu(xmm4, recordExternalAddress(crb, p2)); // 0x67674000, 0xbd32e7b9, 608 // 0x4c4c0000, 0x3d468c23 609 masm.mulpd(xmm3, xmm1); 610 masm.andq(rdx, 31); 611 masm.mulsd(xmm5, xmm1); 612 masm.movq(rcx, rdx); 613 masm.mulpd(xmm4, xmm1); 614 masm.shlq(rcx, 1); 615 masm.subpd(xmm0, xmm3); 616 masm.mulpd(xmm1, recordExternalAddress(crb, p3)); // 0x3707344a, 0x3aa8a2e0, 617 // 0x03707345, 0x3ae98a2e 618 masm.addq(rdx, rcx); 619 masm.shlq(rcx, 2); 620 masm.addq(rdx, rcx); 621 masm.addsd(xmm5, xmm0); 622 masm.movdqu(xmm2, xmm0); 623 masm.subpd(xmm0, xmm4); 624 masm.movq(xmm6, recordExternalAddress(crb, one)); // 0x00000000, 0x3ff00000 625 masm.shlq(rdx, 4); 626 masm.leaq(rax, recordExternalAddress(crb, ctable)); 627 masm.andpd(xmm5, recordExternalAddress(crb, mask35)); // 0xfffc0000, 0xffffffff, 628 // 0x00000000, 0x00000000 629 masm.movdqu(xmm3, xmm0); 630 masm.addq(rax, rdx); 631 masm.subpd(xmm2, xmm0); 632 masm.unpckhpd(xmm0, xmm0); 633 masm.divsd(xmm6, xmm5); 634 masm.subpd(xmm2, xmm4); 635 masm.movdqu(xmm7, new AMD64Address(rax, 16)); 636 masm.subsd(xmm3, xmm5); 637 masm.mulpd(xmm7, xmm0); 638 masm.subpd(xmm2, xmm1); 639 masm.movdqu(xmm1, new AMD64Address(rax, 48)); 640 masm.mulpd(xmm1, xmm0); 641 masm.movdqu(xmm4, new AMD64Address(rax, 96)); 642 masm.mulpd(xmm4, xmm0); 643 masm.addsd(xmm2, xmm3); 644 masm.movdqu(xmm3, xmm0); 645 masm.mulpd(xmm0, xmm0); 646 masm.addpd(xmm7, new AMD64Address(rax, 0)); 647 masm.addpd(xmm1, new AMD64Address(rax, 32)); 648 masm.mulpd(xmm1, xmm0); 649 masm.addpd(xmm4, new AMD64Address(rax, 80)); 650 masm.addpd(xmm7, xmm1); 651 masm.movdqu(xmm1, new AMD64Address(rax, 112)); 652 masm.mulpd(xmm1, xmm0); 653 masm.mulpd(xmm0, xmm0); 654 masm.addpd(xmm4, xmm1); 655 masm.movdqu(xmm1, new AMD64Address(rax, 64)); 656 masm.mulpd(xmm1, xmm0); 657 masm.addpd(xmm7, xmm1); 658 masm.movdqu(xmm1, xmm3); 659 masm.mulpd(xmm3, xmm0); 660 masm.mulsd(xmm0, xmm0); 661 masm.mulpd(xmm1, new AMD64Address(rax, 144)); 662 masm.mulpd(xmm4, xmm3); 663 masm.movdqu(xmm3, xmm1); 664 masm.addpd(xmm7, xmm4); 665 masm.movdqu(xmm4, xmm1); 666 masm.mulsd(xmm0, xmm7); 667 masm.unpckhpd(xmm7, xmm7); 668 masm.addsd(xmm0, xmm7); 669 masm.unpckhpd(xmm1, xmm1); 670 masm.addsd(xmm3, xmm1); 671 masm.subsd(xmm4, xmm3); 672 masm.addsd(xmm1, xmm4); 673 masm.movdqu(xmm4, xmm2); 674 masm.movq(xmm7, new AMD64Address(rax, 144)); 675 masm.unpckhpd(xmm2, xmm2); 676 masm.addsd(xmm7, new AMD64Address(rax, 152)); 677 masm.mulsd(xmm7, xmm2); 678 masm.addsd(xmm7, new AMD64Address(rax, 136)); 679 masm.addsd(xmm7, xmm1); 680 masm.addsd(xmm0, xmm7); 681 masm.movq(xmm7, recordExternalAddress(crb, one)); // 0x00000000, 0x3ff00000 682 masm.mulsd(xmm4, xmm6); 683 masm.movq(xmm2, new AMD64Address(rax, 168)); 684 masm.andpd(xmm2, xmm6); 685 masm.mulsd(xmm5, xmm2); 686 masm.mulsd(xmm6, new AMD64Address(rax, 160)); 687 masm.subsd(xmm7, xmm5); 688 masm.subsd(xmm2, new AMD64Address(rax, 128)); 689 masm.subsd(xmm7, xmm4); 690 masm.mulsd(xmm7, xmm6); 691 masm.movdqu(xmm4, xmm3); 692 masm.subsd(xmm3, xmm2); 693 masm.addsd(xmm2, xmm3); 694 masm.subsd(xmm4, xmm2); 695 masm.addsd(xmm0, xmm4); 696 masm.subsd(xmm0, xmm7); 697 masm.addsd(xmm0, xmm3); 698 masm.jmp(block14); 699 700 masm.bind(block0); 701 masm.jcc(AMD64Assembler.ConditionFlag.Greater, block1); 702 masm.pextrw(rax, xmm0, 3); 703 masm.movl(rdx, rax); 704 masm.andl(rax, 32752); 705 masm.jcc(AMD64Assembler.ConditionFlag.Equal, block2); 706 masm.andl(rdx, 32767); 707 masm.cmpl(rdx, 15904); 708 masm.jcc(AMD64Assembler.ConditionFlag.Below, block3); 709 masm.movdqu(xmm2, xmm0); 710 masm.movdqu(xmm3, xmm0); 711 masm.movq(xmm1, recordExternalAddress(crb, q11)); // 0xb8fe4d77, 0x3f82609a 712 masm.mulsd(xmm2, xmm0); 713 masm.mulsd(xmm3, xmm2); 714 masm.mulsd(xmm1, xmm2); 715 masm.addsd(xmm1, recordExternalAddress(crb, q9)); // 0xbf847a43, 0x3f9664a0 716 masm.mulsd(xmm1, xmm2); 717 masm.addsd(xmm1, recordExternalAddress(crb, q7)); // 0x52c4c8ab, 0x3faba1ba 718 masm.mulsd(xmm1, xmm2); 719 masm.addsd(xmm1, recordExternalAddress(crb, q5)); // 0x11092746, 0x3fc11111 720 masm.mulsd(xmm1, xmm2); 721 masm.addsd(xmm1, recordExternalAddress(crb, q3)); // 0x55555612, 0x3fd55555 722 masm.mulsd(xmm1, xmm3); 723 masm.addsd(xmm0, xmm1); 724 masm.jmp(block14); 725 726 masm.bind(block3); 727 masm.movq(xmm3, recordExternalAddress(crb, twoPow55)); // 0x00000000, 0x43600000 728 masm.mulsd(xmm3, xmm0); 729 masm.addsd(xmm0, xmm3); 730 masm.mulsd(xmm0, recordExternalAddress(crb, twoPowM55)); // 0x00000000, 0x3c800000 731 masm.jmp(block14); 732 733 masm.bind(block2); 734 masm.movdqu(xmm1, xmm0); 735 masm.mulsd(xmm1, xmm1); 736 masm.jmp(block14); 737 738 masm.bind(block1); 739 masm.pextrw(rax, xmm0, 3); 740 masm.andl(rax, 32752); 741 masm.cmpl(rax, 32752); 742 masm.jcc(AMD64Assembler.ConditionFlag.Equal, block4); 743 masm.pextrw(rcx, xmm0, 3); 744 masm.andl(rcx, 32752); 745 masm.subl(rcx, 16224); 746 masm.shrl(rcx, 7); 747 masm.andl(rcx, 65532); 748 masm.leaq(r11, recordExternalAddress(crb, piInvTable)); 749 masm.addq(rcx, r11); 750 masm.movdq(rax, xmm0); 751 masm.movl(r10, new AMD64Address(rcx, 20)); 752 masm.movl(r8, new AMD64Address(rcx, 24)); 753 masm.movl(rdx, rax); 754 masm.shrq(rax, 21); 755 masm.orl(rax, Integer.MIN_VALUE); 756 masm.shrl(rax, 11); 757 masm.movl(r9, r10); 758 masm.imulq(r10, rdx); 759 masm.imulq(r9, rax); 760 masm.imulq(r8, rax); 761 masm.movl(rsi, new AMD64Address(rcx, 16)); 762 masm.movl(rdi, new AMD64Address(rcx, 12)); 763 masm.movl(r11, r10); 764 masm.shrq(r10, 32); 765 masm.addq(r9, r10); 766 masm.addq(r11, r8); 767 masm.movl(r8, r11); 768 masm.shrq(r11, 32); 769 masm.addq(r9, r11); 770 masm.movl(r10, rsi); 771 masm.imulq(rsi, rdx); 772 masm.imulq(r10, rax); 773 masm.movl(r11, rdi); 774 masm.imulq(rdi, rdx); 775 masm.movl(rbx, rsi); 776 masm.shrq(rsi, 32); 777 masm.addq(r9, rbx); 778 masm.movl(rbx, r9); 779 masm.shrq(r9, 32); 780 masm.addq(r10, rsi); 781 masm.addq(r10, r9); 782 masm.shlq(rbx, 32); 783 masm.orq(r8, rbx); 784 masm.imulq(r11, rax); 785 masm.movl(r9, new AMD64Address(rcx, 8)); 786 masm.movl(rsi, new AMD64Address(rcx, 4)); 787 masm.movl(rbx, rdi); 788 masm.shrq(rdi, 32); 789 masm.addq(r10, rbx); 790 masm.movl(rbx, r10); 791 masm.shrq(r10, 32); 792 masm.addq(r11, rdi); 793 masm.addq(r11, r10); 794 masm.movq(rdi, r9); 795 masm.imulq(r9, rdx); 796 masm.imulq(rdi, rax); 797 masm.movl(r10, r9); 798 masm.shrq(r9, 32); 799 masm.addq(r11, r10); 800 masm.movl(r10, r11); 801 masm.shrq(r11, 32); 802 masm.addq(rdi, r9); 803 masm.addq(rdi, r11); 804 masm.movq(r9, rsi); 805 masm.imulq(rsi, rdx); 806 masm.imulq(r9, rax); 807 masm.shlq(r10, 32); 808 masm.orq(r10, rbx); 809 masm.movl(rax, new AMD64Address(rcx, 0)); 810 masm.movl(r11, rsi); 811 masm.shrq(rsi, 32); 812 masm.addq(rdi, r11); 813 masm.movl(r11, rdi); 814 masm.shrq(rdi, 32); 815 masm.addq(r9, rsi); 816 masm.addq(r9, rdi); 817 masm.imulq(rdx, rax); 818 masm.pextrw(rbx, xmm0, 3); 819 masm.leaq(rdi, recordExternalAddress(crb, piInvTable)); 820 masm.subq(rcx, rdi); 821 masm.addl(rcx, rcx); 822 masm.addl(rcx, rcx); 823 masm.addl(rcx, rcx); 824 masm.addl(rcx, 19); 825 masm.movl(rsi, 32768); 826 masm.andl(rsi, rbx); 827 masm.shrl(rbx, 4); 828 masm.andl(rbx, 2047); 829 masm.subl(rbx, 1023); 830 masm.subl(rcx, rbx); 831 masm.addq(r9, rdx); 832 masm.movl(rdx, rcx); 833 masm.addl(rdx, 32); 834 masm.cmpl(rcx, 0); 835 masm.jcc(AMD64Assembler.ConditionFlag.Less, block5); 836 masm.negl(rcx); 837 masm.addl(rcx, 29); 838 masm.shll(r9); 839 masm.movl(rdi, r9); 840 masm.andl(r9, 1073741823); 841 masm.testl(r9, 536870912); 842 masm.jcc(AMD64Assembler.ConditionFlag.NotEqual, block6); 843 masm.shrl(r9); 844 masm.movl(rbx, 0); 845 masm.shlq(r9, 32); 846 masm.orq(r9, r11); 847 848 masm.bind(block7); 849 850 masm.bind(block8); 851 masm.cmpq(r9, 0); 852 masm.jcc(AMD64Assembler.ConditionFlag.Equal, block9); 853 854 masm.bind(block10); 855 masm.bsrq(r11, r9); 856 masm.movl(rcx, 29); 857 masm.subl(rcx, r11); 858 masm.jcc(AMD64Assembler.ConditionFlag.LessEqual, block11); 859 masm.shlq(r9); 860 masm.movq(rax, r10); 861 masm.shlq(r10); 862 masm.addl(rdx, rcx); 863 masm.negl(rcx); 864 masm.addl(rcx, 64); 865 masm.shrq(rax); 866 masm.shrq(r8); 867 masm.orq(r9, rax); 868 masm.orq(r10, r8); 869 870 masm.bind(block12); 871 masm.cvtsi2sdq(xmm0, r9); 872 masm.shrq(r10, 1); 873 masm.cvtsi2sdq(xmm3, r10); 874 masm.xorpd(xmm4, xmm4); 875 masm.shll(rdx, 4); 876 masm.negl(rdx); 877 masm.addl(rdx, 16368); 878 masm.orl(rdx, rsi); 879 masm.xorl(rdx, rbx); 880 masm.pinsrw(xmm4, rdx, 3); 881 masm.movq(xmm2, recordExternalAddress(crb, pi4)); // 0x00000000, 0x3fe921fb, 882 // 0x4611a626, 0x3e85110b 883 masm.movq(xmm7, recordExternalAddress(crb, pi48)); // 0x3fe921fb, 0x4611a626, 884 // 0x3e85110b 885 masm.xorpd(xmm5, xmm5); 886 masm.subl(rdx, 1008); 887 masm.pinsrw(xmm5, rdx, 3); 888 masm.mulsd(xmm0, xmm4); 889 masm.shll(rsi, 16); 890 masm.sarl(rsi, 31); 891 masm.mulsd(xmm3, xmm5); 892 masm.movdqu(xmm1, xmm0); 893 masm.mulsd(xmm0, xmm2); 894 masm.shrl(rdi, 30); 895 masm.addsd(xmm1, xmm3); 896 masm.mulsd(xmm3, xmm2); 897 masm.addl(rdi, rsi); 898 masm.xorl(rdi, rsi); 899 masm.mulsd(xmm7, xmm1); 900 masm.movl(rax, rdi); 901 masm.addsd(xmm7, xmm3); 902 masm.movdqu(xmm2, xmm0); 903 masm.addsd(xmm0, xmm7); 904 masm.subsd(xmm2, xmm0); 905 masm.addsd(xmm7, xmm2); 906 masm.movdqu(xmm1, recordExternalAddress(crb, pi32Inv)); // 0x6dc9c883, 0x3fe45f30, 907 // 0x6dc9c883, 0x40245f30 908 if (masm.supports(AMD64.CPUFeature.SSE3)) { 909 masm.movddup(xmm0, xmm0); 910 } else { 911 masm.movlhps(xmm0, xmm0); 912 } 913 masm.movdqu(xmm4, recordExternalAddress(crb, signMask)); // 0x00000000, 0x80000000, 914 // 0x00000000, 0x80000000 915 masm.andpd(xmm4, xmm0); 916 masm.mulpd(xmm1, xmm0); 917 if (masm.supports(AMD64.CPUFeature.SSE3)) { 918 masm.movddup(xmm7, xmm7); 919 } else { 920 masm.movlhps(xmm7, xmm7); 921 } 922 masm.movdqu(xmm5, recordExternalAddress(crb, onehalf)); // 0x00000000, 0x3fe00000, 923 // 0x00000000, 0x3fe00000 924 masm.movdqu(xmm6, recordExternalAddress(crb, mul16)); // 0x00000000, 0x40300000, 925 // 0x00000000, 0x3ff00000 926 masm.por(xmm5, xmm4); 927 masm.addpd(xmm1, xmm5); 928 masm.movdqu(xmm5, xmm1); 929 masm.unpckhpd(xmm5, xmm5); 930 masm.cvttsd2sil(rdx, xmm5); 931 masm.cvttpd2dq(xmm1, xmm1); 932 masm.cvtdq2pd(xmm1, xmm1); 933 masm.mulpd(xmm1, xmm6); 934 masm.movdqu(xmm3, recordExternalAddress(crb, p1)); // 0x54444000, 0x3fb921fb, 935 // 0x54440000, 0x3fb921fb 936 masm.movq(xmm5, recordExternalAddress(crb, qq2)); // 0x676733af, 0x3d32e7b9 937 masm.shll(rax, 4); 938 masm.addl(rdx, 469248); 939 masm.movdqu(xmm4, recordExternalAddress(crb, p2)); // 0x67674000, 0xbd32e7b9, 940 // 0x4c4c0000, 0x3d468c23 941 masm.mulpd(xmm3, xmm1); 942 masm.addl(rdx, rax); 943 masm.andl(rdx, 31); 944 masm.mulsd(xmm5, xmm1); 945 masm.movl(rcx, rdx); 946 masm.mulpd(xmm4, xmm1); 947 masm.shll(rcx, 1); 948 masm.subpd(xmm0, xmm3); 949 masm.mulpd(xmm1, recordExternalAddress(crb, p3)); // 0x3707344a, 0x3aa8a2e0, 950 // 0x03707345, 0x3ae98a2e 951 masm.addl(rdx, rcx); 952 masm.shll(rcx, 2); 953 masm.addl(rdx, rcx); 954 masm.addsd(xmm5, xmm0); 955 masm.movdqu(xmm2, xmm0); 956 masm.subpd(xmm0, xmm4); 957 masm.movq(xmm6, recordExternalAddress(crb, one)); // 0x00000000, 0x3ff00000 958 masm.shll(rdx, 4); 959 masm.leaq(rax, recordExternalAddress(crb, ctable)); 960 masm.andpd(xmm5, recordExternalAddress(crb, mask35)); // 0xfffc0000, 0xffffffff, 961 // 0x00000000, 0x00000000 962 masm.movdqu(xmm3, xmm0); 963 masm.addq(rax, rdx); 964 masm.subpd(xmm2, xmm0); 965 masm.unpckhpd(xmm0, xmm0); 966 masm.divsd(xmm6, xmm5); 967 masm.subpd(xmm2, xmm4); 968 masm.subsd(xmm3, xmm5); 969 masm.subpd(xmm2, xmm1); 970 masm.movdqu(xmm1, new AMD64Address(rax, 48)); 971 masm.addpd(xmm2, xmm7); 972 masm.movdqu(xmm7, new AMD64Address(rax, 16)); 973 masm.mulpd(xmm7, xmm0); 974 masm.movdqu(xmm4, new AMD64Address(rax, 96)); 975 masm.mulpd(xmm1, xmm0); 976 masm.mulpd(xmm4, xmm0); 977 masm.addsd(xmm2, xmm3); 978 masm.movdqu(xmm3, xmm0); 979 masm.mulpd(xmm0, xmm0); 980 masm.addpd(xmm7, new AMD64Address(rax, 0)); 981 masm.addpd(xmm1, new AMD64Address(rax, 32)); 982 masm.mulpd(xmm1, xmm0); 983 masm.addpd(xmm4, new AMD64Address(rax, 80)); 984 masm.addpd(xmm7, xmm1); 985 masm.movdqu(xmm1, new AMD64Address(rax, 112)); 986 masm.mulpd(xmm1, xmm0); 987 masm.mulpd(xmm0, xmm0); 988 masm.addpd(xmm4, xmm1); 989 masm.movdqu(xmm1, new AMD64Address(rax, 64)); 990 masm.mulpd(xmm1, xmm0); 991 masm.addpd(xmm7, xmm1); 992 masm.movdqu(xmm1, xmm3); 993 masm.mulpd(xmm3, xmm0); 994 masm.mulsd(xmm0, xmm0); 995 masm.mulpd(xmm1, new AMD64Address(rax, 144)); 996 masm.mulpd(xmm4, xmm3); 997 masm.movdqu(xmm3, xmm1); 998 masm.addpd(xmm7, xmm4); 999 masm.movdqu(xmm4, xmm1); 1000 masm.mulsd(xmm0, xmm7); 1001 masm.unpckhpd(xmm7, xmm7); 1002 masm.addsd(xmm0, xmm7); 1003 masm.unpckhpd(xmm1, xmm1); 1004 masm.addsd(xmm3, xmm1); 1005 masm.subsd(xmm4, xmm3); 1006 masm.addsd(xmm1, xmm4); 1007 masm.movdqu(xmm4, xmm2); 1008 masm.movq(xmm7, new AMD64Address(rax, 144)); 1009 masm.unpckhpd(xmm2, xmm2); 1010 masm.addsd(xmm7, new AMD64Address(rax, 152)); 1011 masm.mulsd(xmm7, xmm2); 1012 masm.addsd(xmm7, new AMD64Address(rax, 136)); 1013 masm.addsd(xmm7, xmm1); 1014 masm.addsd(xmm0, xmm7); 1015 masm.movq(xmm7, recordExternalAddress(crb, one)); // 0x00000000, 0x3ff00000 1016 masm.mulsd(xmm4, xmm6); 1017 masm.movq(xmm2, new AMD64Address(rax, 168)); 1018 masm.andpd(xmm2, xmm6); 1019 masm.mulsd(xmm5, xmm2); 1020 masm.mulsd(xmm6, new AMD64Address(rax, 160)); 1021 masm.subsd(xmm7, xmm5); 1022 masm.subsd(xmm2, new AMD64Address(rax, 128)); 1023 masm.subsd(xmm7, xmm4); 1024 masm.mulsd(xmm7, xmm6); 1025 masm.movdqu(xmm4, xmm3); 1026 masm.subsd(xmm3, xmm2); 1027 masm.addsd(xmm2, xmm3); 1028 masm.subsd(xmm4, xmm2); 1029 masm.addsd(xmm0, xmm4); 1030 masm.subsd(xmm0, xmm7); 1031 masm.addsd(xmm0, xmm3); 1032 masm.jmp(block14); 1033 1034 masm.bind(block9); 1035 masm.addl(rdx, 64); 1036 masm.movq(r9, r10); 1037 masm.movq(r10, r8); 1038 masm.movl(r8, 0); 1039 masm.cmpq(r9, 0); 1040 masm.jcc(AMD64Assembler.ConditionFlag.NotEqual, block10); 1041 masm.addl(rdx, 64); 1042 masm.movq(r9, r10); 1043 masm.movq(r10, r8); 1044 masm.cmpq(r9, 0); 1045 masm.jcc(AMD64Assembler.ConditionFlag.NotEqual, block10); 1046 masm.jmp(block12); 1047 1048 masm.bind(block11); 1049 masm.jcc(AMD64Assembler.ConditionFlag.Equal, block12); 1050 masm.negl(rcx); 1051 masm.shrq(r10); 1052 masm.movq(rax, r9); 1053 masm.shrq(r9); 1054 masm.subl(rdx, rcx); 1055 masm.negl(rcx); 1056 masm.addl(rcx, 64); 1057 masm.shlq(rax); 1058 masm.orq(r10, rax); 1059 masm.jmp(block12); 1060 1061 masm.bind(block5); 1062 masm.notl(rcx); 1063 masm.shlq(r9, 32); 1064 masm.orq(r9, r11); 1065 masm.shlq(r9); 1066 masm.movq(rdi, r9); 1067 masm.testl(r9, Integer.MIN_VALUE); 1068 masm.jcc(AMD64Assembler.ConditionFlag.NotEqual, block13); 1069 masm.shrl(r9); 1070 masm.movl(rbx, 0); 1071 masm.shrq(rdi, 2); 1072 masm.jmp(block8); 1073 1074 masm.bind(block6); 1075 masm.shrl(r9); 1076 masm.movl(rbx, 1073741824); 1077 masm.shrl(rbx); 1078 masm.shlq(r9, 32); 1079 masm.orq(r9, r11); 1080 masm.shlq(rbx, 32); 1081 masm.addl(rdi, 1073741824); 1082 masm.movl(rcx, 0); 1083 masm.movl(r11, 0); 1084 masm.subq(rcx, r8); 1085 masm.sbbq(r11, r10); 1086 masm.sbbq(rbx, r9); 1087 masm.movq(r8, rcx); 1088 masm.movq(r10, r11); 1089 masm.movq(r9, rbx); 1090 masm.movl(rbx, 32768); 1091 masm.jmp(block7); 1092 1093 masm.bind(block13); 1094 masm.shrl(r9); 1095 masm.movq(rbx, 0x100000000L); 1096 masm.shrq(rbx); 1097 masm.movl(rcx, 0); 1098 masm.movl(r11, 0); 1099 masm.subq(rcx, r8); 1100 masm.sbbq(r11, r10); 1101 masm.sbbq(rbx, r9); 1102 masm.movq(r8, rcx); 1103 masm.movq(r10, r11); 1104 masm.movq(r9, rbx); 1105 masm.movl(rbx, 32768); 1106 masm.shrq(rdi, 2); 1107 masm.addl(rdi, 1073741824); 1108 masm.jmp(block8); 1109 1110 masm.bind(block4); 1111 masm.movq(xmm0, new AMD64Address(rsp, 8)); 1112 masm.mulsd(xmm0, recordExternalAddress(crb, negZero)); // 0x00000000, 0x80000000 1113 masm.movq(new AMD64Address(rsp, 0), xmm0); 1114 1115 masm.bind(block14); 1116 masm.addq(rsp, 16); 1117 masm.pop(rbx); 1118 } 1119 }