1 // 2 // Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. 3 // Copyright (c) 2014, Red Hat Inc. All rights reserved. 4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 // 6 // This code is free software; you can redistribute it and/or modify it 7 // under the terms of the GNU General Public License version 2 only, as 8 // published by the Free Software Foundation. 9 // 10 // This code is distributed in the hope that it will be useful, but WITHOUT 11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 // version 2 for more details (a copy is included in the LICENSE file that 14 // accompanied this code). 15 // 16 // You should have received a copy of the GNU General Public License version 17 // 2 along with this work; if not, write to the Free Software Foundation, 18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 // 20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 // or visit www.oracle.com if you need additional information or have any 22 // questions. 23 // 24 // 25 26 // AArch64 Architecture Description File 27 28 //----------REGISTER DEFINITION BLOCK------------------------------------------ 29 // This information is used by the matcher and the register allocator to 30 // describe individual registers and classes of registers within the target 31 // archtecture. 32 33 register %{ 34 //----------Architecture Description Register Definitions---------------------- 35 // General Registers 36 // "reg_def" name ( register save type, C convention save type, 37 // ideal register type, encoding ); 38 // Register Save Types: 39 // 40 // NS = No-Save: The register allocator assumes that these registers 41 // can be used without saving upon entry to the method, & 42 // that they do not need to be saved at call sites. 43 // 44 // SOC = Save-On-Call: The register allocator assumes that these registers 45 // can be used without saving upon entry to the method, 46 // but that they must be saved at call sites. 47 // 48 // SOE = Save-On-Entry: The register allocator assumes that these registers 49 // must be saved before using them upon entry to the 50 // method, but they do not need to be saved at call 51 // sites. 52 // 53 // AS = Always-Save: The register allocator assumes that these registers 54 // must be saved before using them upon entry to the 55 // method, & that they must be saved at call sites. 56 // 57 // Ideal Register Type is used to determine how to save & restore a 58 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 59 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 60 // 61 // The encoding number is the actual bit-pattern placed into the opcodes. 62 63 // We must define the 64 bit int registers in two 32 bit halves, the 64 // real lower register and a virtual upper half register. upper halves 65 // are used by the register allocator but are not actually supplied as 66 // operands to memory ops. 67 // 68 // follow the C1 compiler in making registers 69 // 70 // r0-r7,r10-r26 volatile (caller save) 71 // r27-r32 system (no save, no allocate) 72 // r8-r9 invisible to the allocator (so we can use them as scratch regs) 73 // 74 // as regards Java usage. we don't use any callee save registers 75 // because this makes it difficult to de-optimise a frame (see comment 76 // in x86 implementation of Deoptimization::unwind_callee_save_values) 77 // 78 79 // General Registers 80 81 reg_def R0 ( SOC, SOC, Op_RegI, 0, r0->as_VMReg() ); 82 reg_def R0_H ( SOC, SOC, Op_RegI, 0, r0->as_VMReg()->next() ); 83 reg_def R1 ( SOC, SOC, Op_RegI, 1, r1->as_VMReg() ); 84 reg_def R1_H ( SOC, SOC, Op_RegI, 1, r1->as_VMReg()->next() ); 85 reg_def R2 ( SOC, SOC, Op_RegI, 2, r2->as_VMReg() ); 86 reg_def R2_H ( SOC, SOC, Op_RegI, 2, r2->as_VMReg()->next() ); 87 reg_def R3 ( SOC, SOC, Op_RegI, 3, r3->as_VMReg() ); 88 reg_def R3_H ( SOC, SOC, Op_RegI, 3, r3->as_VMReg()->next() ); 89 reg_def R4 ( SOC, SOC, Op_RegI, 4, r4->as_VMReg() ); 90 reg_def R4_H ( SOC, SOC, Op_RegI, 4, r4->as_VMReg()->next() ); 91 reg_def R5 ( SOC, SOC, Op_RegI, 5, r5->as_VMReg() ); 92 reg_def R5_H ( SOC, SOC, Op_RegI, 5, r5->as_VMReg()->next() ); 93 reg_def R6 ( SOC, SOC, Op_RegI, 6, r6->as_VMReg() ); 94 reg_def R6_H ( SOC, SOC, Op_RegI, 6, r6->as_VMReg()->next() ); 95 reg_def R7 ( SOC, SOC, Op_RegI, 7, r7->as_VMReg() ); 96 reg_def R7_H ( SOC, SOC, Op_RegI, 7, r7->as_VMReg()->next() ); 97 reg_def R10 ( SOC, SOC, Op_RegI, 10, r10->as_VMReg() ); 98 reg_def R10_H ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next()); 99 reg_def R11 ( SOC, SOC, Op_RegI, 11, r11->as_VMReg() ); 100 reg_def R11_H ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next()); 101 reg_def R12 ( SOC, SOC, Op_RegI, 12, r12->as_VMReg() ); 102 reg_def R12_H ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next()); 103 reg_def R13 ( SOC, SOC, Op_RegI, 13, r13->as_VMReg() ); 104 reg_def R13_H ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next()); 105 reg_def R14 ( SOC, SOC, Op_RegI, 14, r14->as_VMReg() ); 106 reg_def R14_H ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next()); 107 reg_def R15 ( SOC, SOC, Op_RegI, 15, r15->as_VMReg() ); 108 reg_def R15_H ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next()); 109 reg_def R16 ( SOC, SOC, Op_RegI, 16, r16->as_VMReg() ); 110 reg_def R16_H ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next()); 111 reg_def R17 ( SOC, SOC, Op_RegI, 17, r17->as_VMReg() ); 112 reg_def R17_H ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next()); 113 reg_def R18 ( SOC, SOC, Op_RegI, 18, r18->as_VMReg() ); 114 reg_def R18_H ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next()); 115 reg_def R19 ( SOC, SOE, Op_RegI, 19, r19->as_VMReg() ); 116 reg_def R19_H ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next()); 117 reg_def R20 ( SOC, SOE, Op_RegI, 20, r20->as_VMReg() ); // caller esp 118 reg_def R20_H ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next()); 119 reg_def R21 ( SOC, SOE, Op_RegI, 21, r21->as_VMReg() ); 120 reg_def R21_H ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next()); 121 reg_def R22 ( SOC, SOE, Op_RegI, 22, r22->as_VMReg() ); 122 reg_def R22_H ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next()); 123 reg_def R23 ( SOC, SOE, Op_RegI, 23, r23->as_VMReg() ); 124 reg_def R23_H ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next()); 125 reg_def R24 ( SOC, SOE, Op_RegI, 24, r24->as_VMReg() ); 126 reg_def R24_H ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next()); 127 reg_def R25 ( SOC, SOE, Op_RegI, 25, r25->as_VMReg() ); 128 reg_def R25_H ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next()); 129 reg_def R26 ( SOC, SOE, Op_RegI, 26, r26->as_VMReg() ); 130 reg_def R26_H ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next()); 131 reg_def R27 ( NS, SOE, Op_RegI, 27, r27->as_VMReg() ); // heapbase 132 reg_def R27_H ( NS, SOE, Op_RegI, 27, r27->as_VMReg()->next()); 133 reg_def R28 ( NS, SOE, Op_RegI, 28, r28->as_VMReg() ); // thread 134 reg_def R28_H ( NS, SOE, Op_RegI, 28, r28->as_VMReg()->next()); 135 reg_def R29 ( NS, NS, Op_RegI, 29, r29->as_VMReg() ); // fp 136 reg_def R29_H ( NS, NS, Op_RegI, 29, r29->as_VMReg()->next()); 137 reg_def R30 ( NS, NS, Op_RegI, 30, r30->as_VMReg() ); // lr 138 reg_def R30_H ( NS, NS, Op_RegI, 30, r30->as_VMReg()->next()); 139 reg_def R31 ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg() ); // sp 140 reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next()); 141 142 // ---------------------------- 143 // Float/Double Registers 144 // ---------------------------- 145 146 // Double Registers 147 148 // The rules of ADL require that double registers be defined in pairs. 149 // Each pair must be two 32-bit values, but not necessarily a pair of 150 // single float registers. In each pair, ADLC-assigned register numbers 151 // must be adjacent, with the lower number even. Finally, when the 152 // CPU stores such a register pair to memory, the word associated with 153 // the lower ADLC-assigned number must be stored to the lower address. 154 155 // AArch64 has 32 floating-point registers. Each can store a vector of 156 // single or double precision floating-point values up to 8 * 32 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats. We currently only 158 // use the first float or double element of the vector. 159 160 // for Java use float registers v0-v15 are always save on call whereas 161 // the platform ABI treats v8-v15 as callee save). float registers 162 // v16-v31 are SOC as per the platform spec 163 164 reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() ); 165 reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() ); 166 reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) ); 167 reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) ); 168 169 reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() ); 170 reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() ); 171 reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) ); 172 reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) ); 173 174 reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() ); 175 reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() ); 176 reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) ); 177 reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) ); 178 179 reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() ); 180 reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() ); 181 reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) ); 182 reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) ); 183 184 reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() ); 185 reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() ); 186 reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) ); 187 reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) ); 188 189 reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() ); 190 reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() ); 191 reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) ); 192 reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) ); 193 194 reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() ); 195 reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() ); 196 reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) ); 197 reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) ); 198 199 reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() ); 200 reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() ); 201 reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) ); 202 reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) ); 203 204 reg_def V8 ( SOC, SOC, Op_RegF, 8, v8->as_VMReg() ); 205 reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next() ); 206 reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) ); 207 reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) ); 208 209 reg_def V9 ( SOC, SOC, Op_RegF, 9, v9->as_VMReg() ); 210 reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next() ); 211 reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) ); 212 reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) ); 213 214 reg_def V10 ( SOC, SOC, Op_RegF, 10, v10->as_VMReg() ); 215 reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() ); 216 reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2)); 217 reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3)); 218 219 reg_def V11 ( SOC, SOC, Op_RegF, 11, v11->as_VMReg() ); 220 reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() ); 221 reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2)); 222 reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3)); 223 224 reg_def V12 ( SOC, SOC, Op_RegF, 12, v12->as_VMReg() ); 225 reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() ); 226 reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2)); 227 reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3)); 228 229 reg_def V13 ( SOC, SOC, Op_RegF, 13, v13->as_VMReg() ); 230 reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() ); 231 reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2)); 232 reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3)); 233 234 reg_def V14 ( SOC, SOC, Op_RegF, 14, v14->as_VMReg() ); 235 reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() ); 236 reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2)); 237 reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3)); 238 239 reg_def V15 ( SOC, SOC, Op_RegF, 15, v15->as_VMReg() ); 240 reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() ); 241 reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2)); 242 reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3)); 243 244 reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() ); 245 reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() ); 246 reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2)); 247 reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3)); 248 249 reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() ); 250 reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() ); 251 reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2)); 252 reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3)); 253 254 reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() ); 255 reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() ); 256 reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2)); 257 reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3)); 258 259 reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() ); 260 reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() ); 261 reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2)); 262 reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3)); 263 264 reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() ); 265 reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() ); 266 reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2)); 267 reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3)); 268 269 reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() ); 270 reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() ); 271 reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2)); 272 reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3)); 273 274 reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() ); 275 reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() ); 276 reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2)); 277 reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3)); 278 279 reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() ); 280 reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() ); 281 reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2)); 282 reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3)); 283 284 reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() ); 285 reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() ); 286 reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2)); 287 reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3)); 288 289 reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() ); 290 reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() ); 291 reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2)); 292 reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3)); 293 294 reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() ); 295 reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() ); 296 reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2)); 297 reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3)); 298 299 reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() ); 300 reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() ); 301 reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2)); 302 reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3)); 303 304 reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() ); 305 reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() ); 306 reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2)); 307 reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3)); 308 309 reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() ); 310 reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() ); 311 reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2)); 312 reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3)); 313 314 reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() ); 315 reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() ); 316 reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2)); 317 reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3)); 318 319 reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() ); 320 reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() ); 321 reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2)); 322 reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3)); 323 324 // ---------------------------- 325 // Special Registers 326 // ---------------------------- 327 328 // the AArch64 CSPR status flag register is not directly acessible as 329 // instruction operand. the FPSR status flag register is a system 330 // register which can be written/read using MSR/MRS but again does not 331 // appear as an operand (a code identifying the FSPR occurs as an 332 // immediate value in the instruction). 333 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad()); 335 336 337 // Specify priority of register selection within phases of register 338 // allocation. Highest priority is first. A useful heuristic is to 339 // give registers a low priority when they are required by machine 340 // instructions, like EAX and EDX on I486, and choose no-save registers 341 // before save-on-call, & save-on-call before save-on-entry. Registers 342 // which participate in fixed calling sequences should come last. 343 // Registers which are used as pairs must fall on an even boundary. 344 345 alloc_class chunk0( 346 // volatiles 347 R10, R10_H, 348 R11, R11_H, 349 R12, R12_H, 350 R13, R13_H, 351 R14, R14_H, 352 R15, R15_H, 353 R16, R16_H, 354 R17, R17_H, 355 R18, R18_H, 356 357 // arg registers 358 R0, R0_H, 359 R1, R1_H, 360 R2, R2_H, 361 R3, R3_H, 362 R4, R4_H, 363 R5, R5_H, 364 R6, R6_H, 365 R7, R7_H, 366 367 // non-volatiles 368 R19, R19_H, 369 R20, R20_H, 370 R21, R21_H, 371 R22, R22_H, 372 R23, R23_H, 373 R24, R24_H, 374 R25, R25_H, 375 R26, R26_H, 376 377 // non-allocatable registers 378 379 R27, R27_H, // heapbase 380 R28, R28_H, // thread 381 R29, R29_H, // fp 382 R30, R30_H, // lr 383 R31, R31_H, // sp 384 ); 385 386 alloc_class chunk1( 387 388 // no save 389 V16, V16_H, V16_J, V16_K, 390 V17, V17_H, V17_J, V17_K, 391 V18, V18_H, V18_J, V18_K, 392 V19, V19_H, V19_J, V19_K, 393 V20, V20_H, V20_J, V20_K, 394 V21, V21_H, V21_J, V21_K, 395 V22, V22_H, V22_J, V22_K, 396 V23, V23_H, V23_J, V23_K, 397 V24, V24_H, V24_J, V24_K, 398 V25, V25_H, V25_J, V25_K, 399 V26, V26_H, V26_J, V26_K, 400 V27, V27_H, V27_J, V27_K, 401 V28, V28_H, V28_J, V28_K, 402 V29, V29_H, V29_J, V29_K, 403 V30, V30_H, V30_J, V30_K, 404 V31, V31_H, V31_J, V31_K, 405 406 // arg registers 407 V0, V0_H, V0_J, V0_K, 408 V1, V1_H, V1_J, V1_K, 409 V2, V2_H, V2_J, V2_K, 410 V3, V3_H, V3_J, V3_K, 411 V4, V4_H, V4_J, V4_K, 412 V5, V5_H, V5_J, V5_K, 413 V6, V6_H, V6_J, V6_K, 414 V7, V7_H, V7_J, V7_K, 415 416 // non-volatiles 417 V8, V8_H, V8_J, V8_K, 418 V9, V9_H, V9_J, V9_K, 419 V10, V10_H, V10_J, V10_K, 420 V11, V11_H, V11_J, V11_K, 421 V12, V12_H, V12_J, V12_K, 422 V13, V13_H, V13_J, V13_K, 423 V14, V14_H, V14_J, V14_K, 424 V15, V15_H, V15_J, V15_K, 425 ); 426 427 alloc_class chunk2(RFLAGS); 428 429 //----------Architecture Description Register Classes-------------------------- 430 // Several register classes are automatically defined based upon information in 431 // this architecture description. 432 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 433 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 436 // 437 438 // Class for all 32 bit integer registers -- excludes SP which will 439 // never be used as an integer register 440 reg_class any_reg32( 441 R0, 442 R1, 443 R2, 444 R3, 445 R4, 446 R5, 447 R6, 448 R7, 449 R10, 450 R11, 451 R12, 452 R13, 453 R14, 454 R15, 455 R16, 456 R17, 457 R18, 458 R19, 459 R20, 460 R21, 461 R22, 462 R23, 463 R24, 464 R25, 465 R26, 466 R27, 467 R28, 468 R29, 469 R30 470 ); 471 472 // Singleton class for R0 int register 473 reg_class int_r0_reg(R0); 474 475 // Singleton class for R2 int register 476 reg_class int_r2_reg(R2); 477 478 // Singleton class for R3 int register 479 reg_class int_r3_reg(R3); 480 481 // Singleton class for R4 int register 482 reg_class int_r4_reg(R4); 483 484 // Class for all long integer registers (including RSP) 485 reg_class any_reg( 486 R0, R0_H, 487 R1, R1_H, 488 R2, R2_H, 489 R3, R3_H, 490 R4, R4_H, 491 R5, R5_H, 492 R6, R6_H, 493 R7, R7_H, 494 R10, R10_H, 495 R11, R11_H, 496 R12, R12_H, 497 R13, R13_H, 498 R14, R14_H, 499 R15, R15_H, 500 R16, R16_H, 501 R17, R17_H, 502 R18, R18_H, 503 R19, R19_H, 504 R20, R20_H, 505 R21, R21_H, 506 R22, R22_H, 507 R23, R23_H, 508 R24, R24_H, 509 R25, R25_H, 510 R26, R26_H, 511 R27, R27_H, 512 R28, R28_H, 513 R29, R29_H, 514 R30, R30_H, 515 R31, R31_H 516 ); 517 518 // Class for all non-special integer registers 519 reg_class no_special_reg32_no_fp( 520 R0, 521 R1, 522 R2, 523 R3, 524 R4, 525 R5, 526 R6, 527 R7, 528 R10, 529 R11, 530 R12, // rmethod 531 R13, 532 R14, 533 R15, 534 R16, 535 R17, 536 R18, 537 R19, 538 R20, 539 R21, 540 R22, 541 R23, 542 R24, 543 R25, 544 R26 545 /* R27, */ // heapbase 546 /* R28, */ // thread 547 /* R29, */ // fp 548 /* R30, */ // lr 549 /* R31 */ // sp 550 ); 551 552 reg_class no_special_reg32_with_fp( 553 R0, 554 R1, 555 R2, 556 R3, 557 R4, 558 R5, 559 R6, 560 R7, 561 R10, 562 R11, 563 R12, // rmethod 564 R13, 565 R14, 566 R15, 567 R16, 568 R17, 569 R18, 570 R19, 571 R20, 572 R21, 573 R22, 574 R23, 575 R24, 576 R25, 577 R26 578 /* R27, */ // heapbase 579 /* R28, */ // thread 580 /* R29, */ // fp 581 /* R30, */ // lr 582 /* R31 */ // sp 583 ); 584 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %}); 586 587 // Class for all non-special long integer registers 588 reg_class no_special_reg_no_fp( 589 R0, R0_H, 590 R1, R1_H, 591 R2, R2_H, 592 R3, R3_H, 593 R4, R4_H, 594 R5, R5_H, 595 R6, R6_H, 596 R7, R7_H, 597 R10, R10_H, 598 R11, R11_H, 599 R12, R12_H, // rmethod 600 R13, R13_H, 601 R14, R14_H, 602 R15, R15_H, 603 R16, R16_H, 604 R17, R17_H, 605 R18, R18_H, 606 R19, R19_H, 607 R20, R20_H, 608 R21, R21_H, 609 R22, R22_H, 610 R23, R23_H, 611 R24, R24_H, 612 R25, R25_H, 613 R26, R26_H, 614 /* R27, R27_H, */ // heapbase 615 /* R28, R28_H, */ // thread 616 /* R29, R29_H, */ // fp 617 /* R30, R30_H, */ // lr 618 /* R31, R31_H */ // sp 619 ); 620 621 reg_class no_special_reg_with_fp( 622 R0, R0_H, 623 R1, R1_H, 624 R2, R2_H, 625 R3, R3_H, 626 R4, R4_H, 627 R5, R5_H, 628 R6, R6_H, 629 R7, R7_H, 630 R10, R10_H, 631 R11, R11_H, 632 R12, R12_H, // rmethod 633 R13, R13_H, 634 R14, R14_H, 635 R15, R15_H, 636 R16, R16_H, 637 R17, R17_H, 638 R18, R18_H, 639 R19, R19_H, 640 R20, R20_H, 641 R21, R21_H, 642 R22, R22_H, 643 R23, R23_H, 644 R24, R24_H, 645 R25, R25_H, 646 R26, R26_H, 647 /* R27, R27_H, */ // heapbase 648 /* R28, R28_H, */ // thread 649 /* R29, R29_H, */ // fp 650 /* R30, R30_H, */ // lr 651 /* R31, R31_H */ // sp 652 ); 653 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %}); 655 656 // Class for 64 bit register r0 657 reg_class r0_reg( 658 R0, R0_H 659 ); 660 661 // Class for 64 bit register r1 662 reg_class r1_reg( 663 R1, R1_H 664 ); 665 666 // Class for 64 bit register r2 667 reg_class r2_reg( 668 R2, R2_H 669 ); 670 671 // Class for 64 bit register r3 672 reg_class r3_reg( 673 R3, R3_H 674 ); 675 676 // Class for 64 bit register r4 677 reg_class r4_reg( 678 R4, R4_H 679 ); 680 681 // Class for 64 bit register r5 682 reg_class r5_reg( 683 R5, R5_H 684 ); 685 686 // Class for 64 bit register r10 687 reg_class r10_reg( 688 R10, R10_H 689 ); 690 691 // Class for 64 bit register r11 692 reg_class r11_reg( 693 R11, R11_H 694 ); 695 696 // Class for method register 697 reg_class method_reg( 698 R12, R12_H 699 ); 700 701 // Class for heapbase register 702 reg_class heapbase_reg( 703 R27, R27_H 704 ); 705 706 // Class for thread register 707 reg_class thread_reg( 708 R28, R28_H 709 ); 710 711 // Class for frame pointer register 712 reg_class fp_reg( 713 R29, R29_H 714 ); 715 716 // Class for link register 717 reg_class lr_reg( 718 R30, R30_H 719 ); 720 721 // Class for long sp register 722 reg_class sp_reg( 723 R31, R31_H 724 ); 725 726 // Class for all pointer registers 727 reg_class ptr_reg( 728 R0, R0_H, 729 R1, R1_H, 730 R2, R2_H, 731 R3, R3_H, 732 R4, R4_H, 733 R5, R5_H, 734 R6, R6_H, 735 R7, R7_H, 736 R10, R10_H, 737 R11, R11_H, 738 R12, R12_H, 739 R13, R13_H, 740 R14, R14_H, 741 R15, R15_H, 742 R16, R16_H, 743 R17, R17_H, 744 R18, R18_H, 745 R19, R19_H, 746 R20, R20_H, 747 R21, R21_H, 748 R22, R22_H, 749 R23, R23_H, 750 R24, R24_H, 751 R25, R25_H, 752 R26, R26_H, 753 R27, R27_H, 754 R28, R28_H, 755 R29, R29_H, 756 R30, R30_H, 757 R31, R31_H 758 ); 759 760 // Class for all non_special pointer registers 761 reg_class no_special_ptr_reg( 762 R0, R0_H, 763 R1, R1_H, 764 R2, R2_H, 765 R3, R3_H, 766 R4, R4_H, 767 R5, R5_H, 768 R6, R6_H, 769 R7, R7_H, 770 R10, R10_H, 771 R11, R11_H, 772 R12, R12_H, 773 R13, R13_H, 774 R14, R14_H, 775 R15, R15_H, 776 R16, R16_H, 777 R17, R17_H, 778 R18, R18_H, 779 R19, R19_H, 780 R20, R20_H, 781 R21, R21_H, 782 R22, R22_H, 783 R23, R23_H, 784 R24, R24_H, 785 R25, R25_H, 786 R26, R26_H, 787 /* R27, R27_H, */ // heapbase 788 /* R28, R28_H, */ // thread 789 /* R29, R29_H, */ // fp 790 /* R30, R30_H, */ // lr 791 /* R31, R31_H */ // sp 792 ); 793 794 // Class for all float registers 795 reg_class float_reg( 796 V0, 797 V1, 798 V2, 799 V3, 800 V4, 801 V5, 802 V6, 803 V7, 804 V8, 805 V9, 806 V10, 807 V11, 808 V12, 809 V13, 810 V14, 811 V15, 812 V16, 813 V17, 814 V18, 815 V19, 816 V20, 817 V21, 818 V22, 819 V23, 820 V24, 821 V25, 822 V26, 823 V27, 824 V28, 825 V29, 826 V30, 827 V31 828 ); 829 830 // Double precision float registers have virtual `high halves' that 831 // are needed by the allocator. 832 // Class for all double registers 833 reg_class double_reg( 834 V0, V0_H, 835 V1, V1_H, 836 V2, V2_H, 837 V3, V3_H, 838 V4, V4_H, 839 V5, V5_H, 840 V6, V6_H, 841 V7, V7_H, 842 V8, V8_H, 843 V9, V9_H, 844 V10, V10_H, 845 V11, V11_H, 846 V12, V12_H, 847 V13, V13_H, 848 V14, V14_H, 849 V15, V15_H, 850 V16, V16_H, 851 V17, V17_H, 852 V18, V18_H, 853 V19, V19_H, 854 V20, V20_H, 855 V21, V21_H, 856 V22, V22_H, 857 V23, V23_H, 858 V24, V24_H, 859 V25, V25_H, 860 V26, V26_H, 861 V27, V27_H, 862 V28, V28_H, 863 V29, V29_H, 864 V30, V30_H, 865 V31, V31_H 866 ); 867 868 // Class for all 64bit vector registers 869 reg_class vectord_reg( 870 V0, V0_H, 871 V1, V1_H, 872 V2, V2_H, 873 V3, V3_H, 874 V4, V4_H, 875 V5, V5_H, 876 V6, V6_H, 877 V7, V7_H, 878 V8, V8_H, 879 V9, V9_H, 880 V10, V10_H, 881 V11, V11_H, 882 V12, V12_H, 883 V13, V13_H, 884 V14, V14_H, 885 V15, V15_H, 886 V16, V16_H, 887 V17, V17_H, 888 V18, V18_H, 889 V19, V19_H, 890 V20, V20_H, 891 V21, V21_H, 892 V22, V22_H, 893 V23, V23_H, 894 V24, V24_H, 895 V25, V25_H, 896 V26, V26_H, 897 V27, V27_H, 898 V28, V28_H, 899 V29, V29_H, 900 V30, V30_H, 901 V31, V31_H 902 ); 903 904 // Class for all 128bit vector registers 905 reg_class vectorx_reg( 906 V0, V0_H, V0_J, V0_K, 907 V1, V1_H, V1_J, V1_K, 908 V2, V2_H, V2_J, V2_K, 909 V3, V3_H, V3_J, V3_K, 910 V4, V4_H, V4_J, V4_K, 911 V5, V5_H, V5_J, V5_K, 912 V6, V6_H, V6_J, V6_K, 913 V7, V7_H, V7_J, V7_K, 914 V8, V8_H, V8_J, V8_K, 915 V9, V9_H, V9_J, V9_K, 916 V10, V10_H, V10_J, V10_K, 917 V11, V11_H, V11_J, V11_K, 918 V12, V12_H, V12_J, V12_K, 919 V13, V13_H, V13_J, V13_K, 920 V14, V14_H, V14_J, V14_K, 921 V15, V15_H, V15_J, V15_K, 922 V16, V16_H, V16_J, V16_K, 923 V17, V17_H, V17_J, V17_K, 924 V18, V18_H, V18_J, V18_K, 925 V19, V19_H, V19_J, V19_K, 926 V20, V20_H, V20_J, V20_K, 927 V21, V21_H, V21_J, V21_K, 928 V22, V22_H, V22_J, V22_K, 929 V23, V23_H, V23_J, V23_K, 930 V24, V24_H, V24_J, V24_K, 931 V25, V25_H, V25_J, V25_K, 932 V26, V26_H, V26_J, V26_K, 933 V27, V27_H, V27_J, V27_K, 934 V28, V28_H, V28_J, V28_K, 935 V29, V29_H, V29_J, V29_K, 936 V30, V30_H, V30_J, V30_K, 937 V31, V31_H, V31_J, V31_K 938 ); 939 940 // Class for 128 bit register v0 941 reg_class v0_reg( 942 V0, V0_H 943 ); 944 945 // Class for 128 bit register v1 946 reg_class v1_reg( 947 V1, V1_H 948 ); 949 950 // Class for 128 bit register v2 951 reg_class v2_reg( 952 V2, V2_H 953 ); 954 955 // Class for 128 bit register v3 956 reg_class v3_reg( 957 V3, V3_H 958 ); 959 960 // Singleton class for condition codes 961 reg_class int_flags(RFLAGS); 962 963 %} 964 965 //----------DEFINITION BLOCK--------------------------------------------------- 966 // Define name --> value mappings to inform the ADLC of an integer valued name 967 // Current support includes integer values in the range [0, 0x7FFFFFFF] 968 // Format: 969 // int_def <name> ( <int_value>, <expression>); 970 // Generated Code in ad_<arch>.hpp 971 // #define <name> (<expression>) 972 // // value == <int_value> 973 // Generated code in ad_<arch>.cpp adlc_verification() 974 // assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>"); 975 // 976 977 // we follow the ppc-aix port in using a simple cost model which ranks 978 // register operations as cheap, memory ops as more expensive and 979 // branches as most expensive. the first two have a low as well as a 980 // normal cost. huge cost appears to be a way of saying don't do 981 // something 982 983 definitions %{ 984 // The default cost (of a register move instruction). 985 int_def INSN_COST ( 100, 100); 986 int_def BRANCH_COST ( 200, 2 * INSN_COST); 987 int_def CALL_COST ( 200, 2 * INSN_COST); 988 int_def VOLATILE_REF_COST ( 1000, 10 * INSN_COST); 989 %} 990 991 992 //----------SOURCE BLOCK------------------------------------------------------- 993 // This is a block of C++ code which provides values, functions, and 994 // definitions necessary in the rest of the architecture description 995 996 source_hpp %{ 997 998 #include "gc/shared/cardTableModRefBS.hpp" 999 1000 class CallStubImpl { 1001 1002 //-------------------------------------------------------------- 1003 //---< Used for optimization in Compile::shorten_branches >--- 1004 //-------------------------------------------------------------- 1005 1006 public: 1007 // Size of call trampoline stub. 1008 static uint size_call_trampoline() { 1009 return 0; // no call trampolines on this platform 1010 } 1011 1012 // number of relocations needed by a call trampoline stub 1013 static uint reloc_call_trampoline() { 1014 return 0; // no call trampolines on this platform 1015 } 1016 }; 1017 1018 class HandlerImpl { 1019 1020 public: 1021 1022 static int emit_exception_handler(CodeBuffer &cbuf); 1023 static int emit_deopt_handler(CodeBuffer& cbuf); 1024 1025 static uint size_exception_handler() { 1026 return MacroAssembler::far_branch_size(); 1027 } 1028 1029 static uint size_deopt_handler() { 1030 // count one adr and one far branch instruction 1031 return 4 * NativeInstruction::instruction_size; 1032 } 1033 }; 1034 1035 // graph traversal helpers 1036 1037 MemBarNode *parent_membar(const Node *n); 1038 MemBarNode *child_membar(const MemBarNode *n); 1039 bool leading_membar(const MemBarNode *barrier); 1040 1041 bool is_card_mark_membar(const MemBarNode *barrier); 1042 bool is_CAS(int opcode); 1043 1044 MemBarNode *leading_to_trailing(MemBarNode *leading); 1045 MemBarNode *card_mark_to_leading(const MemBarNode *barrier); 1046 MemBarNode *trailing_to_leading(const MemBarNode *trailing); 1047 1048 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb 1049 1050 bool unnecessary_acquire(const Node *barrier); 1051 bool needs_acquiring_load(const Node *load); 1052 1053 // predicates controlling emit of str<x>/stlr<x> and associated dmbs 1054 1055 bool unnecessary_release(const Node *barrier); 1056 bool unnecessary_volatile(const Node *barrier); 1057 bool needs_releasing_store(const Node *store); 1058 1059 // predicate controlling translation of CompareAndSwapX 1060 bool needs_acquiring_load_exclusive(const Node *load); 1061 1062 // predicate controlling translation of StoreCM 1063 bool unnecessary_storestore(const Node *storecm); 1064 %} 1065 1066 source %{ 1067 1068 // Optimizaton of volatile gets and puts 1069 // ------------------------------------- 1070 // 1071 // AArch64 has ldar<x> and stlr<x> instructions which we can safely 1072 // use to implement volatile reads and writes. For a volatile read 1073 // we simply need 1074 // 1075 // ldar<x> 1076 // 1077 // and for a volatile write we need 1078 // 1079 // stlr<x> 1080 // 1081 // Alternatively, we can implement them by pairing a normal 1082 // load/store with a memory barrier. For a volatile read we need 1083 // 1084 // ldr<x> 1085 // dmb ishld 1086 // 1087 // for a volatile write 1088 // 1089 // dmb ish 1090 // str<x> 1091 // dmb ish 1092 // 1093 // We can also use ldaxr and stlxr to implement compare and swap CAS 1094 // sequences. These are normally translated to an instruction 1095 // sequence like the following 1096 // 1097 // dmb ish 1098 // retry: 1099 // ldxr<x> rval raddr 1100 // cmp rval rold 1101 // b.ne done 1102 // stlxr<x> rval, rnew, rold 1103 // cbnz rval retry 1104 // done: 1105 // cset r0, eq 1106 // dmb ishld 1107 // 1108 // Note that the exclusive store is already using an stlxr 1109 // instruction. That is required to ensure visibility to other 1110 // threads of the exclusive write (assuming it succeeds) before that 1111 // of any subsequent writes. 1112 // 1113 // The following instruction sequence is an improvement on the above 1114 // 1115 // retry: 1116 // ldaxr<x> rval raddr 1117 // cmp rval rold 1118 // b.ne done 1119 // stlxr<x> rval, rnew, rold 1120 // cbnz rval retry 1121 // done: 1122 // cset r0, eq 1123 // 1124 // We don't need the leading dmb ish since the stlxr guarantees 1125 // visibility of prior writes in the case that the swap is 1126 // successful. Crucially we don't have to worry about the case where 1127 // the swap is not successful since no valid program should be 1128 // relying on visibility of prior changes by the attempting thread 1129 // in the case where the CAS fails. 1130 // 1131 // Similarly, we don't need the trailing dmb ishld if we substitute 1132 // an ldaxr instruction since that will provide all the guarantees we 1133 // require regarding observation of changes made by other threads 1134 // before any change to the CAS address observed by the load. 1135 // 1136 // In order to generate the desired instruction sequence we need to 1137 // be able to identify specific 'signature' ideal graph node 1138 // sequences which i) occur as a translation of a volatile reads or 1139 // writes or CAS operations and ii) do not occur through any other 1140 // translation or graph transformation. We can then provide 1141 // alternative aldc matching rules which translate these node 1142 // sequences to the desired machine code sequences. Selection of the 1143 // alternative rules can be implemented by predicates which identify 1144 // the relevant node sequences. 1145 // 1146 // The ideal graph generator translates a volatile read to the node 1147 // sequence 1148 // 1149 // LoadX[mo_acquire] 1150 // MemBarAcquire 1151 // 1152 // As a special case when using the compressed oops optimization we 1153 // may also see this variant 1154 // 1155 // LoadN[mo_acquire] 1156 // DecodeN 1157 // MemBarAcquire 1158 // 1159 // A volatile write is translated to the node sequence 1160 // 1161 // MemBarRelease 1162 // StoreX[mo_release] {CardMark}-optional 1163 // MemBarVolatile 1164 // 1165 // n.b. the above node patterns are generated with a strict 1166 // 'signature' configuration of input and output dependencies (see 1167 // the predicates below for exact details). The card mark may be as 1168 // simple as a few extra nodes or, in a few GC configurations, may 1169 // include more complex control flow between the leading and 1170 // trailing memory barriers. However, whatever the card mark 1171 // configuration these signatures are unique to translated volatile 1172 // reads/stores -- they will not appear as a result of any other 1173 // bytecode translation or inlining nor as a consequence of 1174 // optimizing transforms. 1175 // 1176 // We also want to catch inlined unsafe volatile gets and puts and 1177 // be able to implement them using either ldar<x>/stlr<x> or some 1178 // combination of ldr<x>/stlr<x> and dmb instructions. 1179 // 1180 // Inlined unsafe volatiles puts manifest as a minor variant of the 1181 // normal volatile put node sequence containing an extra cpuorder 1182 // membar 1183 // 1184 // MemBarRelease 1185 // MemBarCPUOrder 1186 // StoreX[mo_release] {CardMark}-optional 1187 // MemBarVolatile 1188 // 1189 // n.b. as an aside, the cpuorder membar is not itself subject to 1190 // matching and translation by adlc rules. However, the rule 1191 // predicates need to detect its presence in order to correctly 1192 // select the desired adlc rules. 1193 // 1194 // Inlined unsafe volatile gets manifest as a somewhat different 1195 // node sequence to a normal volatile get 1196 // 1197 // MemBarCPUOrder 1198 // || \\ 1199 // MemBarAcquire LoadX[mo_acquire] 1200 // || 1201 // MemBarCPUOrder 1202 // 1203 // In this case the acquire membar does not directly depend on the 1204 // load. However, we can be sure that the load is generated from an 1205 // inlined unsafe volatile get if we see it dependent on this unique 1206 // sequence of membar nodes. Similarly, given an acquire membar we 1207 // can know that it was added because of an inlined unsafe volatile 1208 // get if it is fed and feeds a cpuorder membar and if its feed 1209 // membar also feeds an acquiring load. 1210 // 1211 // Finally an inlined (Unsafe) CAS operation is translated to the 1212 // following ideal graph 1213 // 1214 // MemBarRelease 1215 // MemBarCPUOrder 1216 // CompareAndSwapX {CardMark}-optional 1217 // MemBarCPUOrder 1218 // MemBarAcquire 1219 // 1220 // So, where we can identify these volatile read and write 1221 // signatures we can choose to plant either of the above two code 1222 // sequences. For a volatile read we can simply plant a normal 1223 // ldr<x> and translate the MemBarAcquire to a dmb. However, we can 1224 // also choose to inhibit translation of the MemBarAcquire and 1225 // inhibit planting of the ldr<x>, instead planting an ldar<x>. 1226 // 1227 // When we recognise a volatile store signature we can choose to 1228 // plant at a dmb ish as a translation for the MemBarRelease, a 1229 // normal str<x> and then a dmb ish for the MemBarVolatile. 1230 // Alternatively, we can inhibit translation of the MemBarRelease 1231 // and MemBarVolatile and instead plant a simple stlr<x> 1232 // instruction. 1233 // 1234 // when we recognise a CAS signature we can choose to plant a dmb 1235 // ish as a translation for the MemBarRelease, the conventional 1236 // macro-instruction sequence for the CompareAndSwap node (which 1237 // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire. 1238 // Alternatively, we can elide generation of the dmb instructions 1239 // and plant the alternative CompareAndSwap macro-instruction 1240 // sequence (which uses ldaxr<x>). 1241 // 1242 // Of course, the above only applies when we see these signature 1243 // configurations. We still want to plant dmb instructions in any 1244 // other cases where we may see a MemBarAcquire, MemBarRelease or 1245 // MemBarVolatile. For example, at the end of a constructor which 1246 // writes final/volatile fields we will see a MemBarRelease 1247 // instruction and this needs a 'dmb ish' lest we risk the 1248 // constructed object being visible without making the 1249 // final/volatile field writes visible. 1250 // 1251 // n.b. the translation rules below which rely on detection of the 1252 // volatile signatures and insert ldar<x> or stlr<x> are failsafe. 1253 // If we see anything other than the signature configurations we 1254 // always just translate the loads and stores to ldr<x> and str<x> 1255 // and translate acquire, release and volatile membars to the 1256 // relevant dmb instructions. 1257 // 1258 1259 // graph traversal helpers used for volatile put/get and CAS 1260 // optimization 1261 1262 // 1) general purpose helpers 1263 1264 // if node n is linked to a parent MemBarNode by an intervening 1265 // Control and Memory ProjNode return the MemBarNode otherwise return 1266 // NULL. 1267 // 1268 // n may only be a Load or a MemBar. 1269 1270 MemBarNode *parent_membar(const Node *n) 1271 { 1272 Node *ctl = NULL; 1273 Node *mem = NULL; 1274 Node *membar = NULL; 1275 1276 if (n->is_Load()) { 1277 ctl = n->lookup(LoadNode::Control); 1278 mem = n->lookup(LoadNode::Memory); 1279 } else if (n->is_MemBar()) { 1280 ctl = n->lookup(TypeFunc::Control); 1281 mem = n->lookup(TypeFunc::Memory); 1282 } else { 1283 return NULL; 1284 } 1285 1286 if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) { 1287 return NULL; 1288 } 1289 1290 membar = ctl->lookup(0); 1291 1292 if (!membar || !membar->is_MemBar()) { 1293 return NULL; 1294 } 1295 1296 if (mem->lookup(0) != membar) { 1297 return NULL; 1298 } 1299 1300 return membar->as_MemBar(); 1301 } 1302 1303 // if n is linked to a child MemBarNode by intervening Control and 1304 // Memory ProjNodes return the MemBarNode otherwise return NULL. 1305 1306 MemBarNode *child_membar(const MemBarNode *n) 1307 { 1308 ProjNode *ctl = n->proj_out(TypeFunc::Control); 1309 ProjNode *mem = n->proj_out(TypeFunc::Memory); 1310 1311 // MemBar needs to have both a Ctl and Mem projection 1312 if (! ctl || ! mem) 1313 return NULL; 1314 1315 MemBarNode *child = NULL; 1316 Node *x; 1317 1318 for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { 1319 x = ctl->fast_out(i); 1320 // if we see a membar we keep hold of it. we may also see a new 1321 // arena copy of the original but it will appear later 1322 if (x->is_MemBar()) { 1323 child = x->as_MemBar(); 1324 break; 1325 } 1326 } 1327 1328 if (child == NULL) { 1329 return NULL; 1330 } 1331 1332 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { 1333 x = mem->fast_out(i); 1334 // if we see a membar we keep hold of it. we may also see a new 1335 // arena copy of the original but it will appear later 1336 if (x == child) { 1337 return child; 1338 } 1339 } 1340 return NULL; 1341 } 1342 1343 // helper predicate use to filter candidates for a leading memory 1344 // barrier 1345 // 1346 // returns true if barrier is a MemBarRelease or a MemBarCPUOrder 1347 // whose Ctl and Mem feeds come from a MemBarRelease otherwise false 1348 1349 bool leading_membar(const MemBarNode *barrier) 1350 { 1351 int opcode = barrier->Opcode(); 1352 // if this is a release membar we are ok 1353 if (opcode == Op_MemBarRelease) { 1354 return true; 1355 } 1356 // if its a cpuorder membar . . . 1357 if (opcode != Op_MemBarCPUOrder) { 1358 return false; 1359 } 1360 // then the parent has to be a release membar 1361 MemBarNode *parent = parent_membar(barrier); 1362 if (!parent) { 1363 return false; 1364 } 1365 opcode = parent->Opcode(); 1366 return opcode == Op_MemBarRelease; 1367 } 1368 1369 // 2) card mark detection helper 1370 1371 // helper predicate which can be used to detect a volatile membar 1372 // introduced as part of a conditional card mark sequence either by 1373 // G1 or by CMS when UseCondCardMark is true. 1374 // 1375 // membar can be definitively determined to be part of a card mark 1376 // sequence if and only if all the following hold 1377 // 1378 // i) it is a MemBarVolatile 1379 // 1380 // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is 1381 // true 1382 // 1383 // iii) the node's Mem projection feeds a StoreCM node. 1384 1385 bool is_card_mark_membar(const MemBarNode *barrier) 1386 { 1387 if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) { 1388 return false; 1389 } 1390 1391 if (barrier->Opcode() != Op_MemBarVolatile) { 1392 return false; 1393 } 1394 1395 ProjNode *mem = barrier->proj_out(TypeFunc::Memory); 1396 1397 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) { 1398 Node *y = mem->fast_out(i); 1399 if (y->Opcode() == Op_StoreCM) { 1400 return true; 1401 } 1402 } 1403 1404 return false; 1405 } 1406 1407 1408 // 3) helper predicates to traverse volatile put or CAS graphs which 1409 // may contain GC barrier subgraphs 1410 1411 // Preamble 1412 // -------- 1413 // 1414 // for volatile writes we can omit generating barriers and employ a 1415 // releasing store when we see a node sequence sequence with a 1416 // leading MemBarRelease and a trailing MemBarVolatile as follows 1417 // 1418 // MemBarRelease 1419 // { || } -- optional 1420 // {MemBarCPUOrder} 1421 // || \\ 1422 // || StoreX[mo_release] 1423 // | \ Bot / ??? 1424 // | MergeMem 1425 // | / 1426 // MemBarVolatile 1427 // 1428 // where 1429 // || and \\ represent Ctl and Mem feeds via Proj nodes 1430 // | \ and / indicate further routing of the Ctl and Mem feeds 1431 // 1432 // Note that the memory feed from the CPUOrder membar to the 1433 // MergeMem node is an AliasIdxBot slice while the feed from the 1434 // StoreX is for a slice determined by the type of value being 1435 // written. 1436 // 1437 // the diagram above shows the graph we see for non-object stores. 1438 // for a volatile Object store (StoreN/P) we may see other nodes 1439 // below the leading membar because of the need for a GC pre- or 1440 // post-write barrier. 1441 // 1442 // with most GC configurations we with see this simple variant which 1443 // includes a post-write barrier card mark. 1444 // 1445 // MemBarRelease______________________________ 1446 // || \\ Ctl \ \\ 1447 // || StoreN/P[mo_release] CastP2X StoreB/CM 1448 // | \ Bot / oop . . . / 1449 // | MergeMem 1450 // | / 1451 // || / 1452 // MemBarVolatile 1453 // 1454 // i.e. the leading membar feeds Ctl to a CastP2X (which converts 1455 // the object address to an int used to compute the card offset) and 1456 // Ctl+Mem to a StoreB node (which does the actual card mark). 1457 // 1458 // n.b. a StoreCM node is only ever used when CMS (with or without 1459 // CondCardMark) or G1 is configured. This abstract instruction 1460 // differs from a normal card mark write (StoreB) because it implies 1461 // a requirement to order visibility of the card mark (StoreCM) 1462 // after that of the object put (StoreP/N) using a StoreStore memory 1463 // barrier. Note that this is /not/ a requirement to order the 1464 // instructions in the generated code (that is already guaranteed by 1465 // the order of memory dependencies). Rather it is a requirement to 1466 // ensure visibility order which only applies on architectures like 1467 // AArch64 which do not implement TSO. This ordering is required for 1468 // both non-volatile and volatile puts. 1469 // 1470 // That implies that we need to translate a StoreCM using the 1471 // sequence 1472 // 1473 // dmb ishst 1474 // stlrb 1475 // 1476 // This dmb cannot be omitted even when the associated StoreX or 1477 // CompareAndSwapX is implemented using stlr. However, as described 1478 // below there are circumstances where a specific GC configuration 1479 // requires a stronger barrier in which case it can be omitted. 1480 // 1481 // With the Serial or Parallel GC using +CondCardMark the card mark 1482 // is performed conditionally on it currently being unmarked in 1483 // which case the volatile put graph looks slightly different 1484 // 1485 // MemBarRelease____________________________________________ 1486 // || \\ Ctl \ Ctl \ \\ Mem \ 1487 // || StoreN/P[mo_release] CastP2X If LoadB | 1488 // | \ Bot / oop \ | 1489 // | MergeMem . . . StoreB 1490 // | / / 1491 // || / 1492 // MemBarVolatile 1493 // 1494 // It is worth noting at this stage that all the above 1495 // configurations can be uniquely identified by checking that the 1496 // memory flow includes the following subgraph: 1497 // 1498 // MemBarRelease 1499 // {MemBarCPUOrder} 1500 // | \ . . . 1501 // | StoreX[mo_release] . . . 1502 // Bot | / oop 1503 // MergeMem 1504 // | 1505 // MemBarVolatile 1506 // 1507 // This is referred to as a *normal* volatile store subgraph. It can 1508 // easily be detected starting from any candidate MemBarRelease, 1509 // StoreX[mo_release] or MemBarVolatile node. 1510 // 1511 // A small variation on this normal case occurs for an unsafe CAS 1512 // operation. The basic memory flow subgraph for a non-object CAS is 1513 // as follows 1514 // 1515 // MemBarRelease 1516 // || 1517 // MemBarCPUOrder 1518 // | \\ . . . 1519 // | CompareAndSwapX 1520 // | | 1521 // Bot | SCMemProj 1522 // \ / Bot 1523 // MergeMem 1524 // / 1525 // MemBarCPUOrder 1526 // || 1527 // MemBarAcquire 1528 // 1529 // The same basic variations on this arrangement (mutatis mutandis) 1530 // occur when a card mark is introduced. i.e. the CPUOrder MemBar 1531 // feeds the extra CastP2X, LoadB etc nodes but the above memory 1532 // flow subgraph is still present. 1533 // 1534 // This is referred to as a *normal* CAS subgraph. It can easily be 1535 // detected starting from any candidate MemBarRelease, 1536 // StoreX[mo_release] or MemBarAcquire node. 1537 // 1538 // The code below uses two helper predicates, leading_to_trailing 1539 // and trailing_to_leading to identify these normal graphs, one 1540 // validating the layout starting from the top membar and searching 1541 // down and the other validating the layout starting from the lower 1542 // membar and searching up. 1543 // 1544 // There are two special case GC configurations when the simple 1545 // normal graphs above may not be generated: when using G1 (which 1546 // always employs a conditional card mark); and when using CMS with 1547 // conditional card marking (+CondCardMark) configured. These GCs 1548 // are both concurrent rather than stop-the world GCs. So they 1549 // introduce extra Ctl+Mem flow into the graph between the leading 1550 // and trailing membar nodes, in particular enforcing stronger 1551 // memory serialisation beween the object put and the corresponding 1552 // conditional card mark. CMS employs a post-write GC barrier while 1553 // G1 employs both a pre- and post-write GC barrier. 1554 // 1555 // The post-write barrier subgraph for these configurations includes 1556 // a MemBarVolatile node -- referred to as a card mark membar -- 1557 // which is needed to order the card write (StoreCM) operation in 1558 // the barrier, the preceding StoreX (or CompareAndSwapX) and Store 1559 // operations performed by GC threads i.e. a card mark membar 1560 // constitutes a StoreLoad barrier hence must be translated to a dmb 1561 // ish (whether or not it sits inside a volatile store sequence). 1562 // 1563 // Of course, the use of the dmb ish for the card mark membar also 1564 // implies theat the StoreCM which follows can omit the dmb ishst 1565 // instruction. The necessary visibility ordering will already be 1566 // guaranteed by the dmb ish. In sum, the dmb ishst instruction only 1567 // needs to be generated for as part of the StoreCM sequence with GC 1568 // configuration +CMS -CondCardMark. 1569 // 1570 // Of course all these extra barrier nodes may well be absent -- 1571 // they are only inserted for object puts. Their potential presence 1572 // significantly complicates the task of identifying whether a 1573 // MemBarRelease, StoreX[mo_release], MemBarVolatile or 1574 // MemBarAcquire forms part of a volatile put or CAS when using 1575 // these GC configurations (see below) and also complicates the 1576 // decision as to how to translate a MemBarVolatile and StoreCM. 1577 // 1578 // So, thjis means that a card mark MemBarVolatile occurring in the 1579 // post-barrier graph it needs to be distinguished from a normal 1580 // trailing MemBarVolatile. Resolving this is straightforward: a 1581 // card mark MemBarVolatile always projects a Mem feed to a StoreCM 1582 // node and that is a unique marker 1583 // 1584 // MemBarVolatile (card mark) 1585 // C | \ . . . 1586 // | StoreCM . . . 1587 // . . . 1588 // 1589 // Returning to the task of translating the object put and the 1590 // leading/trailing membar nodes: what do the node graphs look like 1591 // for these 2 special cases? and how can we determine the status of 1592 // a MemBarRelease, StoreX[mo_release] or MemBarVolatile in both 1593 // normal and non-normal cases? 1594 // 1595 // A CMS GC post-barrier wraps its card write (StoreCM) inside an If 1596 // which selects conditonal execution based on the value loaded 1597 // (LoadB) from the card. Ctl and Mem are fed to the If via an 1598 // intervening StoreLoad barrier (MemBarVolatile). 1599 // 1600 // So, with CMS we may see a node graph for a volatile object store 1601 // which looks like this 1602 // 1603 // MemBarRelease 1604 // MemBarCPUOrder_(leading)____________________ 1605 // C | | M \ \\ M | C \ 1606 // | | \ StoreN/P[mo_release] | CastP2X 1607 // | | Bot \ / oop \ | 1608 // | | MergeMem \ / 1609 // | | / | / 1610 // MemBarVolatile (card mark) | / 1611 // C | || M | | / 1612 // | LoadB | Bot oop | / Bot 1613 // | | | / / 1614 // | Cmp |\ / / 1615 // | / | \ / / 1616 // If | \ / / 1617 // | \ | \ / / 1618 // IfFalse IfTrue | \ / / 1619 // \ / \ | | / / 1620 // \ / StoreCM | / / 1621 // \ / \ / / / 1622 // Region Phi / / 1623 // | \ Raw | / / 1624 // | . . . | / / 1625 // | MergeMem 1626 // | | 1627 // MemBarVolatile (trailing) 1628 // 1629 // Notice that there are two MergeMem nodes below the leading 1630 // membar. The first MergeMem merges the AliasIdxBot Mem slice from 1631 // the leading membar and the oopptr Mem slice from the Store into 1632 // the card mark membar. The trailing MergeMem merges the 1633 // AliasIdxBot Mem slice from the leading membar, the AliasIdxRaw 1634 // slice from the StoreCM and an oop slice from the StoreN/P node 1635 // into the trailing membar (n.b. the raw slice proceeds via a Phi 1636 // associated with the If region). 1637 // 1638 // So, in the case of CMS + CondCardMark the volatile object store 1639 // graph still includes a normal volatile store subgraph from the 1640 // leading membar to the trailing membar. However, it also contains 1641 // the same shape memory flow to the card mark membar. The two flows 1642 // can be distinguished by testing whether or not the downstream 1643 // membar is a card mark membar. 1644 // 1645 // The graph for a CAS also varies with CMS + CondCardMark, in 1646 // particular employing a control feed from the CompareAndSwapX node 1647 // through a CmpI and If to the card mark membar and StoreCM which 1648 // updates the associated card. This avoids executing the card mark 1649 // if the CAS fails. However, it can be seen from the diagram below 1650 // that the presence of the barrier does not alter the normal CAS 1651 // memory subgraph where the leading membar feeds a CompareAndSwapX, 1652 // an SCMemProj, a MergeMem then a final trailing MemBarCPUOrder and 1653 // MemBarAcquire pair. 1654 // 1655 // MemBarRelease 1656 // MemBarCPUOrder__(leading)_______________________ 1657 // C / M | \\ C \ 1658 // . . . | Bot CompareAndSwapN/P CastP2X 1659 // | C / M | 1660 // | CmpI | 1661 // | / | 1662 // | . . . | 1663 // | IfTrue | 1664 // | / | 1665 // MemBarVolatile (card mark) | 1666 // C | || M | | 1667 // | LoadB | Bot ______/| 1668 // | | | / | 1669 // | Cmp | / SCMemProj 1670 // | / | / | 1671 // If | / / 1672 // | \ | / / Bot 1673 // IfFalse IfTrue | / / 1674 // | / \ / / prec / 1675 // . . . | / StoreCM / 1676 // \ | / | raw / 1677 // Region . . . / 1678 // | \ / 1679 // | . . . \ / Bot 1680 // | MergeMem 1681 // | / 1682 // MemBarCPUOrder 1683 // MemBarAcquire (trailing) 1684 // 1685 // This has a slightly different memory subgraph to the one seen 1686 // previously but the core of it has a similar memory flow to the 1687 // CAS normal subgraph: 1688 // 1689 // MemBarRelease 1690 // MemBarCPUOrder____ 1691 // | \ . . . 1692 // | CompareAndSwapX . . . 1693 // | C / M | 1694 // | CmpI | 1695 // | / | 1696 // | . . / 1697 // Bot | IfTrue / 1698 // | / / 1699 // MemBarVolatile / 1700 // | ... / 1701 // StoreCM ... / 1702 // | / 1703 // . . . SCMemProj 1704 // Raw \ / Bot 1705 // MergeMem 1706 // | 1707 // MemBarCPUOrder 1708 // MemBarAcquire 1709 // 1710 // The G1 graph for a volatile object put is a lot more complicated. 1711 // Nodes inserted on behalf of G1 may comprise: a pre-write graph 1712 // which adds the old value to the SATB queue; the releasing store 1713 // itself; and, finally, a post-write graph which performs a card 1714 // mark. 1715 // 1716 // The pre-write graph may be omitted, but only when the put is 1717 // writing to a newly allocated (young gen) object and then only if 1718 // there is a direct memory chain to the Initialize node for the 1719 // object allocation. This will not happen for a volatile put since 1720 // any memory chain passes through the leading membar. 1721 // 1722 // The pre-write graph includes a series of 3 If tests. The outermost 1723 // If tests whether SATB is enabled (no else case). The next If tests 1724 // whether the old value is non-NULL (no else case). The third tests 1725 // whether the SATB queue index is > 0, if so updating the queue. The 1726 // else case for this third If calls out to the runtime to allocate a 1727 // new queue buffer. 1728 // 1729 // So with G1 the pre-write and releasing store subgraph looks like 1730 // this (the nested Ifs are omitted). 1731 // 1732 // MemBarRelease (leading)____________ 1733 // C | || M \ M \ M \ M \ . . . 1734 // | LoadB \ LoadL LoadN \ 1735 // | / \ \ 1736 // If |\ \ 1737 // | \ | \ \ 1738 // IfFalse IfTrue | \ \ 1739 // | | | \ | 1740 // | If | /\ | 1741 // | | \ | 1742 // | \ | 1743 // | . . . \ | 1744 // | / | / | | 1745 // Region Phi[M] | | 1746 // | \ | | | 1747 // | \_____ | ___ | | 1748 // C | C \ | C \ M | | 1749 // | CastP2X | StoreN/P[mo_release] | 1750 // | | | | 1751 // C | M | M | M | 1752 // \ | Raw | oop / Bot 1753 // . . . 1754 // (post write subtree elided) 1755 // . . . 1756 // C \ M / 1757 // MemBarVolatile (trailing) 1758 // 1759 // Note that the three memory feeds into the post-write tree are an 1760 // AliasRawIdx slice associated with the writes in the pre-write 1761 // tree, an oop type slice from the StoreX specific to the type of 1762 // the volatile field and the AliasBotIdx slice emanating from the 1763 // leading membar. 1764 // 1765 // n.b. the LoadB in this subgraph is not the card read -- it's a 1766 // read of the SATB queue active flag. 1767 // 1768 // The CAS graph is once again a variant of the above with a 1769 // CompareAndSwapX node and SCMemProj in place of the StoreX. The 1770 // value from the CompareAndSwapX node is fed into the post-write 1771 // graph aling with the AliasIdxRaw feed from the pre-barrier and 1772 // the AliasIdxBot feeds from the leading membar and the ScMemProj. 1773 // 1774 // MemBarRelease (leading)____________ 1775 // C | || M \ M \ M \ M \ . . . 1776 // | LoadB \ LoadL LoadN \ 1777 // | / \ \ 1778 // If |\ \ 1779 // | \ | \ \ 1780 // IfFalse IfTrue | \ \ 1781 // | | | \ \ 1782 // | If | \ | 1783 // | | \ | 1784 // | \ | 1785 // | . . . \ | 1786 // | / | / \ | 1787 // Region Phi[M] \ | 1788 // | \ | \ | 1789 // | \_____ | | | 1790 // C | C \ | | | 1791 // | CastP2X | CompareAndSwapX | 1792 // | | res | | | 1793 // C | M | | SCMemProj M | 1794 // \ | Raw | | Bot / Bot 1795 // . . . 1796 // (post write subtree elided) 1797 // . . . 1798 // C \ M / 1799 // MemBarVolatile (trailing) 1800 // 1801 // The G1 post-write subtree is also optional, this time when the 1802 // new value being written is either null or can be identified as a 1803 // newly allocated (young gen) object with no intervening control 1804 // flow. The latter cannot happen but the former may, in which case 1805 // the card mark membar is omitted and the memory feeds from the 1806 // leading membar and the SToreN/P are merged direct into the 1807 // trailing membar as per the normal subgraph. So, the only special 1808 // case which arises is when the post-write subgraph is generated. 1809 // 1810 // The kernel of the post-write G1 subgraph is the card mark itself 1811 // which includes a card mark memory barrier (MemBarVolatile), a 1812 // card test (LoadB), and a conditional update (If feeding a 1813 // StoreCM). These nodes are surrounded by a series of nested Ifs 1814 // which try to avoid doing the card mark. The top level If skips if 1815 // the object reference does not cross regions (i.e. it tests if 1816 // (adr ^ val) >> log2(regsize) != 0) -- intra-region references 1817 // need not be recorded. The next If, which skips on a NULL value, 1818 // may be absent (it is not generated if the type of value is >= 1819 // OopPtr::NotNull). The 3rd If skips writes to young regions (by 1820 // checking if card_val != young). n.b. although this test requires 1821 // a pre-read of the card it can safely be done before the StoreLoad 1822 // barrier. However that does not bypass the need to reread the card 1823 // after the barrier. 1824 // 1825 // (pre-write subtree elided) 1826 // . . . . . . . . . . . . 1827 // C | M | M | M | 1828 // Region Phi[M] StoreN | 1829 // | Raw | oop | Bot | 1830 // / \_______ |\ |\ |\ 1831 // C / C \ . . . | \ | \ | \ 1832 // If CastP2X . . . | \ | \ | \ 1833 // / \ | \ | \ | \ 1834 // / \ | \ | \ | \ 1835 // IfFalse IfTrue | | | \ 1836 // | | \ | / | 1837 // | If \ | \ / \ | 1838 // | / \ \ | / \ | 1839 // | / \ \ | / \ | | 1840 // | IfFalse IfTrue MergeMem \ | | 1841 // | . . . / \ | \ | | 1842 // | / \ | | | | 1843 // | IfFalse IfTrue | | | | 1844 // | . . . | | | | | 1845 // | If / | | | 1846 // | / \ / | | | 1847 // | / \ / | | | 1848 // | IfFalse IfTrue / | | | 1849 // | . . . | / | | | 1850 // | \ / | | | 1851 // | \ / | | | 1852 // | MemBarVolatile__(card mark ) | | | 1853 // | || C | \ | | | 1854 // | LoadB If | / | | 1855 // | / \ Raw | / / / 1856 // | . . . | / / / 1857 // | \ | / / / 1858 // | StoreCM / / / 1859 // | | / / / 1860 // | . . . / / 1861 // | / / 1862 // | . . . / / 1863 // | | | / / / 1864 // | | Phi[M] / / / 1865 // | | | / / / 1866 // | | | / / / 1867 // | Region . . . Phi[M] / / 1868 // | | | / / 1869 // \ | | / / 1870 // \ | . . . | / / 1871 // \ | | / / 1872 // Region Phi[M] / / 1873 // | \ / / 1874 // \ MergeMem 1875 // \ / 1876 // MemBarVolatile 1877 // 1878 // As with CMS + CondCardMark the first MergeMem merges the 1879 // AliasIdxBot Mem slice from the leading membar and the oopptr Mem 1880 // slice from the Store into the card mark membar. However, in this 1881 // case it may also merge an AliasRawIdx mem slice from the pre 1882 // barrier write. 1883 // 1884 // The trailing MergeMem merges an AliasIdxBot Mem slice from the 1885 // leading membar with an oop slice from the StoreN and an 1886 // AliasRawIdx slice from the post barrier writes. In this case the 1887 // AliasIdxRaw Mem slice is merged through a series of Phi nodes 1888 // which combine feeds from the If regions in the post barrier 1889 // subgraph. 1890 // 1891 // So, for G1 the same characteristic subgraph arises as for CMS + 1892 // CondCardMark. There is a normal subgraph feeding the card mark 1893 // membar and a normal subgraph feeding the trailing membar. 1894 // 1895 // The CAS graph when using G1GC also includes an optional 1896 // post-write subgraph. It is very similar to the above graph except 1897 // for a few details. 1898 // 1899 // - The control flow is gated by an additonal If which tests the 1900 // result from the CompareAndSwapX node 1901 // 1902 // - The MergeMem which feeds the card mark membar only merges the 1903 // AliasIdxBot slice from the leading membar and the AliasIdxRaw 1904 // slice from the pre-barrier. It does not merge the SCMemProj 1905 // AliasIdxBot slice. So, this subgraph does not look like the 1906 // normal CAS subgraph. 1907 // 1908 // - The MergeMem which feeds the trailing membar merges the 1909 // AliasIdxBot slice from the leading membar, the AliasIdxRaw slice 1910 // from the post-barrier and the SCMemProj AliasIdxBot slice i.e. it 1911 // has two AliasIdxBot input slices. However, this subgraph does 1912 // still look like the normal CAS subgraph. 1913 // 1914 // So, the upshot is: 1915 // 1916 // In all cases a volatile put graph will include a *normal* 1917 // volatile store subgraph betwen the leading membar and the 1918 // trailing membar. It may also include a normal volatile store 1919 // subgraph betwen the leading membar and the card mark membar. 1920 // 1921 // In all cases a CAS graph will contain a unique normal CAS graph 1922 // feeding the trailing membar. 1923 // 1924 // In all cases where there is a card mark membar (either as part of 1925 // a volatile object put or CAS) it will be fed by a MergeMem whose 1926 // AliasIdxBot slice feed will be a leading membar. 1927 // 1928 // The predicates controlling generation of instructions for store 1929 // and barrier nodes employ a few simple helper functions (described 1930 // below) which identify the presence or absence of all these 1931 // subgraph configurations and provide a means of traversing from 1932 // one node in the subgraph to another. 1933 1934 // is_CAS(int opcode) 1935 // 1936 // return true if opcode is one of the possible CompareAndSwapX 1937 // values otherwise false. 1938 1939 bool is_CAS(int opcode) 1940 { 1941 return (opcode == Op_CompareAndSwapI || 1942 opcode == Op_CompareAndSwapL || 1943 opcode == Op_CompareAndSwapN || 1944 opcode == Op_CompareAndSwapP); 1945 } 1946 1947 // leading_to_trailing 1948 // 1949 //graph traversal helper which detects the normal case Mem feed from 1950 // a release membar (or, optionally, its cpuorder child) to a 1951 // dependent volatile membar i.e. it ensures that one or other of 1952 // the following Mem flow subgraph is present. 1953 // 1954 // MemBarRelease {leading} 1955 // {MemBarCPUOrder} {optional} 1956 // Bot | \ . . . 1957 // | StoreN/P[mo_release] . . . 1958 // | / 1959 // MergeMem 1960 // | 1961 // MemBarVolatile {not card mark} 1962 // 1963 // MemBarRelease {leading} 1964 // {MemBarCPUOrder} {optional} 1965 // | \ . . . 1966 // | CompareAndSwapX . . . 1967 // | 1968 // . . . SCMemProj 1969 // \ | 1970 // | MergeMem 1971 // | / 1972 // MemBarCPUOrder 1973 // MemBarAcquire {trailing} 1974 // 1975 // the predicate needs to be capable of distinguishing the following 1976 // volatile put graph which may arises when a GC post barrier 1977 // inserts a card mark membar 1978 // 1979 // MemBarRelease {leading} 1980 // {MemBarCPUOrder}__ 1981 // Bot | \ \ 1982 // | StoreN/P \ 1983 // | / \ | 1984 // MergeMem \ | 1985 // | \ | 1986 // MemBarVolatile \ | 1987 // {card mark} \ | 1988 // MergeMem 1989 // | 1990 // {not card mark} MemBarVolatile 1991 // 1992 // if the correct configuration is present returns the trailing 1993 // membar otherwise NULL. 1994 // 1995 // the input membar is expected to be either a cpuorder membar or a 1996 // release membar. in the latter case it should not have a cpu membar 1997 // child. 1998 // 1999 // the returned value may be a card mark or trailing membar 2000 // 2001 2002 MemBarNode *leading_to_trailing(MemBarNode *leading) 2003 { 2004 assert((leading->Opcode() == Op_MemBarRelease || 2005 leading->Opcode() == Op_MemBarCPUOrder), 2006 "expecting a volatile or cpuroder membar!"); 2007 2008 // check the mem flow 2009 ProjNode *mem = leading->proj_out(TypeFunc::Memory); 2010 2011 if (!mem) { 2012 return NULL; 2013 } 2014 2015 Node *x = NULL; 2016 StoreNode * st = NULL; 2017 LoadStoreNode *cas = NULL; 2018 MergeMemNode *mm = NULL; 2019 MergeMemNode *mm2 = NULL; 2020 2021 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { 2022 x = mem->fast_out(i); 2023 if (x->is_MergeMem()) { 2024 if (mm != NULL) { 2025 if (mm2 != NULL) { 2026 // should not see more than 2 merge mems 2027 return NULL; 2028 } else { 2029 mm2 = x->as_MergeMem(); 2030 } 2031 } else { 2032 mm = x->as_MergeMem(); 2033 } 2034 } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) { 2035 // two releasing stores/CAS nodes is one too many 2036 if (st != NULL || cas != NULL) { 2037 return NULL; 2038 } 2039 st = x->as_Store(); 2040 } else if (is_CAS(x->Opcode())) { 2041 if (st != NULL || cas != NULL) { 2042 return NULL; 2043 } 2044 cas = x->as_LoadStore(); 2045 } 2046 } 2047 2048 // must have a store or a cas 2049 if (!st && !cas) { 2050 return NULL; 2051 } 2052 2053 // must have at least one merge if we also have st 2054 if (st && !mm) { 2055 return NULL; 2056 } 2057 2058 if (cas) { 2059 Node *y = NULL; 2060 // look for an SCMemProj 2061 for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) { 2062 x = cas->fast_out(i); 2063 if (x->is_Proj()) { 2064 y = x; 2065 break; 2066 } 2067 } 2068 if (y == NULL) { 2069 return NULL; 2070 } 2071 // the proj must feed a MergeMem 2072 for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) { 2073 x = y->fast_out(i); 2074 if (x->is_MergeMem()) { 2075 mm = x->as_MergeMem(); 2076 break; 2077 } 2078 } 2079 if (mm == NULL) { 2080 return NULL; 2081 } 2082 MemBarNode *mbar = NULL; 2083 // ensure the merge feeds a trailing membar cpuorder + acquire pair 2084 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { 2085 x = mm->fast_out(i); 2086 if (x->is_MemBar()) { 2087 int opcode = x->Opcode(); 2088 if (opcode == Op_MemBarCPUOrder) { 2089 MemBarNode *z = x->as_MemBar(); 2090 z = child_membar(z); 2091 if (z != NULL && z->Opcode() == Op_MemBarAcquire) { 2092 mbar = z; 2093 } 2094 } 2095 break; 2096 } 2097 } 2098 return mbar; 2099 } else { 2100 Node *y = NULL; 2101 // ensure the store feeds the first mergemem; 2102 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) { 2103 if (st->fast_out(i) == mm) { 2104 y = st; 2105 break; 2106 } 2107 } 2108 if (y == NULL) { 2109 return NULL; 2110 } 2111 if (mm2 != NULL) { 2112 // ensure the store feeds the second mergemem; 2113 y = NULL; 2114 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) { 2115 if (st->fast_out(i) == mm2) { 2116 y = st; 2117 } 2118 } 2119 if (y == NULL) { 2120 return NULL; 2121 } 2122 } 2123 2124 MemBarNode *mbar = NULL; 2125 // ensure the first mergemem feeds a volatile membar 2126 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { 2127 x = mm->fast_out(i); 2128 if (x->is_MemBar()) { 2129 int opcode = x->Opcode(); 2130 if (opcode == Op_MemBarVolatile) { 2131 mbar = x->as_MemBar(); 2132 } 2133 break; 2134 } 2135 } 2136 if (mm2 == NULL) { 2137 // this is our only option for a trailing membar 2138 return mbar; 2139 } 2140 // ensure the second mergemem feeds a volatile membar 2141 MemBarNode *mbar2 = NULL; 2142 for (DUIterator_Fast imax, i = mm2->fast_outs(imax); i < imax; i++) { 2143 x = mm2->fast_out(i); 2144 if (x->is_MemBar()) { 2145 int opcode = x->Opcode(); 2146 if (opcode == Op_MemBarVolatile) { 2147 mbar2 = x->as_MemBar(); 2148 } 2149 break; 2150 } 2151 } 2152 // if we have two merge mems we must have two volatile membars 2153 if (mbar == NULL || mbar2 == NULL) { 2154 return NULL; 2155 } 2156 // return the trailing membar 2157 if (is_card_mark_membar(mbar2)) { 2158 return mbar; 2159 } else { 2160 if (is_card_mark_membar(mbar)) { 2161 return mbar2; 2162 } else { 2163 return NULL; 2164 } 2165 } 2166 } 2167 } 2168 2169 // trailing_to_leading 2170 // 2171 // graph traversal helper which detects the normal case Mem feed 2172 // from a trailing membar to a preceding release membar (optionally 2173 // its cpuorder child) i.e. it ensures that one or other of the 2174 // following Mem flow subgraphs is present. 2175 // 2176 // MemBarRelease {leading} 2177 // MemBarCPUOrder {optional} 2178 // | Bot | \ . . . 2179 // | | StoreN/P[mo_release] . . . 2180 // | | / 2181 // | MergeMem 2182 // | | 2183 // MemBarVolatile {not card mark} 2184 // 2185 // MemBarRelease {leading} 2186 // MemBarCPUOrder {optional} 2187 // | \ . . . 2188 // | CompareAndSwapX . . . 2189 // | 2190 // . . . SCMemProj 2191 // \ | 2192 // | MergeMem 2193 // | | 2194 // MemBarCPUOrder 2195 // MemBarAcquire {trailing} 2196 // 2197 // this predicate checks for the same flow as the previous predicate 2198 // but starting from the bottom rather than the top. 2199 // 2200 // if the configuration is present returns the cpuorder member for 2201 // preference or when absent the release membar otherwise NULL. 2202 // 2203 // n.b. the input membar is expected to be a MemBarVolatile or 2204 // MemBarAcquire. if it is a MemBarVolatile it must *not* be a card 2205 // mark membar. 2206 2207 MemBarNode *trailing_to_leading(const MemBarNode *barrier) 2208 { 2209 // input must be a volatile membar 2210 assert((barrier->Opcode() == Op_MemBarVolatile || 2211 barrier->Opcode() == Op_MemBarAcquire), 2212 "expecting a volatile or an acquire membar"); 2213 2214 assert((barrier->Opcode() != Op_MemBarVolatile) || 2215 !is_card_mark_membar(barrier), 2216 "not expecting a card mark membar"); 2217 Node *x; 2218 bool is_cas = barrier->Opcode() == Op_MemBarAcquire; 2219 2220 // if we have an acquire membar then it must be fed via a CPUOrder 2221 // membar 2222 2223 if (is_cas) { 2224 // skip to parent barrier which must be a cpuorder 2225 x = parent_membar(barrier); 2226 if (x->Opcode() != Op_MemBarCPUOrder) 2227 return NULL; 2228 } else { 2229 // start from the supplied barrier 2230 x = (Node *)barrier; 2231 } 2232 2233 // the Mem feed to the membar should be a merge 2234 x = x ->in(TypeFunc::Memory); 2235 if (!x->is_MergeMem()) 2236 return NULL; 2237 2238 MergeMemNode *mm = x->as_MergeMem(); 2239 2240 if (is_cas) { 2241 // the merge should be fed from the CAS via an SCMemProj node 2242 x = NULL; 2243 for (uint idx = 1; idx < mm->req(); idx++) { 2244 if (mm->in(idx)->Opcode() == Op_SCMemProj) { 2245 x = mm->in(idx); 2246 break; 2247 } 2248 } 2249 if (x == NULL) { 2250 return NULL; 2251 } 2252 // check for a CAS feeding this proj 2253 x = x->in(0); 2254 int opcode = x->Opcode(); 2255 if (!is_CAS(opcode)) { 2256 return NULL; 2257 } 2258 // the CAS should get its mem feed from the leading membar 2259 x = x->in(MemNode::Memory); 2260 } else { 2261 // the merge should get its Bottom mem feed from the leading membar 2262 x = mm->in(Compile::AliasIdxBot); 2263 } 2264 2265 // ensure this is a non control projection 2266 if (!x->is_Proj() || x->is_CFG()) { 2267 return NULL; 2268 } 2269 // if it is fed by a membar that's the one we want 2270 x = x->in(0); 2271 2272 if (!x->is_MemBar()) { 2273 return NULL; 2274 } 2275 2276 MemBarNode *leading = x->as_MemBar(); 2277 // reject invalid candidates 2278 if (!leading_membar(leading)) { 2279 return NULL; 2280 } 2281 2282 // ok, we have a leading membar, now for the sanity clauses 2283 2284 // the leading membar must feed Mem to a releasing store or CAS 2285 ProjNode *mem = leading->proj_out(TypeFunc::Memory); 2286 StoreNode *st = NULL; 2287 LoadStoreNode *cas = NULL; 2288 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { 2289 x = mem->fast_out(i); 2290 if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) { 2291 // two stores or CASes is one too many 2292 if (st != NULL || cas != NULL) { 2293 return NULL; 2294 } 2295 st = x->as_Store(); 2296 } else if (is_CAS(x->Opcode())) { 2297 if (st != NULL || cas != NULL) { 2298 return NULL; 2299 } 2300 cas = x->as_LoadStore(); 2301 } 2302 } 2303 2304 // we should not have both a store and a cas 2305 if (st == NULL & cas == NULL) { 2306 return NULL; 2307 } 2308 2309 if (st == NULL) { 2310 // nothing more to check 2311 return leading; 2312 } else { 2313 // we should not have a store if we started from an acquire 2314 if (is_cas) { 2315 return NULL; 2316 } 2317 2318 // the store should feed the merge we used to get here 2319 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) { 2320 if (st->fast_out(i) == mm) { 2321 return leading; 2322 } 2323 } 2324 } 2325 2326 return NULL; 2327 } 2328 2329 // card_mark_to_leading 2330 // 2331 // graph traversal helper which traverses from a card mark volatile 2332 // membar to a leading membar i.e. it ensures that the following Mem 2333 // flow subgraph is present. 2334 // 2335 // MemBarRelease {leading} 2336 // {MemBarCPUOrder} {optional} 2337 // | . . . 2338 // Bot | / 2339 // MergeMem 2340 // | 2341 // MemBarVolatile (card mark) 2342 // | \ 2343 // . . . StoreCM 2344 // 2345 // if the configuration is present returns the cpuorder member for 2346 // preference or when absent the release membar otherwise NULL. 2347 // 2348 // n.b. the input membar is expected to be a MemBarVolatile amd must 2349 // be a card mark membar. 2350 2351 MemBarNode *card_mark_to_leading(const MemBarNode *barrier) 2352 { 2353 // input must be a card mark volatile membar 2354 assert(is_card_mark_membar(barrier), "expecting a card mark membar"); 2355 2356 // the Mem feed to the membar should be a merge 2357 Node *x = barrier->in(TypeFunc::Memory); 2358 if (!x->is_MergeMem()) { 2359 return NULL; 2360 } 2361 2362 MergeMemNode *mm = x->as_MergeMem(); 2363 2364 x = mm->in(Compile::AliasIdxBot); 2365 2366 if (!x->is_MemBar()) { 2367 return NULL; 2368 } 2369 2370 MemBarNode *leading = x->as_MemBar(); 2371 2372 if (leading_membar(leading)) { 2373 return leading; 2374 } 2375 2376 return NULL; 2377 } 2378 2379 bool unnecessary_acquire(const Node *barrier) 2380 { 2381 assert(barrier->is_MemBar(), "expecting a membar"); 2382 2383 if (UseBarriersForVolatile) { 2384 // we need to plant a dmb 2385 return false; 2386 } 2387 2388 // a volatile read derived from bytecode (or also from an inlined 2389 // SHA field read via LibraryCallKit::load_field_from_object) 2390 // manifests as a LoadX[mo_acquire] followed by an acquire membar 2391 // with a bogus read dependency on it's preceding load. so in those 2392 // cases we will find the load node at the PARMS offset of the 2393 // acquire membar. n.b. there may be an intervening DecodeN node. 2394 // 2395 // a volatile load derived from an inlined unsafe field access 2396 // manifests as a cpuorder membar with Ctl and Mem projections 2397 // feeding both an acquire membar and a LoadX[mo_acquire]. The 2398 // acquire then feeds another cpuorder membar via Ctl and Mem 2399 // projections. The load has no output dependency on these trailing 2400 // membars because subsequent nodes inserted into the graph take 2401 // their control feed from the final membar cpuorder meaning they 2402 // are all ordered after the load. 2403 2404 Node *x = barrier->lookup(TypeFunc::Parms); 2405 if (x) { 2406 // we are starting from an acquire and it has a fake dependency 2407 // 2408 // need to check for 2409 // 2410 // LoadX[mo_acquire] 2411 // { |1 } 2412 // {DecodeN} 2413 // |Parms 2414 // MemBarAcquire* 2415 // 2416 // where * tags node we were passed 2417 // and |k means input k 2418 if (x->is_DecodeNarrowPtr()) { 2419 x = x->in(1); 2420 } 2421 2422 return (x->is_Load() && x->as_Load()->is_acquire()); 2423 } 2424 2425 // now check for an unsafe volatile get 2426 2427 // need to check for 2428 // 2429 // MemBarCPUOrder 2430 // || \\ 2431 // MemBarAcquire* LoadX[mo_acquire] 2432 // || 2433 // MemBarCPUOrder 2434 // 2435 // where * tags node we were passed 2436 // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes 2437 2438 // check for a parent MemBarCPUOrder 2439 ProjNode *ctl; 2440 ProjNode *mem; 2441 MemBarNode *parent = parent_membar(barrier); 2442 if (!parent || parent->Opcode() != Op_MemBarCPUOrder) 2443 return false; 2444 ctl = parent->proj_out(TypeFunc::Control); 2445 mem = parent->proj_out(TypeFunc::Memory); 2446 if (!ctl || !mem) { 2447 return false; 2448 } 2449 // ensure the proj nodes both feed a LoadX[mo_acquire] 2450 LoadNode *ld = NULL; 2451 for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { 2452 x = ctl->fast_out(i); 2453 // if we see a load we keep hold of it and stop searching 2454 if (x->is_Load()) { 2455 ld = x->as_Load(); 2456 break; 2457 } 2458 } 2459 // it must be an acquiring load 2460 if (ld && ld->is_acquire()) { 2461 2462 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { 2463 x = mem->fast_out(i); 2464 // if we see the same load we drop it and stop searching 2465 if (x == ld) { 2466 ld = NULL; 2467 break; 2468 } 2469 } 2470 // we must have dropped the load 2471 if (ld == NULL) { 2472 // check for a child cpuorder membar 2473 MemBarNode *child = child_membar(barrier->as_MemBar()); 2474 if (child && child->Opcode() == Op_MemBarCPUOrder) 2475 return true; 2476 } 2477 } 2478 2479 // final option for unnecessary mebar is that it is a trailing node 2480 // belonging to a CAS 2481 2482 MemBarNode *leading = trailing_to_leading(barrier->as_MemBar()); 2483 2484 return leading != NULL; 2485 } 2486 2487 bool needs_acquiring_load(const Node *n) 2488 { 2489 assert(n->is_Load(), "expecting a load"); 2490 if (UseBarriersForVolatile) { 2491 // we use a normal load and a dmb 2492 return false; 2493 } 2494 2495 LoadNode *ld = n->as_Load(); 2496 2497 if (!ld->is_acquire()) { 2498 return false; 2499 } 2500 2501 // check if this load is feeding an acquire membar 2502 // 2503 // LoadX[mo_acquire] 2504 // { |1 } 2505 // {DecodeN} 2506 // |Parms 2507 // MemBarAcquire* 2508 // 2509 // where * tags node we were passed 2510 // and |k means input k 2511 2512 Node *start = ld; 2513 Node *mbacq = NULL; 2514 2515 // if we hit a DecodeNarrowPtr we reset the start node and restart 2516 // the search through the outputs 2517 restart: 2518 2519 for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) { 2520 Node *x = start->fast_out(i); 2521 if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) { 2522 mbacq = x; 2523 } else if (!mbacq && 2524 (x->is_DecodeNarrowPtr() || 2525 (x->is_Mach() && x->Opcode() == Op_DecodeN))) { 2526 start = x; 2527 goto restart; 2528 } 2529 } 2530 2531 if (mbacq) { 2532 return true; 2533 } 2534 2535 // now check for an unsafe volatile get 2536 2537 // check if Ctl and Proj feed comes from a MemBarCPUOrder 2538 // 2539 // MemBarCPUOrder 2540 // || \\ 2541 // MemBarAcquire* LoadX[mo_acquire] 2542 // || 2543 // MemBarCPUOrder 2544 2545 MemBarNode *membar; 2546 2547 membar = parent_membar(ld); 2548 2549 if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) { 2550 return false; 2551 } 2552 2553 // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain 2554 2555 membar = child_membar(membar); 2556 2557 if (!membar || !membar->Opcode() == Op_MemBarAcquire) { 2558 return false; 2559 } 2560 2561 membar = child_membar(membar); 2562 2563 if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) { 2564 return false; 2565 } 2566 2567 return true; 2568 } 2569 2570 bool unnecessary_release(const Node *n) 2571 { 2572 assert((n->is_MemBar() && 2573 n->Opcode() == Op_MemBarRelease), 2574 "expecting a release membar"); 2575 2576 if (UseBarriersForVolatile) { 2577 // we need to plant a dmb 2578 return false; 2579 } 2580 2581 // if there is a dependent CPUOrder barrier then use that as the 2582 // leading 2583 2584 MemBarNode *barrier = n->as_MemBar(); 2585 // check for an intervening cpuorder membar 2586 MemBarNode *b = child_membar(barrier); 2587 if (b && b->Opcode() == Op_MemBarCPUOrder) { 2588 // ok, so start the check from the dependent cpuorder barrier 2589 barrier = b; 2590 } 2591 2592 // must start with a normal feed 2593 MemBarNode *trailing = leading_to_trailing(barrier); 2594 2595 return (trailing != NULL); 2596 } 2597 2598 bool unnecessary_volatile(const Node *n) 2599 { 2600 // assert n->is_MemBar(); 2601 if (UseBarriersForVolatile) { 2602 // we need to plant a dmb 2603 return false; 2604 } 2605 2606 MemBarNode *mbvol = n->as_MemBar(); 2607 2608 // first we check if this is part of a card mark. if so then we have 2609 // to generate a StoreLoad barrier 2610 2611 if (is_card_mark_membar(mbvol)) { 2612 return false; 2613 } 2614 2615 // ok, if it's not a card mark then we still need to check if it is 2616 // a trailing membar of a volatile put graph. 2617 2618 return (trailing_to_leading(mbvol) != NULL); 2619 } 2620 2621 // predicates controlling emit of str<x>/stlr<x> and associated dmbs 2622 2623 bool needs_releasing_store(const Node *n) 2624 { 2625 // assert n->is_Store(); 2626 if (UseBarriersForVolatile) { 2627 // we use a normal store and dmb combination 2628 return false; 2629 } 2630 2631 StoreNode *st = n->as_Store(); 2632 2633 // the store must be marked as releasing 2634 if (!st->is_release()) { 2635 return false; 2636 } 2637 2638 // the store must be fed by a membar 2639 2640 Node *x = st->lookup(StoreNode::Memory); 2641 2642 if (! x || !x->is_Proj()) { 2643 return false; 2644 } 2645 2646 ProjNode *proj = x->as_Proj(); 2647 2648 x = proj->lookup(0); 2649 2650 if (!x || !x->is_MemBar()) { 2651 return false; 2652 } 2653 2654 MemBarNode *barrier = x->as_MemBar(); 2655 2656 // if the barrier is a release membar or a cpuorder mmebar fed by a 2657 // release membar then we need to check whether that forms part of a 2658 // volatile put graph. 2659 2660 // reject invalid candidates 2661 if (!leading_membar(barrier)) { 2662 return false; 2663 } 2664 2665 // does this lead a normal subgraph? 2666 MemBarNode *trailing = leading_to_trailing(barrier); 2667 2668 return (trailing != NULL); 2669 } 2670 2671 // predicate controlling translation of CAS 2672 // 2673 // returns true if CAS needs to use an acquiring load otherwise false 2674 2675 bool needs_acquiring_load_exclusive(const Node *n) 2676 { 2677 assert(is_CAS(n->Opcode()), "expecting a compare and swap"); 2678 if (UseBarriersForVolatile) { 2679 return false; 2680 } 2681 2682 // CAS nodes only ought to turn up in inlined unsafe CAS operations 2683 #ifdef ASSERT 2684 LoadStoreNode *st = n->as_LoadStore(); 2685 2686 // the store must be fed by a membar 2687 2688 Node *x = st->lookup(StoreNode::Memory); 2689 2690 assert (x && x->is_Proj(), "CAS not fed by memory proj!"); 2691 2692 ProjNode *proj = x->as_Proj(); 2693 2694 x = proj->lookup(0); 2695 2696 assert (x && x->is_MemBar(), "CAS not fed by membar!"); 2697 2698 MemBarNode *barrier = x->as_MemBar(); 2699 2700 // the barrier must be a cpuorder mmebar fed by a release membar 2701 2702 assert(barrier->Opcode() == Op_MemBarCPUOrder, 2703 "CAS not fed by cpuorder membar!"); 2704 2705 MemBarNode *b = parent_membar(barrier); 2706 assert ((b != NULL && b->Opcode() == Op_MemBarRelease), 2707 "CAS not fed by cpuorder+release membar pair!"); 2708 2709 // does this lead a normal subgraph? 2710 MemBarNode *mbar = leading_to_trailing(barrier); 2711 2712 assert(mbar != NULL, "CAS not embedded in normal graph!"); 2713 2714 assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire"); 2715 #endif // ASSERT 2716 // so we can just return true here 2717 return true; 2718 } 2719 2720 // predicate controlling translation of StoreCM 2721 // 2722 // returns true if a StoreStore must precede the card write otherwise 2723 // false 2724 2725 bool unnecessary_storestore(const Node *storecm) 2726 { 2727 assert(storecm->Opcode() == Op_StoreCM, "expecting a StoreCM"); 2728 2729 // we only ever need to generate a dmb ishst between an object put 2730 // and the associated card mark when we are using CMS without 2731 // conditional card marking. Any other occurence will happen when 2732 // performing a card mark using CMS with conditional card marking or 2733 // G1. In those cases the preceding MamBarVolatile will be 2734 // translated to a dmb ish which guarantes visibility of the 2735 // preceding StoreN/P before this StoreCM 2736 2737 if (!UseConcMarkSweepGC || UseCondCardMark) { 2738 return true; 2739 } 2740 2741 // if we are implementing volatile puts using barriers then we must 2742 // insert the dmb ishst 2743 2744 if (UseBarriersForVolatile) { 2745 return false; 2746 } 2747 2748 // we must be using CMS with conditional card marking so we ahve to 2749 // generate the StoreStore 2750 2751 return false; 2752 } 2753 2754 2755 #define __ _masm. 2756 2757 // advance declarations for helper functions to convert register 2758 // indices to register objects 2759 2760 // the ad file has to provide implementations of certain methods 2761 // expected by the generic code 2762 // 2763 // REQUIRED FUNCTIONALITY 2764 2765 //============================================================================= 2766 2767 // !!!!! Special hack to get all types of calls to specify the byte offset 2768 // from the start of the call to the point where the return address 2769 // will point. 2770 2771 int MachCallStaticJavaNode::ret_addr_offset() 2772 { 2773 // call should be a simple bl 2774 int off = 4; 2775 return off; 2776 } 2777 2778 int MachCallDynamicJavaNode::ret_addr_offset() 2779 { 2780 return 16; // movz, movk, movk, bl 2781 } 2782 2783 int MachCallRuntimeNode::ret_addr_offset() { 2784 // for generated stubs the call will be 2785 // far_call(addr) 2786 // for real runtime callouts it will be six instructions 2787 // see aarch64_enc_java_to_runtime 2788 // adr(rscratch2, retaddr) 2789 // lea(rscratch1, RuntimeAddress(addr) 2790 // stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize))) 2791 // blrt rscratch1 2792 CodeBlob *cb = CodeCache::find_blob(_entry_point); 2793 if (cb) { 2794 return MacroAssembler::far_branch_size(); 2795 } else { 2796 return 6 * NativeInstruction::instruction_size; 2797 } 2798 } 2799 2800 // Indicate if the safepoint node needs the polling page as an input 2801 2802 // the shared code plants the oop data at the start of the generated 2803 // code for the safepoint node and that needs ot be at the load 2804 // instruction itself. so we cannot plant a mov of the safepoint poll 2805 // address followed by a load. setting this to true means the mov is 2806 // scheduled as a prior instruction. that's better for scheduling 2807 // anyway. 2808 2809 bool SafePointNode::needs_polling_address_input() 2810 { 2811 return true; 2812 } 2813 2814 //============================================================================= 2815 2816 #ifndef PRODUCT 2817 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const { 2818 st->print("BREAKPOINT"); 2819 } 2820 #endif 2821 2822 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 2823 MacroAssembler _masm(&cbuf); 2824 __ brk(0); 2825 } 2826 2827 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { 2828 return MachNode::size(ra_); 2829 } 2830 2831 //============================================================================= 2832 2833 #ifndef PRODUCT 2834 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2835 st->print("nop \t# %d bytes pad for loops and calls", _count); 2836 } 2837 #endif 2838 2839 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2840 MacroAssembler _masm(&cbuf); 2841 for (int i = 0; i < _count; i++) { 2842 __ nop(); 2843 } 2844 } 2845 2846 uint MachNopNode::size(PhaseRegAlloc*) const { 2847 return _count * NativeInstruction::instruction_size; 2848 } 2849 2850 //============================================================================= 2851 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 2852 2853 int Compile::ConstantTable::calculate_table_base_offset() const { 2854 return 0; // absolute addressing, no offset 2855 } 2856 2857 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 2858 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 2859 ShouldNotReachHere(); 2860 } 2861 2862 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 2863 // Empty encoding 2864 } 2865 2866 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 2867 return 0; 2868 } 2869 2870 #ifndef PRODUCT 2871 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 2872 st->print("-- \t// MachConstantBaseNode (empty encoding)"); 2873 } 2874 #endif 2875 2876 #ifndef PRODUCT 2877 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { 2878 Compile* C = ra_->C; 2879 2880 int framesize = C->frame_slots() << LogBytesPerInt; 2881 2882 if (C->need_stack_bang(framesize)) 2883 st->print("# stack bang size=%d\n\t", framesize); 2884 2885 if (framesize < ((1 << 9) + 2 * wordSize)) { 2886 st->print("sub sp, sp, #%d\n\t", framesize); 2887 st->print("stp rfp, lr, [sp, #%d]", framesize - 2 * wordSize); 2888 if (PreserveFramePointer) st->print("\n\tadd rfp, sp, #%d", framesize - 2 * wordSize); 2889 } else { 2890 st->print("stp lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize)); 2891 if (PreserveFramePointer) st->print("mov rfp, sp\n\t"); 2892 st->print("mov rscratch1, #%d\n\t", framesize - 2 * wordSize); 2893 st->print("sub sp, sp, rscratch1"); 2894 } 2895 } 2896 #endif 2897 2898 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 2899 Compile* C = ra_->C; 2900 MacroAssembler _masm(&cbuf); 2901 2902 // n.b. frame size includes space for return pc and rfp 2903 const long framesize = C->frame_size_in_bytes(); 2904 assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment"); 2905 2906 // insert a nop at the start of the prolog so we can patch in a 2907 // branch if we need to invalidate the method later 2908 __ nop(); 2909 2910 int bangsize = C->bang_size_in_bytes(); 2911 if (C->need_stack_bang(bangsize) && UseStackBanging) 2912 __ generate_stack_overflow_check(bangsize); 2913 2914 __ build_frame(framesize); 2915 2916 if (NotifySimulator) { 2917 __ notify(Assembler::method_entry); 2918 } 2919 2920 if (VerifyStackAtCalls) { 2921 Unimplemented(); 2922 } 2923 2924 C->set_frame_complete(cbuf.insts_size()); 2925 2926 if (C->has_mach_constant_base_node()) { 2927 // NOTE: We set the table base offset here because users might be 2928 // emitted before MachConstantBaseNode. 2929 Compile::ConstantTable& constant_table = C->constant_table(); 2930 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 2931 } 2932 } 2933 2934 uint MachPrologNode::size(PhaseRegAlloc* ra_) const 2935 { 2936 return MachNode::size(ra_); // too many variables; just compute it 2937 // the hard way 2938 } 2939 2940 int MachPrologNode::reloc() const 2941 { 2942 return 0; 2943 } 2944 2945 //============================================================================= 2946 2947 #ifndef PRODUCT 2948 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { 2949 Compile* C = ra_->C; 2950 int framesize = C->frame_slots() << LogBytesPerInt; 2951 2952 st->print("# pop frame %d\n\t",framesize); 2953 2954 if (framesize == 0) { 2955 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize)); 2956 } else if (framesize < ((1 << 9) + 2 * wordSize)) { 2957 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize); 2958 st->print("add sp, sp, #%d\n\t", framesize); 2959 } else { 2960 st->print("mov rscratch1, #%d\n\t", framesize - 2 * wordSize); 2961 st->print("add sp, sp, rscratch1\n\t"); 2962 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize)); 2963 } 2964 2965 if (do_polling() && C->is_method_compilation()) { 2966 st->print("# touch polling page\n\t"); 2967 st->print("mov rscratch1, #0x%lx\n\t", p2i(os::get_polling_page())); 2968 st->print("ldr zr, [rscratch1]"); 2969 } 2970 } 2971 #endif 2972 2973 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 2974 Compile* C = ra_->C; 2975 MacroAssembler _masm(&cbuf); 2976 int framesize = C->frame_slots() << LogBytesPerInt; 2977 2978 __ remove_frame(framesize); 2979 2980 if (NotifySimulator) { 2981 __ notify(Assembler::method_reentry); 2982 } 2983 2984 if (do_polling() && C->is_method_compilation()) { 2985 __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type); 2986 } 2987 } 2988 2989 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 2990 // Variable size. Determine dynamically. 2991 return MachNode::size(ra_); 2992 } 2993 2994 int MachEpilogNode::reloc() const { 2995 // Return number of relocatable values contained in this instruction. 2996 return 1; // 1 for polling page. 2997 } 2998 2999 const Pipeline * MachEpilogNode::pipeline() const { 3000 return MachNode::pipeline_class(); 3001 } 3002 3003 // This method seems to be obsolete. It is declared in machnode.hpp 3004 // and defined in all *.ad files, but it is never called. Should we 3005 // get rid of it? 3006 int MachEpilogNode::safepoint_offset() const { 3007 assert(do_polling(), "no return for this epilog node"); 3008 return 4; 3009 } 3010 3011 //============================================================================= 3012 3013 // Figure out which register class each belongs in: rc_int, rc_float or 3014 // rc_stack. 3015 enum RC { rc_bad, rc_int, rc_float, rc_stack }; 3016 3017 static enum RC rc_class(OptoReg::Name reg) { 3018 3019 if (reg == OptoReg::Bad) { 3020 return rc_bad; 3021 } 3022 3023 // we have 30 int registers * 2 halves 3024 // (rscratch1 and rscratch2 are omitted) 3025 3026 if (reg < 60) { 3027 return rc_int; 3028 } 3029 3030 // we have 32 float register * 2 halves 3031 if (reg < 60 + 128) { 3032 return rc_float; 3033 } 3034 3035 // Between float regs & stack is the flags regs. 3036 assert(OptoReg::is_stack(reg), "blow up if spilling flags"); 3037 3038 return rc_stack; 3039 } 3040 3041 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const { 3042 Compile* C = ra_->C; 3043 3044 // Get registers to move. 3045 OptoReg::Name src_hi = ra_->get_reg_second(in(1)); 3046 OptoReg::Name src_lo = ra_->get_reg_first(in(1)); 3047 OptoReg::Name dst_hi = ra_->get_reg_second(this); 3048 OptoReg::Name dst_lo = ra_->get_reg_first(this); 3049 3050 enum RC src_hi_rc = rc_class(src_hi); 3051 enum RC src_lo_rc = rc_class(src_lo); 3052 enum RC dst_hi_rc = rc_class(dst_hi); 3053 enum RC dst_lo_rc = rc_class(dst_lo); 3054 3055 assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register"); 3056 3057 if (src_hi != OptoReg::Bad) { 3058 assert((src_lo&1)==0 && src_lo+1==src_hi && 3059 (dst_lo&1)==0 && dst_lo+1==dst_hi, 3060 "expected aligned-adjacent pairs"); 3061 } 3062 3063 if (src_lo == dst_lo && src_hi == dst_hi) { 3064 return 0; // Self copy, no move. 3065 } 3066 3067 bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi && 3068 (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi; 3069 int src_offset = ra_->reg2offset(src_lo); 3070 int dst_offset = ra_->reg2offset(dst_lo); 3071 3072 if (bottom_type()->isa_vect() != NULL) { 3073 uint ireg = ideal_reg(); 3074 assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); 3075 if (cbuf) { 3076 MacroAssembler _masm(cbuf); 3077 assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity"); 3078 if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { 3079 // stack->stack 3080 assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset"); 3081 if (ireg == Op_VecD) { 3082 __ unspill(rscratch1, true, src_offset); 3083 __ spill(rscratch1, true, dst_offset); 3084 } else { 3085 __ spill_copy128(src_offset, dst_offset); 3086 } 3087 } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { 3088 __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]), 3089 ireg == Op_VecD ? __ T8B : __ T16B, 3090 as_FloatRegister(Matcher::_regEncode[src_lo])); 3091 } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { 3092 __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), 3093 ireg == Op_VecD ? __ D : __ Q, 3094 ra_->reg2offset(dst_lo)); 3095 } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { 3096 __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), 3097 ireg == Op_VecD ? __ D : __ Q, 3098 ra_->reg2offset(src_lo)); 3099 } else { 3100 ShouldNotReachHere(); 3101 } 3102 } 3103 } else if (cbuf) { 3104 MacroAssembler _masm(cbuf); 3105 switch (src_lo_rc) { 3106 case rc_int: 3107 if (dst_lo_rc == rc_int) { // gpr --> gpr copy 3108 if (is64) { 3109 __ mov(as_Register(Matcher::_regEncode[dst_lo]), 3110 as_Register(Matcher::_regEncode[src_lo])); 3111 } else { 3112 MacroAssembler _masm(cbuf); 3113 __ movw(as_Register(Matcher::_regEncode[dst_lo]), 3114 as_Register(Matcher::_regEncode[src_lo])); 3115 } 3116 } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy 3117 if (is64) { 3118 __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]), 3119 as_Register(Matcher::_regEncode[src_lo])); 3120 } else { 3121 __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]), 3122 as_Register(Matcher::_regEncode[src_lo])); 3123 } 3124 } else { // gpr --> stack spill 3125 assert(dst_lo_rc == rc_stack, "spill to bad register class"); 3126 __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset); 3127 } 3128 break; 3129 case rc_float: 3130 if (dst_lo_rc == rc_int) { // fpr --> gpr copy 3131 if (is64) { 3132 __ fmovd(as_Register(Matcher::_regEncode[dst_lo]), 3133 as_FloatRegister(Matcher::_regEncode[src_lo])); 3134 } else { 3135 __ fmovs(as_Register(Matcher::_regEncode[dst_lo]), 3136 as_FloatRegister(Matcher::_regEncode[src_lo])); 3137 } 3138 } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy 3139 if (cbuf) { 3140 __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]), 3141 as_FloatRegister(Matcher::_regEncode[src_lo])); 3142 } else { 3143 __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]), 3144 as_FloatRegister(Matcher::_regEncode[src_lo])); 3145 } 3146 } else { // fpr --> stack spill 3147 assert(dst_lo_rc == rc_stack, "spill to bad register class"); 3148 __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), 3149 is64 ? __ D : __ S, dst_offset); 3150 } 3151 break; 3152 case rc_stack: 3153 if (dst_lo_rc == rc_int) { // stack --> gpr load 3154 __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); 3155 } else if (dst_lo_rc == rc_float) { // stack --> fpr load 3156 __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), 3157 is64 ? __ D : __ S, src_offset); 3158 } else { // stack --> stack copy 3159 assert(dst_lo_rc == rc_stack, "spill to bad register class"); 3160 __ unspill(rscratch1, is64, src_offset); 3161 __ spill(rscratch1, is64, dst_offset); 3162 } 3163 break; 3164 default: 3165 assert(false, "bad rc_class for spill"); 3166 ShouldNotReachHere(); 3167 } 3168 } 3169 3170 if (st) { 3171 st->print("spill "); 3172 if (src_lo_rc == rc_stack) { 3173 st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo)); 3174 } else { 3175 st->print("%s -> ", Matcher::regName[src_lo]); 3176 } 3177 if (dst_lo_rc == rc_stack) { 3178 st->print("[sp, #%d]", ra_->reg2offset(dst_lo)); 3179 } else { 3180 st->print("%s", Matcher::regName[dst_lo]); 3181 } 3182 if (bottom_type()->isa_vect() != NULL) { 3183 st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128); 3184 } else { 3185 st->print("\t# spill size = %d", is64 ? 64:32); 3186 } 3187 } 3188 3189 return 0; 3190 3191 } 3192 3193 #ifndef PRODUCT 3194 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const { 3195 if (!ra_) 3196 st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx); 3197 else 3198 implementation(NULL, ra_, false, st); 3199 } 3200 #endif 3201 3202 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 3203 implementation(&cbuf, ra_, false, NULL); 3204 } 3205 3206 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 3207 return MachNode::size(ra_); 3208 } 3209 3210 //============================================================================= 3211 3212 #ifndef PRODUCT 3213 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const { 3214 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 3215 int reg = ra_->get_reg_first(this); 3216 st->print("add %s, rsp, #%d]\t# box lock", 3217 Matcher::regName[reg], offset); 3218 } 3219 #endif 3220 3221 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 3222 MacroAssembler _masm(&cbuf); 3223 3224 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 3225 int reg = ra_->get_encode(this); 3226 3227 if (Assembler::operand_valid_for_add_sub_immediate(offset)) { 3228 __ add(as_Register(reg), sp, offset); 3229 } else { 3230 ShouldNotReachHere(); 3231 } 3232 } 3233 3234 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 3235 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_). 3236 return 4; 3237 } 3238 3239 //============================================================================= 3240 3241 #ifndef PRODUCT 3242 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const 3243 { 3244 st->print_cr("# MachUEPNode"); 3245 if (UseCompressedClassPointers) { 3246 st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass"); 3247 if (Universe::narrow_klass_shift() != 0) { 3248 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1"); 3249 } 3250 } else { 3251 st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass"); 3252 } 3253 st->print_cr("\tcmp r0, rscratch1\t # Inline cache check"); 3254 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub"); 3255 } 3256 #endif 3257 3258 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const 3259 { 3260 // This is the unverified entry point. 3261 MacroAssembler _masm(&cbuf); 3262 3263 __ cmp_klass(j_rarg0, rscratch2, rscratch1); 3264 Label skip; 3265 // TODO 3266 // can we avoid this skip and still use a reloc? 3267 __ br(Assembler::EQ, skip); 3268 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 3269 __ bind(skip); 3270 } 3271 3272 uint MachUEPNode::size(PhaseRegAlloc* ra_) const 3273 { 3274 return MachNode::size(ra_); 3275 } 3276 3277 // REQUIRED EMIT CODE 3278 3279 //============================================================================= 3280 3281 // Emit exception handler code. 3282 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) 3283 { 3284 // mov rscratch1 #exception_blob_entry_point 3285 // br rscratch1 3286 // Note that the code buffer's insts_mark is always relative to insts. 3287 // That's why we must use the macroassembler to generate a handler. 3288 MacroAssembler _masm(&cbuf); 3289 address base = __ start_a_stub(size_exception_handler()); 3290 if (base == NULL) { 3291 ciEnv::current()->record_failure("CodeCache is full"); 3292 return 0; // CodeBuffer::expand failed 3293 } 3294 int offset = __ offset(); 3295 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 3296 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 3297 __ end_a_stub(); 3298 return offset; 3299 } 3300 3301 // Emit deopt handler code. 3302 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) 3303 { 3304 // Note that the code buffer's insts_mark is always relative to insts. 3305 // That's why we must use the macroassembler to generate a handler. 3306 MacroAssembler _masm(&cbuf); 3307 address base = __ start_a_stub(size_deopt_handler()); 3308 if (base == NULL) { 3309 ciEnv::current()->record_failure("CodeCache is full"); 3310 return 0; // CodeBuffer::expand failed 3311 } 3312 int offset = __ offset(); 3313 3314 __ adr(lr, __ pc()); 3315 __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 3316 3317 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 3318 __ end_a_stub(); 3319 return offset; 3320 } 3321 3322 // REQUIRED MATCHER CODE 3323 3324 //============================================================================= 3325 3326 const bool Matcher::match_rule_supported(int opcode) { 3327 3328 // TODO 3329 // identify extra cases that we might want to provide match rules for 3330 // e.g. Op_StrEquals and other intrinsics 3331 if (!has_match_rule(opcode)) { 3332 return false; 3333 } 3334 3335 return true; // Per default match rules are supported. 3336 } 3337 3338 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 3339 3340 // TODO 3341 // identify extra cases that we might want to provide match rules for 3342 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 3343 bool ret_value = match_rule_supported(opcode); 3344 // Add rules here. 3345 3346 return ret_value; // Per default match rules are supported. 3347 } 3348 3349 const int Matcher::float_pressure(int default_pressure_threshold) { 3350 return default_pressure_threshold; 3351 } 3352 3353 int Matcher::regnum_to_fpu_offset(int regnum) 3354 { 3355 Unimplemented(); 3356 return 0; 3357 } 3358 3359 // Is this branch offset short enough that a short branch can be used? 3360 // 3361 // NOTE: If the platform does not provide any short branch variants, then 3362 // this method should return false for offset 0. 3363 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 3364 // The passed offset is relative to address of the branch. 3365 3366 return (-32768 <= offset && offset < 32768); 3367 } 3368 3369 const bool Matcher::isSimpleConstant64(jlong value) { 3370 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 3371 // Probably always true, even if a temp register is required. 3372 return true; 3373 } 3374 3375 // true just means we have fast l2f conversion 3376 const bool Matcher::convL2FSupported(void) { 3377 return true; 3378 } 3379 3380 // Vector width in bytes. 3381 const int Matcher::vector_width_in_bytes(BasicType bt) { 3382 int size = MIN2(16,(int)MaxVectorSize); 3383 // Minimum 2 values in vector 3384 if (size < 2*type2aelembytes(bt)) size = 0; 3385 // But never < 4 3386 if (size < 4) size = 0; 3387 return size; 3388 } 3389 3390 // Limits on vector size (number of elements) loaded into vector. 3391 const int Matcher::max_vector_size(const BasicType bt) { 3392 return vector_width_in_bytes(bt)/type2aelembytes(bt); 3393 } 3394 const int Matcher::min_vector_size(const BasicType bt) { 3395 // For the moment limit the vector size to 8 bytes 3396 int size = 8 / type2aelembytes(bt); 3397 if (size < 2) size = 2; 3398 return size; 3399 } 3400 3401 // Vector ideal reg. 3402 const int Matcher::vector_ideal_reg(int len) { 3403 switch(len) { 3404 case 8: return Op_VecD; 3405 case 16: return Op_VecX; 3406 } 3407 ShouldNotReachHere(); 3408 return 0; 3409 } 3410 3411 const int Matcher::vector_shift_count_ideal_reg(int size) { 3412 return Op_VecX; 3413 } 3414 3415 // AES support not yet implemented 3416 const bool Matcher::pass_original_key_for_aes() { 3417 return false; 3418 } 3419 3420 // x86 supports misaligned vectors store/load. 3421 const bool Matcher::misaligned_vectors_ok() { 3422 return !AlignVector; // can be changed by flag 3423 } 3424 3425 // false => size gets scaled to BytesPerLong, ok. 3426 const bool Matcher::init_array_count_is_in_bytes = false; 3427 3428 // Use conditional move (CMOVL) 3429 const int Matcher::long_cmove_cost() { 3430 // long cmoves are no more expensive than int cmoves 3431 return 0; 3432 } 3433 3434 const int Matcher::float_cmove_cost() { 3435 // float cmoves are no more expensive than int cmoves 3436 return 0; 3437 } 3438 3439 // Does the CPU require late expand (see block.cpp for description of late expand)? 3440 const bool Matcher::require_postalloc_expand = false; 3441 3442 // Should the Matcher clone shifts on addressing modes, expecting them 3443 // to be subsumed into complex addressing expressions or compute them 3444 // into registers? True for Intel but false for most RISCs 3445 const bool Matcher::clone_shift_expressions = false; 3446 3447 // Do we need to mask the count passed to shift instructions or does 3448 // the cpu only look at the lower 5/6 bits anyway? 3449 const bool Matcher::need_masked_shift_count = false; 3450 3451 // This affects two different things: 3452 // - how Decode nodes are matched 3453 // - how ImplicitNullCheck opportunities are recognized 3454 // If true, the matcher will try to remove all Decodes and match them 3455 // (as operands) into nodes. NullChecks are not prepared to deal with 3456 // Decodes by final_graph_reshaping(). 3457 // If false, final_graph_reshaping() forces the decode behind the Cmp 3458 // for a NullCheck. The matcher matches the Decode node into a register. 3459 // Implicit_null_check optimization moves the Decode along with the 3460 // memory operation back up before the NullCheck. 3461 bool Matcher::narrow_oop_use_complex_address() { 3462 return Universe::narrow_oop_shift() == 0; 3463 } 3464 3465 bool Matcher::narrow_klass_use_complex_address() { 3466 // TODO 3467 // decide whether we need to set this to true 3468 return false; 3469 } 3470 3471 // Is it better to copy float constants, or load them directly from 3472 // memory? Intel can load a float constant from a direct address, 3473 // requiring no extra registers. Most RISCs will have to materialize 3474 // an address into a register first, so they would do better to copy 3475 // the constant from stack. 3476 const bool Matcher::rematerialize_float_constants = false; 3477 3478 // If CPU can load and store mis-aligned doubles directly then no 3479 // fixup is needed. Else we split the double into 2 integer pieces 3480 // and move it piece-by-piece. Only happens when passing doubles into 3481 // C code as the Java calling convention forces doubles to be aligned. 3482 const bool Matcher::misaligned_doubles_ok = true; 3483 3484 // No-op on amd64 3485 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 3486 Unimplemented(); 3487 } 3488 3489 // Advertise here if the CPU requires explicit rounding operations to 3490 // implement the UseStrictFP mode. 3491 const bool Matcher::strict_fp_requires_explicit_rounding = false; 3492 3493 // Are floats converted to double when stored to stack during 3494 // deoptimization? 3495 bool Matcher::float_in_double() { return true; } 3496 3497 // Do ints take an entire long register or just half? 3498 // The relevant question is how the int is callee-saved: 3499 // the whole long is written but de-opt'ing will have to extract 3500 // the relevant 32 bits. 3501 const bool Matcher::int_in_long = true; 3502 3503 // Return whether or not this register is ever used as an argument. 3504 // This function is used on startup to build the trampoline stubs in 3505 // generateOptoStub. Registers not mentioned will be killed by the VM 3506 // call in the trampoline, and arguments in those registers not be 3507 // available to the callee. 3508 bool Matcher::can_be_java_arg(int reg) 3509 { 3510 return 3511 reg == R0_num || reg == R0_H_num || 3512 reg == R1_num || reg == R1_H_num || 3513 reg == R2_num || reg == R2_H_num || 3514 reg == R3_num || reg == R3_H_num || 3515 reg == R4_num || reg == R4_H_num || 3516 reg == R5_num || reg == R5_H_num || 3517 reg == R6_num || reg == R6_H_num || 3518 reg == R7_num || reg == R7_H_num || 3519 reg == V0_num || reg == V0_H_num || 3520 reg == V1_num || reg == V1_H_num || 3521 reg == V2_num || reg == V2_H_num || 3522 reg == V3_num || reg == V3_H_num || 3523 reg == V4_num || reg == V4_H_num || 3524 reg == V5_num || reg == V5_H_num || 3525 reg == V6_num || reg == V6_H_num || 3526 reg == V7_num || reg == V7_H_num; 3527 } 3528 3529 bool Matcher::is_spillable_arg(int reg) 3530 { 3531 return can_be_java_arg(reg); 3532 } 3533 3534 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { 3535 return false; 3536 } 3537 3538 RegMask Matcher::divI_proj_mask() { 3539 ShouldNotReachHere(); 3540 return RegMask(); 3541 } 3542 3543 // Register for MODI projection of divmodI. 3544 RegMask Matcher::modI_proj_mask() { 3545 ShouldNotReachHere(); 3546 return RegMask(); 3547 } 3548 3549 // Register for DIVL projection of divmodL. 3550 RegMask Matcher::divL_proj_mask() { 3551 ShouldNotReachHere(); 3552 return RegMask(); 3553 } 3554 3555 // Register for MODL projection of divmodL. 3556 RegMask Matcher::modL_proj_mask() { 3557 ShouldNotReachHere(); 3558 return RegMask(); 3559 } 3560 3561 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 3562 return FP_REG_mask(); 3563 } 3564 3565 // helper for encoding java_to_runtime calls on sim 3566 // 3567 // this is needed to compute the extra arguments required when 3568 // planting a call to the simulator blrt instruction. the TypeFunc 3569 // can be queried to identify the counts for integral, and floating 3570 // arguments and the return type 3571 3572 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype) 3573 { 3574 int gps = 0; 3575 int fps = 0; 3576 const TypeTuple *domain = tf->domain(); 3577 int max = domain->cnt(); 3578 for (int i = TypeFunc::Parms; i < max; i++) { 3579 const Type *t = domain->field_at(i); 3580 switch(t->basic_type()) { 3581 case T_FLOAT: 3582 case T_DOUBLE: 3583 fps++; 3584 default: 3585 gps++; 3586 } 3587 } 3588 gpcnt = gps; 3589 fpcnt = fps; 3590 BasicType rt = tf->return_type(); 3591 switch (rt) { 3592 case T_VOID: 3593 rtype = MacroAssembler::ret_type_void; 3594 break; 3595 default: 3596 rtype = MacroAssembler::ret_type_integral; 3597 break; 3598 case T_FLOAT: 3599 rtype = MacroAssembler::ret_type_float; 3600 break; 3601 case T_DOUBLE: 3602 rtype = MacroAssembler::ret_type_double; 3603 break; 3604 } 3605 } 3606 3607 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN) \ 3608 MacroAssembler _masm(&cbuf); \ 3609 { \ 3610 guarantee(INDEX == -1, "mode not permitted for volatile"); \ 3611 guarantee(DISP == 0, "mode not permitted for volatile"); \ 3612 guarantee(SCALE == 0, "mode not permitted for volatile"); \ 3613 __ INSN(REG, as_Register(BASE)); \ 3614 } 3615 3616 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr); 3617 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr); 3618 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, 3619 MacroAssembler::SIMD_RegVariant T, const Address &adr); 3620 3621 // Used for all non-volatile memory accesses. The use of 3622 // $mem->opcode() to discover whether this pattern uses sign-extended 3623 // offsets is something of a kludge. 3624 static void loadStore(MacroAssembler masm, mem_insn insn, 3625 Register reg, int opcode, 3626 Register base, int index, int size, int disp) 3627 { 3628 Address::extend scale; 3629 3630 // Hooboy, this is fugly. We need a way to communicate to the 3631 // encoder that the index needs to be sign extended, so we have to 3632 // enumerate all the cases. 3633 switch (opcode) { 3634 case INDINDEXSCALEDOFFSETI2L: 3635 case INDINDEXSCALEDI2L: 3636 case INDINDEXSCALEDOFFSETI2LN: 3637 case INDINDEXSCALEDI2LN: 3638 case INDINDEXOFFSETI2L: 3639 case INDINDEXOFFSETI2LN: 3640 scale = Address::sxtw(size); 3641 break; 3642 default: 3643 scale = Address::lsl(size); 3644 } 3645 3646 if (index == -1) { 3647 (masm.*insn)(reg, Address(base, disp)); 3648 } else { 3649 if (disp == 0) { 3650 (masm.*insn)(reg, Address(base, as_Register(index), scale)); 3651 } else { 3652 masm.lea(rscratch1, Address(base, disp)); 3653 (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale)); 3654 } 3655 } 3656 } 3657 3658 static void loadStore(MacroAssembler masm, mem_float_insn insn, 3659 FloatRegister reg, int opcode, 3660 Register base, int index, int size, int disp) 3661 { 3662 Address::extend scale; 3663 3664 switch (opcode) { 3665 case INDINDEXSCALEDOFFSETI2L: 3666 case INDINDEXSCALEDI2L: 3667 case INDINDEXSCALEDOFFSETI2LN: 3668 case INDINDEXSCALEDI2LN: 3669 scale = Address::sxtw(size); 3670 break; 3671 default: 3672 scale = Address::lsl(size); 3673 } 3674 3675 if (index == -1) { 3676 (masm.*insn)(reg, Address(base, disp)); 3677 } else { 3678 if (disp == 0) { 3679 (masm.*insn)(reg, Address(base, as_Register(index), scale)); 3680 } else { 3681 masm.lea(rscratch1, Address(base, disp)); 3682 (masm.*insn)(reg, Address(rscratch1, as_Register(index), scale)); 3683 } 3684 } 3685 } 3686 3687 static void loadStore(MacroAssembler masm, mem_vector_insn insn, 3688 FloatRegister reg, MacroAssembler::SIMD_RegVariant T, 3689 int opcode, Register base, int index, int size, int disp) 3690 { 3691 if (index == -1) { 3692 (masm.*insn)(reg, T, Address(base, disp)); 3693 } else { 3694 assert(disp == 0, "unsupported address mode"); 3695 (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size))); 3696 } 3697 } 3698 3699 %} 3700 3701 3702 3703 //----------ENCODING BLOCK----------------------------------------------------- 3704 // This block specifies the encoding classes used by the compiler to 3705 // output byte streams. Encoding classes are parameterized macros 3706 // used by Machine Instruction Nodes in order to generate the bit 3707 // encoding of the instruction. Operands specify their base encoding 3708 // interface with the interface keyword. There are currently 3709 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & 3710 // COND_INTER. REG_INTER causes an operand to generate a function 3711 // which returns its register number when queried. CONST_INTER causes 3712 // an operand to generate a function which returns the value of the 3713 // constant when queried. MEMORY_INTER causes an operand to generate 3714 // four functions which return the Base Register, the Index Register, 3715 // the Scale Value, and the Offset Value of the operand when queried. 3716 // COND_INTER causes an operand to generate six functions which return 3717 // the encoding code (ie - encoding bits for the instruction) 3718 // associated with each basic boolean condition for a conditional 3719 // instruction. 3720 // 3721 // Instructions specify two basic values for encoding. Again, a 3722 // function is available to check if the constant displacement is an 3723 // oop. They use the ins_encode keyword to specify their encoding 3724 // classes (which must be a sequence of enc_class names, and their 3725 // parameters, specified in the encoding block), and they use the 3726 // opcode keyword to specify, in order, their primary, secondary, and 3727 // tertiary opcode. Only the opcode sections which a particular 3728 // instruction needs for encoding need to be specified. 3729 encode %{ 3730 // Build emit functions for each basic byte or larger field in the 3731 // intel encoding scheme (opcode, rm, sib, immediate), and call them 3732 // from C++ code in the enc_class source block. Emit functions will 3733 // live in the main source block for now. In future, we can 3734 // generalize this by adding a syntax that specifies the sizes of 3735 // fields in an order, so that the adlc can build the emit functions 3736 // automagically 3737 3738 // catch all for unimplemented encodings 3739 enc_class enc_unimplemented %{ 3740 MacroAssembler _masm(&cbuf); 3741 __ unimplemented("C2 catch all"); 3742 %} 3743 3744 // BEGIN Non-volatile memory access 3745 3746 enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{ 3747 Register dst_reg = as_Register($dst$$reg); 3748 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(), 3749 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3750 %} 3751 3752 enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{ 3753 Register dst_reg = as_Register($dst$$reg); 3754 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(), 3755 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3756 %} 3757 3758 enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{ 3759 Register dst_reg = as_Register($dst$$reg); 3760 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(), 3761 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3762 %} 3763 3764 enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{ 3765 Register dst_reg = as_Register($dst$$reg); 3766 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(), 3767 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3768 %} 3769 3770 enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{ 3771 Register dst_reg = as_Register($dst$$reg); 3772 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(), 3773 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3774 %} 3775 3776 enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{ 3777 Register dst_reg = as_Register($dst$$reg); 3778 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(), 3779 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3780 %} 3781 3782 enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{ 3783 Register dst_reg = as_Register($dst$$reg); 3784 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(), 3785 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3786 %} 3787 3788 enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{ 3789 Register dst_reg = as_Register($dst$$reg); 3790 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(), 3791 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3792 %} 3793 3794 enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{ 3795 Register dst_reg = as_Register($dst$$reg); 3796 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(), 3797 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3798 %} 3799 3800 enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{ 3801 Register dst_reg = as_Register($dst$$reg); 3802 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(), 3803 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3804 %} 3805 3806 enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{ 3807 Register dst_reg = as_Register($dst$$reg); 3808 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(), 3809 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3810 %} 3811 3812 enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{ 3813 Register dst_reg = as_Register($dst$$reg); 3814 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(), 3815 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3816 %} 3817 3818 enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{ 3819 FloatRegister dst_reg = as_FloatRegister($dst$$reg); 3820 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(), 3821 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3822 %} 3823 3824 enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{ 3825 FloatRegister dst_reg = as_FloatRegister($dst$$reg); 3826 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(), 3827 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3828 %} 3829 3830 enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{ 3831 FloatRegister dst_reg = as_FloatRegister($dst$$reg); 3832 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S, 3833 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3834 %} 3835 3836 enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{ 3837 FloatRegister dst_reg = as_FloatRegister($dst$$reg); 3838 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D, 3839 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3840 %} 3841 3842 enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{ 3843 FloatRegister dst_reg = as_FloatRegister($dst$$reg); 3844 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q, 3845 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3846 %} 3847 3848 enc_class aarch64_enc_strb(iRegI src, memory mem) %{ 3849 Register src_reg = as_Register($src$$reg); 3850 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(), 3851 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3852 %} 3853 3854 enc_class aarch64_enc_strb0(memory mem) %{ 3855 MacroAssembler _masm(&cbuf); 3856 loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(), 3857 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3858 %} 3859 3860 enc_class aarch64_enc_strb0_ordered(memory mem) %{ 3861 MacroAssembler _masm(&cbuf); 3862 __ membar(Assembler::StoreStore); 3863 loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(), 3864 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3865 %} 3866 3867 enc_class aarch64_enc_strh(iRegI src, memory mem) %{ 3868 Register src_reg = as_Register($src$$reg); 3869 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(), 3870 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3871 %} 3872 3873 enc_class aarch64_enc_strh0(memory mem) %{ 3874 MacroAssembler _masm(&cbuf); 3875 loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(), 3876 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3877 %} 3878 3879 enc_class aarch64_enc_strw(iRegI src, memory mem) %{ 3880 Register src_reg = as_Register($src$$reg); 3881 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(), 3882 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3883 %} 3884 3885 enc_class aarch64_enc_strw0(memory mem) %{ 3886 MacroAssembler _masm(&cbuf); 3887 loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(), 3888 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3889 %} 3890 3891 enc_class aarch64_enc_str(iRegL src, memory mem) %{ 3892 Register src_reg = as_Register($src$$reg); 3893 // we sometimes get asked to store the stack pointer into the 3894 // current thread -- we cannot do that directly on AArch64 3895 if (src_reg == r31_sp) { 3896 MacroAssembler _masm(&cbuf); 3897 assert(as_Register($mem$$base) == rthread, "unexpected store for sp"); 3898 __ mov(rscratch2, sp); 3899 src_reg = rscratch2; 3900 } 3901 loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(), 3902 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3903 %} 3904 3905 enc_class aarch64_enc_str0(memory mem) %{ 3906 MacroAssembler _masm(&cbuf); 3907 loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(), 3908 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3909 %} 3910 3911 enc_class aarch64_enc_strs(vRegF src, memory mem) %{ 3912 FloatRegister src_reg = as_FloatRegister($src$$reg); 3913 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(), 3914 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3915 %} 3916 3917 enc_class aarch64_enc_strd(vRegD src, memory mem) %{ 3918 FloatRegister src_reg = as_FloatRegister($src$$reg); 3919 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(), 3920 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3921 %} 3922 3923 enc_class aarch64_enc_strvS(vecD src, memory mem) %{ 3924 FloatRegister src_reg = as_FloatRegister($src$$reg); 3925 loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S, 3926 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3927 %} 3928 3929 enc_class aarch64_enc_strvD(vecD src, memory mem) %{ 3930 FloatRegister src_reg = as_FloatRegister($src$$reg); 3931 loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D, 3932 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3933 %} 3934 3935 enc_class aarch64_enc_strvQ(vecX src, memory mem) %{ 3936 FloatRegister src_reg = as_FloatRegister($src$$reg); 3937 loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q, 3938 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 3939 %} 3940 3941 // END Non-volatile memory access 3942 3943 // volatile loads and stores 3944 3945 enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{ 3946 MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 3947 rscratch1, stlrb); 3948 %} 3949 3950 enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{ 3951 MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 3952 rscratch1, stlrh); 3953 %} 3954 3955 enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{ 3956 MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 3957 rscratch1, stlrw); 3958 %} 3959 3960 3961 enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{ 3962 Register dst_reg = as_Register($dst$$reg); 3963 MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 3964 rscratch1, ldarb); 3965 __ sxtbw(dst_reg, dst_reg); 3966 %} 3967 3968 enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{ 3969 Register dst_reg = as_Register($dst$$reg); 3970 MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 3971 rscratch1, ldarb); 3972 __ sxtb(dst_reg, dst_reg); 3973 %} 3974 3975 enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{ 3976 MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 3977 rscratch1, ldarb); 3978 %} 3979 3980 enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{ 3981 MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 3982 rscratch1, ldarb); 3983 %} 3984 3985 enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{ 3986 Register dst_reg = as_Register($dst$$reg); 3987 MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 3988 rscratch1, ldarh); 3989 __ sxthw(dst_reg, dst_reg); 3990 %} 3991 3992 enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{ 3993 Register dst_reg = as_Register($dst$$reg); 3994 MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 3995 rscratch1, ldarh); 3996 __ sxth(dst_reg, dst_reg); 3997 %} 3998 3999 enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{ 4000 MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 4001 rscratch1, ldarh); 4002 %} 4003 4004 enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{ 4005 MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 4006 rscratch1, ldarh); 4007 %} 4008 4009 enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{ 4010 MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 4011 rscratch1, ldarw); 4012 %} 4013 4014 enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{ 4015 MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 4016 rscratch1, ldarw); 4017 %} 4018 4019 enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{ 4020 MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 4021 rscratch1, ldar); 4022 %} 4023 4024 enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{ 4025 MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 4026 rscratch1, ldarw); 4027 __ fmovs(as_FloatRegister($dst$$reg), rscratch1); 4028 %} 4029 4030 enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{ 4031 MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 4032 rscratch1, ldar); 4033 __ fmovd(as_FloatRegister($dst$$reg), rscratch1); 4034 %} 4035 4036 enc_class aarch64_enc_stlr(iRegL src, memory mem) %{ 4037 Register src_reg = as_Register($src$$reg); 4038 // we sometimes get asked to store the stack pointer into the 4039 // current thread -- we cannot do that directly on AArch64 4040 if (src_reg == r31_sp) { 4041 MacroAssembler _masm(&cbuf); 4042 assert(as_Register($mem$$base) == rthread, "unexpected store for sp"); 4043 __ mov(rscratch2, sp); 4044 src_reg = rscratch2; 4045 } 4046 MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 4047 rscratch1, stlr); 4048 %} 4049 4050 enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{ 4051 { 4052 MacroAssembler _masm(&cbuf); 4053 FloatRegister src_reg = as_FloatRegister($src$$reg); 4054 __ fmovs(rscratch2, src_reg); 4055 } 4056 MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 4057 rscratch1, stlrw); 4058 %} 4059 4060 enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{ 4061 { 4062 MacroAssembler _masm(&cbuf); 4063 FloatRegister src_reg = as_FloatRegister($src$$reg); 4064 __ fmovd(rscratch2, src_reg); 4065 } 4066 MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, 4067 rscratch1, stlr); 4068 %} 4069 4070 // synchronized read/update encodings 4071 4072 enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{ 4073 MacroAssembler _masm(&cbuf); 4074 Register dst_reg = as_Register($dst$$reg); 4075 Register base = as_Register($mem$$base); 4076 int index = $mem$$index; 4077 int scale = $mem$$scale; 4078 int disp = $mem$$disp; 4079 if (index == -1) { 4080 if (disp != 0) { 4081 __ lea(rscratch1, Address(base, disp)); 4082 __ ldaxr(dst_reg, rscratch1); 4083 } else { 4084 // TODO 4085 // should we ever get anything other than this case? 4086 __ ldaxr(dst_reg, base); 4087 } 4088 } else { 4089 Register index_reg = as_Register(index); 4090 if (disp == 0) { 4091 __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale))); 4092 __ ldaxr(dst_reg, rscratch1); 4093 } else { 4094 __ lea(rscratch1, Address(base, disp)); 4095 __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale))); 4096 __ ldaxr(dst_reg, rscratch1); 4097 } 4098 } 4099 %} 4100 4101 enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{ 4102 MacroAssembler _masm(&cbuf); 4103 Register src_reg = as_Register($src$$reg); 4104 Register base = as_Register($mem$$base); 4105 int index = $mem$$index; 4106 int scale = $mem$$scale; 4107 int disp = $mem$$disp; 4108 if (index == -1) { 4109 if (disp != 0) { 4110 __ lea(rscratch2, Address(base, disp)); 4111 __ stlxr(rscratch1, src_reg, rscratch2); 4112 } else { 4113 // TODO 4114 // should we ever get anything other than this case? 4115 __ stlxr(rscratch1, src_reg, base); 4116 } 4117 } else { 4118 Register index_reg = as_Register(index); 4119 if (disp == 0) { 4120 __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale))); 4121 __ stlxr(rscratch1, src_reg, rscratch2); 4122 } else { 4123 __ lea(rscratch2, Address(base, disp)); 4124 __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale))); 4125 __ stlxr(rscratch1, src_reg, rscratch2); 4126 } 4127 } 4128 __ cmpw(rscratch1, zr); 4129 %} 4130 4131 enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ 4132 MacroAssembler _masm(&cbuf); 4133 guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); 4134 __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, 4135 &Assembler::ldxr, &MacroAssembler::cmp, &Assembler::stlxr); 4136 %} 4137 4138 enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{ 4139 MacroAssembler _masm(&cbuf); 4140 guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); 4141 __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, 4142 &Assembler::ldxrw, &MacroAssembler::cmpw, &Assembler::stlxrw); 4143 %} 4144 4145 4146 // The only difference between aarch64_enc_cmpxchg and 4147 // aarch64_enc_cmpxchg_acq is that we use load-acquire in the 4148 // CompareAndSwap sequence to serve as a barrier on acquiring a 4149 // lock. 4150 enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ 4151 MacroAssembler _masm(&cbuf); 4152 guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); 4153 __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, 4154 &Assembler::ldaxr, &MacroAssembler::cmp, &Assembler::stlxr); 4155 %} 4156 4157 enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{ 4158 MacroAssembler _masm(&cbuf); 4159 guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); 4160 __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, 4161 &Assembler::ldaxrw, &MacroAssembler::cmpw, &Assembler::stlxrw); 4162 %} 4163 4164 4165 // auxiliary used for CompareAndSwapX to set result register 4166 enc_class aarch64_enc_cset_eq(iRegINoSp res) %{ 4167 MacroAssembler _masm(&cbuf); 4168 Register res_reg = as_Register($res$$reg); 4169 __ cset(res_reg, Assembler::EQ); 4170 %} 4171 4172 // prefetch encodings 4173 4174 enc_class aarch64_enc_prefetchw(memory mem) %{ 4175 MacroAssembler _masm(&cbuf); 4176 Register base = as_Register($mem$$base); 4177 int index = $mem$$index; 4178 int scale = $mem$$scale; 4179 int disp = $mem$$disp; 4180 if (index == -1) { 4181 __ prfm(Address(base, disp), PSTL1KEEP); 4182 } else { 4183 Register index_reg = as_Register(index); 4184 if (disp == 0) { 4185 __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP); 4186 } else { 4187 __ lea(rscratch1, Address(base, disp)); 4188 __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP); 4189 } 4190 } 4191 %} 4192 4193 enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{ 4194 MacroAssembler _masm(&cbuf); 4195 Register cnt_reg = as_Register($cnt$$reg); 4196 Register base_reg = as_Register($base$$reg); 4197 // base is word aligned 4198 // cnt is count of words 4199 4200 Label loop; 4201 Label entry; 4202 4203 // Algorithm: 4204 // 4205 // scratch1 = cnt & 7; 4206 // cnt -= scratch1; 4207 // p += scratch1; 4208 // switch (scratch1) { 4209 // do { 4210 // cnt -= 8; 4211 // p[-8] = 0; 4212 // case 7: 4213 // p[-7] = 0; 4214 // case 6: 4215 // p[-6] = 0; 4216 // // ... 4217 // case 1: 4218 // p[-1] = 0; 4219 // case 0: 4220 // p += 8; 4221 // } while (cnt); 4222 // } 4223 4224 const int unroll = 8; // Number of str(zr) instructions we'll unroll 4225 4226 __ andr(rscratch1, cnt_reg, unroll - 1); // tmp1 = cnt % unroll 4227 __ sub(cnt_reg, cnt_reg, rscratch1); // cnt -= unroll 4228 // base_reg always points to the end of the region we're about to zero 4229 __ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize)); 4230 __ adr(rscratch2, entry); 4231 __ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2); 4232 __ br(rscratch2); 4233 __ bind(loop); 4234 __ sub(cnt_reg, cnt_reg, unroll); 4235 for (int i = -unroll; i < 0; i++) 4236 __ str(zr, Address(base_reg, i * wordSize)); 4237 __ bind(entry); 4238 __ add(base_reg, base_reg, unroll * wordSize); 4239 __ cbnz(cnt_reg, loop); 4240 %} 4241 4242 /// mov envcodings 4243 4244 enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{ 4245 MacroAssembler _masm(&cbuf); 4246 u_int32_t con = (u_int32_t)$src$$constant; 4247 Register dst_reg = as_Register($dst$$reg); 4248 if (con == 0) { 4249 __ movw(dst_reg, zr); 4250 } else { 4251 __ movw(dst_reg, con); 4252 } 4253 %} 4254 4255 enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{ 4256 MacroAssembler _masm(&cbuf); 4257 Register dst_reg = as_Register($dst$$reg); 4258 u_int64_t con = (u_int64_t)$src$$constant; 4259 if (con == 0) { 4260 __ mov(dst_reg, zr); 4261 } else { 4262 __ mov(dst_reg, con); 4263 } 4264 %} 4265 4266 enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{ 4267 MacroAssembler _masm(&cbuf); 4268 Register dst_reg = as_Register($dst$$reg); 4269 address con = (address)$src$$constant; 4270 if (con == NULL || con == (address)1) { 4271 ShouldNotReachHere(); 4272 } else { 4273 relocInfo::relocType rtype = $src->constant_reloc(); 4274 if (rtype == relocInfo::oop_type) { 4275 __ movoop(dst_reg, (jobject)con, /*immediate*/true); 4276 } else if (rtype == relocInfo::metadata_type) { 4277 __ mov_metadata(dst_reg, (Metadata*)con); 4278 } else { 4279 assert(rtype == relocInfo::none, "unexpected reloc type"); 4280 if (con < (address)(uintptr_t)os::vm_page_size()) { 4281 __ mov(dst_reg, con); 4282 } else { 4283 unsigned long offset; 4284 __ adrp(dst_reg, con, offset); 4285 __ add(dst_reg, dst_reg, offset); 4286 } 4287 } 4288 } 4289 %} 4290 4291 enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{ 4292 MacroAssembler _masm(&cbuf); 4293 Register dst_reg = as_Register($dst$$reg); 4294 __ mov(dst_reg, zr); 4295 %} 4296 4297 enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{ 4298 MacroAssembler _masm(&cbuf); 4299 Register dst_reg = as_Register($dst$$reg); 4300 __ mov(dst_reg, (u_int64_t)1); 4301 %} 4302 4303 enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{ 4304 MacroAssembler _masm(&cbuf); 4305 address page = (address)$src$$constant; 4306 Register dst_reg = as_Register($dst$$reg); 4307 unsigned long off; 4308 __ adrp(dst_reg, Address(page, relocInfo::poll_type), off); 4309 assert(off == 0, "assumed offset == 0"); 4310 %} 4311 4312 enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{ 4313 MacroAssembler _masm(&cbuf); 4314 __ load_byte_map_base($dst$$Register); 4315 %} 4316 4317 enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{ 4318 MacroAssembler _masm(&cbuf); 4319 Register dst_reg = as_Register($dst$$reg); 4320 address con = (address)$src$$constant; 4321 if (con == NULL) { 4322 ShouldNotReachHere(); 4323 } else { 4324 relocInfo::relocType rtype = $src->constant_reloc(); 4325 assert(rtype == relocInfo::oop_type, "unexpected reloc type"); 4326 __ set_narrow_oop(dst_reg, (jobject)con); 4327 } 4328 %} 4329 4330 enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{ 4331 MacroAssembler _masm(&cbuf); 4332 Register dst_reg = as_Register($dst$$reg); 4333 __ mov(dst_reg, zr); 4334 %} 4335 4336 enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{ 4337 MacroAssembler _masm(&cbuf); 4338 Register dst_reg = as_Register($dst$$reg); 4339 address con = (address)$src$$constant; 4340 if (con == NULL) { 4341 ShouldNotReachHere(); 4342 } else { 4343 relocInfo::relocType rtype = $src->constant_reloc(); 4344 assert(rtype == relocInfo::metadata_type, "unexpected reloc type"); 4345 __ set_narrow_klass(dst_reg, (Klass *)con); 4346 } 4347 %} 4348 4349 // arithmetic encodings 4350 4351 enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{ 4352 MacroAssembler _masm(&cbuf); 4353 Register dst_reg = as_Register($dst$$reg); 4354 Register src_reg = as_Register($src1$$reg); 4355 int32_t con = (int32_t)$src2$$constant; 4356 // add has primary == 0, subtract has primary == 1 4357 if ($primary) { con = -con; } 4358 if (con < 0) { 4359 __ subw(dst_reg, src_reg, -con); 4360 } else { 4361 __ addw(dst_reg, src_reg, con); 4362 } 4363 %} 4364 4365 enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{ 4366 MacroAssembler _masm(&cbuf); 4367 Register dst_reg = as_Register($dst$$reg); 4368 Register src_reg = as_Register($src1$$reg); 4369 int32_t con = (int32_t)$src2$$constant; 4370 // add has primary == 0, subtract has primary == 1 4371 if ($primary) { con = -con; } 4372 if (con < 0) { 4373 __ sub(dst_reg, src_reg, -con); 4374 } else { 4375 __ add(dst_reg, src_reg, con); 4376 } 4377 %} 4378 4379 enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{ 4380 MacroAssembler _masm(&cbuf); 4381 Register dst_reg = as_Register($dst$$reg); 4382 Register src1_reg = as_Register($src1$$reg); 4383 Register src2_reg = as_Register($src2$$reg); 4384 __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1); 4385 %} 4386 4387 enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{ 4388 MacroAssembler _masm(&cbuf); 4389 Register dst_reg = as_Register($dst$$reg); 4390 Register src1_reg = as_Register($src1$$reg); 4391 Register src2_reg = as_Register($src2$$reg); 4392 __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1); 4393 %} 4394 4395 enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{ 4396 MacroAssembler _masm(&cbuf); 4397 Register dst_reg = as_Register($dst$$reg); 4398 Register src1_reg = as_Register($src1$$reg); 4399 Register src2_reg = as_Register($src2$$reg); 4400 __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1); 4401 %} 4402 4403 enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{ 4404 MacroAssembler _masm(&cbuf); 4405 Register dst_reg = as_Register($dst$$reg); 4406 Register src1_reg = as_Register($src1$$reg); 4407 Register src2_reg = as_Register($src2$$reg); 4408 __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1); 4409 %} 4410 4411 // compare instruction encodings 4412 4413 enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{ 4414 MacroAssembler _masm(&cbuf); 4415 Register reg1 = as_Register($src1$$reg); 4416 Register reg2 = as_Register($src2$$reg); 4417 __ cmpw(reg1, reg2); 4418 %} 4419 4420 enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{ 4421 MacroAssembler _masm(&cbuf); 4422 Register reg = as_Register($src1$$reg); 4423 int32_t val = $src2$$constant; 4424 if (val >= 0) { 4425 __ subsw(zr, reg, val); 4426 } else { 4427 __ addsw(zr, reg, -val); 4428 } 4429 %} 4430 4431 enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{ 4432 MacroAssembler _masm(&cbuf); 4433 Register reg1 = as_Register($src1$$reg); 4434 u_int32_t val = (u_int32_t)$src2$$constant; 4435 __ movw(rscratch1, val); 4436 __ cmpw(reg1, rscratch1); 4437 %} 4438 4439 enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{ 4440 MacroAssembler _masm(&cbuf); 4441 Register reg1 = as_Register($src1$$reg); 4442 Register reg2 = as_Register($src2$$reg); 4443 __ cmp(reg1, reg2); 4444 %} 4445 4446 enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{ 4447 MacroAssembler _masm(&cbuf); 4448 Register reg = as_Register($src1$$reg); 4449 int64_t val = $src2$$constant; 4450 if (val >= 0) { 4451 __ subs(zr, reg, val); 4452 } else if (val != -val) { 4453 __ adds(zr, reg, -val); 4454 } else { 4455 // aargh, Long.MIN_VALUE is a special case 4456 __ orr(rscratch1, zr, (u_int64_t)val); 4457 __ subs(zr, reg, rscratch1); 4458 } 4459 %} 4460 4461 enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{ 4462 MacroAssembler _masm(&cbuf); 4463 Register reg1 = as_Register($src1$$reg); 4464 u_int64_t val = (u_int64_t)$src2$$constant; 4465 __ mov(rscratch1, val); 4466 __ cmp(reg1, rscratch1); 4467 %} 4468 4469 enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{ 4470 MacroAssembler _masm(&cbuf); 4471 Register reg1 = as_Register($src1$$reg); 4472 Register reg2 = as_Register($src2$$reg); 4473 __ cmp(reg1, reg2); 4474 %} 4475 4476 enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{ 4477 MacroAssembler _masm(&cbuf); 4478 Register reg1 = as_Register($src1$$reg); 4479 Register reg2 = as_Register($src2$$reg); 4480 __ cmpw(reg1, reg2); 4481 %} 4482 4483 enc_class aarch64_enc_testp(iRegP src) %{ 4484 MacroAssembler _masm(&cbuf); 4485 Register reg = as_Register($src$$reg); 4486 __ cmp(reg, zr); 4487 %} 4488 4489 enc_class aarch64_enc_testn(iRegN src) %{ 4490 MacroAssembler _masm(&cbuf); 4491 Register reg = as_Register($src$$reg); 4492 __ cmpw(reg, zr); 4493 %} 4494 4495 enc_class aarch64_enc_b(label lbl) %{ 4496 MacroAssembler _masm(&cbuf); 4497 Label *L = $lbl$$label; 4498 __ b(*L); 4499 %} 4500 4501 enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{ 4502 MacroAssembler _masm(&cbuf); 4503 Label *L = $lbl$$label; 4504 __ br ((Assembler::Condition)$cmp$$cmpcode, *L); 4505 %} 4506 4507 enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{ 4508 MacroAssembler _masm(&cbuf); 4509 Label *L = $lbl$$label; 4510 __ br ((Assembler::Condition)$cmp$$cmpcode, *L); 4511 %} 4512 4513 enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) 4514 %{ 4515 Register sub_reg = as_Register($sub$$reg); 4516 Register super_reg = as_Register($super$$reg); 4517 Register temp_reg = as_Register($temp$$reg); 4518 Register result_reg = as_Register($result$$reg); 4519 4520 Label miss; 4521 MacroAssembler _masm(&cbuf); 4522 __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg, 4523 NULL, &miss, 4524 /*set_cond_codes:*/ true); 4525 if ($primary) { 4526 __ mov(result_reg, zr); 4527 } 4528 __ bind(miss); 4529 %} 4530 4531 enc_class aarch64_enc_java_static_call(method meth) %{ 4532 MacroAssembler _masm(&cbuf); 4533 4534 address addr = (address)$meth$$method; 4535 address call; 4536 if (!_method) { 4537 // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. 4538 call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf); 4539 } else { 4540 int method_index = resolved_method_index(cbuf); 4541 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 4542 : static_call_Relocation::spec(method_index); 4543 call = __ trampoline_call(Address(addr, rspec), &cbuf); 4544 4545 // Emit stub for static call 4546 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 4547 if (stub == NULL) { 4548 ciEnv::current()->record_failure("CodeCache is full"); 4549 return; 4550 } 4551 } 4552 if (call == NULL) { 4553 ciEnv::current()->record_failure("CodeCache is full"); 4554 return; 4555 } 4556 %} 4557 4558 enc_class aarch64_enc_java_dynamic_call(method meth) %{ 4559 MacroAssembler _masm(&cbuf); 4560 int method_index = resolved_method_index(cbuf); 4561 address call = __ ic_call((address)$meth$$method, method_index); 4562 if (call == NULL) { 4563 ciEnv::current()->record_failure("CodeCache is full"); 4564 return; 4565 } 4566 %} 4567 4568 enc_class aarch64_enc_call_epilog() %{ 4569 MacroAssembler _masm(&cbuf); 4570 if (VerifyStackAtCalls) { 4571 // Check that stack depth is unchanged: find majik cookie on stack 4572 __ call_Unimplemented(); 4573 } 4574 %} 4575 4576 enc_class aarch64_enc_java_to_runtime(method meth) %{ 4577 MacroAssembler _masm(&cbuf); 4578 4579 // some calls to generated routines (arraycopy code) are scheduled 4580 // by C2 as runtime calls. if so we can call them using a br (they 4581 // will be in a reachable segment) otherwise we have to use a blrt 4582 // which loads the absolute address into a register. 4583 address entry = (address)$meth$$method; 4584 CodeBlob *cb = CodeCache::find_blob(entry); 4585 if (cb) { 4586 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type)); 4587 if (call == NULL) { 4588 ciEnv::current()->record_failure("CodeCache is full"); 4589 return; 4590 } 4591 } else { 4592 int gpcnt; 4593 int fpcnt; 4594 int rtype; 4595 getCallInfo(tf(), gpcnt, fpcnt, rtype); 4596 Label retaddr; 4597 __ adr(rscratch2, retaddr); 4598 __ lea(rscratch1, RuntimeAddress(entry)); 4599 // Leave a breadcrumb for JavaThread::pd_last_frame(). 4600 __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize))); 4601 __ blrt(rscratch1, gpcnt, fpcnt, rtype); 4602 __ bind(retaddr); 4603 __ add(sp, sp, 2 * wordSize); 4604 } 4605 %} 4606 4607 enc_class aarch64_enc_rethrow() %{ 4608 MacroAssembler _masm(&cbuf); 4609 __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); 4610 %} 4611 4612 enc_class aarch64_enc_ret() %{ 4613 MacroAssembler _masm(&cbuf); 4614 __ ret(lr); 4615 %} 4616 4617 enc_class aarch64_enc_tail_call(iRegP jump_target) %{ 4618 MacroAssembler _masm(&cbuf); 4619 Register target_reg = as_Register($jump_target$$reg); 4620 __ br(target_reg); 4621 %} 4622 4623 enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{ 4624 MacroAssembler _masm(&cbuf); 4625 Register target_reg = as_Register($jump_target$$reg); 4626 // exception oop should be in r0 4627 // ret addr has been popped into lr 4628 // callee expects it in r3 4629 __ mov(r3, lr); 4630 __ br(target_reg); 4631 %} 4632 4633 enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{ 4634 MacroAssembler _masm(&cbuf); 4635 Register oop = as_Register($object$$reg); 4636 Register box = as_Register($box$$reg); 4637 Register disp_hdr = as_Register($tmp$$reg); 4638 Register tmp = as_Register($tmp2$$reg); 4639 Label cont; 4640 Label object_has_monitor; 4641 Label cas_failed; 4642 4643 assert_different_registers(oop, box, tmp, disp_hdr); 4644 4645 // Load markOop from object into displaced_header. 4646 __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); 4647 4648 // Always do locking in runtime. 4649 if (EmitSync & 0x01) { 4650 __ cmp(oop, zr); 4651 return; 4652 } 4653 4654 if (UseBiasedLocking && !UseOptoBiasInlining) { 4655 __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont); 4656 } 4657 4658 // Handle existing monitor 4659 if ((EmitSync & 0x02) == 0) { 4660 // we can use AArch64's bit test and branch here but 4661 // markoopDesc does not define a bit index just the bit value 4662 // so assert in case the bit pos changes 4663 # define __monitor_value_log2 1 4664 assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position"); 4665 __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor); 4666 # undef __monitor_value_log2 4667 } 4668 4669 // Set displaced_header to be (markOop of object | UNLOCK_VALUE). 4670 __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value); 4671 4672 // Load Compare Value application register. 4673 4674 // Initialize the box. (Must happen before we update the object mark!) 4675 __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); 4676 4677 // Compare object markOop with mark and if equal exchange scratch1 4678 // with object markOop. 4679 { 4680 Label retry_load; 4681 __ bind(retry_load); 4682 __ ldaxr(tmp, oop); 4683 __ cmp(tmp, disp_hdr); 4684 __ br(Assembler::NE, cas_failed); 4685 // use stlxr to ensure update is immediately visible 4686 __ stlxr(tmp, box, oop); 4687 __ cbzw(tmp, cont); 4688 __ b(retry_load); 4689 } 4690 4691 // Formerly: 4692 // __ cmpxchgptr(/*oldv=*/disp_hdr, 4693 // /*newv=*/box, 4694 // /*addr=*/oop, 4695 // /*tmp=*/tmp, 4696 // cont, 4697 // /*fail*/NULL); 4698 4699 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); 4700 4701 // If the compare-and-exchange succeeded, then we found an unlocked 4702 // object, will have now locked it will continue at label cont 4703 4704 __ bind(cas_failed); 4705 // We did not see an unlocked object so try the fast recursive case. 4706 4707 // Check if the owner is self by comparing the value in the 4708 // markOop of object (disp_hdr) with the stack pointer. 4709 __ mov(rscratch1, sp); 4710 __ sub(disp_hdr, disp_hdr, rscratch1); 4711 __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place)); 4712 // If condition is true we are cont and hence we can store 0 as the 4713 // displaced header in the box, which indicates that it is a recursive lock. 4714 __ ands(tmp/*==0?*/, disp_hdr, tmp); 4715 __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); 4716 4717 // Handle existing monitor. 4718 if ((EmitSync & 0x02) == 0) { 4719 __ b(cont); 4720 4721 __ bind(object_has_monitor); 4722 // The object's monitor m is unlocked iff m->owner == NULL, 4723 // otherwise m->owner may contain a thread or a stack address. 4724 // 4725 // Try to CAS m->owner from NULL to current thread. 4726 __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value)); 4727 __ mov(disp_hdr, zr); 4728 4729 { 4730 Label retry_load, fail; 4731 __ bind(retry_load); 4732 __ ldaxr(rscratch1, tmp); 4733 __ cmp(disp_hdr, rscratch1); 4734 __ br(Assembler::NE, fail); 4735 // use stlxr to ensure update is immediately visible 4736 __ stlxr(rscratch1, rthread, tmp); 4737 __ cbnzw(rscratch1, retry_load); 4738 __ bind(fail); 4739 } 4740 4741 // Label next; 4742 // __ cmpxchgptr(/*oldv=*/disp_hdr, 4743 // /*newv=*/rthread, 4744 // /*addr=*/tmp, 4745 // /*tmp=*/rscratch1, 4746 // /*succeed*/next, 4747 // /*fail*/NULL); 4748 // __ bind(next); 4749 4750 // store a non-null value into the box. 4751 __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes())); 4752 4753 // PPC port checks the following invariants 4754 // #ifdef ASSERT 4755 // bne(flag, cont); 4756 // We have acquired the monitor, check some invariants. 4757 // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes()); 4758 // Invariant 1: _recursions should be 0. 4759 // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size"); 4760 // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp, 4761 // "monitor->_recursions should be 0", -1); 4762 // Invariant 2: OwnerIsThread shouldn't be 0. 4763 // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size"); 4764 //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp, 4765 // "monitor->OwnerIsThread shouldn't be 0", -1); 4766 // #endif 4767 } 4768 4769 __ bind(cont); 4770 // flag == EQ indicates success 4771 // flag == NE indicates failure 4772 4773 %} 4774 4775 // TODO 4776 // reimplement this with custom cmpxchgptr code 4777 // which avoids some of the unnecessary branching 4778 enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{ 4779 MacroAssembler _masm(&cbuf); 4780 Register oop = as_Register($object$$reg); 4781 Register box = as_Register($box$$reg); 4782 Register disp_hdr = as_Register($tmp$$reg); 4783 Register tmp = as_Register($tmp2$$reg); 4784 Label cont; 4785 Label object_has_monitor; 4786 Label cas_failed; 4787 4788 assert_different_registers(oop, box, tmp, disp_hdr); 4789 4790 // Always do locking in runtime. 4791 if (EmitSync & 0x01) { 4792 __ cmp(oop, zr); // Oop can't be 0 here => always false. 4793 return; 4794 } 4795 4796 if (UseBiasedLocking && !UseOptoBiasInlining) { 4797 __ biased_locking_exit(oop, tmp, cont); 4798 } 4799 4800 // Find the lock address and load the displaced header from the stack. 4801 __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); 4802 4803 // If the displaced header is 0, we have a recursive unlock. 4804 __ cmp(disp_hdr, zr); 4805 __ br(Assembler::EQ, cont); 4806 4807 4808 // Handle existing monitor. 4809 if ((EmitSync & 0x02) == 0) { 4810 __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); 4811 __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor); 4812 } 4813 4814 // Check if it is still a light weight lock, this is is true if we 4815 // see the stack address of the basicLock in the markOop of the 4816 // object. 4817 4818 { 4819 Label retry_load; 4820 __ bind(retry_load); 4821 __ ldxr(tmp, oop); 4822 __ cmp(box, tmp); 4823 __ br(Assembler::NE, cas_failed); 4824 // use stlxr to ensure update is immediately visible 4825 __ stlxr(tmp, disp_hdr, oop); 4826 __ cbzw(tmp, cont); 4827 __ b(retry_load); 4828 } 4829 4830 // __ cmpxchgptr(/*compare_value=*/box, 4831 // /*exchange_value=*/disp_hdr, 4832 // /*where=*/oop, 4833 // /*result=*/tmp, 4834 // cont, 4835 // /*cas_failed*/NULL); 4836 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); 4837 4838 __ bind(cas_failed); 4839 4840 // Handle existing monitor. 4841 if ((EmitSync & 0x02) == 0) { 4842 __ b(cont); 4843 4844 __ bind(object_has_monitor); 4845 __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor 4846 __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); 4847 __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); 4848 __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner. 4849 __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions 4850 __ cmp(rscratch1, zr); 4851 __ br(Assembler::NE, cont); 4852 4853 __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); 4854 __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); 4855 __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0. 4856 __ cmp(rscratch1, zr); 4857 __ cbnz(rscratch1, cont); 4858 // need a release store here 4859 __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); 4860 __ stlr(rscratch1, tmp); // rscratch1 is zero 4861 } 4862 4863 __ bind(cont); 4864 // flag == EQ indicates success 4865 // flag == NE indicates failure 4866 %} 4867 4868 %} 4869 4870 //----------FRAME-------------------------------------------------------------- 4871 // Definition of frame structure and management information. 4872 // 4873 // S T A C K L A Y O U T Allocators stack-slot number 4874 // | (to get allocators register number 4875 // G Owned by | | v add OptoReg::stack0()) 4876 // r CALLER | | 4877 // o | +--------+ pad to even-align allocators stack-slot 4878 // w V | pad0 | numbers; owned by CALLER 4879 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 4880 // h ^ | in | 5 4881 // | | args | 4 Holes in incoming args owned by SELF 4882 // | | | | 3 4883 // | | +--------+ 4884 // V | | old out| Empty on Intel, window on Sparc 4885 // | old |preserve| Must be even aligned. 4886 // | SP-+--------+----> Matcher::_old_SP, even aligned 4887 // | | in | 3 area for Intel ret address 4888 // Owned by |preserve| Empty on Sparc. 4889 // SELF +--------+ 4890 // | | pad2 | 2 pad to align old SP 4891 // | +--------+ 1 4892 // | | locks | 0 4893 // | +--------+----> OptoReg::stack0(), even aligned 4894 // | | pad1 | 11 pad to align new SP 4895 // | +--------+ 4896 // | | | 10 4897 // | | spills | 9 spills 4898 // V | | 8 (pad0 slot for callee) 4899 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 4900 // ^ | out | 7 4901 // | | args | 6 Holes in outgoing args owned by CALLEE 4902 // Owned by +--------+ 4903 // CALLEE | new out| 6 Empty on Intel, window on Sparc 4904 // | new |preserve| Must be even-aligned. 4905 // | SP-+--------+----> Matcher::_new_SP, even aligned 4906 // | | | 4907 // 4908 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 4909 // known from SELF's arguments and the Java calling convention. 4910 // Region 6-7 is determined per call site. 4911 // Note 2: If the calling convention leaves holes in the incoming argument 4912 // area, those holes are owned by SELF. Holes in the outgoing area 4913 // are owned by the CALLEE. Holes should not be nessecary in the 4914 // incoming area, as the Java calling convention is completely under 4915 // the control of the AD file. Doubles can be sorted and packed to 4916 // avoid holes. Holes in the outgoing arguments may be nessecary for 4917 // varargs C calling conventions. 4918 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 4919 // even aligned with pad0 as needed. 4920 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 4921 // (the latter is true on Intel but is it false on AArch64?) 4922 // region 6-11 is even aligned; it may be padded out more so that 4923 // the region from SP to FP meets the minimum stack alignment. 4924 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack 4925 // alignment. Region 11, pad1, may be dynamically extended so that 4926 // SP meets the minimum alignment. 4927 4928 frame %{ 4929 // What direction does stack grow in (assumed to be same for C & Java) 4930 stack_direction(TOWARDS_LOW); 4931 4932 // These three registers define part of the calling convention 4933 // between compiled code and the interpreter. 4934 4935 // Inline Cache Register or methodOop for I2C. 4936 inline_cache_reg(R12); 4937 4938 // Method Oop Register when calling interpreter. 4939 interpreter_method_oop_reg(R12); 4940 4941 // Number of stack slots consumed by locking an object 4942 sync_stack_slots(2); 4943 4944 // Compiled code's Frame Pointer 4945 frame_pointer(R31); 4946 4947 // Interpreter stores its frame pointer in a register which is 4948 // stored to the stack by I2CAdaptors. 4949 // I2CAdaptors convert from interpreted java to compiled java. 4950 interpreter_frame_pointer(R29); 4951 4952 // Stack alignment requirement 4953 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes) 4954 4955 // Number of stack slots between incoming argument block and the start of 4956 // a new frame. The PROLOG must add this many slots to the stack. The 4957 // EPILOG must remove this many slots. aarch64 needs two slots for 4958 // return address and fp. 4959 // TODO think this is correct but check 4960 in_preserve_stack_slots(4); 4961 4962 // Number of outgoing stack slots killed above the out_preserve_stack_slots 4963 // for calls to C. Supports the var-args backing area for register parms. 4964 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt); 4965 4966 // The after-PROLOG location of the return address. Location of 4967 // return address specifies a type (REG or STACK) and a number 4968 // representing the register number (i.e. - use a register name) or 4969 // stack slot. 4970 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 4971 // Otherwise, it is above the locks and verification slot and alignment word 4972 // TODO this may well be correct but need to check why that - 2 is there 4973 // ppc port uses 0 but we definitely need to allow for fixed_slots 4974 // which folds in the space used for monitors 4975 return_addr(STACK - 2 + 4976 round_to((Compile::current()->in_preserve_stack_slots() + 4977 Compile::current()->fixed_slots()), 4978 stack_alignment_in_slots())); 4979 4980 // Body of function which returns an integer array locating 4981 // arguments either in registers or in stack slots. Passed an array 4982 // of ideal registers called "sig" and a "length" count. Stack-slot 4983 // offsets are based on outgoing arguments, i.e. a CALLER setting up 4984 // arguments for a CALLEE. Incoming stack arguments are 4985 // automatically biased by the preserve_stack_slots field above. 4986 4987 calling_convention 4988 %{ 4989 // No difference between ingoing/outgoing just pass false 4990 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 4991 %} 4992 4993 c_calling_convention 4994 %{ 4995 // This is obviously always outgoing 4996 (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length); 4997 %} 4998 4999 // Location of compiled Java return values. Same as C for now. 5000 return_value 5001 %{ 5002 // TODO do we allow ideal_reg == Op_RegN??? 5003 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, 5004 "only return normal values"); 5005 5006 static const int lo[Op_RegL + 1] = { // enum name 5007 0, // Op_Node 5008 0, // Op_Set 5009 R0_num, // Op_RegN 5010 R0_num, // Op_RegI 5011 R0_num, // Op_RegP 5012 V0_num, // Op_RegF 5013 V0_num, // Op_RegD 5014 R0_num // Op_RegL 5015 }; 5016 5017 static const int hi[Op_RegL + 1] = { // enum name 5018 0, // Op_Node 5019 0, // Op_Set 5020 OptoReg::Bad, // Op_RegN 5021 OptoReg::Bad, // Op_RegI 5022 R0_H_num, // Op_RegP 5023 OptoReg::Bad, // Op_RegF 5024 V0_H_num, // Op_RegD 5025 R0_H_num // Op_RegL 5026 }; 5027 5028 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); 5029 %} 5030 %} 5031 5032 //----------ATTRIBUTES--------------------------------------------------------- 5033 //----------Operand Attributes------------------------------------------------- 5034 op_attrib op_cost(1); // Required cost attribute 5035 5036 //----------Instruction Attributes--------------------------------------------- 5037 ins_attrib ins_cost(INSN_COST); // Required cost attribute 5038 ins_attrib ins_size(32); // Required size attribute (in bits) 5039 ins_attrib ins_short_branch(0); // Required flag: is this instruction 5040 // a non-matching short branch variant 5041 // of some long branch? 5042 ins_attrib ins_alignment(4); // Required alignment attribute (must 5043 // be a power of 2) specifies the 5044 // alignment that some part of the 5045 // instruction (not necessarily the 5046 // start) requires. If > 1, a 5047 // compute_padding() function must be 5048 // provided for the instruction 5049 5050 //----------OPERANDS----------------------------------------------------------- 5051 // Operand definitions must precede instruction definitions for correct parsing 5052 // in the ADLC because operands constitute user defined types which are used in 5053 // instruction definitions. 5054 5055 //----------Simple Operands---------------------------------------------------- 5056 5057 // Integer operands 32 bit 5058 // 32 bit immediate 5059 operand immI() 5060 %{ 5061 match(ConI); 5062 5063 op_cost(0); 5064 format %{ %} 5065 interface(CONST_INTER); 5066 %} 5067 5068 // 32 bit zero 5069 operand immI0() 5070 %{ 5071 predicate(n->get_int() == 0); 5072 match(ConI); 5073 5074 op_cost(0); 5075 format %{ %} 5076 interface(CONST_INTER); 5077 %} 5078 5079 // 32 bit unit increment 5080 operand immI_1() 5081 %{ 5082 predicate(n->get_int() == 1); 5083 match(ConI); 5084 5085 op_cost(0); 5086 format %{ %} 5087 interface(CONST_INTER); 5088 %} 5089 5090 // 32 bit unit decrement 5091 operand immI_M1() 5092 %{ 5093 predicate(n->get_int() == -1); 5094 match(ConI); 5095 5096 op_cost(0); 5097 format %{ %} 5098 interface(CONST_INTER); 5099 %} 5100 5101 operand immI_le_4() 5102 %{ 5103 predicate(n->get_int() <= 4); 5104 match(ConI); 5105 5106 op_cost(0); 5107 format %{ %} 5108 interface(CONST_INTER); 5109 %} 5110 5111 operand immI_31() 5112 %{ 5113 predicate(n->get_int() == 31); 5114 match(ConI); 5115 5116 op_cost(0); 5117 format %{ %} 5118 interface(CONST_INTER); 5119 %} 5120 5121 operand immI_8() 5122 %{ 5123 predicate(n->get_int() == 8); 5124 match(ConI); 5125 5126 op_cost(0); 5127 format %{ %} 5128 interface(CONST_INTER); 5129 %} 5130 5131 operand immI_16() 5132 %{ 5133 predicate(n->get_int() == 16); 5134 match(ConI); 5135 5136 op_cost(0); 5137 format %{ %} 5138 interface(CONST_INTER); 5139 %} 5140 5141 operand immI_24() 5142 %{ 5143 predicate(n->get_int() == 24); 5144 match(ConI); 5145 5146 op_cost(0); 5147 format %{ %} 5148 interface(CONST_INTER); 5149 %} 5150 5151 operand immI_32() 5152 %{ 5153 predicate(n->get_int() == 32); 5154 match(ConI); 5155 5156 op_cost(0); 5157 format %{ %} 5158 interface(CONST_INTER); 5159 %} 5160 5161 operand immI_48() 5162 %{ 5163 predicate(n->get_int() == 48); 5164 match(ConI); 5165 5166 op_cost(0); 5167 format %{ %} 5168 interface(CONST_INTER); 5169 %} 5170 5171 operand immI_56() 5172 %{ 5173 predicate(n->get_int() == 56); 5174 match(ConI); 5175 5176 op_cost(0); 5177 format %{ %} 5178 interface(CONST_INTER); 5179 %} 5180 5181 operand immI_64() 5182 %{ 5183 predicate(n->get_int() == 64); 5184 match(ConI); 5185 5186 op_cost(0); 5187 format %{ %} 5188 interface(CONST_INTER); 5189 %} 5190 5191 operand immI_255() 5192 %{ 5193 predicate(n->get_int() == 255); 5194 match(ConI); 5195 5196 op_cost(0); 5197 format %{ %} 5198 interface(CONST_INTER); 5199 %} 5200 5201 operand immI_65535() 5202 %{ 5203 predicate(n->get_int() == 65535); 5204 match(ConI); 5205 5206 op_cost(0); 5207 format %{ %} 5208 interface(CONST_INTER); 5209 %} 5210 5211 operand immL_63() 5212 %{ 5213 predicate(n->get_int() == 63); 5214 match(ConI); 5215 5216 op_cost(0); 5217 format %{ %} 5218 interface(CONST_INTER); 5219 %} 5220 5221 operand immL_255() 5222 %{ 5223 predicate(n->get_int() == 255); 5224 match(ConI); 5225 5226 op_cost(0); 5227 format %{ %} 5228 interface(CONST_INTER); 5229 %} 5230 5231 operand immL_65535() 5232 %{ 5233 predicate(n->get_long() == 65535L); 5234 match(ConL); 5235 5236 op_cost(0); 5237 format %{ %} 5238 interface(CONST_INTER); 5239 %} 5240 5241 operand immL_4294967295() 5242 %{ 5243 predicate(n->get_long() == 4294967295L); 5244 match(ConL); 5245 5246 op_cost(0); 5247 format %{ %} 5248 interface(CONST_INTER); 5249 %} 5250 5251 operand immL_bitmask() 5252 %{ 5253 predicate(((n->get_long() & 0xc000000000000000l) == 0) 5254 && is_power_of_2(n->get_long() + 1)); 5255 match(ConL); 5256 5257 op_cost(0); 5258 format %{ %} 5259 interface(CONST_INTER); 5260 %} 5261 5262 operand immI_bitmask() 5263 %{ 5264 predicate(((n->get_int() & 0xc0000000) == 0) 5265 && is_power_of_2(n->get_int() + 1)); 5266 match(ConI); 5267 5268 op_cost(0); 5269 format %{ %} 5270 interface(CONST_INTER); 5271 %} 5272 5273 // Scale values for scaled offset addressing modes (up to long but not quad) 5274 operand immIScale() 5275 %{ 5276 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 5277 match(ConI); 5278 5279 op_cost(0); 5280 format %{ %} 5281 interface(CONST_INTER); 5282 %} 5283 5284 // 26 bit signed offset -- for pc-relative branches 5285 operand immI26() 5286 %{ 5287 predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25))); 5288 match(ConI); 5289 5290 op_cost(0); 5291 format %{ %} 5292 interface(CONST_INTER); 5293 %} 5294 5295 // 19 bit signed offset -- for pc-relative loads 5296 operand immI19() 5297 %{ 5298 predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18))); 5299 match(ConI); 5300 5301 op_cost(0); 5302 format %{ %} 5303 interface(CONST_INTER); 5304 %} 5305 5306 // 12 bit unsigned offset -- for base plus immediate loads 5307 operand immIU12() 5308 %{ 5309 predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12))); 5310 match(ConI); 5311 5312 op_cost(0); 5313 format %{ %} 5314 interface(CONST_INTER); 5315 %} 5316 5317 operand immLU12() 5318 %{ 5319 predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12))); 5320 match(ConL); 5321 5322 op_cost(0); 5323 format %{ %} 5324 interface(CONST_INTER); 5325 %} 5326 5327 // Offset for scaled or unscaled immediate loads and stores 5328 operand immIOffset() 5329 %{ 5330 predicate(Address::offset_ok_for_immed(n->get_int())); 5331 match(ConI); 5332 5333 op_cost(0); 5334 format %{ %} 5335 interface(CONST_INTER); 5336 %} 5337 5338 operand immLoffset() 5339 %{ 5340 predicate(Address::offset_ok_for_immed(n->get_long())); 5341 match(ConL); 5342 5343 op_cost(0); 5344 format %{ %} 5345 interface(CONST_INTER); 5346 %} 5347 5348 // 32 bit integer valid for add sub immediate 5349 operand immIAddSub() 5350 %{ 5351 predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int())); 5352 match(ConI); 5353 op_cost(0); 5354 format %{ %} 5355 interface(CONST_INTER); 5356 %} 5357 5358 // 32 bit unsigned integer valid for logical immediate 5359 // TODO -- check this is right when e.g the mask is 0x80000000 5360 operand immILog() 5361 %{ 5362 predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int())); 5363 match(ConI); 5364 5365 op_cost(0); 5366 format %{ %} 5367 interface(CONST_INTER); 5368 %} 5369 5370 // Integer operands 64 bit 5371 // 64 bit immediate 5372 operand immL() 5373 %{ 5374 match(ConL); 5375 5376 op_cost(0); 5377 format %{ %} 5378 interface(CONST_INTER); 5379 %} 5380 5381 // 64 bit zero 5382 operand immL0() 5383 %{ 5384 predicate(n->get_long() == 0); 5385 match(ConL); 5386 5387 op_cost(0); 5388 format %{ %} 5389 interface(CONST_INTER); 5390 %} 5391 5392 // 64 bit unit increment 5393 operand immL_1() 5394 %{ 5395 predicate(n->get_long() == 1); 5396 match(ConL); 5397 5398 op_cost(0); 5399 format %{ %} 5400 interface(CONST_INTER); 5401 %} 5402 5403 // 64 bit unit decrement 5404 operand immL_M1() 5405 %{ 5406 predicate(n->get_long() == -1); 5407 match(ConL); 5408 5409 op_cost(0); 5410 format %{ %} 5411 interface(CONST_INTER); 5412 %} 5413 5414 // 32 bit offset of pc in thread anchor 5415 5416 operand immL_pc_off() 5417 %{ 5418 predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) + 5419 in_bytes(JavaFrameAnchor::last_Java_pc_offset())); 5420 match(ConL); 5421 5422 op_cost(0); 5423 format %{ %} 5424 interface(CONST_INTER); 5425 %} 5426 5427 // 64 bit integer valid for add sub immediate 5428 operand immLAddSub() 5429 %{ 5430 predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long())); 5431 match(ConL); 5432 op_cost(0); 5433 format %{ %} 5434 interface(CONST_INTER); 5435 %} 5436 5437 // 64 bit integer valid for logical immediate 5438 operand immLLog() 5439 %{ 5440 predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long())); 5441 match(ConL); 5442 op_cost(0); 5443 format %{ %} 5444 interface(CONST_INTER); 5445 %} 5446 5447 // Long Immediate: low 32-bit mask 5448 operand immL_32bits() 5449 %{ 5450 predicate(n->get_long() == 0xFFFFFFFFL); 5451 match(ConL); 5452 op_cost(0); 5453 format %{ %} 5454 interface(CONST_INTER); 5455 %} 5456 5457 // Pointer operands 5458 // Pointer Immediate 5459 operand immP() 5460 %{ 5461 match(ConP); 5462 5463 op_cost(0); 5464 format %{ %} 5465 interface(CONST_INTER); 5466 %} 5467 5468 // NULL Pointer Immediate 5469 operand immP0() 5470 %{ 5471 predicate(n->get_ptr() == 0); 5472 match(ConP); 5473 5474 op_cost(0); 5475 format %{ %} 5476 interface(CONST_INTER); 5477 %} 5478 5479 // Pointer Immediate One 5480 // this is used in object initialization (initial object header) 5481 operand immP_1() 5482 %{ 5483 predicate(n->get_ptr() == 1); 5484 match(ConP); 5485 5486 op_cost(0); 5487 format %{ %} 5488 interface(CONST_INTER); 5489 %} 5490 5491 // Polling Page Pointer Immediate 5492 operand immPollPage() 5493 %{ 5494 predicate((address)n->get_ptr() == os::get_polling_page()); 5495 match(ConP); 5496 5497 op_cost(0); 5498 format %{ %} 5499 interface(CONST_INTER); 5500 %} 5501 5502 // Card Table Byte Map Base 5503 operand immByteMapBase() 5504 %{ 5505 // Get base of card map 5506 predicate((jbyte*)n->get_ptr() == 5507 ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base); 5508 match(ConP); 5509 5510 op_cost(0); 5511 format %{ %} 5512 interface(CONST_INTER); 5513 %} 5514 5515 // Pointer Immediate Minus One 5516 // this is used when we want to write the current PC to the thread anchor 5517 operand immP_M1() 5518 %{ 5519 predicate(n->get_ptr() == -1); 5520 match(ConP); 5521 5522 op_cost(0); 5523 format %{ %} 5524 interface(CONST_INTER); 5525 %} 5526 5527 // Pointer Immediate Minus Two 5528 // this is used when we want to write the current PC to the thread anchor 5529 operand immP_M2() 5530 %{ 5531 predicate(n->get_ptr() == -2); 5532 match(ConP); 5533 5534 op_cost(0); 5535 format %{ %} 5536 interface(CONST_INTER); 5537 %} 5538 5539 // Float and Double operands 5540 // Double Immediate 5541 operand immD() 5542 %{ 5543 match(ConD); 5544 op_cost(0); 5545 format %{ %} 5546 interface(CONST_INTER); 5547 %} 5548 5549 // Double Immediate: +0.0d 5550 operand immD0() 5551 %{ 5552 predicate(jlong_cast(n->getd()) == 0); 5553 match(ConD); 5554 5555 op_cost(0); 5556 format %{ %} 5557 interface(CONST_INTER); 5558 %} 5559 5560 // constant 'double +0.0'. 5561 operand immDPacked() 5562 %{ 5563 predicate(Assembler::operand_valid_for_float_immediate(n->getd())); 5564 match(ConD); 5565 op_cost(0); 5566 format %{ %} 5567 interface(CONST_INTER); 5568 %} 5569 5570 // Float Immediate 5571 operand immF() 5572 %{ 5573 match(ConF); 5574 op_cost(0); 5575 format %{ %} 5576 interface(CONST_INTER); 5577 %} 5578 5579 // Float Immediate: +0.0f. 5580 operand immF0() 5581 %{ 5582 predicate(jint_cast(n->getf()) == 0); 5583 match(ConF); 5584 5585 op_cost(0); 5586 format %{ %} 5587 interface(CONST_INTER); 5588 %} 5589 5590 // 5591 operand immFPacked() 5592 %{ 5593 predicate(Assembler::operand_valid_for_float_immediate((double)n->getf())); 5594 match(ConF); 5595 op_cost(0); 5596 format %{ %} 5597 interface(CONST_INTER); 5598 %} 5599 5600 // Narrow pointer operands 5601 // Narrow Pointer Immediate 5602 operand immN() 5603 %{ 5604 match(ConN); 5605 5606 op_cost(0); 5607 format %{ %} 5608 interface(CONST_INTER); 5609 %} 5610 5611 // Narrow NULL Pointer Immediate 5612 operand immN0() 5613 %{ 5614 predicate(n->get_narrowcon() == 0); 5615 match(ConN); 5616 5617 op_cost(0); 5618 format %{ %} 5619 interface(CONST_INTER); 5620 %} 5621 5622 operand immNKlass() 5623 %{ 5624 match(ConNKlass); 5625 5626 op_cost(0); 5627 format %{ %} 5628 interface(CONST_INTER); 5629 %} 5630 5631 // Integer 32 bit Register Operands 5632 // Integer 32 bitRegister (excludes SP) 5633 operand iRegI() 5634 %{ 5635 constraint(ALLOC_IN_RC(any_reg32)); 5636 match(RegI); 5637 match(iRegINoSp); 5638 op_cost(0); 5639 format %{ %} 5640 interface(REG_INTER); 5641 %} 5642 5643 // Integer 32 bit Register not Special 5644 operand iRegINoSp() 5645 %{ 5646 constraint(ALLOC_IN_RC(no_special_reg32)); 5647 match(RegI); 5648 op_cost(0); 5649 format %{ %} 5650 interface(REG_INTER); 5651 %} 5652 5653 // Integer 64 bit Register Operands 5654 // Integer 64 bit Register (includes SP) 5655 operand iRegL() 5656 %{ 5657 constraint(ALLOC_IN_RC(any_reg)); 5658 match(RegL); 5659 match(iRegLNoSp); 5660 op_cost(0); 5661 format %{ %} 5662 interface(REG_INTER); 5663 %} 5664 5665 // Integer 64 bit Register not Special 5666 operand iRegLNoSp() 5667 %{ 5668 constraint(ALLOC_IN_RC(no_special_reg)); 5669 match(RegL); 5670 format %{ %} 5671 interface(REG_INTER); 5672 %} 5673 5674 // Pointer Register Operands 5675 // Pointer Register 5676 operand iRegP() 5677 %{ 5678 constraint(ALLOC_IN_RC(ptr_reg)); 5679 match(RegP); 5680 match(iRegPNoSp); 5681 match(iRegP_R0); 5682 //match(iRegP_R2); 5683 //match(iRegP_R4); 5684 //match(iRegP_R5); 5685 match(thread_RegP); 5686 op_cost(0); 5687 format %{ %} 5688 interface(REG_INTER); 5689 %} 5690 5691 // Pointer 64 bit Register not Special 5692 operand iRegPNoSp() 5693 %{ 5694 constraint(ALLOC_IN_RC(no_special_ptr_reg)); 5695 match(RegP); 5696 // match(iRegP); 5697 // match(iRegP_R0); 5698 // match(iRegP_R2); 5699 // match(iRegP_R4); 5700 // match(iRegP_R5); 5701 // match(thread_RegP); 5702 op_cost(0); 5703 format %{ %} 5704 interface(REG_INTER); 5705 %} 5706 5707 // Pointer 64 bit Register R0 only 5708 operand iRegP_R0() 5709 %{ 5710 constraint(ALLOC_IN_RC(r0_reg)); 5711 match(RegP); 5712 // match(iRegP); 5713 match(iRegPNoSp); 5714 op_cost(0); 5715 format %{ %} 5716 interface(REG_INTER); 5717 %} 5718 5719 // Pointer 64 bit Register R1 only 5720 operand iRegP_R1() 5721 %{ 5722 constraint(ALLOC_IN_RC(r1_reg)); 5723 match(RegP); 5724 // match(iRegP); 5725 match(iRegPNoSp); 5726 op_cost(0); 5727 format %{ %} 5728 interface(REG_INTER); 5729 %} 5730 5731 // Pointer 64 bit Register R2 only 5732 operand iRegP_R2() 5733 %{ 5734 constraint(ALLOC_IN_RC(r2_reg)); 5735 match(RegP); 5736 // match(iRegP); 5737 match(iRegPNoSp); 5738 op_cost(0); 5739 format %{ %} 5740 interface(REG_INTER); 5741 %} 5742 5743 // Pointer 64 bit Register R3 only 5744 operand iRegP_R3() 5745 %{ 5746 constraint(ALLOC_IN_RC(r3_reg)); 5747 match(RegP); 5748 // match(iRegP); 5749 match(iRegPNoSp); 5750 op_cost(0); 5751 format %{ %} 5752 interface(REG_INTER); 5753 %} 5754 5755 // Pointer 64 bit Register R4 only 5756 operand iRegP_R4() 5757 %{ 5758 constraint(ALLOC_IN_RC(r4_reg)); 5759 match(RegP); 5760 // match(iRegP); 5761 match(iRegPNoSp); 5762 op_cost(0); 5763 format %{ %} 5764 interface(REG_INTER); 5765 %} 5766 5767 // Pointer 64 bit Register R5 only 5768 operand iRegP_R5() 5769 %{ 5770 constraint(ALLOC_IN_RC(r5_reg)); 5771 match(RegP); 5772 // match(iRegP); 5773 match(iRegPNoSp); 5774 op_cost(0); 5775 format %{ %} 5776 interface(REG_INTER); 5777 %} 5778 5779 // Pointer 64 bit Register R10 only 5780 operand iRegP_R10() 5781 %{ 5782 constraint(ALLOC_IN_RC(r10_reg)); 5783 match(RegP); 5784 // match(iRegP); 5785 match(iRegPNoSp); 5786 op_cost(0); 5787 format %{ %} 5788 interface(REG_INTER); 5789 %} 5790 5791 // Long 64 bit Register R11 only 5792 operand iRegL_R11() 5793 %{ 5794 constraint(ALLOC_IN_RC(r11_reg)); 5795 match(RegL); 5796 match(iRegLNoSp); 5797 op_cost(0); 5798 format %{ %} 5799 interface(REG_INTER); 5800 %} 5801 5802 // Pointer 64 bit Register FP only 5803 operand iRegP_FP() 5804 %{ 5805 constraint(ALLOC_IN_RC(fp_reg)); 5806 match(RegP); 5807 // match(iRegP); 5808 op_cost(0); 5809 format %{ %} 5810 interface(REG_INTER); 5811 %} 5812 5813 // Register R0 only 5814 operand iRegI_R0() 5815 %{ 5816 constraint(ALLOC_IN_RC(int_r0_reg)); 5817 match(RegI); 5818 match(iRegINoSp); 5819 op_cost(0); 5820 format %{ %} 5821 interface(REG_INTER); 5822 %} 5823 5824 // Register R2 only 5825 operand iRegI_R2() 5826 %{ 5827 constraint(ALLOC_IN_RC(int_r2_reg)); 5828 match(RegI); 5829 match(iRegINoSp); 5830 op_cost(0); 5831 format %{ %} 5832 interface(REG_INTER); 5833 %} 5834 5835 // Register R3 only 5836 operand iRegI_R3() 5837 %{ 5838 constraint(ALLOC_IN_RC(int_r3_reg)); 5839 match(RegI); 5840 match(iRegINoSp); 5841 op_cost(0); 5842 format %{ %} 5843 interface(REG_INTER); 5844 %} 5845 5846 5847 // Register R2 only 5848 operand iRegI_R4() 5849 %{ 5850 constraint(ALLOC_IN_RC(int_r4_reg)); 5851 match(RegI); 5852 match(iRegINoSp); 5853 op_cost(0); 5854 format %{ %} 5855 interface(REG_INTER); 5856 %} 5857 5858 5859 // Pointer Register Operands 5860 // Narrow Pointer Register 5861 operand iRegN() 5862 %{ 5863 constraint(ALLOC_IN_RC(any_reg32)); 5864 match(RegN); 5865 match(iRegNNoSp); 5866 op_cost(0); 5867 format %{ %} 5868 interface(REG_INTER); 5869 %} 5870 5871 // Integer 64 bit Register not Special 5872 operand iRegNNoSp() 5873 %{ 5874 constraint(ALLOC_IN_RC(no_special_reg32)); 5875 match(RegN); 5876 op_cost(0); 5877 format %{ %} 5878 interface(REG_INTER); 5879 %} 5880 5881 // heap base register -- used for encoding immN0 5882 5883 operand iRegIHeapbase() 5884 %{ 5885 constraint(ALLOC_IN_RC(heapbase_reg)); 5886 match(RegI); 5887 op_cost(0); 5888 format %{ %} 5889 interface(REG_INTER); 5890 %} 5891 5892 // Float Register 5893 // Float register operands 5894 operand vRegF() 5895 %{ 5896 constraint(ALLOC_IN_RC(float_reg)); 5897 match(RegF); 5898 5899 op_cost(0); 5900 format %{ %} 5901 interface(REG_INTER); 5902 %} 5903 5904 // Double Register 5905 // Double register operands 5906 operand vRegD() 5907 %{ 5908 constraint(ALLOC_IN_RC(double_reg)); 5909 match(RegD); 5910 5911 op_cost(0); 5912 format %{ %} 5913 interface(REG_INTER); 5914 %} 5915 5916 operand vecD() 5917 %{ 5918 constraint(ALLOC_IN_RC(vectord_reg)); 5919 match(VecD); 5920 5921 op_cost(0); 5922 format %{ %} 5923 interface(REG_INTER); 5924 %} 5925 5926 operand vecX() 5927 %{ 5928 constraint(ALLOC_IN_RC(vectorx_reg)); 5929 match(VecX); 5930 5931 op_cost(0); 5932 format %{ %} 5933 interface(REG_INTER); 5934 %} 5935 5936 operand vRegD_V0() 5937 %{ 5938 constraint(ALLOC_IN_RC(v0_reg)); 5939 match(RegD); 5940 op_cost(0); 5941 format %{ %} 5942 interface(REG_INTER); 5943 %} 5944 5945 operand vRegD_V1() 5946 %{ 5947 constraint(ALLOC_IN_RC(v1_reg)); 5948 match(RegD); 5949 op_cost(0); 5950 format %{ %} 5951 interface(REG_INTER); 5952 %} 5953 5954 operand vRegD_V2() 5955 %{ 5956 constraint(ALLOC_IN_RC(v2_reg)); 5957 match(RegD); 5958 op_cost(0); 5959 format %{ %} 5960 interface(REG_INTER); 5961 %} 5962 5963 operand vRegD_V3() 5964 %{ 5965 constraint(ALLOC_IN_RC(v3_reg)); 5966 match(RegD); 5967 op_cost(0); 5968 format %{ %} 5969 interface(REG_INTER); 5970 %} 5971 5972 // Flags register, used as output of signed compare instructions 5973 5974 // note that on AArch64 we also use this register as the output for 5975 // for floating point compare instructions (CmpF CmpD). this ensures 5976 // that ordered inequality tests use GT, GE, LT or LE none of which 5977 // pass through cases where the result is unordered i.e. one or both 5978 // inputs to the compare is a NaN. this means that the ideal code can 5979 // replace e.g. a GT with an LE and not end up capturing the NaN case 5980 // (where the comparison should always fail). EQ and NE tests are 5981 // always generated in ideal code so that unordered folds into the NE 5982 // case, matching the behaviour of AArch64 NE. 5983 // 5984 // This differs from x86 where the outputs of FP compares use a 5985 // special FP flags registers and where compares based on this 5986 // register are distinguished into ordered inequalities (cmpOpUCF) and 5987 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests 5988 // to explicitly handle the unordered case in branches. x86 also has 5989 // to include extra CMoveX rules to accept a cmpOpUCF input. 5990 5991 operand rFlagsReg() 5992 %{ 5993 constraint(ALLOC_IN_RC(int_flags)); 5994 match(RegFlags); 5995 5996 op_cost(0); 5997 format %{ "RFLAGS" %} 5998 interface(REG_INTER); 5999 %} 6000 6001 // Flags register, used as output of unsigned compare instructions 6002 operand rFlagsRegU() 6003 %{ 6004 constraint(ALLOC_IN_RC(int_flags)); 6005 match(RegFlags); 6006 6007 op_cost(0); 6008 format %{ "RFLAGSU" %} 6009 interface(REG_INTER); 6010 %} 6011 6012 // Special Registers 6013 6014 // Method Register 6015 operand inline_cache_RegP(iRegP reg) 6016 %{ 6017 constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg 6018 match(reg); 6019 match(iRegPNoSp); 6020 op_cost(0); 6021 format %{ %} 6022 interface(REG_INTER); 6023 %} 6024 6025 operand interpreter_method_oop_RegP(iRegP reg) 6026 %{ 6027 constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg 6028 match(reg); 6029 match(iRegPNoSp); 6030 op_cost(0); 6031 format %{ %} 6032 interface(REG_INTER); 6033 %} 6034 6035 // Thread Register 6036 operand thread_RegP(iRegP reg) 6037 %{ 6038 constraint(ALLOC_IN_RC(thread_reg)); // link_reg 6039 match(reg); 6040 op_cost(0); 6041 format %{ %} 6042 interface(REG_INTER); 6043 %} 6044 6045 operand lr_RegP(iRegP reg) 6046 %{ 6047 constraint(ALLOC_IN_RC(lr_reg)); // link_reg 6048 match(reg); 6049 op_cost(0); 6050 format %{ %} 6051 interface(REG_INTER); 6052 %} 6053 6054 //----------Memory Operands---------------------------------------------------- 6055 6056 operand indirect(iRegP reg) 6057 %{ 6058 constraint(ALLOC_IN_RC(ptr_reg)); 6059 match(reg); 6060 op_cost(0); 6061 format %{ "[$reg]" %} 6062 interface(MEMORY_INTER) %{ 6063 base($reg); 6064 index(0xffffffff); 6065 scale(0x0); 6066 disp(0x0); 6067 %} 6068 %} 6069 6070 operand indIndexScaledOffsetI(iRegP reg, iRegL lreg, immIScale scale, immIU12 off) 6071 %{ 6072 constraint(ALLOC_IN_RC(ptr_reg)); 6073 match(AddP (AddP reg (LShiftL lreg scale)) off); 6074 op_cost(INSN_COST); 6075 format %{ "$reg, $lreg lsl($scale), $off" %} 6076 interface(MEMORY_INTER) %{ 6077 base($reg); 6078 index($lreg); 6079 scale($scale); 6080 disp($off); 6081 %} 6082 %} 6083 6084 operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 off) 6085 %{ 6086 constraint(ALLOC_IN_RC(ptr_reg)); 6087 match(AddP (AddP reg (LShiftL lreg scale)) off); 6088 op_cost(INSN_COST); 6089 format %{ "$reg, $lreg lsl($scale), $off" %} 6090 interface(MEMORY_INTER) %{ 6091 base($reg); 6092 index($lreg); 6093 scale($scale); 6094 disp($off); 6095 %} 6096 %} 6097 6098 operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off) 6099 %{ 6100 constraint(ALLOC_IN_RC(ptr_reg)); 6101 match(AddP (AddP reg (ConvI2L ireg)) off); 6102 op_cost(INSN_COST); 6103 format %{ "$reg, $ireg, $off I2L" %} 6104 interface(MEMORY_INTER) %{ 6105 base($reg); 6106 index($ireg); 6107 scale(0x0); 6108 disp($off); 6109 %} 6110 %} 6111 6112 operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off) 6113 %{ 6114 constraint(ALLOC_IN_RC(ptr_reg)); 6115 match(AddP (AddP reg (LShiftL (ConvI2L ireg) scale)) off); 6116 op_cost(INSN_COST); 6117 format %{ "$reg, $ireg sxtw($scale), $off I2L" %} 6118 interface(MEMORY_INTER) %{ 6119 base($reg); 6120 index($ireg); 6121 scale($scale); 6122 disp($off); 6123 %} 6124 %} 6125 6126 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale) 6127 %{ 6128 constraint(ALLOC_IN_RC(ptr_reg)); 6129 match(AddP reg (LShiftL (ConvI2L ireg) scale)); 6130 op_cost(0); 6131 format %{ "$reg, $ireg sxtw($scale), 0, I2L" %} 6132 interface(MEMORY_INTER) %{ 6133 base($reg); 6134 index($ireg); 6135 scale($scale); 6136 disp(0x0); 6137 %} 6138 %} 6139 6140 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale) 6141 %{ 6142 constraint(ALLOC_IN_RC(ptr_reg)); 6143 match(AddP reg (LShiftL lreg scale)); 6144 op_cost(0); 6145 format %{ "$reg, $lreg lsl($scale)" %} 6146 interface(MEMORY_INTER) %{ 6147 base($reg); 6148 index($lreg); 6149 scale($scale); 6150 disp(0x0); 6151 %} 6152 %} 6153 6154 operand indIndex(iRegP reg, iRegL lreg) 6155 %{ 6156 constraint(ALLOC_IN_RC(ptr_reg)); 6157 match(AddP reg lreg); 6158 op_cost(0); 6159 format %{ "$reg, $lreg" %} 6160 interface(MEMORY_INTER) %{ 6161 base($reg); 6162 index($lreg); 6163 scale(0x0); 6164 disp(0x0); 6165 %} 6166 %} 6167 6168 operand indOffI(iRegP reg, immIOffset off) 6169 %{ 6170 constraint(ALLOC_IN_RC(ptr_reg)); 6171 match(AddP reg off); 6172 op_cost(0); 6173 format %{ "[$reg, $off]" %} 6174 interface(MEMORY_INTER) %{ 6175 base($reg); 6176 index(0xffffffff); 6177 scale(0x0); 6178 disp($off); 6179 %} 6180 %} 6181 6182 operand indOffL(iRegP reg, immLoffset off) 6183 %{ 6184 constraint(ALLOC_IN_RC(ptr_reg)); 6185 match(AddP reg off); 6186 op_cost(0); 6187 format %{ "[$reg, $off]" %} 6188 interface(MEMORY_INTER) %{ 6189 base($reg); 6190 index(0xffffffff); 6191 scale(0x0); 6192 disp($off); 6193 %} 6194 %} 6195 6196 6197 operand indirectN(iRegN reg) 6198 %{ 6199 predicate(Universe::narrow_oop_shift() == 0); 6200 constraint(ALLOC_IN_RC(ptr_reg)); 6201 match(DecodeN reg); 6202 op_cost(0); 6203 format %{ "[$reg]\t# narrow" %} 6204 interface(MEMORY_INTER) %{ 6205 base($reg); 6206 index(0xffffffff); 6207 scale(0x0); 6208 disp(0x0); 6209 %} 6210 %} 6211 6212 operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIU12 off) 6213 %{ 6214 predicate(Universe::narrow_oop_shift() == 0); 6215 constraint(ALLOC_IN_RC(ptr_reg)); 6216 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off); 6217 op_cost(0); 6218 format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %} 6219 interface(MEMORY_INTER) %{ 6220 base($reg); 6221 index($lreg); 6222 scale($scale); 6223 disp($off); 6224 %} 6225 %} 6226 6227 operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 off) 6228 %{ 6229 predicate(Universe::narrow_oop_shift() == 0); 6230 constraint(ALLOC_IN_RC(ptr_reg)); 6231 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off); 6232 op_cost(INSN_COST); 6233 format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %} 6234 interface(MEMORY_INTER) %{ 6235 base($reg); 6236 index($lreg); 6237 scale($scale); 6238 disp($off); 6239 %} 6240 %} 6241 6242 operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off) 6243 %{ 6244 predicate(Universe::narrow_oop_shift() == 0); 6245 constraint(ALLOC_IN_RC(ptr_reg)); 6246 match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off); 6247 op_cost(INSN_COST); 6248 format %{ "$reg, $ireg, $off I2L\t# narrow" %} 6249 interface(MEMORY_INTER) %{ 6250 base($reg); 6251 index($ireg); 6252 scale(0x0); 6253 disp($off); 6254 %} 6255 %} 6256 6257 operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off) 6258 %{ 6259 predicate(Universe::narrow_oop_shift() == 0); 6260 constraint(ALLOC_IN_RC(ptr_reg)); 6261 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off); 6262 op_cost(INSN_COST); 6263 format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %} 6264 interface(MEMORY_INTER) %{ 6265 base($reg); 6266 index($ireg); 6267 scale($scale); 6268 disp($off); 6269 %} 6270 %} 6271 6272 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale) 6273 %{ 6274 predicate(Universe::narrow_oop_shift() == 0); 6275 constraint(ALLOC_IN_RC(ptr_reg)); 6276 match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)); 6277 op_cost(0); 6278 format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %} 6279 interface(MEMORY_INTER) %{ 6280 base($reg); 6281 index($ireg); 6282 scale($scale); 6283 disp(0x0); 6284 %} 6285 %} 6286 6287 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale) 6288 %{ 6289 predicate(Universe::narrow_oop_shift() == 0); 6290 constraint(ALLOC_IN_RC(ptr_reg)); 6291 match(AddP (DecodeN reg) (LShiftL lreg scale)); 6292 op_cost(0); 6293 format %{ "$reg, $lreg lsl($scale)\t# narrow" %} 6294 interface(MEMORY_INTER) %{ 6295 base($reg); 6296 index($lreg); 6297 scale($scale); 6298 disp(0x0); 6299 %} 6300 %} 6301 6302 operand indIndexN(iRegN reg, iRegL lreg) 6303 %{ 6304 predicate(Universe::narrow_oop_shift() == 0); 6305 constraint(ALLOC_IN_RC(ptr_reg)); 6306 match(AddP (DecodeN reg) lreg); 6307 op_cost(0); 6308 format %{ "$reg, $lreg\t# narrow" %} 6309 interface(MEMORY_INTER) %{ 6310 base($reg); 6311 index($lreg); 6312 scale(0x0); 6313 disp(0x0); 6314 %} 6315 %} 6316 6317 operand indOffIN(iRegN reg, immIOffset off) 6318 %{ 6319 predicate(Universe::narrow_oop_shift() == 0); 6320 constraint(ALLOC_IN_RC(ptr_reg)); 6321 match(AddP (DecodeN reg) off); 6322 op_cost(0); 6323 format %{ "[$reg, $off]\t# narrow" %} 6324 interface(MEMORY_INTER) %{ 6325 base($reg); 6326 index(0xffffffff); 6327 scale(0x0); 6328 disp($off); 6329 %} 6330 %} 6331 6332 operand indOffLN(iRegN reg, immLoffset off) 6333 %{ 6334 predicate(Universe::narrow_oop_shift() == 0); 6335 constraint(ALLOC_IN_RC(ptr_reg)); 6336 match(AddP (DecodeN reg) off); 6337 op_cost(0); 6338 format %{ "[$reg, $off]\t# narrow" %} 6339 interface(MEMORY_INTER) %{ 6340 base($reg); 6341 index(0xffffffff); 6342 scale(0x0); 6343 disp($off); 6344 %} 6345 %} 6346 6347 6348 6349 // AArch64 opto stubs need to write to the pc slot in the thread anchor 6350 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off) 6351 %{ 6352 constraint(ALLOC_IN_RC(ptr_reg)); 6353 match(AddP reg off); 6354 op_cost(0); 6355 format %{ "[$reg, $off]" %} 6356 interface(MEMORY_INTER) %{ 6357 base($reg); 6358 index(0xffffffff); 6359 scale(0x0); 6360 disp($off); 6361 %} 6362 %} 6363 6364 //----------Special Memory Operands-------------------------------------------- 6365 // Stack Slot Operand - This operand is used for loading and storing temporary 6366 // values on the stack where a match requires a value to 6367 // flow through memory. 6368 operand stackSlotP(sRegP reg) 6369 %{ 6370 constraint(ALLOC_IN_RC(stack_slots)); 6371 op_cost(100); 6372 // No match rule because this operand is only generated in matching 6373 // match(RegP); 6374 format %{ "[$reg]" %} 6375 interface(MEMORY_INTER) %{ 6376 base(0x1e); // RSP 6377 index(0x0); // No Index 6378 scale(0x0); // No Scale 6379 disp($reg); // Stack Offset 6380 %} 6381 %} 6382 6383 operand stackSlotI(sRegI reg) 6384 %{ 6385 constraint(ALLOC_IN_RC(stack_slots)); 6386 // No match rule because this operand is only generated in matching 6387 // match(RegI); 6388 format %{ "[$reg]" %} 6389 interface(MEMORY_INTER) %{ 6390 base(0x1e); // RSP 6391 index(0x0); // No Index 6392 scale(0x0); // No Scale 6393 disp($reg); // Stack Offset 6394 %} 6395 %} 6396 6397 operand stackSlotF(sRegF reg) 6398 %{ 6399 constraint(ALLOC_IN_RC(stack_slots)); 6400 // No match rule because this operand is only generated in matching 6401 // match(RegF); 6402 format %{ "[$reg]" %} 6403 interface(MEMORY_INTER) %{ 6404 base(0x1e); // RSP 6405 index(0x0); // No Index 6406 scale(0x0); // No Scale 6407 disp($reg); // Stack Offset 6408 %} 6409 %} 6410 6411 operand stackSlotD(sRegD reg) 6412 %{ 6413 constraint(ALLOC_IN_RC(stack_slots)); 6414 // No match rule because this operand is only generated in matching 6415 // match(RegD); 6416 format %{ "[$reg]" %} 6417 interface(MEMORY_INTER) %{ 6418 base(0x1e); // RSP 6419 index(0x0); // No Index 6420 scale(0x0); // No Scale 6421 disp($reg); // Stack Offset 6422 %} 6423 %} 6424 6425 operand stackSlotL(sRegL reg) 6426 %{ 6427 constraint(ALLOC_IN_RC(stack_slots)); 6428 // No match rule because this operand is only generated in matching 6429 // match(RegL); 6430 format %{ "[$reg]" %} 6431 interface(MEMORY_INTER) %{ 6432 base(0x1e); // RSP 6433 index(0x0); // No Index 6434 scale(0x0); // No Scale 6435 disp($reg); // Stack Offset 6436 %} 6437 %} 6438 6439 // Operands for expressing Control Flow 6440 // NOTE: Label is a predefined operand which should not be redefined in 6441 // the AD file. It is generically handled within the ADLC. 6442 6443 //----------Conditional Branch Operands---------------------------------------- 6444 // Comparison Op - This is the operation of the comparison, and is limited to 6445 // the following set of codes: 6446 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 6447 // 6448 // Other attributes of the comparison, such as unsignedness, are specified 6449 // by the comparison instruction that sets a condition code flags register. 6450 // That result is represented by a flags operand whose subtype is appropriate 6451 // to the unsignedness (etc.) of the comparison. 6452 // 6453 // Later, the instruction which matches both the Comparison Op (a Bool) and 6454 // the flags (produced by the Cmp) specifies the coding of the comparison op 6455 // by matching a specific subtype of Bool operand below, such as cmpOpU. 6456 6457 // used for signed integral comparisons and fp comparisons 6458 6459 operand cmpOp() 6460 %{ 6461 match(Bool); 6462 6463 format %{ "" %} 6464 interface(COND_INTER) %{ 6465 equal(0x0, "eq"); 6466 not_equal(0x1, "ne"); 6467 less(0xb, "lt"); 6468 greater_equal(0xa, "ge"); 6469 less_equal(0xd, "le"); 6470 greater(0xc, "gt"); 6471 overflow(0x6, "vs"); 6472 no_overflow(0x7, "vc"); 6473 %} 6474 %} 6475 6476 // used for unsigned integral comparisons 6477 6478 operand cmpOpU() 6479 %{ 6480 match(Bool); 6481 6482 format %{ "" %} 6483 interface(COND_INTER) %{ 6484 equal(0x0, "eq"); 6485 not_equal(0x1, "ne"); 6486 less(0x3, "lo"); 6487 greater_equal(0x2, "hs"); 6488 less_equal(0x9, "ls"); 6489 greater(0x8, "hi"); 6490 overflow(0x6, "vs"); 6491 no_overflow(0x7, "vc"); 6492 %} 6493 %} 6494 6495 // Special operand allowing long args to int ops to be truncated for free 6496 6497 operand iRegL2I(iRegL reg) %{ 6498 6499 op_cost(0); 6500 6501 match(ConvL2I reg); 6502 6503 format %{ "l2i($reg)" %} 6504 6505 interface(REG_INTER) 6506 %} 6507 6508 opclass vmem(indirect, indIndex, indOffI, indOffL); 6509 6510 //----------OPERAND CLASSES---------------------------------------------------- 6511 // Operand Classes are groups of operands that are used as to simplify 6512 // instruction definitions by not requiring the AD writer to specify 6513 // separate instructions for every form of operand when the 6514 // instruction accepts multiple operand types with the same basic 6515 // encoding and format. The classic case of this is memory operands. 6516 6517 // memory is used to define read/write location for load/store 6518 // instruction defs. we can turn a memory op into an Address 6519 6520 opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL, 6521 indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN); 6522 6523 6524 // iRegIorL2I is used for src inputs in rules for 32 bit int (I) 6525 // operations. it allows the src to be either an iRegI or a (ConvL2I 6526 // iRegL). in the latter case the l2i normally planted for a ConvL2I 6527 // can be elided because the 32-bit instruction will just employ the 6528 // lower 32 bits anyway. 6529 // 6530 // n.b. this does not elide all L2I conversions. if the truncated 6531 // value is consumed by more than one operation then the ConvL2I 6532 // cannot be bundled into the consuming nodes so an l2i gets planted 6533 // (actually a movw $dst $src) and the downstream instructions consume 6534 // the result of the l2i as an iRegI input. That's a shame since the 6535 // movw is actually redundant but its not too costly. 6536 6537 opclass iRegIorL2I(iRegI, iRegL2I); 6538 6539 //----------PIPELINE----------------------------------------------------------- 6540 // Rules which define the behavior of the target architectures pipeline. 6541 6542 // For specific pipelines, eg A53, define the stages of that pipeline 6543 //pipe_desc(ISS, EX1, EX2, WR); 6544 #define ISS S0 6545 #define EX1 S1 6546 #define EX2 S2 6547 #define WR S3 6548 6549 // Integer ALU reg operation 6550 pipeline %{ 6551 6552 attributes %{ 6553 // ARM instructions are of fixed length 6554 fixed_size_instructions; // Fixed size instructions TODO does 6555 max_instructions_per_bundle = 2; // A53 = 2, A57 = 4 6556 // ARM instructions come in 32-bit word units 6557 instruction_unit_size = 4; // An instruction is 4 bytes long 6558 instruction_fetch_unit_size = 64; // The processor fetches one line 6559 instruction_fetch_units = 1; // of 64 bytes 6560 6561 // List of nop instructions 6562 nops( MachNop ); 6563 %} 6564 6565 // We don't use an actual pipeline model so don't care about resources 6566 // or description. we do use pipeline classes to introduce fixed 6567 // latencies 6568 6569 //----------RESOURCES---------------------------------------------------------- 6570 // Resources are the functional units available to the machine 6571 6572 resources( INS0, INS1, INS01 = INS0 | INS1, 6573 ALU0, ALU1, ALU = ALU0 | ALU1, 6574 MAC, 6575 DIV, 6576 BRANCH, 6577 LDST, 6578 NEON_FP); 6579 6580 //----------PIPELINE DESCRIPTION----------------------------------------------- 6581 // Pipeline Description specifies the stages in the machine's pipeline 6582 6583 // Define the pipeline as a generic 6 stage pipeline 6584 pipe_desc(S0, S1, S2, S3, S4, S5); 6585 6586 //----------PIPELINE CLASSES--------------------------------------------------- 6587 // Pipeline Classes describe the stages in which input and output are 6588 // referenced by the hardware pipeline. 6589 6590 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2) 6591 %{ 6592 single_instruction; 6593 src1 : S1(read); 6594 src2 : S2(read); 6595 dst : S5(write); 6596 INS01 : ISS; 6597 NEON_FP : S5; 6598 %} 6599 6600 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2) 6601 %{ 6602 single_instruction; 6603 src1 : S1(read); 6604 src2 : S2(read); 6605 dst : S5(write); 6606 INS01 : ISS; 6607 NEON_FP : S5; 6608 %} 6609 6610 pipe_class fp_uop_s(vRegF dst, vRegF src) 6611 %{ 6612 single_instruction; 6613 src : S1(read); 6614 dst : S5(write); 6615 INS01 : ISS; 6616 NEON_FP : S5; 6617 %} 6618 6619 pipe_class fp_uop_d(vRegD dst, vRegD src) 6620 %{ 6621 single_instruction; 6622 src : S1(read); 6623 dst : S5(write); 6624 INS01 : ISS; 6625 NEON_FP : S5; 6626 %} 6627 6628 pipe_class fp_d2f(vRegF dst, vRegD src) 6629 %{ 6630 single_instruction; 6631 src : S1(read); 6632 dst : S5(write); 6633 INS01 : ISS; 6634 NEON_FP : S5; 6635 %} 6636 6637 pipe_class fp_f2d(vRegD dst, vRegF src) 6638 %{ 6639 single_instruction; 6640 src : S1(read); 6641 dst : S5(write); 6642 INS01 : ISS; 6643 NEON_FP : S5; 6644 %} 6645 6646 pipe_class fp_f2i(iRegINoSp dst, vRegF src) 6647 %{ 6648 single_instruction; 6649 src : S1(read); 6650 dst : S5(write); 6651 INS01 : ISS; 6652 NEON_FP : S5; 6653 %} 6654 6655 pipe_class fp_f2l(iRegLNoSp dst, vRegF src) 6656 %{ 6657 single_instruction; 6658 src : S1(read); 6659 dst : S5(write); 6660 INS01 : ISS; 6661 NEON_FP : S5; 6662 %} 6663 6664 pipe_class fp_i2f(vRegF dst, iRegIorL2I src) 6665 %{ 6666 single_instruction; 6667 src : S1(read); 6668 dst : S5(write); 6669 INS01 : ISS; 6670 NEON_FP : S5; 6671 %} 6672 6673 pipe_class fp_l2f(vRegF dst, iRegL src) 6674 %{ 6675 single_instruction; 6676 src : S1(read); 6677 dst : S5(write); 6678 INS01 : ISS; 6679 NEON_FP : S5; 6680 %} 6681 6682 pipe_class fp_d2i(iRegINoSp dst, vRegD src) 6683 %{ 6684 single_instruction; 6685 src : S1(read); 6686 dst : S5(write); 6687 INS01 : ISS; 6688 NEON_FP : S5; 6689 %} 6690 6691 pipe_class fp_d2l(iRegLNoSp dst, vRegD src) 6692 %{ 6693 single_instruction; 6694 src : S1(read); 6695 dst : S5(write); 6696 INS01 : ISS; 6697 NEON_FP : S5; 6698 %} 6699 6700 pipe_class fp_i2d(vRegD dst, iRegIorL2I src) 6701 %{ 6702 single_instruction; 6703 src : S1(read); 6704 dst : S5(write); 6705 INS01 : ISS; 6706 NEON_FP : S5; 6707 %} 6708 6709 pipe_class fp_l2d(vRegD dst, iRegIorL2I src) 6710 %{ 6711 single_instruction; 6712 src : S1(read); 6713 dst : S5(write); 6714 INS01 : ISS; 6715 NEON_FP : S5; 6716 %} 6717 6718 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2) 6719 %{ 6720 single_instruction; 6721 src1 : S1(read); 6722 src2 : S2(read); 6723 dst : S5(write); 6724 INS0 : ISS; 6725 NEON_FP : S5; 6726 %} 6727 6728 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2) 6729 %{ 6730 single_instruction; 6731 src1 : S1(read); 6732 src2 : S2(read); 6733 dst : S5(write); 6734 INS0 : ISS; 6735 NEON_FP : S5; 6736 %} 6737 6738 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr) 6739 %{ 6740 single_instruction; 6741 cr : S1(read); 6742 src1 : S1(read); 6743 src2 : S1(read); 6744 dst : S3(write); 6745 INS01 : ISS; 6746 NEON_FP : S3; 6747 %} 6748 6749 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr) 6750 %{ 6751 single_instruction; 6752 cr : S1(read); 6753 src1 : S1(read); 6754 src2 : S1(read); 6755 dst : S3(write); 6756 INS01 : ISS; 6757 NEON_FP : S3; 6758 %} 6759 6760 pipe_class fp_imm_s(vRegF dst) 6761 %{ 6762 single_instruction; 6763 dst : S3(write); 6764 INS01 : ISS; 6765 NEON_FP : S3; 6766 %} 6767 6768 pipe_class fp_imm_d(vRegD dst) 6769 %{ 6770 single_instruction; 6771 dst : S3(write); 6772 INS01 : ISS; 6773 NEON_FP : S3; 6774 %} 6775 6776 pipe_class fp_load_constant_s(vRegF dst) 6777 %{ 6778 single_instruction; 6779 dst : S4(write); 6780 INS01 : ISS; 6781 NEON_FP : S4; 6782 %} 6783 6784 pipe_class fp_load_constant_d(vRegD dst) 6785 %{ 6786 single_instruction; 6787 dst : S4(write); 6788 INS01 : ISS; 6789 NEON_FP : S4; 6790 %} 6791 6792 pipe_class vmul64(vecD dst, vecD src1, vecD src2) 6793 %{ 6794 single_instruction; 6795 dst : S5(write); 6796 src1 : S1(read); 6797 src2 : S1(read); 6798 INS01 : ISS; 6799 NEON_FP : S5; 6800 %} 6801 6802 pipe_class vmul128(vecX dst, vecX src1, vecX src2) 6803 %{ 6804 single_instruction; 6805 dst : S5(write); 6806 src1 : S1(read); 6807 src2 : S1(read); 6808 INS0 : ISS; 6809 NEON_FP : S5; 6810 %} 6811 6812 pipe_class vmla64(vecD dst, vecD src1, vecD src2) 6813 %{ 6814 single_instruction; 6815 dst : S5(write); 6816 src1 : S1(read); 6817 src2 : S1(read); 6818 dst : S1(read); 6819 INS01 : ISS; 6820 NEON_FP : S5; 6821 %} 6822 6823 pipe_class vmla128(vecX dst, vecX src1, vecX src2) 6824 %{ 6825 single_instruction; 6826 dst : S5(write); 6827 src1 : S1(read); 6828 src2 : S1(read); 6829 dst : S1(read); 6830 INS0 : ISS; 6831 NEON_FP : S5; 6832 %} 6833 6834 pipe_class vdop64(vecD dst, vecD src1, vecD src2) 6835 %{ 6836 single_instruction; 6837 dst : S4(write); 6838 src1 : S2(read); 6839 src2 : S2(read); 6840 INS01 : ISS; 6841 NEON_FP : S4; 6842 %} 6843 6844 pipe_class vdop128(vecX dst, vecX src1, vecX src2) 6845 %{ 6846 single_instruction; 6847 dst : S4(write); 6848 src1 : S2(read); 6849 src2 : S2(read); 6850 INS0 : ISS; 6851 NEON_FP : S4; 6852 %} 6853 6854 pipe_class vlogical64(vecD dst, vecD src1, vecD src2) 6855 %{ 6856 single_instruction; 6857 dst : S3(write); 6858 src1 : S2(read); 6859 src2 : S2(read); 6860 INS01 : ISS; 6861 NEON_FP : S3; 6862 %} 6863 6864 pipe_class vlogical128(vecX dst, vecX src1, vecX src2) 6865 %{ 6866 single_instruction; 6867 dst : S3(write); 6868 src1 : S2(read); 6869 src2 : S2(read); 6870 INS0 : ISS; 6871 NEON_FP : S3; 6872 %} 6873 6874 pipe_class vshift64(vecD dst, vecD src, vecX shift) 6875 %{ 6876 single_instruction; 6877 dst : S3(write); 6878 src : S1(read); 6879 shift : S1(read); 6880 INS01 : ISS; 6881 NEON_FP : S3; 6882 %} 6883 6884 pipe_class vshift128(vecX dst, vecX src, vecX shift) 6885 %{ 6886 single_instruction; 6887 dst : S3(write); 6888 src : S1(read); 6889 shift : S1(read); 6890 INS0 : ISS; 6891 NEON_FP : S3; 6892 %} 6893 6894 pipe_class vshift64_imm(vecD dst, vecD src, immI shift) 6895 %{ 6896 single_instruction; 6897 dst : S3(write); 6898 src : S1(read); 6899 INS01 : ISS; 6900 NEON_FP : S3; 6901 %} 6902 6903 pipe_class vshift128_imm(vecX dst, vecX src, immI shift) 6904 %{ 6905 single_instruction; 6906 dst : S3(write); 6907 src : S1(read); 6908 INS0 : ISS; 6909 NEON_FP : S3; 6910 %} 6911 6912 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2) 6913 %{ 6914 single_instruction; 6915 dst : S5(write); 6916 src1 : S1(read); 6917 src2 : S1(read); 6918 INS01 : ISS; 6919 NEON_FP : S5; 6920 %} 6921 6922 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2) 6923 %{ 6924 single_instruction; 6925 dst : S5(write); 6926 src1 : S1(read); 6927 src2 : S1(read); 6928 INS0 : ISS; 6929 NEON_FP : S5; 6930 %} 6931 6932 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2) 6933 %{ 6934 single_instruction; 6935 dst : S5(write); 6936 src1 : S1(read); 6937 src2 : S1(read); 6938 INS0 : ISS; 6939 NEON_FP : S5; 6940 %} 6941 6942 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2) 6943 %{ 6944 single_instruction; 6945 dst : S5(write); 6946 src1 : S1(read); 6947 src2 : S1(read); 6948 INS0 : ISS; 6949 NEON_FP : S5; 6950 %} 6951 6952 pipe_class vsqrt_fp128(vecX dst, vecX src) 6953 %{ 6954 single_instruction; 6955 dst : S5(write); 6956 src : S1(read); 6957 INS0 : ISS; 6958 NEON_FP : S5; 6959 %} 6960 6961 pipe_class vunop_fp64(vecD dst, vecD src) 6962 %{ 6963 single_instruction; 6964 dst : S5(write); 6965 src : S1(read); 6966 INS01 : ISS; 6967 NEON_FP : S5; 6968 %} 6969 6970 pipe_class vunop_fp128(vecX dst, vecX src) 6971 %{ 6972 single_instruction; 6973 dst : S5(write); 6974 src : S1(read); 6975 INS0 : ISS; 6976 NEON_FP : S5; 6977 %} 6978 6979 pipe_class vdup_reg_reg64(vecD dst, iRegI src) 6980 %{ 6981 single_instruction; 6982 dst : S3(write); 6983 src : S1(read); 6984 INS01 : ISS; 6985 NEON_FP : S3; 6986 %} 6987 6988 pipe_class vdup_reg_reg128(vecX dst, iRegI src) 6989 %{ 6990 single_instruction; 6991 dst : S3(write); 6992 src : S1(read); 6993 INS01 : ISS; 6994 NEON_FP : S3; 6995 %} 6996 6997 pipe_class vdup_reg_freg64(vecD dst, vRegF src) 6998 %{ 6999 single_instruction; 7000 dst : S3(write); 7001 src : S1(read); 7002 INS01 : ISS; 7003 NEON_FP : S3; 7004 %} 7005 7006 pipe_class vdup_reg_freg128(vecX dst, vRegF src) 7007 %{ 7008 single_instruction; 7009 dst : S3(write); 7010 src : S1(read); 7011 INS01 : ISS; 7012 NEON_FP : S3; 7013 %} 7014 7015 pipe_class vdup_reg_dreg128(vecX dst, vRegD src) 7016 %{ 7017 single_instruction; 7018 dst : S3(write); 7019 src : S1(read); 7020 INS01 : ISS; 7021 NEON_FP : S3; 7022 %} 7023 7024 pipe_class vmovi_reg_imm64(vecD dst) 7025 %{ 7026 single_instruction; 7027 dst : S3(write); 7028 INS01 : ISS; 7029 NEON_FP : S3; 7030 %} 7031 7032 pipe_class vmovi_reg_imm128(vecX dst) 7033 %{ 7034 single_instruction; 7035 dst : S3(write); 7036 INS0 : ISS; 7037 NEON_FP : S3; 7038 %} 7039 7040 pipe_class vload_reg_mem64(vecD dst, vmem mem) 7041 %{ 7042 single_instruction; 7043 dst : S5(write); 7044 mem : ISS(read); 7045 INS01 : ISS; 7046 NEON_FP : S3; 7047 %} 7048 7049 pipe_class vload_reg_mem128(vecX dst, vmem mem) 7050 %{ 7051 single_instruction; 7052 dst : S5(write); 7053 mem : ISS(read); 7054 INS01 : ISS; 7055 NEON_FP : S3; 7056 %} 7057 7058 pipe_class vstore_reg_mem64(vecD src, vmem mem) 7059 %{ 7060 single_instruction; 7061 mem : ISS(read); 7062 src : S2(read); 7063 INS01 : ISS; 7064 NEON_FP : S3; 7065 %} 7066 7067 pipe_class vstore_reg_mem128(vecD src, vmem mem) 7068 %{ 7069 single_instruction; 7070 mem : ISS(read); 7071 src : S2(read); 7072 INS01 : ISS; 7073 NEON_FP : S3; 7074 %} 7075 7076 //------- Integer ALU operations -------------------------- 7077 7078 // Integer ALU reg-reg operation 7079 // Operands needed in EX1, result generated in EX2 7080 // Eg. ADD x0, x1, x2 7081 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) 7082 %{ 7083 single_instruction; 7084 dst : EX2(write); 7085 src1 : EX1(read); 7086 src2 : EX1(read); 7087 INS01 : ISS; // Dual issue as instruction 0 or 1 7088 ALU : EX2; 7089 %} 7090 7091 // Integer ALU reg-reg operation with constant shift 7092 // Shifted register must be available in LATE_ISS instead of EX1 7093 // Eg. ADD x0, x1, x2, LSL #2 7094 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift) 7095 %{ 7096 single_instruction; 7097 dst : EX2(write); 7098 src1 : EX1(read); 7099 src2 : ISS(read); 7100 INS01 : ISS; 7101 ALU : EX2; 7102 %} 7103 7104 // Integer ALU reg operation with constant shift 7105 // Eg. LSL x0, x1, #shift 7106 pipe_class ialu_reg_shift(iRegI dst, iRegI src1) 7107 %{ 7108 single_instruction; 7109 dst : EX2(write); 7110 src1 : ISS(read); 7111 INS01 : ISS; 7112 ALU : EX2; 7113 %} 7114 7115 // Integer ALU reg-reg operation with variable shift 7116 // Both operands must be available in LATE_ISS instead of EX1 7117 // Result is available in EX1 instead of EX2 7118 // Eg. LSLV x0, x1, x2 7119 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2) 7120 %{ 7121 single_instruction; 7122 dst : EX1(write); 7123 src1 : ISS(read); 7124 src2 : ISS(read); 7125 INS01 : ISS; 7126 ALU : EX1; 7127 %} 7128 7129 // Integer ALU reg-reg operation with extract 7130 // As for _vshift above, but result generated in EX2 7131 // Eg. EXTR x0, x1, x2, #N 7132 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2) 7133 %{ 7134 single_instruction; 7135 dst : EX2(write); 7136 src1 : ISS(read); 7137 src2 : ISS(read); 7138 INS1 : ISS; // Can only dual issue as Instruction 1 7139 ALU : EX1; 7140 %} 7141 7142 // Integer ALU reg operation 7143 // Eg. NEG x0, x1 7144 pipe_class ialu_reg(iRegI dst, iRegI src) 7145 %{ 7146 single_instruction; 7147 dst : EX2(write); 7148 src : EX1(read); 7149 INS01 : ISS; 7150 ALU : EX2; 7151 %} 7152 7153 // Integer ALU reg mmediate operation 7154 // Eg. ADD x0, x1, #N 7155 pipe_class ialu_reg_imm(iRegI dst, iRegI src1) 7156 %{ 7157 single_instruction; 7158 dst : EX2(write); 7159 src1 : EX1(read); 7160 INS01 : ISS; 7161 ALU : EX2; 7162 %} 7163 7164 // Integer ALU immediate operation (no source operands) 7165 // Eg. MOV x0, #N 7166 pipe_class ialu_imm(iRegI dst) 7167 %{ 7168 single_instruction; 7169 dst : EX1(write); 7170 INS01 : ISS; 7171 ALU : EX1; 7172 %} 7173 7174 //------- Compare operation ------------------------------- 7175 7176 // Compare reg-reg 7177 // Eg. CMP x0, x1 7178 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2) 7179 %{ 7180 single_instruction; 7181 // fixed_latency(16); 7182 cr : EX2(write); 7183 op1 : EX1(read); 7184 op2 : EX1(read); 7185 INS01 : ISS; 7186 ALU : EX2; 7187 %} 7188 7189 // Compare reg-reg 7190 // Eg. CMP x0, #N 7191 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1) 7192 %{ 7193 single_instruction; 7194 // fixed_latency(16); 7195 cr : EX2(write); 7196 op1 : EX1(read); 7197 INS01 : ISS; 7198 ALU : EX2; 7199 %} 7200 7201 //------- Conditional instructions ------------------------ 7202 7203 // Conditional no operands 7204 // Eg. CSINC x0, zr, zr, <cond> 7205 pipe_class icond_none(iRegI dst, rFlagsReg cr) 7206 %{ 7207 single_instruction; 7208 cr : EX1(read); 7209 dst : EX2(write); 7210 INS01 : ISS; 7211 ALU : EX2; 7212 %} 7213 7214 // Conditional 2 operand 7215 // EG. CSEL X0, X1, X2, <cond> 7216 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr) 7217 %{ 7218 single_instruction; 7219 cr : EX1(read); 7220 src1 : EX1(read); 7221 src2 : EX1(read); 7222 dst : EX2(write); 7223 INS01 : ISS; 7224 ALU : EX2; 7225 %} 7226 7227 // Conditional 2 operand 7228 // EG. CSEL X0, X1, X2, <cond> 7229 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr) 7230 %{ 7231 single_instruction; 7232 cr : EX1(read); 7233 src : EX1(read); 7234 dst : EX2(write); 7235 INS01 : ISS; 7236 ALU : EX2; 7237 %} 7238 7239 //------- Multiply pipeline operations -------------------- 7240 7241 // Multiply reg-reg 7242 // Eg. MUL w0, w1, w2 7243 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) 7244 %{ 7245 single_instruction; 7246 dst : WR(write); 7247 src1 : ISS(read); 7248 src2 : ISS(read); 7249 INS01 : ISS; 7250 MAC : WR; 7251 %} 7252 7253 // Multiply accumulate 7254 // Eg. MADD w0, w1, w2, w3 7255 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) 7256 %{ 7257 single_instruction; 7258 dst : WR(write); 7259 src1 : ISS(read); 7260 src2 : ISS(read); 7261 src3 : ISS(read); 7262 INS01 : ISS; 7263 MAC : WR; 7264 %} 7265 7266 // Eg. MUL w0, w1, w2 7267 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2) 7268 %{ 7269 single_instruction; 7270 fixed_latency(3); // Maximum latency for 64 bit mul 7271 dst : WR(write); 7272 src1 : ISS(read); 7273 src2 : ISS(read); 7274 INS01 : ISS; 7275 MAC : WR; 7276 %} 7277 7278 // Multiply accumulate 7279 // Eg. MADD w0, w1, w2, w3 7280 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) 7281 %{ 7282 single_instruction; 7283 fixed_latency(3); // Maximum latency for 64 bit mul 7284 dst : WR(write); 7285 src1 : ISS(read); 7286 src2 : ISS(read); 7287 src3 : ISS(read); 7288 INS01 : ISS; 7289 MAC : WR; 7290 %} 7291 7292 //------- Divide pipeline operations -------------------- 7293 7294 // Eg. SDIV w0, w1, w2 7295 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) 7296 %{ 7297 single_instruction; 7298 fixed_latency(8); // Maximum latency for 32 bit divide 7299 dst : WR(write); 7300 src1 : ISS(read); 7301 src2 : ISS(read); 7302 INS0 : ISS; // Can only dual issue as instruction 0 7303 DIV : WR; 7304 %} 7305 7306 // Eg. SDIV x0, x1, x2 7307 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) 7308 %{ 7309 single_instruction; 7310 fixed_latency(16); // Maximum latency for 64 bit divide 7311 dst : WR(write); 7312 src1 : ISS(read); 7313 src2 : ISS(read); 7314 INS0 : ISS; // Can only dual issue as instruction 0 7315 DIV : WR; 7316 %} 7317 7318 //------- Load pipeline operations ------------------------ 7319 7320 // Load - prefetch 7321 // Eg. PFRM <mem> 7322 pipe_class iload_prefetch(memory mem) 7323 %{ 7324 single_instruction; 7325 mem : ISS(read); 7326 INS01 : ISS; 7327 LDST : WR; 7328 %} 7329 7330 // Load - reg, mem 7331 // Eg. LDR x0, <mem> 7332 pipe_class iload_reg_mem(iRegI dst, memory mem) 7333 %{ 7334 single_instruction; 7335 dst : WR(write); 7336 mem : ISS(read); 7337 INS01 : ISS; 7338 LDST : WR; 7339 %} 7340 7341 // Load - reg, reg 7342 // Eg. LDR x0, [sp, x1] 7343 pipe_class iload_reg_reg(iRegI dst, iRegI src) 7344 %{ 7345 single_instruction; 7346 dst : WR(write); 7347 src : ISS(read); 7348 INS01 : ISS; 7349 LDST : WR; 7350 %} 7351 7352 //------- Store pipeline operations ----------------------- 7353 7354 // Store - zr, mem 7355 // Eg. STR zr, <mem> 7356 pipe_class istore_mem(memory mem) 7357 %{ 7358 single_instruction; 7359 mem : ISS(read); 7360 INS01 : ISS; 7361 LDST : WR; 7362 %} 7363 7364 // Store - reg, mem 7365 // Eg. STR x0, <mem> 7366 pipe_class istore_reg_mem(iRegI src, memory mem) 7367 %{ 7368 single_instruction; 7369 mem : ISS(read); 7370 src : EX2(read); 7371 INS01 : ISS; 7372 LDST : WR; 7373 %} 7374 7375 // Store - reg, reg 7376 // Eg. STR x0, [sp, x1] 7377 pipe_class istore_reg_reg(iRegI dst, iRegI src) 7378 %{ 7379 single_instruction; 7380 dst : ISS(read); 7381 src : EX2(read); 7382 INS01 : ISS; 7383 LDST : WR; 7384 %} 7385 7386 //------- Store pipeline operations ----------------------- 7387 7388 // Branch 7389 pipe_class pipe_branch() 7390 %{ 7391 single_instruction; 7392 INS01 : ISS; 7393 BRANCH : EX1; 7394 %} 7395 7396 // Conditional branch 7397 pipe_class pipe_branch_cond(rFlagsReg cr) 7398 %{ 7399 single_instruction; 7400 cr : EX1(read); 7401 INS01 : ISS; 7402 BRANCH : EX1; 7403 %} 7404 7405 // Compare & Branch 7406 // EG. CBZ/CBNZ 7407 pipe_class pipe_cmp_branch(iRegI op1) 7408 %{ 7409 single_instruction; 7410 op1 : EX1(read); 7411 INS01 : ISS; 7412 BRANCH : EX1; 7413 %} 7414 7415 //------- Synchronisation operations ---------------------- 7416 7417 // Any operation requiring serialization. 7418 // EG. DMB/Atomic Ops/Load Acquire/Str Release 7419 pipe_class pipe_serial() 7420 %{ 7421 single_instruction; 7422 force_serialization; 7423 fixed_latency(16); 7424 INS01 : ISS(2); // Cannot dual issue with any other instruction 7425 LDST : WR; 7426 %} 7427 7428 // Generic big/slow expanded idiom - also serialized 7429 pipe_class pipe_slow() 7430 %{ 7431 instruction_count(10); 7432 multiple_bundles; 7433 force_serialization; 7434 fixed_latency(16); 7435 INS01 : ISS(2); // Cannot dual issue with any other instruction 7436 LDST : WR; 7437 %} 7438 7439 // Empty pipeline class 7440 pipe_class pipe_class_empty() 7441 %{ 7442 single_instruction; 7443 fixed_latency(0); 7444 %} 7445 7446 // Default pipeline class. 7447 pipe_class pipe_class_default() 7448 %{ 7449 single_instruction; 7450 fixed_latency(2); 7451 %} 7452 7453 // Pipeline class for compares. 7454 pipe_class pipe_class_compare() 7455 %{ 7456 single_instruction; 7457 fixed_latency(16); 7458 %} 7459 7460 // Pipeline class for memory operations. 7461 pipe_class pipe_class_memory() 7462 %{ 7463 single_instruction; 7464 fixed_latency(16); 7465 %} 7466 7467 // Pipeline class for call. 7468 pipe_class pipe_class_call() 7469 %{ 7470 single_instruction; 7471 fixed_latency(100); 7472 %} 7473 7474 // Define the class for the Nop node. 7475 define %{ 7476 MachNop = pipe_class_empty; 7477 %} 7478 7479 %} 7480 //----------INSTRUCTIONS------------------------------------------------------- 7481 // 7482 // match -- States which machine-independent subtree may be replaced 7483 // by this instruction. 7484 // ins_cost -- The estimated cost of this instruction is used by instruction 7485 // selection to identify a minimum cost tree of machine 7486 // instructions that matches a tree of machine-independent 7487 // instructions. 7488 // format -- A string providing the disassembly for this instruction. 7489 // The value of an instruction's operand may be inserted 7490 // by referring to it with a '$' prefix. 7491 // opcode -- Three instruction opcodes may be provided. These are referred 7492 // to within an encode class as $primary, $secondary, and $tertiary 7493 // rrspectively. The primary opcode is commonly used to 7494 // indicate the type of machine instruction, while secondary 7495 // and tertiary are often used for prefix options or addressing 7496 // modes. 7497 // ins_encode -- A list of encode classes with parameters. The encode class 7498 // name must have been defined in an 'enc_class' specification 7499 // in the encode section of the architecture description. 7500 7501 // ============================================================================ 7502 // Memory (Load/Store) Instructions 7503 7504 // Load Instructions 7505 7506 // Load Byte (8 bit signed) 7507 instruct loadB(iRegINoSp dst, memory mem) 7508 %{ 7509 match(Set dst (LoadB mem)); 7510 predicate(!needs_acquiring_load(n)); 7511 7512 ins_cost(4 * INSN_COST); 7513 format %{ "ldrsbw $dst, $mem\t# byte" %} 7514 7515 ins_encode(aarch64_enc_ldrsbw(dst, mem)); 7516 7517 ins_pipe(iload_reg_mem); 7518 %} 7519 7520 // Load Byte (8 bit signed) into long 7521 instruct loadB2L(iRegLNoSp dst, memory mem) 7522 %{ 7523 match(Set dst (ConvI2L (LoadB mem))); 7524 predicate(!needs_acquiring_load(n->in(1))); 7525 7526 ins_cost(4 * INSN_COST); 7527 format %{ "ldrsb $dst, $mem\t# byte" %} 7528 7529 ins_encode(aarch64_enc_ldrsb(dst, mem)); 7530 7531 ins_pipe(iload_reg_mem); 7532 %} 7533 7534 // Load Byte (8 bit unsigned) 7535 instruct loadUB(iRegINoSp dst, memory mem) 7536 %{ 7537 match(Set dst (LoadUB mem)); 7538 predicate(!needs_acquiring_load(n)); 7539 7540 ins_cost(4 * INSN_COST); 7541 format %{ "ldrbw $dst, $mem\t# byte" %} 7542 7543 ins_encode(aarch64_enc_ldrb(dst, mem)); 7544 7545 ins_pipe(iload_reg_mem); 7546 %} 7547 7548 // Load Byte (8 bit unsigned) into long 7549 instruct loadUB2L(iRegLNoSp dst, memory mem) 7550 %{ 7551 match(Set dst (ConvI2L (LoadUB mem))); 7552 predicate(!needs_acquiring_load(n->in(1))); 7553 7554 ins_cost(4 * INSN_COST); 7555 format %{ "ldrb $dst, $mem\t# byte" %} 7556 7557 ins_encode(aarch64_enc_ldrb(dst, mem)); 7558 7559 ins_pipe(iload_reg_mem); 7560 %} 7561 7562 // Load Short (16 bit signed) 7563 instruct loadS(iRegINoSp dst, memory mem) 7564 %{ 7565 match(Set dst (LoadS mem)); 7566 predicate(!needs_acquiring_load(n)); 7567 7568 ins_cost(4 * INSN_COST); 7569 format %{ "ldrshw $dst, $mem\t# short" %} 7570 7571 ins_encode(aarch64_enc_ldrshw(dst, mem)); 7572 7573 ins_pipe(iload_reg_mem); 7574 %} 7575 7576 // Load Short (16 bit signed) into long 7577 instruct loadS2L(iRegLNoSp dst, memory mem) 7578 %{ 7579 match(Set dst (ConvI2L (LoadS mem))); 7580 predicate(!needs_acquiring_load(n->in(1))); 7581 7582 ins_cost(4 * INSN_COST); 7583 format %{ "ldrsh $dst, $mem\t# short" %} 7584 7585 ins_encode(aarch64_enc_ldrsh(dst, mem)); 7586 7587 ins_pipe(iload_reg_mem); 7588 %} 7589 7590 // Load Char (16 bit unsigned) 7591 instruct loadUS(iRegINoSp dst, memory mem) 7592 %{ 7593 match(Set dst (LoadUS mem)); 7594 predicate(!needs_acquiring_load(n)); 7595 7596 ins_cost(4 * INSN_COST); 7597 format %{ "ldrh $dst, $mem\t# short" %} 7598 7599 ins_encode(aarch64_enc_ldrh(dst, mem)); 7600 7601 ins_pipe(iload_reg_mem); 7602 %} 7603 7604 // Load Short/Char (16 bit unsigned) into long 7605 instruct loadUS2L(iRegLNoSp dst, memory mem) 7606 %{ 7607 match(Set dst (ConvI2L (LoadUS mem))); 7608 predicate(!needs_acquiring_load(n->in(1))); 7609 7610 ins_cost(4 * INSN_COST); 7611 format %{ "ldrh $dst, $mem\t# short" %} 7612 7613 ins_encode(aarch64_enc_ldrh(dst, mem)); 7614 7615 ins_pipe(iload_reg_mem); 7616 %} 7617 7618 // Load Integer (32 bit signed) 7619 instruct loadI(iRegINoSp dst, memory mem) 7620 %{ 7621 match(Set dst (LoadI mem)); 7622 predicate(!needs_acquiring_load(n)); 7623 7624 ins_cost(4 * INSN_COST); 7625 format %{ "ldrw $dst, $mem\t# int" %} 7626 7627 ins_encode(aarch64_enc_ldrw(dst, mem)); 7628 7629 ins_pipe(iload_reg_mem); 7630 %} 7631 7632 // Load Integer (32 bit signed) into long 7633 instruct loadI2L(iRegLNoSp dst, memory mem) 7634 %{ 7635 match(Set dst (ConvI2L (LoadI mem))); 7636 predicate(!needs_acquiring_load(n->in(1))); 7637 7638 ins_cost(4 * INSN_COST); 7639 format %{ "ldrsw $dst, $mem\t# int" %} 7640 7641 ins_encode(aarch64_enc_ldrsw(dst, mem)); 7642 7643 ins_pipe(iload_reg_mem); 7644 %} 7645 7646 // Load Integer (32 bit unsigned) into long 7647 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask) 7648 %{ 7649 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 7650 predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load())); 7651 7652 ins_cost(4 * INSN_COST); 7653 format %{ "ldrw $dst, $mem\t# int" %} 7654 7655 ins_encode(aarch64_enc_ldrw(dst, mem)); 7656 7657 ins_pipe(iload_reg_mem); 7658 %} 7659 7660 // Load Long (64 bit signed) 7661 instruct loadL(iRegLNoSp dst, memory mem) 7662 %{ 7663 match(Set dst (LoadL mem)); 7664 predicate(!needs_acquiring_load(n)); 7665 7666 ins_cost(4 * INSN_COST); 7667 format %{ "ldr $dst, $mem\t# int" %} 7668 7669 ins_encode(aarch64_enc_ldr(dst, mem)); 7670 7671 ins_pipe(iload_reg_mem); 7672 %} 7673 7674 // Load Range 7675 instruct loadRange(iRegINoSp dst, memory mem) 7676 %{ 7677 match(Set dst (LoadRange mem)); 7678 7679 ins_cost(4 * INSN_COST); 7680 format %{ "ldrw $dst, $mem\t# range" %} 7681 7682 ins_encode(aarch64_enc_ldrw(dst, mem)); 7683 7684 ins_pipe(iload_reg_mem); 7685 %} 7686 7687 // Load Pointer 7688 instruct loadP(iRegPNoSp dst, memory mem) 7689 %{ 7690 match(Set dst (LoadP mem)); 7691 predicate(!needs_acquiring_load(n)); 7692 7693 ins_cost(4 * INSN_COST); 7694 format %{ "ldr $dst, $mem\t# ptr" %} 7695 7696 ins_encode(aarch64_enc_ldr(dst, mem)); 7697 7698 ins_pipe(iload_reg_mem); 7699 %} 7700 7701 // Load Compressed Pointer 7702 instruct loadN(iRegNNoSp dst, memory mem) 7703 %{ 7704 match(Set dst (LoadN mem)); 7705 predicate(!needs_acquiring_load(n)); 7706 7707 ins_cost(4 * INSN_COST); 7708 format %{ "ldrw $dst, $mem\t# compressed ptr" %} 7709 7710 ins_encode(aarch64_enc_ldrw(dst, mem)); 7711 7712 ins_pipe(iload_reg_mem); 7713 %} 7714 7715 // Load Klass Pointer 7716 instruct loadKlass(iRegPNoSp dst, memory mem) 7717 %{ 7718 match(Set dst (LoadKlass mem)); 7719 predicate(!needs_acquiring_load(n)); 7720 7721 ins_cost(4 * INSN_COST); 7722 format %{ "ldr $dst, $mem\t# class" %} 7723 7724 ins_encode(aarch64_enc_ldr(dst, mem)); 7725 7726 ins_pipe(iload_reg_mem); 7727 %} 7728 7729 // Load Narrow Klass Pointer 7730 instruct loadNKlass(iRegNNoSp dst, memory mem) 7731 %{ 7732 match(Set dst (LoadNKlass mem)); 7733 predicate(!needs_acquiring_load(n)); 7734 7735 ins_cost(4 * INSN_COST); 7736 format %{ "ldrw $dst, $mem\t# compressed class ptr" %} 7737 7738 ins_encode(aarch64_enc_ldrw(dst, mem)); 7739 7740 ins_pipe(iload_reg_mem); 7741 %} 7742 7743 // Load Float 7744 instruct loadF(vRegF dst, memory mem) 7745 %{ 7746 match(Set dst (LoadF mem)); 7747 predicate(!needs_acquiring_load(n)); 7748 7749 ins_cost(4 * INSN_COST); 7750 format %{ "ldrs $dst, $mem\t# float" %} 7751 7752 ins_encode( aarch64_enc_ldrs(dst, mem) ); 7753 7754 ins_pipe(pipe_class_memory); 7755 %} 7756 7757 // Load Double 7758 instruct loadD(vRegD dst, memory mem) 7759 %{ 7760 match(Set dst (LoadD mem)); 7761 predicate(!needs_acquiring_load(n)); 7762 7763 ins_cost(4 * INSN_COST); 7764 format %{ "ldrd $dst, $mem\t# double" %} 7765 7766 ins_encode( aarch64_enc_ldrd(dst, mem) ); 7767 7768 ins_pipe(pipe_class_memory); 7769 %} 7770 7771 7772 // Load Int Constant 7773 instruct loadConI(iRegINoSp dst, immI src) 7774 %{ 7775 match(Set dst src); 7776 7777 ins_cost(INSN_COST); 7778 format %{ "mov $dst, $src\t# int" %} 7779 7780 ins_encode( aarch64_enc_movw_imm(dst, src) ); 7781 7782 ins_pipe(ialu_imm); 7783 %} 7784 7785 // Load Long Constant 7786 instruct loadConL(iRegLNoSp dst, immL src) 7787 %{ 7788 match(Set dst src); 7789 7790 ins_cost(INSN_COST); 7791 format %{ "mov $dst, $src\t# long" %} 7792 7793 ins_encode( aarch64_enc_mov_imm(dst, src) ); 7794 7795 ins_pipe(ialu_imm); 7796 %} 7797 7798 // Load Pointer Constant 7799 7800 instruct loadConP(iRegPNoSp dst, immP con) 7801 %{ 7802 match(Set dst con); 7803 7804 ins_cost(INSN_COST * 4); 7805 format %{ 7806 "mov $dst, $con\t# ptr\n\t" 7807 %} 7808 7809 ins_encode(aarch64_enc_mov_p(dst, con)); 7810 7811 ins_pipe(ialu_imm); 7812 %} 7813 7814 // Load Null Pointer Constant 7815 7816 instruct loadConP0(iRegPNoSp dst, immP0 con) 7817 %{ 7818 match(Set dst con); 7819 7820 ins_cost(INSN_COST); 7821 format %{ "mov $dst, $con\t# NULL ptr" %} 7822 7823 ins_encode(aarch64_enc_mov_p0(dst, con)); 7824 7825 ins_pipe(ialu_imm); 7826 %} 7827 7828 // Load Pointer Constant One 7829 7830 instruct loadConP1(iRegPNoSp dst, immP_1 con) 7831 %{ 7832 match(Set dst con); 7833 7834 ins_cost(INSN_COST); 7835 format %{ "mov $dst, $con\t# NULL ptr" %} 7836 7837 ins_encode(aarch64_enc_mov_p1(dst, con)); 7838 7839 ins_pipe(ialu_imm); 7840 %} 7841 7842 // Load Poll Page Constant 7843 7844 instruct loadConPollPage(iRegPNoSp dst, immPollPage con) 7845 %{ 7846 match(Set dst con); 7847 7848 ins_cost(INSN_COST); 7849 format %{ "adr $dst, $con\t# Poll Page Ptr" %} 7850 7851 ins_encode(aarch64_enc_mov_poll_page(dst, con)); 7852 7853 ins_pipe(ialu_imm); 7854 %} 7855 7856 // Load Byte Map Base Constant 7857 7858 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con) 7859 %{ 7860 match(Set dst con); 7861 7862 ins_cost(INSN_COST); 7863 format %{ "adr $dst, $con\t# Byte Map Base" %} 7864 7865 ins_encode(aarch64_enc_mov_byte_map_base(dst, con)); 7866 7867 ins_pipe(ialu_imm); 7868 %} 7869 7870 // Load Narrow Pointer Constant 7871 7872 instruct loadConN(iRegNNoSp dst, immN con) 7873 %{ 7874 match(Set dst con); 7875 7876 ins_cost(INSN_COST * 4); 7877 format %{ "mov $dst, $con\t# compressed ptr" %} 7878 7879 ins_encode(aarch64_enc_mov_n(dst, con)); 7880 7881 ins_pipe(ialu_imm); 7882 %} 7883 7884 // Load Narrow Null Pointer Constant 7885 7886 instruct loadConN0(iRegNNoSp dst, immN0 con) 7887 %{ 7888 match(Set dst con); 7889 7890 ins_cost(INSN_COST); 7891 format %{ "mov $dst, $con\t# compressed NULL ptr" %} 7892 7893 ins_encode(aarch64_enc_mov_n0(dst, con)); 7894 7895 ins_pipe(ialu_imm); 7896 %} 7897 7898 // Load Narrow Klass Constant 7899 7900 instruct loadConNKlass(iRegNNoSp dst, immNKlass con) 7901 %{ 7902 match(Set dst con); 7903 7904 ins_cost(INSN_COST); 7905 format %{ "mov $dst, $con\t# compressed klass ptr" %} 7906 7907 ins_encode(aarch64_enc_mov_nk(dst, con)); 7908 7909 ins_pipe(ialu_imm); 7910 %} 7911 7912 // Load Packed Float Constant 7913 7914 instruct loadConF_packed(vRegF dst, immFPacked con) %{ 7915 match(Set dst con); 7916 ins_cost(INSN_COST * 4); 7917 format %{ "fmovs $dst, $con"%} 7918 ins_encode %{ 7919 __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant); 7920 %} 7921 7922 ins_pipe(fp_imm_s); 7923 %} 7924 7925 // Load Float Constant 7926 7927 instruct loadConF(vRegF dst, immF con) %{ 7928 match(Set dst con); 7929 7930 ins_cost(INSN_COST * 4); 7931 7932 format %{ 7933 "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t" 7934 %} 7935 7936 ins_encode %{ 7937 __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con)); 7938 %} 7939 7940 ins_pipe(fp_load_constant_s); 7941 %} 7942 7943 // Load Packed Double Constant 7944 7945 instruct loadConD_packed(vRegD dst, immDPacked con) %{ 7946 match(Set dst con); 7947 ins_cost(INSN_COST); 7948 format %{ "fmovd $dst, $con"%} 7949 ins_encode %{ 7950 __ fmovd(as_FloatRegister($dst$$reg), $con$$constant); 7951 %} 7952 7953 ins_pipe(fp_imm_d); 7954 %} 7955 7956 // Load Double Constant 7957 7958 instruct loadConD(vRegD dst, immD con) %{ 7959 match(Set dst con); 7960 7961 ins_cost(INSN_COST * 5); 7962 format %{ 7963 "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t" 7964 %} 7965 7966 ins_encode %{ 7967 __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con)); 7968 %} 7969 7970 ins_pipe(fp_load_constant_d); 7971 %} 7972 7973 // Store Instructions 7974 7975 // Store CMS card-mark Immediate 7976 instruct storeimmCM0(immI0 zero, memory mem) 7977 %{ 7978 match(Set mem (StoreCM mem zero)); 7979 predicate(unnecessary_storestore(n)); 7980 7981 ins_cost(INSN_COST); 7982 format %{ "strb zr, $mem\t# byte" %} 7983 7984 ins_encode(aarch64_enc_strb0(mem)); 7985 7986 ins_pipe(istore_mem); 7987 %} 7988 7989 // Store CMS card-mark Immediate with intervening StoreStore 7990 // needed when using CMS with no conditional card marking 7991 instruct storeimmCM0_ordered(immI0 zero, memory mem) 7992 %{ 7993 match(Set mem (StoreCM mem zero)); 7994 7995 ins_cost(INSN_COST * 2); 7996 format %{ "dmb ishst" 7997 "\n\tstrb zr, $mem\t# byte" %} 7998 7999 ins_encode(aarch64_enc_strb0_ordered(mem)); 8000 8001 ins_pipe(istore_mem); 8002 %} 8003 8004 // Store Byte 8005 instruct storeB(iRegIorL2I src, memory mem) 8006 %{ 8007 match(Set mem (StoreB mem src)); 8008 predicate(!needs_releasing_store(n)); 8009 8010 ins_cost(INSN_COST); 8011 format %{ "strb $src, $mem\t# byte" %} 8012 8013 ins_encode(aarch64_enc_strb(src, mem)); 8014 8015 ins_pipe(istore_reg_mem); 8016 %} 8017 8018 8019 instruct storeimmB0(immI0 zero, memory mem) 8020 %{ 8021 match(Set mem (StoreB mem zero)); 8022 predicate(!needs_releasing_store(n)); 8023 8024 ins_cost(INSN_COST); 8025 format %{ "strb rscractch2, $mem\t# byte" %} 8026 8027 ins_encode(aarch64_enc_strb0(mem)); 8028 8029 ins_pipe(istore_mem); 8030 %} 8031 8032 // Store Char/Short 8033 instruct storeC(iRegIorL2I src, memory mem) 8034 %{ 8035 match(Set mem (StoreC mem src)); 8036 predicate(!needs_releasing_store(n)); 8037 8038 ins_cost(INSN_COST); 8039 format %{ "strh $src, $mem\t# short" %} 8040 8041 ins_encode(aarch64_enc_strh(src, mem)); 8042 8043 ins_pipe(istore_reg_mem); 8044 %} 8045 8046 instruct storeimmC0(immI0 zero, memory mem) 8047 %{ 8048 match(Set mem (StoreC mem zero)); 8049 predicate(!needs_releasing_store(n)); 8050 8051 ins_cost(INSN_COST); 8052 format %{ "strh zr, $mem\t# short" %} 8053 8054 ins_encode(aarch64_enc_strh0(mem)); 8055 8056 ins_pipe(istore_mem); 8057 %} 8058 8059 // Store Integer 8060 8061 instruct storeI(iRegIorL2I src, memory mem) 8062 %{ 8063 match(Set mem(StoreI mem src)); 8064 predicate(!needs_releasing_store(n)); 8065 8066 ins_cost(INSN_COST); 8067 format %{ "strw $src, $mem\t# int" %} 8068 8069 ins_encode(aarch64_enc_strw(src, mem)); 8070 8071 ins_pipe(istore_reg_mem); 8072 %} 8073 8074 instruct storeimmI0(immI0 zero, memory mem) 8075 %{ 8076 match(Set mem(StoreI mem zero)); 8077 predicate(!needs_releasing_store(n)); 8078 8079 ins_cost(INSN_COST); 8080 format %{ "strw zr, $mem\t# int" %} 8081 8082 ins_encode(aarch64_enc_strw0(mem)); 8083 8084 ins_pipe(istore_mem); 8085 %} 8086 8087 // Store Long (64 bit signed) 8088 instruct storeL(iRegL src, memory mem) 8089 %{ 8090 match(Set mem (StoreL mem src)); 8091 predicate(!needs_releasing_store(n)); 8092 8093 ins_cost(INSN_COST); 8094 format %{ "str $src, $mem\t# int" %} 8095 8096 ins_encode(aarch64_enc_str(src, mem)); 8097 8098 ins_pipe(istore_reg_mem); 8099 %} 8100 8101 // Store Long (64 bit signed) 8102 instruct storeimmL0(immL0 zero, memory mem) 8103 %{ 8104 match(Set mem (StoreL mem zero)); 8105 predicate(!needs_releasing_store(n)); 8106 8107 ins_cost(INSN_COST); 8108 format %{ "str zr, $mem\t# int" %} 8109 8110 ins_encode(aarch64_enc_str0(mem)); 8111 8112 ins_pipe(istore_mem); 8113 %} 8114 8115 // Store Pointer 8116 instruct storeP(iRegP src, memory mem) 8117 %{ 8118 match(Set mem (StoreP mem src)); 8119 predicate(!needs_releasing_store(n)); 8120 8121 ins_cost(INSN_COST); 8122 format %{ "str $src, $mem\t# ptr" %} 8123 8124 ins_encode(aarch64_enc_str(src, mem)); 8125 8126 ins_pipe(istore_reg_mem); 8127 %} 8128 8129 // Store Pointer 8130 instruct storeimmP0(immP0 zero, memory mem) 8131 %{ 8132 match(Set mem (StoreP mem zero)); 8133 predicate(!needs_releasing_store(n)); 8134 8135 ins_cost(INSN_COST); 8136 format %{ "str zr, $mem\t# ptr" %} 8137 8138 ins_encode(aarch64_enc_str0(mem)); 8139 8140 ins_pipe(istore_mem); 8141 %} 8142 8143 // Store Compressed Pointer 8144 instruct storeN(iRegN src, memory mem) 8145 %{ 8146 match(Set mem (StoreN mem src)); 8147 predicate(!needs_releasing_store(n)); 8148 8149 ins_cost(INSN_COST); 8150 format %{ "strw $src, $mem\t# compressed ptr" %} 8151 8152 ins_encode(aarch64_enc_strw(src, mem)); 8153 8154 ins_pipe(istore_reg_mem); 8155 %} 8156 8157 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) 8158 %{ 8159 match(Set mem (StoreN mem zero)); 8160 predicate(Universe::narrow_oop_base() == NULL && 8161 Universe::narrow_klass_base() == NULL && 8162 (!needs_releasing_store(n))); 8163 8164 ins_cost(INSN_COST); 8165 format %{ "strw rheapbase, $mem\t# compressed ptr (rheapbase==0)" %} 8166 8167 ins_encode(aarch64_enc_strw(heapbase, mem)); 8168 8169 ins_pipe(istore_reg_mem); 8170 %} 8171 8172 // Store Float 8173 instruct storeF(vRegF src, memory mem) 8174 %{ 8175 match(Set mem (StoreF mem src)); 8176 predicate(!needs_releasing_store(n)); 8177 8178 ins_cost(INSN_COST); 8179 format %{ "strs $src, $mem\t# float" %} 8180 8181 ins_encode( aarch64_enc_strs(src, mem) ); 8182 8183 ins_pipe(pipe_class_memory); 8184 %} 8185 8186 // TODO 8187 // implement storeImmF0 and storeFImmPacked 8188 8189 // Store Double 8190 instruct storeD(vRegD src, memory mem) 8191 %{ 8192 match(Set mem (StoreD mem src)); 8193 predicate(!needs_releasing_store(n)); 8194 8195 ins_cost(INSN_COST); 8196 format %{ "strd $src, $mem\t# double" %} 8197 8198 ins_encode( aarch64_enc_strd(src, mem) ); 8199 8200 ins_pipe(pipe_class_memory); 8201 %} 8202 8203 // Store Compressed Klass Pointer 8204 instruct storeNKlass(iRegN src, memory mem) 8205 %{ 8206 predicate(!needs_releasing_store(n)); 8207 match(Set mem (StoreNKlass mem src)); 8208 8209 ins_cost(INSN_COST); 8210 format %{ "strw $src, $mem\t# compressed klass ptr" %} 8211 8212 ins_encode(aarch64_enc_strw(src, mem)); 8213 8214 ins_pipe(istore_reg_mem); 8215 %} 8216 8217 // TODO 8218 // implement storeImmD0 and storeDImmPacked 8219 8220 // prefetch instructions 8221 // Must be safe to execute with invalid address (cannot fault). 8222 8223 instruct prefetchalloc( memory mem ) %{ 8224 match(PrefetchAllocation mem); 8225 8226 ins_cost(INSN_COST); 8227 format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %} 8228 8229 ins_encode( aarch64_enc_prefetchw(mem) ); 8230 8231 ins_pipe(iload_prefetch); 8232 %} 8233 8234 // ---------------- volatile loads and stores ---------------- 8235 8236 // Load Byte (8 bit signed) 8237 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem) 8238 %{ 8239 match(Set dst (LoadB mem)); 8240 8241 ins_cost(VOLATILE_REF_COST); 8242 format %{ "ldarsb $dst, $mem\t# byte" %} 8243 8244 ins_encode(aarch64_enc_ldarsb(dst, mem)); 8245 8246 ins_pipe(pipe_serial); 8247 %} 8248 8249 // Load Byte (8 bit signed) into long 8250 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) 8251 %{ 8252 match(Set dst (ConvI2L (LoadB mem))); 8253 8254 ins_cost(VOLATILE_REF_COST); 8255 format %{ "ldarsb $dst, $mem\t# byte" %} 8256 8257 ins_encode(aarch64_enc_ldarsb(dst, mem)); 8258 8259 ins_pipe(pipe_serial); 8260 %} 8261 8262 // Load Byte (8 bit unsigned) 8263 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem) 8264 %{ 8265 match(Set dst (LoadUB mem)); 8266 8267 ins_cost(VOLATILE_REF_COST); 8268 format %{ "ldarb $dst, $mem\t# byte" %} 8269 8270 ins_encode(aarch64_enc_ldarb(dst, mem)); 8271 8272 ins_pipe(pipe_serial); 8273 %} 8274 8275 // Load Byte (8 bit unsigned) into long 8276 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) 8277 %{ 8278 match(Set dst (ConvI2L (LoadUB mem))); 8279 8280 ins_cost(VOLATILE_REF_COST); 8281 format %{ "ldarb $dst, $mem\t# byte" %} 8282 8283 ins_encode(aarch64_enc_ldarb(dst, mem)); 8284 8285 ins_pipe(pipe_serial); 8286 %} 8287 8288 // Load Short (16 bit signed) 8289 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem) 8290 %{ 8291 match(Set dst (LoadS mem)); 8292 8293 ins_cost(VOLATILE_REF_COST); 8294 format %{ "ldarshw $dst, $mem\t# short" %} 8295 8296 ins_encode(aarch64_enc_ldarshw(dst, mem)); 8297 8298 ins_pipe(pipe_serial); 8299 %} 8300 8301 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem) 8302 %{ 8303 match(Set dst (LoadUS mem)); 8304 8305 ins_cost(VOLATILE_REF_COST); 8306 format %{ "ldarhw $dst, $mem\t# short" %} 8307 8308 ins_encode(aarch64_enc_ldarhw(dst, mem)); 8309 8310 ins_pipe(pipe_serial); 8311 %} 8312 8313 // Load Short/Char (16 bit unsigned) into long 8314 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) 8315 %{ 8316 match(Set dst (ConvI2L (LoadUS mem))); 8317 8318 ins_cost(VOLATILE_REF_COST); 8319 format %{ "ldarh $dst, $mem\t# short" %} 8320 8321 ins_encode(aarch64_enc_ldarh(dst, mem)); 8322 8323 ins_pipe(pipe_serial); 8324 %} 8325 8326 // Load Short/Char (16 bit signed) into long 8327 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) 8328 %{ 8329 match(Set dst (ConvI2L (LoadS mem))); 8330 8331 ins_cost(VOLATILE_REF_COST); 8332 format %{ "ldarh $dst, $mem\t# short" %} 8333 8334 ins_encode(aarch64_enc_ldarsh(dst, mem)); 8335 8336 ins_pipe(pipe_serial); 8337 %} 8338 8339 // Load Integer (32 bit signed) 8340 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem) 8341 %{ 8342 match(Set dst (LoadI mem)); 8343 8344 ins_cost(VOLATILE_REF_COST); 8345 format %{ "ldarw $dst, $mem\t# int" %} 8346 8347 ins_encode(aarch64_enc_ldarw(dst, mem)); 8348 8349 ins_pipe(pipe_serial); 8350 %} 8351 8352 // Load Integer (32 bit unsigned) into long 8353 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask) 8354 %{ 8355 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 8356 8357 ins_cost(VOLATILE_REF_COST); 8358 format %{ "ldarw $dst, $mem\t# int" %} 8359 8360 ins_encode(aarch64_enc_ldarw(dst, mem)); 8361 8362 ins_pipe(pipe_serial); 8363 %} 8364 8365 // Load Long (64 bit signed) 8366 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) 8367 %{ 8368 match(Set dst (LoadL mem)); 8369 8370 ins_cost(VOLATILE_REF_COST); 8371 format %{ "ldar $dst, $mem\t# int" %} 8372 8373 ins_encode(aarch64_enc_ldar(dst, mem)); 8374 8375 ins_pipe(pipe_serial); 8376 %} 8377 8378 // Load Pointer 8379 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem) 8380 %{ 8381 match(Set dst (LoadP mem)); 8382 8383 ins_cost(VOLATILE_REF_COST); 8384 format %{ "ldar $dst, $mem\t# ptr" %} 8385 8386 ins_encode(aarch64_enc_ldar(dst, mem)); 8387 8388 ins_pipe(pipe_serial); 8389 %} 8390 8391 // Load Compressed Pointer 8392 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem) 8393 %{ 8394 match(Set dst (LoadN mem)); 8395 8396 ins_cost(VOLATILE_REF_COST); 8397 format %{ "ldarw $dst, $mem\t# compressed ptr" %} 8398 8399 ins_encode(aarch64_enc_ldarw(dst, mem)); 8400 8401 ins_pipe(pipe_serial); 8402 %} 8403 8404 // Load Float 8405 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem) 8406 %{ 8407 match(Set dst (LoadF mem)); 8408 8409 ins_cost(VOLATILE_REF_COST); 8410 format %{ "ldars $dst, $mem\t# float" %} 8411 8412 ins_encode( aarch64_enc_fldars(dst, mem) ); 8413 8414 ins_pipe(pipe_serial); 8415 %} 8416 8417 // Load Double 8418 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem) 8419 %{ 8420 match(Set dst (LoadD mem)); 8421 8422 ins_cost(VOLATILE_REF_COST); 8423 format %{ "ldard $dst, $mem\t# double" %} 8424 8425 ins_encode( aarch64_enc_fldard(dst, mem) ); 8426 8427 ins_pipe(pipe_serial); 8428 %} 8429 8430 // Store Byte 8431 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem) 8432 %{ 8433 match(Set mem (StoreB mem src)); 8434 8435 ins_cost(VOLATILE_REF_COST); 8436 format %{ "stlrb $src, $mem\t# byte" %} 8437 8438 ins_encode(aarch64_enc_stlrb(src, mem)); 8439 8440 ins_pipe(pipe_class_memory); 8441 %} 8442 8443 // Store Char/Short 8444 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem) 8445 %{ 8446 match(Set mem (StoreC mem src)); 8447 8448 ins_cost(VOLATILE_REF_COST); 8449 format %{ "stlrh $src, $mem\t# short" %} 8450 8451 ins_encode(aarch64_enc_stlrh(src, mem)); 8452 8453 ins_pipe(pipe_class_memory); 8454 %} 8455 8456 // Store Integer 8457 8458 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem) 8459 %{ 8460 match(Set mem(StoreI mem src)); 8461 8462 ins_cost(VOLATILE_REF_COST); 8463 format %{ "stlrw $src, $mem\t# int" %} 8464 8465 ins_encode(aarch64_enc_stlrw(src, mem)); 8466 8467 ins_pipe(pipe_class_memory); 8468 %} 8469 8470 // Store Long (64 bit signed) 8471 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem) 8472 %{ 8473 match(Set mem (StoreL mem src)); 8474 8475 ins_cost(VOLATILE_REF_COST); 8476 format %{ "stlr $src, $mem\t# int" %} 8477 8478 ins_encode(aarch64_enc_stlr(src, mem)); 8479 8480 ins_pipe(pipe_class_memory); 8481 %} 8482 8483 // Store Pointer 8484 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem) 8485 %{ 8486 match(Set mem (StoreP mem src)); 8487 8488 ins_cost(VOLATILE_REF_COST); 8489 format %{ "stlr $src, $mem\t# ptr" %} 8490 8491 ins_encode(aarch64_enc_stlr(src, mem)); 8492 8493 ins_pipe(pipe_class_memory); 8494 %} 8495 8496 // Store Compressed Pointer 8497 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem) 8498 %{ 8499 match(Set mem (StoreN mem src)); 8500 8501 ins_cost(VOLATILE_REF_COST); 8502 format %{ "stlrw $src, $mem\t# compressed ptr" %} 8503 8504 ins_encode(aarch64_enc_stlrw(src, mem)); 8505 8506 ins_pipe(pipe_class_memory); 8507 %} 8508 8509 // Store Float 8510 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem) 8511 %{ 8512 match(Set mem (StoreF mem src)); 8513 8514 ins_cost(VOLATILE_REF_COST); 8515 format %{ "stlrs $src, $mem\t# float" %} 8516 8517 ins_encode( aarch64_enc_fstlrs(src, mem) ); 8518 8519 ins_pipe(pipe_class_memory); 8520 %} 8521 8522 // TODO 8523 // implement storeImmF0 and storeFImmPacked 8524 8525 // Store Double 8526 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem) 8527 %{ 8528 match(Set mem (StoreD mem src)); 8529 8530 ins_cost(VOLATILE_REF_COST); 8531 format %{ "stlrd $src, $mem\t# double" %} 8532 8533 ins_encode( aarch64_enc_fstlrd(src, mem) ); 8534 8535 ins_pipe(pipe_class_memory); 8536 %} 8537 8538 // ---------------- end of volatile loads and stores ---------------- 8539 8540 // ============================================================================ 8541 // BSWAP Instructions 8542 8543 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{ 8544 match(Set dst (ReverseBytesI src)); 8545 8546 ins_cost(INSN_COST); 8547 format %{ "revw $dst, $src" %} 8548 8549 ins_encode %{ 8550 __ revw(as_Register($dst$$reg), as_Register($src$$reg)); 8551 %} 8552 8553 ins_pipe(ialu_reg); 8554 %} 8555 8556 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{ 8557 match(Set dst (ReverseBytesL src)); 8558 8559 ins_cost(INSN_COST); 8560 format %{ "rev $dst, $src" %} 8561 8562 ins_encode %{ 8563 __ rev(as_Register($dst$$reg), as_Register($src$$reg)); 8564 %} 8565 8566 ins_pipe(ialu_reg); 8567 %} 8568 8569 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{ 8570 match(Set dst (ReverseBytesUS src)); 8571 8572 ins_cost(INSN_COST); 8573 format %{ "rev16w $dst, $src" %} 8574 8575 ins_encode %{ 8576 __ rev16w(as_Register($dst$$reg), as_Register($src$$reg)); 8577 %} 8578 8579 ins_pipe(ialu_reg); 8580 %} 8581 8582 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{ 8583 match(Set dst (ReverseBytesS src)); 8584 8585 ins_cost(INSN_COST); 8586 format %{ "rev16w $dst, $src\n\t" 8587 "sbfmw $dst, $dst, #0, #15" %} 8588 8589 ins_encode %{ 8590 __ rev16w(as_Register($dst$$reg), as_Register($src$$reg)); 8591 __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U); 8592 %} 8593 8594 ins_pipe(ialu_reg); 8595 %} 8596 8597 // ============================================================================ 8598 // Zero Count Instructions 8599 8600 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{ 8601 match(Set dst (CountLeadingZerosI src)); 8602 8603 ins_cost(INSN_COST); 8604 format %{ "clzw $dst, $src" %} 8605 ins_encode %{ 8606 __ clzw(as_Register($dst$$reg), as_Register($src$$reg)); 8607 %} 8608 8609 ins_pipe(ialu_reg); 8610 %} 8611 8612 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{ 8613 match(Set dst (CountLeadingZerosL src)); 8614 8615 ins_cost(INSN_COST); 8616 format %{ "clz $dst, $src" %} 8617 ins_encode %{ 8618 __ clz(as_Register($dst$$reg), as_Register($src$$reg)); 8619 %} 8620 8621 ins_pipe(ialu_reg); 8622 %} 8623 8624 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{ 8625 match(Set dst (CountTrailingZerosI src)); 8626 8627 ins_cost(INSN_COST * 2); 8628 format %{ "rbitw $dst, $src\n\t" 8629 "clzw $dst, $dst" %} 8630 ins_encode %{ 8631 __ rbitw(as_Register($dst$$reg), as_Register($src$$reg)); 8632 __ clzw(as_Register($dst$$reg), as_Register($dst$$reg)); 8633 %} 8634 8635 ins_pipe(ialu_reg); 8636 %} 8637 8638 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{ 8639 match(Set dst (CountTrailingZerosL src)); 8640 8641 ins_cost(INSN_COST * 2); 8642 format %{ "rbit $dst, $src\n\t" 8643 "clz $dst, $dst" %} 8644 ins_encode %{ 8645 __ rbit(as_Register($dst$$reg), as_Register($src$$reg)); 8646 __ clz(as_Register($dst$$reg), as_Register($dst$$reg)); 8647 %} 8648 8649 ins_pipe(ialu_reg); 8650 %} 8651 8652 //---------- Population Count Instructions ------------------------------------- 8653 // 8654 8655 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{ 8656 predicate(UsePopCountInstruction); 8657 match(Set dst (PopCountI src)); 8658 effect(TEMP tmp); 8659 ins_cost(INSN_COST * 13); 8660 8661 format %{ "movw $src, $src\n\t" 8662 "mov $tmp, $src\t# vector (1D)\n\t" 8663 "cnt $tmp, $tmp\t# vector (8B)\n\t" 8664 "addv $tmp, $tmp\t# vector (8B)\n\t" 8665 "mov $dst, $tmp\t# vector (1D)" %} 8666 ins_encode %{ 8667 __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0 8668 __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register); 8669 __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); 8670 __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); 8671 __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); 8672 %} 8673 8674 ins_pipe(pipe_class_default); 8675 %} 8676 8677 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{ 8678 predicate(UsePopCountInstruction); 8679 match(Set dst (PopCountI (LoadI mem))); 8680 effect(TEMP tmp); 8681 ins_cost(INSN_COST * 13); 8682 8683 format %{ "ldrs $tmp, $mem\n\t" 8684 "cnt $tmp, $tmp\t# vector (8B)\n\t" 8685 "addv $tmp, $tmp\t# vector (8B)\n\t" 8686 "mov $dst, $tmp\t# vector (1D)" %} 8687 ins_encode %{ 8688 FloatRegister tmp_reg = as_FloatRegister($tmp$$reg); 8689 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(), 8690 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 8691 __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); 8692 __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); 8693 __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); 8694 %} 8695 8696 ins_pipe(pipe_class_default); 8697 %} 8698 8699 // Note: Long.bitCount(long) returns an int. 8700 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{ 8701 predicate(UsePopCountInstruction); 8702 match(Set dst (PopCountL src)); 8703 effect(TEMP tmp); 8704 ins_cost(INSN_COST * 13); 8705 8706 format %{ "mov $tmp, $src\t# vector (1D)\n\t" 8707 "cnt $tmp, $tmp\t# vector (8B)\n\t" 8708 "addv $tmp, $tmp\t# vector (8B)\n\t" 8709 "mov $dst, $tmp\t# vector (1D)" %} 8710 ins_encode %{ 8711 __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register); 8712 __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); 8713 __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); 8714 __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); 8715 %} 8716 8717 ins_pipe(pipe_class_default); 8718 %} 8719 8720 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{ 8721 predicate(UsePopCountInstruction); 8722 match(Set dst (PopCountL (LoadL mem))); 8723 effect(TEMP tmp); 8724 ins_cost(INSN_COST * 13); 8725 8726 format %{ "ldrd $tmp, $mem\n\t" 8727 "cnt $tmp, $tmp\t# vector (8B)\n\t" 8728 "addv $tmp, $tmp\t# vector (8B)\n\t" 8729 "mov $dst, $tmp\t# vector (1D)" %} 8730 ins_encode %{ 8731 FloatRegister tmp_reg = as_FloatRegister($tmp$$reg); 8732 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(), 8733 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); 8734 __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); 8735 __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); 8736 __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); 8737 %} 8738 8739 ins_pipe(pipe_class_default); 8740 %} 8741 8742 // ============================================================================ 8743 // MemBar Instruction 8744 8745 instruct load_fence() %{ 8746 match(LoadFence); 8747 ins_cost(VOLATILE_REF_COST); 8748 8749 format %{ "load_fence" %} 8750 8751 ins_encode %{ 8752 __ membar(Assembler::LoadLoad|Assembler::LoadStore); 8753 %} 8754 ins_pipe(pipe_serial); 8755 %} 8756 8757 instruct unnecessary_membar_acquire() %{ 8758 predicate(unnecessary_acquire(n)); 8759 match(MemBarAcquire); 8760 ins_cost(0); 8761 8762 format %{ "membar_acquire (elided)" %} 8763 8764 ins_encode %{ 8765 __ block_comment("membar_acquire (elided)"); 8766 %} 8767 8768 ins_pipe(pipe_class_empty); 8769 %} 8770 8771 instruct membar_acquire() %{ 8772 match(MemBarAcquire); 8773 ins_cost(VOLATILE_REF_COST); 8774 8775 format %{ "membar_acquire" %} 8776 8777 ins_encode %{ 8778 __ block_comment("membar_acquire"); 8779 __ membar(Assembler::LoadLoad|Assembler::LoadStore); 8780 %} 8781 8782 ins_pipe(pipe_serial); 8783 %} 8784 8785 8786 instruct membar_acquire_lock() %{ 8787 match(MemBarAcquireLock); 8788 ins_cost(VOLATILE_REF_COST); 8789 8790 format %{ "membar_acquire_lock (elided)" %} 8791 8792 ins_encode %{ 8793 __ block_comment("membar_acquire_lock (elided)"); 8794 %} 8795 8796 ins_pipe(pipe_serial); 8797 %} 8798 8799 instruct store_fence() %{ 8800 match(StoreFence); 8801 ins_cost(VOLATILE_REF_COST); 8802 8803 format %{ "store_fence" %} 8804 8805 ins_encode %{ 8806 __ membar(Assembler::LoadStore|Assembler::StoreStore); 8807 %} 8808 ins_pipe(pipe_serial); 8809 %} 8810 8811 instruct unnecessary_membar_release() %{ 8812 predicate(unnecessary_release(n)); 8813 match(MemBarRelease); 8814 ins_cost(0); 8815 8816 format %{ "membar_release (elided)" %} 8817 8818 ins_encode %{ 8819 __ block_comment("membar_release (elided)"); 8820 %} 8821 ins_pipe(pipe_serial); 8822 %} 8823 8824 instruct membar_release() %{ 8825 match(MemBarRelease); 8826 ins_cost(VOLATILE_REF_COST); 8827 8828 format %{ "membar_release" %} 8829 8830 ins_encode %{ 8831 __ block_comment("membar_release"); 8832 __ membar(Assembler::LoadStore|Assembler::StoreStore); 8833 %} 8834 ins_pipe(pipe_serial); 8835 %} 8836 8837 instruct membar_storestore() %{ 8838 match(MemBarStoreStore); 8839 ins_cost(VOLATILE_REF_COST); 8840 8841 format %{ "MEMBAR-store-store" %} 8842 8843 ins_encode %{ 8844 __ membar(Assembler::StoreStore); 8845 %} 8846 ins_pipe(pipe_serial); 8847 %} 8848 8849 instruct membar_release_lock() %{ 8850 match(MemBarReleaseLock); 8851 ins_cost(VOLATILE_REF_COST); 8852 8853 format %{ "membar_release_lock (elided)" %} 8854 8855 ins_encode %{ 8856 __ block_comment("membar_release_lock (elided)"); 8857 %} 8858 8859 ins_pipe(pipe_serial); 8860 %} 8861 8862 instruct unnecessary_membar_volatile() %{ 8863 predicate(unnecessary_volatile(n)); 8864 match(MemBarVolatile); 8865 ins_cost(0); 8866 8867 format %{ "membar_volatile (elided)" %} 8868 8869 ins_encode %{ 8870 __ block_comment("membar_volatile (elided)"); 8871 %} 8872 8873 ins_pipe(pipe_serial); 8874 %} 8875 8876 instruct membar_volatile() %{ 8877 match(MemBarVolatile); 8878 ins_cost(VOLATILE_REF_COST*100); 8879 8880 format %{ "membar_volatile" %} 8881 8882 ins_encode %{ 8883 __ block_comment("membar_volatile"); 8884 __ membar(Assembler::StoreLoad); 8885 %} 8886 8887 ins_pipe(pipe_serial); 8888 %} 8889 8890 // ============================================================================ 8891 // Cast/Convert Instructions 8892 8893 instruct castX2P(iRegPNoSp dst, iRegL src) %{ 8894 match(Set dst (CastX2P src)); 8895 8896 ins_cost(INSN_COST); 8897 format %{ "mov $dst, $src\t# long -> ptr" %} 8898 8899 ins_encode %{ 8900 if ($dst$$reg != $src$$reg) { 8901 __ mov(as_Register($dst$$reg), as_Register($src$$reg)); 8902 } 8903 %} 8904 8905 ins_pipe(ialu_reg); 8906 %} 8907 8908 instruct castP2X(iRegLNoSp dst, iRegP src) %{ 8909 match(Set dst (CastP2X src)); 8910 8911 ins_cost(INSN_COST); 8912 format %{ "mov $dst, $src\t# ptr -> long" %} 8913 8914 ins_encode %{ 8915 if ($dst$$reg != $src$$reg) { 8916 __ mov(as_Register($dst$$reg), as_Register($src$$reg)); 8917 } 8918 %} 8919 8920 ins_pipe(ialu_reg); 8921 %} 8922 8923 // Convert oop into int for vectors alignment masking 8924 instruct convP2I(iRegINoSp dst, iRegP src) %{ 8925 match(Set dst (ConvL2I (CastP2X src))); 8926 8927 ins_cost(INSN_COST); 8928 format %{ "movw $dst, $src\t# ptr -> int" %} 8929 ins_encode %{ 8930 __ movw($dst$$Register, $src$$Register); 8931 %} 8932 8933 ins_pipe(ialu_reg); 8934 %} 8935 8936 // Convert compressed oop into int for vectors alignment masking 8937 // in case of 32bit oops (heap < 4Gb). 8938 instruct convN2I(iRegINoSp dst, iRegN src) 8939 %{ 8940 predicate(Universe::narrow_oop_shift() == 0); 8941 match(Set dst (ConvL2I (CastP2X (DecodeN src)))); 8942 8943 ins_cost(INSN_COST); 8944 format %{ "mov dst, $src\t# compressed ptr -> int" %} 8945 ins_encode %{ 8946 __ movw($dst$$Register, $src$$Register); 8947 %} 8948 8949 ins_pipe(ialu_reg); 8950 %} 8951 8952 8953 // Convert oop pointer into compressed form 8954 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{ 8955 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); 8956 match(Set dst (EncodeP src)); 8957 effect(KILL cr); 8958 ins_cost(INSN_COST * 3); 8959 format %{ "encode_heap_oop $dst, $src" %} 8960 ins_encode %{ 8961 Register s = $src$$Register; 8962 Register d = $dst$$Register; 8963 __ encode_heap_oop(d, s); 8964 %} 8965 ins_pipe(ialu_reg); 8966 %} 8967 8968 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{ 8969 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); 8970 match(Set dst (EncodeP src)); 8971 ins_cost(INSN_COST * 3); 8972 format %{ "encode_heap_oop_not_null $dst, $src" %} 8973 ins_encode %{ 8974 __ encode_heap_oop_not_null($dst$$Register, $src$$Register); 8975 %} 8976 ins_pipe(ialu_reg); 8977 %} 8978 8979 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{ 8980 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && 8981 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); 8982 match(Set dst (DecodeN src)); 8983 ins_cost(INSN_COST * 3); 8984 format %{ "decode_heap_oop $dst, $src" %} 8985 ins_encode %{ 8986 Register s = $src$$Register; 8987 Register d = $dst$$Register; 8988 __ decode_heap_oop(d, s); 8989 %} 8990 ins_pipe(ialu_reg); 8991 %} 8992 8993 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{ 8994 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || 8995 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); 8996 match(Set dst (DecodeN src)); 8997 ins_cost(INSN_COST * 3); 8998 format %{ "decode_heap_oop_not_null $dst, $src" %} 8999 ins_encode %{ 9000 Register s = $src$$Register; 9001 Register d = $dst$$Register; 9002 __ decode_heap_oop_not_null(d, s); 9003 %} 9004 ins_pipe(ialu_reg); 9005 %} 9006 9007 // n.b. AArch64 implementations of encode_klass_not_null and 9008 // decode_klass_not_null do not modify the flags register so, unlike 9009 // Intel, we don't kill CR as a side effect here 9010 9011 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{ 9012 match(Set dst (EncodePKlass src)); 9013 9014 ins_cost(INSN_COST * 3); 9015 format %{ "encode_klass_not_null $dst,$src" %} 9016 9017 ins_encode %{ 9018 Register src_reg = as_Register($src$$reg); 9019 Register dst_reg = as_Register($dst$$reg); 9020 __ encode_klass_not_null(dst_reg, src_reg); 9021 %} 9022 9023 ins_pipe(ialu_reg); 9024 %} 9025 9026 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{ 9027 match(Set dst (DecodeNKlass src)); 9028 9029 ins_cost(INSN_COST * 3); 9030 format %{ "decode_klass_not_null $dst,$src" %} 9031 9032 ins_encode %{ 9033 Register src_reg = as_Register($src$$reg); 9034 Register dst_reg = as_Register($dst$$reg); 9035 if (dst_reg != src_reg) { 9036 __ decode_klass_not_null(dst_reg, src_reg); 9037 } else { 9038 __ decode_klass_not_null(dst_reg); 9039 } 9040 %} 9041 9042 ins_pipe(ialu_reg); 9043 %} 9044 9045 instruct checkCastPP(iRegPNoSp dst) 9046 %{ 9047 match(Set dst (CheckCastPP dst)); 9048 9049 size(0); 9050 format %{ "# checkcastPP of $dst" %} 9051 ins_encode(/* empty encoding */); 9052 ins_pipe(pipe_class_empty); 9053 %} 9054 9055 instruct castPP(iRegPNoSp dst) 9056 %{ 9057 match(Set dst (CastPP dst)); 9058 9059 size(0); 9060 format %{ "# castPP of $dst" %} 9061 ins_encode(/* empty encoding */); 9062 ins_pipe(pipe_class_empty); 9063 %} 9064 9065 instruct castII(iRegI dst) 9066 %{ 9067 match(Set dst (CastII dst)); 9068 9069 size(0); 9070 format %{ "# castII of $dst" %} 9071 ins_encode(/* empty encoding */); 9072 ins_cost(0); 9073 ins_pipe(pipe_class_empty); 9074 %} 9075 9076 // ============================================================================ 9077 // Atomic operation instructions 9078 // 9079 // Intel and SPARC both implement Ideal Node LoadPLocked and 9080 // Store{PIL}Conditional instructions using a normal load for the 9081 // LoadPLocked and a CAS for the Store{PIL}Conditional. 9082 // 9083 // The ideal code appears only to use LoadPLocked/StorePLocked as a 9084 // pair to lock object allocations from Eden space when not using 9085 // TLABs. 9086 // 9087 // There does not appear to be a Load{IL}Locked Ideal Node and the 9088 // Ideal code appears to use Store{IL}Conditional as an alias for CAS 9089 // and to use StoreIConditional only for 32-bit and StoreLConditional 9090 // only for 64-bit. 9091 // 9092 // We implement LoadPLocked and StorePLocked instructions using, 9093 // respectively the AArch64 hw load-exclusive and store-conditional 9094 // instructions. Whereas we must implement each of 9095 // Store{IL}Conditional using a CAS which employs a pair of 9096 // instructions comprising a load-exclusive followed by a 9097 // store-conditional. 9098 9099 9100 // Locked-load (linked load) of the current heap-top 9101 // used when updating the eden heap top 9102 // implemented using ldaxr on AArch64 9103 9104 instruct loadPLocked(iRegPNoSp dst, indirect mem) 9105 %{ 9106 match(Set dst (LoadPLocked mem)); 9107 9108 ins_cost(VOLATILE_REF_COST); 9109 9110 format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %} 9111 9112 ins_encode(aarch64_enc_ldaxr(dst, mem)); 9113 9114 ins_pipe(pipe_serial); 9115 %} 9116 9117 // Conditional-store of the updated heap-top. 9118 // Used during allocation of the shared heap. 9119 // Sets flag (EQ) on success. 9120 // implemented using stlxr on AArch64. 9121 9122 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) 9123 %{ 9124 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 9125 9126 ins_cost(VOLATILE_REF_COST); 9127 9128 // TODO 9129 // do we need to do a store-conditional release or can we just use a 9130 // plain store-conditional? 9131 9132 format %{ 9133 "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release" 9134 "cmpw rscratch1, zr\t# EQ on successful write" 9135 %} 9136 9137 ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr)); 9138 9139 ins_pipe(pipe_serial); 9140 %} 9141 9142 9143 // storeLConditional is used by PhaseMacroExpand::expand_lock_node 9144 // when attempting to rebias a lock towards the current thread. We 9145 // must use the acquire form of cmpxchg in order to guarantee acquire 9146 // semantics in this case. 9147 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) 9148 %{ 9149 match(Set cr (StoreLConditional mem (Binary oldval newval))); 9150 9151 ins_cost(VOLATILE_REF_COST); 9152 9153 format %{ 9154 "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" 9155 "cmpw rscratch1, zr\t# EQ on successful write" 9156 %} 9157 9158 ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval)); 9159 9160 ins_pipe(pipe_slow); 9161 %} 9162 9163 // storeIConditional also has acquire semantics, for no better reason 9164 // than matching storeLConditional. At the time of writing this 9165 // comment storeIConditional was not used anywhere by AArch64. 9166 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) 9167 %{ 9168 match(Set cr (StoreIConditional mem (Binary oldval newval))); 9169 9170 ins_cost(VOLATILE_REF_COST); 9171 9172 format %{ 9173 "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" 9174 "cmpw rscratch1, zr\t# EQ on successful write" 9175 %} 9176 9177 ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval)); 9178 9179 ins_pipe(pipe_slow); 9180 %} 9181 9182 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher 9183 // can't match them 9184 9185 // standard CompareAndSwapX when we are using barriers 9186 // these have higher priority than the rules selected by a predicate 9187 9188 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{ 9189 9190 match(Set res (CompareAndSwapI mem (Binary oldval newval))); 9191 ins_cost(2 * VOLATILE_REF_COST); 9192 9193 effect(KILL cr); 9194 9195 format %{ 9196 "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval" 9197 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" 9198 %} 9199 9200 ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval), 9201 aarch64_enc_cset_eq(res)); 9202 9203 ins_pipe(pipe_slow); 9204 %} 9205 9206 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{ 9207 9208 match(Set res (CompareAndSwapL mem (Binary oldval newval))); 9209 ins_cost(2 * VOLATILE_REF_COST); 9210 9211 effect(KILL cr); 9212 9213 format %{ 9214 "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval" 9215 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" 9216 %} 9217 9218 ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval), 9219 aarch64_enc_cset_eq(res)); 9220 9221 ins_pipe(pipe_slow); 9222 %} 9223 9224 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ 9225 9226 match(Set res (CompareAndSwapP mem (Binary oldval newval))); 9227 ins_cost(2 * VOLATILE_REF_COST); 9228 9229 effect(KILL cr); 9230 9231 format %{ 9232 "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" 9233 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" 9234 %} 9235 9236 ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval), 9237 aarch64_enc_cset_eq(res)); 9238 9239 ins_pipe(pipe_slow); 9240 %} 9241 9242 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{ 9243 9244 match(Set res (CompareAndSwapN mem (Binary oldval newval))); 9245 ins_cost(2 * VOLATILE_REF_COST); 9246 9247 effect(KILL cr); 9248 9249 format %{ 9250 "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval" 9251 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" 9252 %} 9253 9254 ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval), 9255 aarch64_enc_cset_eq(res)); 9256 9257 ins_pipe(pipe_slow); 9258 %} 9259 9260 // alternative CompareAndSwapX when we are eliding barriers 9261 9262 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{ 9263 9264 predicate(needs_acquiring_load_exclusive(n)); 9265 match(Set res (CompareAndSwapI mem (Binary oldval newval))); 9266 ins_cost(VOLATILE_REF_COST); 9267 9268 effect(KILL cr); 9269 9270 format %{ 9271 "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval" 9272 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" 9273 %} 9274 9275 ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval), 9276 aarch64_enc_cset_eq(res)); 9277 9278 ins_pipe(pipe_slow); 9279 %} 9280 9281 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{ 9282 9283 predicate(needs_acquiring_load_exclusive(n)); 9284 match(Set res (CompareAndSwapL mem (Binary oldval newval))); 9285 ins_cost(VOLATILE_REF_COST); 9286 9287 effect(KILL cr); 9288 9289 format %{ 9290 "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval" 9291 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" 9292 %} 9293 9294 ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval), 9295 aarch64_enc_cset_eq(res)); 9296 9297 ins_pipe(pipe_slow); 9298 %} 9299 9300 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ 9301 9302 predicate(needs_acquiring_load_exclusive(n)); 9303 match(Set res (CompareAndSwapP mem (Binary oldval newval))); 9304 ins_cost(VOLATILE_REF_COST); 9305 9306 effect(KILL cr); 9307 9308 format %{ 9309 "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" 9310 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" 9311 %} 9312 9313 ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval), 9314 aarch64_enc_cset_eq(res)); 9315 9316 ins_pipe(pipe_slow); 9317 %} 9318 9319 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{ 9320 9321 predicate(needs_acquiring_load_exclusive(n)); 9322 match(Set res (CompareAndSwapN mem (Binary oldval newval))); 9323 ins_cost(VOLATILE_REF_COST); 9324 9325 effect(KILL cr); 9326 9327 format %{ 9328 "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval" 9329 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" 9330 %} 9331 9332 ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval), 9333 aarch64_enc_cset_eq(res)); 9334 9335 ins_pipe(pipe_slow); 9336 %} 9337 9338 9339 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{ 9340 match(Set prev (GetAndSetI mem newv)); 9341 format %{ "atomic_xchgw $prev, $newv, [$mem]" %} 9342 ins_encode %{ 9343 __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base)); 9344 %} 9345 ins_pipe(pipe_serial); 9346 %} 9347 9348 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{ 9349 match(Set prev (GetAndSetL mem newv)); 9350 format %{ "atomic_xchg $prev, $newv, [$mem]" %} 9351 ins_encode %{ 9352 __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); 9353 %} 9354 ins_pipe(pipe_serial); 9355 %} 9356 9357 instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{ 9358 match(Set prev (GetAndSetN mem newv)); 9359 format %{ "atomic_xchgw $prev, $newv, [$mem]" %} 9360 ins_encode %{ 9361 __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base)); 9362 %} 9363 ins_pipe(pipe_serial); 9364 %} 9365 9366 instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{ 9367 match(Set prev (GetAndSetP mem newv)); 9368 format %{ "atomic_xchg $prev, $newv, [$mem]" %} 9369 ins_encode %{ 9370 __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); 9371 %} 9372 ins_pipe(pipe_serial); 9373 %} 9374 9375 9376 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{ 9377 match(Set newval (GetAndAddL mem incr)); 9378 ins_cost(INSN_COST * 10); 9379 format %{ "get_and_addL $newval, [$mem], $incr" %} 9380 ins_encode %{ 9381 __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base)); 9382 %} 9383 ins_pipe(pipe_serial); 9384 %} 9385 9386 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{ 9387 predicate(n->as_LoadStore()->result_not_used()); 9388 match(Set dummy (GetAndAddL mem incr)); 9389 ins_cost(INSN_COST * 9); 9390 format %{ "get_and_addL [$mem], $incr" %} 9391 ins_encode %{ 9392 __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base)); 9393 %} 9394 ins_pipe(pipe_serial); 9395 %} 9396 9397 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{ 9398 match(Set newval (GetAndAddL mem incr)); 9399 ins_cost(INSN_COST * 10); 9400 format %{ "get_and_addL $newval, [$mem], $incr" %} 9401 ins_encode %{ 9402 __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base)); 9403 %} 9404 ins_pipe(pipe_serial); 9405 %} 9406 9407 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{ 9408 predicate(n->as_LoadStore()->result_not_used()); 9409 match(Set dummy (GetAndAddL mem incr)); 9410 ins_cost(INSN_COST * 9); 9411 format %{ "get_and_addL [$mem], $incr" %} 9412 ins_encode %{ 9413 __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base)); 9414 %} 9415 ins_pipe(pipe_serial); 9416 %} 9417 9418 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{ 9419 match(Set newval (GetAndAddI mem incr)); 9420 ins_cost(INSN_COST * 10); 9421 format %{ "get_and_addI $newval, [$mem], $incr" %} 9422 ins_encode %{ 9423 __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base)); 9424 %} 9425 ins_pipe(pipe_serial); 9426 %} 9427 9428 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{ 9429 predicate(n->as_LoadStore()->result_not_used()); 9430 match(Set dummy (GetAndAddI mem incr)); 9431 ins_cost(INSN_COST * 9); 9432 format %{ "get_and_addI [$mem], $incr" %} 9433 ins_encode %{ 9434 __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base)); 9435 %} 9436 ins_pipe(pipe_serial); 9437 %} 9438 9439 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{ 9440 match(Set newval (GetAndAddI mem incr)); 9441 ins_cost(INSN_COST * 10); 9442 format %{ "get_and_addI $newval, [$mem], $incr" %} 9443 ins_encode %{ 9444 __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base)); 9445 %} 9446 ins_pipe(pipe_serial); 9447 %} 9448 9449 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{ 9450 predicate(n->as_LoadStore()->result_not_used()); 9451 match(Set dummy (GetAndAddI mem incr)); 9452 ins_cost(INSN_COST * 9); 9453 format %{ "get_and_addI [$mem], $incr" %} 9454 ins_encode %{ 9455 __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base)); 9456 %} 9457 ins_pipe(pipe_serial); 9458 %} 9459 9460 // Manifest a CmpL result in an integer register. 9461 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0) 9462 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags) 9463 %{ 9464 match(Set dst (CmpL3 src1 src2)); 9465 effect(KILL flags); 9466 9467 ins_cost(INSN_COST * 6); 9468 format %{ 9469 "cmp $src1, $src2" 9470 "csetw $dst, ne" 9471 "cnegw $dst, lt" 9472 %} 9473 // format %{ "CmpL3 $dst, $src1, $src2" %} 9474 ins_encode %{ 9475 __ cmp($src1$$Register, $src2$$Register); 9476 __ csetw($dst$$Register, Assembler::NE); 9477 __ cnegw($dst$$Register, $dst$$Register, Assembler::LT); 9478 %} 9479 9480 ins_pipe(pipe_class_default); 9481 %} 9482 9483 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags) 9484 %{ 9485 match(Set dst (CmpL3 src1 src2)); 9486 effect(KILL flags); 9487 9488 ins_cost(INSN_COST * 6); 9489 format %{ 9490 "cmp $src1, $src2" 9491 "csetw $dst, ne" 9492 "cnegw $dst, lt" 9493 %} 9494 ins_encode %{ 9495 int32_t con = (int32_t)$src2$$constant; 9496 if (con < 0) { 9497 __ adds(zr, $src1$$Register, -con); 9498 } else { 9499 __ subs(zr, $src1$$Register, con); 9500 } 9501 __ csetw($dst$$Register, Assembler::NE); 9502 __ cnegw($dst$$Register, $dst$$Register, Assembler::LT); 9503 %} 9504 9505 ins_pipe(pipe_class_default); 9506 %} 9507 9508 // ============================================================================ 9509 // Conditional Move Instructions 9510 9511 // n.b. we have identical rules for both a signed compare op (cmpOp) 9512 // and an unsigned compare op (cmpOpU). it would be nice if we could 9513 // define an op class which merged both inputs and use it to type the 9514 // argument to a single rule. unfortunatelyt his fails because the 9515 // opclass does not live up to the COND_INTER interface of its 9516 // component operands. When the generic code tries to negate the 9517 // operand it ends up running the generci Machoper::negate method 9518 // which throws a ShouldNotHappen. So, we have to provide two flavours 9519 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh). 9520 9521 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ 9522 match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2))); 9523 9524 ins_cost(INSN_COST * 2); 9525 format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int" %} 9526 9527 ins_encode %{ 9528 __ cselw(as_Register($dst$$reg), 9529 as_Register($src2$$reg), 9530 as_Register($src1$$reg), 9531 (Assembler::Condition)$cmp$$cmpcode); 9532 %} 9533 9534 ins_pipe(icond_reg_reg); 9535 %} 9536 9537 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ 9538 match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2))); 9539 9540 ins_cost(INSN_COST * 2); 9541 format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int" %} 9542 9543 ins_encode %{ 9544 __ cselw(as_Register($dst$$reg), 9545 as_Register($src2$$reg), 9546 as_Register($src1$$reg), 9547 (Assembler::Condition)$cmp$$cmpcode); 9548 %} 9549 9550 ins_pipe(icond_reg_reg); 9551 %} 9552 9553 // special cases where one arg is zero 9554 9555 // n.b. this is selected in preference to the rule above because it 9556 // avoids loading constant 0 into a source register 9557 9558 // TODO 9559 // we ought only to be able to cull one of these variants as the ideal 9560 // transforms ought always to order the zero consistently (to left/right?) 9561 9562 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{ 9563 match(Set dst (CMoveI (Binary cmp cr) (Binary zero src))); 9564 9565 ins_cost(INSN_COST * 2); 9566 format %{ "cselw $dst, $src, zr $cmp\t# signed, int" %} 9567 9568 ins_encode %{ 9569 __ cselw(as_Register($dst$$reg), 9570 as_Register($src$$reg), 9571 zr, 9572 (Assembler::Condition)$cmp$$cmpcode); 9573 %} 9574 9575 ins_pipe(icond_reg); 9576 %} 9577 9578 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{ 9579 match(Set dst (CMoveI (Binary cmp cr) (Binary zero src))); 9580 9581 ins_cost(INSN_COST * 2); 9582 format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int" %} 9583 9584 ins_encode %{ 9585 __ cselw(as_Register($dst$$reg), 9586 as_Register($src$$reg), 9587 zr, 9588 (Assembler::Condition)$cmp$$cmpcode); 9589 %} 9590 9591 ins_pipe(icond_reg); 9592 %} 9593 9594 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{ 9595 match(Set dst (CMoveI (Binary cmp cr) (Binary src zero))); 9596 9597 ins_cost(INSN_COST * 2); 9598 format %{ "cselw $dst, zr, $src $cmp\t# signed, int" %} 9599 9600 ins_encode %{ 9601 __ cselw(as_Register($dst$$reg), 9602 zr, 9603 as_Register($src$$reg), 9604 (Assembler::Condition)$cmp$$cmpcode); 9605 %} 9606 9607 ins_pipe(icond_reg); 9608 %} 9609 9610 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{ 9611 match(Set dst (CMoveI (Binary cmp cr) (Binary src zero))); 9612 9613 ins_cost(INSN_COST * 2); 9614 format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int" %} 9615 9616 ins_encode %{ 9617 __ cselw(as_Register($dst$$reg), 9618 zr, 9619 as_Register($src$$reg), 9620 (Assembler::Condition)$cmp$$cmpcode); 9621 %} 9622 9623 ins_pipe(icond_reg); 9624 %} 9625 9626 // special case for creating a boolean 0 or 1 9627 9628 // n.b. this is selected in preference to the rule above because it 9629 // avoids loading constants 0 and 1 into a source register 9630 9631 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{ 9632 match(Set dst (CMoveI (Binary cmp cr) (Binary one zero))); 9633 9634 ins_cost(INSN_COST * 2); 9635 format %{ "csincw $dst, zr, zr $cmp\t# signed, int" %} 9636 9637 ins_encode %{ 9638 // equivalently 9639 // cset(as_Register($dst$$reg), 9640 // negate_condition((Assembler::Condition)$cmp$$cmpcode)); 9641 __ csincw(as_Register($dst$$reg), 9642 zr, 9643 zr, 9644 (Assembler::Condition)$cmp$$cmpcode); 9645 %} 9646 9647 ins_pipe(icond_none); 9648 %} 9649 9650 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{ 9651 match(Set dst (CMoveI (Binary cmp cr) (Binary one zero))); 9652 9653 ins_cost(INSN_COST * 2); 9654 format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int" %} 9655 9656 ins_encode %{ 9657 // equivalently 9658 // cset(as_Register($dst$$reg), 9659 // negate_condition((Assembler::Condition)$cmp$$cmpcode)); 9660 __ csincw(as_Register($dst$$reg), 9661 zr, 9662 zr, 9663 (Assembler::Condition)$cmp$$cmpcode); 9664 %} 9665 9666 ins_pipe(icond_none); 9667 %} 9668 9669 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{ 9670 match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2))); 9671 9672 ins_cost(INSN_COST * 2); 9673 format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long" %} 9674 9675 ins_encode %{ 9676 __ csel(as_Register($dst$$reg), 9677 as_Register($src2$$reg), 9678 as_Register($src1$$reg), 9679 (Assembler::Condition)$cmp$$cmpcode); 9680 %} 9681 9682 ins_pipe(icond_reg_reg); 9683 %} 9684 9685 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{ 9686 match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2))); 9687 9688 ins_cost(INSN_COST * 2); 9689 format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long" %} 9690 9691 ins_encode %{ 9692 __ csel(as_Register($dst$$reg), 9693 as_Register($src2$$reg), 9694 as_Register($src1$$reg), 9695 (Assembler::Condition)$cmp$$cmpcode); 9696 %} 9697 9698 ins_pipe(icond_reg_reg); 9699 %} 9700 9701 // special cases where one arg is zero 9702 9703 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{ 9704 match(Set dst (CMoveL (Binary cmp cr) (Binary src zero))); 9705 9706 ins_cost(INSN_COST * 2); 9707 format %{ "csel $dst, zr, $src $cmp\t# signed, long" %} 9708 9709 ins_encode %{ 9710 __ csel(as_Register($dst$$reg), 9711 zr, 9712 as_Register($src$$reg), 9713 (Assembler::Condition)$cmp$$cmpcode); 9714 %} 9715 9716 ins_pipe(icond_reg); 9717 %} 9718 9719 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{ 9720 match(Set dst (CMoveL (Binary cmp cr) (Binary src zero))); 9721 9722 ins_cost(INSN_COST * 2); 9723 format %{ "csel $dst, zr, $src $cmp\t# unsigned, long" %} 9724 9725 ins_encode %{ 9726 __ csel(as_Register($dst$$reg), 9727 zr, 9728 as_Register($src$$reg), 9729 (Assembler::Condition)$cmp$$cmpcode); 9730 %} 9731 9732 ins_pipe(icond_reg); 9733 %} 9734 9735 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{ 9736 match(Set dst (CMoveL (Binary cmp cr) (Binary zero src))); 9737 9738 ins_cost(INSN_COST * 2); 9739 format %{ "csel $dst, $src, zr $cmp\t# signed, long" %} 9740 9741 ins_encode %{ 9742 __ csel(as_Register($dst$$reg), 9743 as_Register($src$$reg), 9744 zr, 9745 (Assembler::Condition)$cmp$$cmpcode); 9746 %} 9747 9748 ins_pipe(icond_reg); 9749 %} 9750 9751 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{ 9752 match(Set dst (CMoveL (Binary cmp cr) (Binary zero src))); 9753 9754 ins_cost(INSN_COST * 2); 9755 format %{ "csel $dst, $src, zr $cmp\t# unsigned, long" %} 9756 9757 ins_encode %{ 9758 __ csel(as_Register($dst$$reg), 9759 as_Register($src$$reg), 9760 zr, 9761 (Assembler::Condition)$cmp$$cmpcode); 9762 %} 9763 9764 ins_pipe(icond_reg); 9765 %} 9766 9767 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{ 9768 match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2))); 9769 9770 ins_cost(INSN_COST * 2); 9771 format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr" %} 9772 9773 ins_encode %{ 9774 __ csel(as_Register($dst$$reg), 9775 as_Register($src2$$reg), 9776 as_Register($src1$$reg), 9777 (Assembler::Condition)$cmp$$cmpcode); 9778 %} 9779 9780 ins_pipe(icond_reg_reg); 9781 %} 9782 9783 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{ 9784 match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2))); 9785 9786 ins_cost(INSN_COST * 2); 9787 format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr" %} 9788 9789 ins_encode %{ 9790 __ csel(as_Register($dst$$reg), 9791 as_Register($src2$$reg), 9792 as_Register($src1$$reg), 9793 (Assembler::Condition)$cmp$$cmpcode); 9794 %} 9795 9796 ins_pipe(icond_reg_reg); 9797 %} 9798 9799 // special cases where one arg is zero 9800 9801 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{ 9802 match(Set dst (CMoveP (Binary cmp cr) (Binary src zero))); 9803 9804 ins_cost(INSN_COST * 2); 9805 format %{ "csel $dst, zr, $src $cmp\t# signed, ptr" %} 9806 9807 ins_encode %{ 9808 __ csel(as_Register($dst$$reg), 9809 zr, 9810 as_Register($src$$reg), 9811 (Assembler::Condition)$cmp$$cmpcode); 9812 %} 9813 9814 ins_pipe(icond_reg); 9815 %} 9816 9817 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{ 9818 match(Set dst (CMoveP (Binary cmp cr) (Binary src zero))); 9819 9820 ins_cost(INSN_COST * 2); 9821 format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr" %} 9822 9823 ins_encode %{ 9824 __ csel(as_Register($dst$$reg), 9825 zr, 9826 as_Register($src$$reg), 9827 (Assembler::Condition)$cmp$$cmpcode); 9828 %} 9829 9830 ins_pipe(icond_reg); 9831 %} 9832 9833 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{ 9834 match(Set dst (CMoveP (Binary cmp cr) (Binary zero src))); 9835 9836 ins_cost(INSN_COST * 2); 9837 format %{ "csel $dst, $src, zr $cmp\t# signed, ptr" %} 9838 9839 ins_encode %{ 9840 __ csel(as_Register($dst$$reg), 9841 as_Register($src$$reg), 9842 zr, 9843 (Assembler::Condition)$cmp$$cmpcode); 9844 %} 9845 9846 ins_pipe(icond_reg); 9847 %} 9848 9849 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{ 9850 match(Set dst (CMoveP (Binary cmp cr) (Binary zero src))); 9851 9852 ins_cost(INSN_COST * 2); 9853 format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr" %} 9854 9855 ins_encode %{ 9856 __ csel(as_Register($dst$$reg), 9857 as_Register($src$$reg), 9858 zr, 9859 (Assembler::Condition)$cmp$$cmpcode); 9860 %} 9861 9862 ins_pipe(icond_reg); 9863 %} 9864 9865 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{ 9866 match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2))); 9867 9868 ins_cost(INSN_COST * 2); 9869 format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr" %} 9870 9871 ins_encode %{ 9872 __ cselw(as_Register($dst$$reg), 9873 as_Register($src2$$reg), 9874 as_Register($src1$$reg), 9875 (Assembler::Condition)$cmp$$cmpcode); 9876 %} 9877 9878 ins_pipe(icond_reg_reg); 9879 %} 9880 9881 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{ 9882 match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2))); 9883 9884 ins_cost(INSN_COST * 2); 9885 format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr" %} 9886 9887 ins_encode %{ 9888 __ cselw(as_Register($dst$$reg), 9889 as_Register($src2$$reg), 9890 as_Register($src1$$reg), 9891 (Assembler::Condition)$cmp$$cmpcode); 9892 %} 9893 9894 ins_pipe(icond_reg_reg); 9895 %} 9896 9897 // special cases where one arg is zero 9898 9899 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{ 9900 match(Set dst (CMoveN (Binary cmp cr) (Binary src zero))); 9901 9902 ins_cost(INSN_COST * 2); 9903 format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr" %} 9904 9905 ins_encode %{ 9906 __ cselw(as_Register($dst$$reg), 9907 zr, 9908 as_Register($src$$reg), 9909 (Assembler::Condition)$cmp$$cmpcode); 9910 %} 9911 9912 ins_pipe(icond_reg); 9913 %} 9914 9915 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{ 9916 match(Set dst (CMoveN (Binary cmp cr) (Binary src zero))); 9917 9918 ins_cost(INSN_COST * 2); 9919 format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr" %} 9920 9921 ins_encode %{ 9922 __ cselw(as_Register($dst$$reg), 9923 zr, 9924 as_Register($src$$reg), 9925 (Assembler::Condition)$cmp$$cmpcode); 9926 %} 9927 9928 ins_pipe(icond_reg); 9929 %} 9930 9931 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{ 9932 match(Set dst (CMoveN (Binary cmp cr) (Binary zero src))); 9933 9934 ins_cost(INSN_COST * 2); 9935 format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr" %} 9936 9937 ins_encode %{ 9938 __ cselw(as_Register($dst$$reg), 9939 as_Register($src$$reg), 9940 zr, 9941 (Assembler::Condition)$cmp$$cmpcode); 9942 %} 9943 9944 ins_pipe(icond_reg); 9945 %} 9946 9947 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{ 9948 match(Set dst (CMoveN (Binary cmp cr) (Binary zero src))); 9949 9950 ins_cost(INSN_COST * 2); 9951 format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr" %} 9952 9953 ins_encode %{ 9954 __ cselw(as_Register($dst$$reg), 9955 as_Register($src$$reg), 9956 zr, 9957 (Assembler::Condition)$cmp$$cmpcode); 9958 %} 9959 9960 ins_pipe(icond_reg); 9961 %} 9962 9963 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1, vRegF src2) 9964 %{ 9965 match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2))); 9966 9967 ins_cost(INSN_COST * 3); 9968 9969 format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %} 9970 ins_encode %{ 9971 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; 9972 __ fcsels(as_FloatRegister($dst$$reg), 9973 as_FloatRegister($src2$$reg), 9974 as_FloatRegister($src1$$reg), 9975 cond); 9976 %} 9977 9978 ins_pipe(fp_cond_reg_reg_s); 9979 %} 9980 9981 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1, vRegF src2) 9982 %{ 9983 match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2))); 9984 9985 ins_cost(INSN_COST * 3); 9986 9987 format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %} 9988 ins_encode %{ 9989 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; 9990 __ fcsels(as_FloatRegister($dst$$reg), 9991 as_FloatRegister($src2$$reg), 9992 as_FloatRegister($src1$$reg), 9993 cond); 9994 %} 9995 9996 ins_pipe(fp_cond_reg_reg_s); 9997 %} 9998 9999 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1, vRegD src2) 10000 %{ 10001 match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2))); 10002 10003 ins_cost(INSN_COST * 3); 10004 10005 format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %} 10006 ins_encode %{ 10007 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; 10008 __ fcseld(as_FloatRegister($dst$$reg), 10009 as_FloatRegister($src2$$reg), 10010 as_FloatRegister($src1$$reg), 10011 cond); 10012 %} 10013 10014 ins_pipe(fp_cond_reg_reg_d); 10015 %} 10016 10017 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1, vRegD src2) 10018 %{ 10019 match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2))); 10020 10021 ins_cost(INSN_COST * 3); 10022 10023 format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %} 10024 ins_encode %{ 10025 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; 10026 __ fcseld(as_FloatRegister($dst$$reg), 10027 as_FloatRegister($src2$$reg), 10028 as_FloatRegister($src1$$reg), 10029 cond); 10030 %} 10031 10032 ins_pipe(fp_cond_reg_reg_d); 10033 %} 10034 10035 // ============================================================================ 10036 // Arithmetic Instructions 10037 // 10038 10039 // Integer Addition 10040 10041 // TODO 10042 // these currently employ operations which do not set CR and hence are 10043 // not flagged as killing CR but we would like to isolate the cases 10044 // where we want to set flags from those where we don't. need to work 10045 // out how to do that. 10046 10047 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ 10048 match(Set dst (AddI src1 src2)); 10049 10050 ins_cost(INSN_COST); 10051 format %{ "addw $dst, $src1, $src2" %} 10052 10053 ins_encode %{ 10054 __ addw(as_Register($dst$$reg), 10055 as_Register($src1$$reg), 10056 as_Register($src2$$reg)); 10057 %} 10058 10059 ins_pipe(ialu_reg_reg); 10060 %} 10061 10062 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{ 10063 match(Set dst (AddI src1 src2)); 10064 10065 ins_cost(INSN_COST); 10066 format %{ "addw $dst, $src1, $src2" %} 10067 10068 // use opcode to indicate that this is an add not a sub 10069 opcode(0x0); 10070 10071 ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2)); 10072 10073 ins_pipe(ialu_reg_imm); 10074 %} 10075 10076 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{ 10077 match(Set dst (AddI (ConvL2I src1) src2)); 10078 10079 ins_cost(INSN_COST); 10080 format %{ "addw $dst, $src1, $src2" %} 10081 10082 // use opcode to indicate that this is an add not a sub 10083 opcode(0x0); 10084 10085 ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2)); 10086 10087 ins_pipe(ialu_reg_imm); 10088 %} 10089 10090 // Pointer Addition 10091 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{ 10092 match(Set dst (AddP src1 src2)); 10093 10094 ins_cost(INSN_COST); 10095 format %{ "add $dst, $src1, $src2\t# ptr" %} 10096 10097 ins_encode %{ 10098 __ add(as_Register($dst$$reg), 10099 as_Register($src1$$reg), 10100 as_Register($src2$$reg)); 10101 %} 10102 10103 ins_pipe(ialu_reg_reg); 10104 %} 10105 10106 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{ 10107 match(Set dst (AddP src1 (ConvI2L src2))); 10108 10109 ins_cost(1.9 * INSN_COST); 10110 format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %} 10111 10112 ins_encode %{ 10113 __ add(as_Register($dst$$reg), 10114 as_Register($src1$$reg), 10115 as_Register($src2$$reg), ext::sxtw); 10116 %} 10117 10118 ins_pipe(ialu_reg_reg); 10119 %} 10120 10121 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{ 10122 match(Set dst (AddP src1 (LShiftL src2 scale))); 10123 10124 ins_cost(1.9 * INSN_COST); 10125 format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %} 10126 10127 ins_encode %{ 10128 __ lea(as_Register($dst$$reg), 10129 Address(as_Register($src1$$reg), as_Register($src2$$reg), 10130 Address::lsl($scale$$constant))); 10131 %} 10132 10133 ins_pipe(ialu_reg_reg_shift); 10134 %} 10135 10136 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{ 10137 match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale))); 10138 10139 ins_cost(1.9 * INSN_COST); 10140 format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %} 10141 10142 ins_encode %{ 10143 __ lea(as_Register($dst$$reg), 10144 Address(as_Register($src1$$reg), as_Register($src2$$reg), 10145 Address::sxtw($scale$$constant))); 10146 %} 10147 10148 ins_pipe(ialu_reg_reg_shift); 10149 %} 10150 10151 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{ 10152 match(Set dst (LShiftL (ConvI2L src) scale)); 10153 10154 ins_cost(INSN_COST); 10155 format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %} 10156 10157 ins_encode %{ 10158 __ sbfiz(as_Register($dst$$reg), 10159 as_Register($src$$reg), 10160 $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63)); 10161 %} 10162 10163 ins_pipe(ialu_reg_shift); 10164 %} 10165 10166 // Pointer Immediate Addition 10167 // n.b. this needs to be more expensive than using an indirect memory 10168 // operand 10169 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{ 10170 match(Set dst (AddP src1 src2)); 10171 10172 ins_cost(INSN_COST); 10173 format %{ "add $dst, $src1, $src2\t# ptr" %} 10174 10175 // use opcode to indicate that this is an add not a sub 10176 opcode(0x0); 10177 10178 ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) ); 10179 10180 ins_pipe(ialu_reg_imm); 10181 %} 10182 10183 // Long Addition 10184 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ 10185 10186 match(Set dst (AddL src1 src2)); 10187 10188 ins_cost(INSN_COST); 10189 format %{ "add $dst, $src1, $src2" %} 10190 10191 ins_encode %{ 10192 __ add(as_Register($dst$$reg), 10193 as_Register($src1$$reg), 10194 as_Register($src2$$reg)); 10195 %} 10196 10197 ins_pipe(ialu_reg_reg); 10198 %} 10199 10200 // No constant pool entries requiredLong Immediate Addition. 10201 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{ 10202 match(Set dst (AddL src1 src2)); 10203 10204 ins_cost(INSN_COST); 10205 format %{ "add $dst, $src1, $src2" %} 10206 10207 // use opcode to indicate that this is an add not a sub 10208 opcode(0x0); 10209 10210 ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) ); 10211 10212 ins_pipe(ialu_reg_imm); 10213 %} 10214 10215 // Integer Subtraction 10216 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ 10217 match(Set dst (SubI src1 src2)); 10218 10219 ins_cost(INSN_COST); 10220 format %{ "subw $dst, $src1, $src2" %} 10221 10222 ins_encode %{ 10223 __ subw(as_Register($dst$$reg), 10224 as_Register($src1$$reg), 10225 as_Register($src2$$reg)); 10226 %} 10227 10228 ins_pipe(ialu_reg_reg); 10229 %} 10230 10231 // Immediate Subtraction 10232 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{ 10233 match(Set dst (SubI src1 src2)); 10234 10235 ins_cost(INSN_COST); 10236 format %{ "subw $dst, $src1, $src2" %} 10237 10238 // use opcode to indicate that this is a sub not an add 10239 opcode(0x1); 10240 10241 ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2)); 10242 10243 ins_pipe(ialu_reg_imm); 10244 %} 10245 10246 // Long Subtraction 10247 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ 10248 10249 match(Set dst (SubL src1 src2)); 10250 10251 ins_cost(INSN_COST); 10252 format %{ "sub $dst, $src1, $src2" %} 10253 10254 ins_encode %{ 10255 __ sub(as_Register($dst$$reg), 10256 as_Register($src1$$reg), 10257 as_Register($src2$$reg)); 10258 %} 10259 10260 ins_pipe(ialu_reg_reg); 10261 %} 10262 10263 // No constant pool entries requiredLong Immediate Subtraction. 10264 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{ 10265 match(Set dst (SubL src1 src2)); 10266 10267 ins_cost(INSN_COST); 10268 format %{ "sub$dst, $src1, $src2" %} 10269 10270 // use opcode to indicate that this is a sub not an add 10271 opcode(0x1); 10272 10273 ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) ); 10274 10275 ins_pipe(ialu_reg_imm); 10276 %} 10277 10278 // Integer Negation (special case for sub) 10279 10280 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{ 10281 match(Set dst (SubI zero src)); 10282 10283 ins_cost(INSN_COST); 10284 format %{ "negw $dst, $src\t# int" %} 10285 10286 ins_encode %{ 10287 __ negw(as_Register($dst$$reg), 10288 as_Register($src$$reg)); 10289 %} 10290 10291 ins_pipe(ialu_reg); 10292 %} 10293 10294 // Long Negation 10295 10296 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{ 10297 match(Set dst (SubL zero src)); 10298 10299 ins_cost(INSN_COST); 10300 format %{ "neg $dst, $src\t# long" %} 10301 10302 ins_encode %{ 10303 __ neg(as_Register($dst$$reg), 10304 as_Register($src$$reg)); 10305 %} 10306 10307 ins_pipe(ialu_reg); 10308 %} 10309 10310 // Integer Multiply 10311 10312 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ 10313 match(Set dst (MulI src1 src2)); 10314 10315 ins_cost(INSN_COST * 3); 10316 format %{ "mulw $dst, $src1, $src2" %} 10317 10318 ins_encode %{ 10319 __ mulw(as_Register($dst$$reg), 10320 as_Register($src1$$reg), 10321 as_Register($src2$$reg)); 10322 %} 10323 10324 ins_pipe(imul_reg_reg); 10325 %} 10326 10327 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ 10328 match(Set dst (MulL (ConvI2L src1) (ConvI2L src2))); 10329 10330 ins_cost(INSN_COST * 3); 10331 format %{ "smull $dst, $src1, $src2" %} 10332 10333 ins_encode %{ 10334 __ smull(as_Register($dst$$reg), 10335 as_Register($src1$$reg), 10336 as_Register($src2$$reg)); 10337 %} 10338 10339 ins_pipe(imul_reg_reg); 10340 %} 10341 10342 // Long Multiply 10343 10344 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ 10345 match(Set dst (MulL src1 src2)); 10346 10347 ins_cost(INSN_COST * 5); 10348 format %{ "mul $dst, $src1, $src2" %} 10349 10350 ins_encode %{ 10351 __ mul(as_Register($dst$$reg), 10352 as_Register($src1$$reg), 10353 as_Register($src2$$reg)); 10354 %} 10355 10356 ins_pipe(lmul_reg_reg); 10357 %} 10358 10359 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) 10360 %{ 10361 match(Set dst (MulHiL src1 src2)); 10362 10363 ins_cost(INSN_COST * 7); 10364 format %{ "smulh $dst, $src1, $src2, \t# mulhi" %} 10365 10366 ins_encode %{ 10367 __ smulh(as_Register($dst$$reg), 10368 as_Register($src1$$reg), 10369 as_Register($src2$$reg)); 10370 %} 10371 10372 ins_pipe(lmul_reg_reg); 10373 %} 10374 10375 // Combined Integer Multiply & Add/Sub 10376 10377 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{ 10378 match(Set dst (AddI src3 (MulI src1 src2))); 10379 10380 ins_cost(INSN_COST * 3); 10381 format %{ "madd $dst, $src1, $src2, $src3" %} 10382 10383 ins_encode %{ 10384 __ maddw(as_Register($dst$$reg), 10385 as_Register($src1$$reg), 10386 as_Register($src2$$reg), 10387 as_Register($src3$$reg)); 10388 %} 10389 10390 ins_pipe(imac_reg_reg); 10391 %} 10392 10393 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{ 10394 match(Set dst (SubI src3 (MulI src1 src2))); 10395 10396 ins_cost(INSN_COST * 3); 10397 format %{ "msub $dst, $src1, $src2, $src3" %} 10398 10399 ins_encode %{ 10400 __ msubw(as_Register($dst$$reg), 10401 as_Register($src1$$reg), 10402 as_Register($src2$$reg), 10403 as_Register($src3$$reg)); 10404 %} 10405 10406 ins_pipe(imac_reg_reg); 10407 %} 10408 10409 // Combined Long Multiply & Add/Sub 10410 10411 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{ 10412 match(Set dst (AddL src3 (MulL src1 src2))); 10413 10414 ins_cost(INSN_COST * 5); 10415 format %{ "madd $dst, $src1, $src2, $src3" %} 10416 10417 ins_encode %{ 10418 __ madd(as_Register($dst$$reg), 10419 as_Register($src1$$reg), 10420 as_Register($src2$$reg), 10421 as_Register($src3$$reg)); 10422 %} 10423 10424 ins_pipe(lmac_reg_reg); 10425 %} 10426 10427 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{ 10428 match(Set dst (SubL src3 (MulL src1 src2))); 10429 10430 ins_cost(INSN_COST * 5); 10431 format %{ "msub $dst, $src1, $src2, $src3" %} 10432 10433 ins_encode %{ 10434 __ msub(as_Register($dst$$reg), 10435 as_Register($src1$$reg), 10436 as_Register($src2$$reg), 10437 as_Register($src3$$reg)); 10438 %} 10439 10440 ins_pipe(lmac_reg_reg); 10441 %} 10442 10443 // Integer Divide 10444 10445 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ 10446 match(Set dst (DivI src1 src2)); 10447 10448 ins_cost(INSN_COST * 19); 10449 format %{ "sdivw $dst, $src1, $src2" %} 10450 10451 ins_encode(aarch64_enc_divw(dst, src1, src2)); 10452 ins_pipe(idiv_reg_reg); 10453 %} 10454 10455 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{ 10456 match(Set dst (URShiftI (RShiftI src1 div1) div2)); 10457 ins_cost(INSN_COST); 10458 format %{ "lsrw $dst, $src1, $div1" %} 10459 ins_encode %{ 10460 __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31); 10461 %} 10462 ins_pipe(ialu_reg_shift); 10463 %} 10464 10465 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{ 10466 match(Set dst (AddI src (URShiftI (RShiftI src div1) div2))); 10467 ins_cost(INSN_COST); 10468 format %{ "addw $dst, $src, LSR $div1" %} 10469 10470 ins_encode %{ 10471 __ addw(as_Register($dst$$reg), 10472 as_Register($src$$reg), 10473 as_Register($src$$reg), 10474 Assembler::LSR, 31); 10475 %} 10476 ins_pipe(ialu_reg); 10477 %} 10478 10479 // Long Divide 10480 10481 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ 10482 match(Set dst (DivL src1 src2)); 10483 10484 ins_cost(INSN_COST * 35); 10485 format %{ "sdiv $dst, $src1, $src2" %} 10486 10487 ins_encode(aarch64_enc_div(dst, src1, src2)); 10488 ins_pipe(ldiv_reg_reg); 10489 %} 10490 10491 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{ 10492 match(Set dst (URShiftL (RShiftL src1 div1) div2)); 10493 ins_cost(INSN_COST); 10494 format %{ "lsr $dst, $src1, $div1" %} 10495 ins_encode %{ 10496 __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63); 10497 %} 10498 ins_pipe(ialu_reg_shift); 10499 %} 10500 10501 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{ 10502 match(Set dst (AddL src (URShiftL (RShiftL src div1) div2))); 10503 ins_cost(INSN_COST); 10504 format %{ "add $dst, $src, $div1" %} 10505 10506 ins_encode %{ 10507 __ add(as_Register($dst$$reg), 10508 as_Register($src$$reg), 10509 as_Register($src$$reg), 10510 Assembler::LSR, 63); 10511 %} 10512 ins_pipe(ialu_reg); 10513 %} 10514 10515 // Integer Remainder 10516 10517 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ 10518 match(Set dst (ModI src1 src2)); 10519 10520 ins_cost(INSN_COST * 22); 10521 format %{ "sdivw rscratch1, $src1, $src2\n\t" 10522 "msubw($dst, rscratch1, $src2, $src1" %} 10523 10524 ins_encode(aarch64_enc_modw(dst, src1, src2)); 10525 ins_pipe(idiv_reg_reg); 10526 %} 10527 10528 // Long Remainder 10529 10530 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ 10531 match(Set dst (ModL src1 src2)); 10532 10533 ins_cost(INSN_COST * 38); 10534 format %{ "sdiv rscratch1, $src1, $src2\n" 10535 "msub($dst, rscratch1, $src2, $src1" %} 10536 10537 ins_encode(aarch64_enc_mod(dst, src1, src2)); 10538 ins_pipe(ldiv_reg_reg); 10539 %} 10540 10541 // Integer Shifts 10542 10543 // Shift Left Register 10544 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ 10545 match(Set dst (LShiftI src1 src2)); 10546 10547 ins_cost(INSN_COST * 2); 10548 format %{ "lslvw $dst, $src1, $src2" %} 10549 10550 ins_encode %{ 10551 __ lslvw(as_Register($dst$$reg), 10552 as_Register($src1$$reg), 10553 as_Register($src2$$reg)); 10554 %} 10555 10556 ins_pipe(ialu_reg_reg_vshift); 10557 %} 10558 10559 // Shift Left Immediate 10560 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ 10561 match(Set dst (LShiftI src1 src2)); 10562 10563 ins_cost(INSN_COST); 10564 format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %} 10565 10566 ins_encode %{ 10567 __ lslw(as_Register($dst$$reg), 10568 as_Register($src1$$reg), 10569 $src2$$constant & 0x1f); 10570 %} 10571 10572 ins_pipe(ialu_reg_shift); 10573 %} 10574 10575 // Shift Right Logical Register 10576 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ 10577 match(Set dst (URShiftI src1 src2)); 10578 10579 ins_cost(INSN_COST * 2); 10580 format %{ "lsrvw $dst, $src1, $src2" %} 10581 10582 ins_encode %{ 10583 __ lsrvw(as_Register($dst$$reg), 10584 as_Register($src1$$reg), 10585 as_Register($src2$$reg)); 10586 %} 10587 10588 ins_pipe(ialu_reg_reg_vshift); 10589 %} 10590 10591 // Shift Right Logical Immediate 10592 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ 10593 match(Set dst (URShiftI src1 src2)); 10594 10595 ins_cost(INSN_COST); 10596 format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %} 10597 10598 ins_encode %{ 10599 __ lsrw(as_Register($dst$$reg), 10600 as_Register($src1$$reg), 10601 $src2$$constant & 0x1f); 10602 %} 10603 10604 ins_pipe(ialu_reg_shift); 10605 %} 10606 10607 // Shift Right Arithmetic Register 10608 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ 10609 match(Set dst (RShiftI src1 src2)); 10610 10611 ins_cost(INSN_COST * 2); 10612 format %{ "asrvw $dst, $src1, $src2" %} 10613 10614 ins_encode %{ 10615 __ asrvw(as_Register($dst$$reg), 10616 as_Register($src1$$reg), 10617 as_Register($src2$$reg)); 10618 %} 10619 10620 ins_pipe(ialu_reg_reg_vshift); 10621 %} 10622 10623 // Shift Right Arithmetic Immediate 10624 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ 10625 match(Set dst (RShiftI src1 src2)); 10626 10627 ins_cost(INSN_COST); 10628 format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %} 10629 10630 ins_encode %{ 10631 __ asrw(as_Register($dst$$reg), 10632 as_Register($src1$$reg), 10633 $src2$$constant & 0x1f); 10634 %} 10635 10636 ins_pipe(ialu_reg_shift); 10637 %} 10638 10639 // Combined Int Mask and Right Shift (using UBFM) 10640 // TODO 10641 10642 // Long Shifts 10643 10644 // Shift Left Register 10645 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ 10646 match(Set dst (LShiftL src1 src2)); 10647 10648 ins_cost(INSN_COST * 2); 10649 format %{ "lslv $dst, $src1, $src2" %} 10650 10651 ins_encode %{ 10652 __ lslv(as_Register($dst$$reg), 10653 as_Register($src1$$reg), 10654 as_Register($src2$$reg)); 10655 %} 10656 10657 ins_pipe(ialu_reg_reg_vshift); 10658 %} 10659 10660 // Shift Left Immediate 10661 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ 10662 match(Set dst (LShiftL src1 src2)); 10663 10664 ins_cost(INSN_COST); 10665 format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %} 10666 10667 ins_encode %{ 10668 __ lsl(as_Register($dst$$reg), 10669 as_Register($src1$$reg), 10670 $src2$$constant & 0x3f); 10671 %} 10672 10673 ins_pipe(ialu_reg_shift); 10674 %} 10675 10676 // Shift Right Logical Register 10677 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ 10678 match(Set dst (URShiftL src1 src2)); 10679 10680 ins_cost(INSN_COST * 2); 10681 format %{ "lsrv $dst, $src1, $src2" %} 10682 10683 ins_encode %{ 10684 __ lsrv(as_Register($dst$$reg), 10685 as_Register($src1$$reg), 10686 as_Register($src2$$reg)); 10687 %} 10688 10689 ins_pipe(ialu_reg_reg_vshift); 10690 %} 10691 10692 // Shift Right Logical Immediate 10693 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ 10694 match(Set dst (URShiftL src1 src2)); 10695 10696 ins_cost(INSN_COST); 10697 format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %} 10698 10699 ins_encode %{ 10700 __ lsr(as_Register($dst$$reg), 10701 as_Register($src1$$reg), 10702 $src2$$constant & 0x3f); 10703 %} 10704 10705 ins_pipe(ialu_reg_shift); 10706 %} 10707 10708 // A special-case pattern for card table stores. 10709 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{ 10710 match(Set dst (URShiftL (CastP2X src1) src2)); 10711 10712 ins_cost(INSN_COST); 10713 format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %} 10714 10715 ins_encode %{ 10716 __ lsr(as_Register($dst$$reg), 10717 as_Register($src1$$reg), 10718 $src2$$constant & 0x3f); 10719 %} 10720 10721 ins_pipe(ialu_reg_shift); 10722 %} 10723 10724 // Shift Right Arithmetic Register 10725 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ 10726 match(Set dst (RShiftL src1 src2)); 10727 10728 ins_cost(INSN_COST * 2); 10729 format %{ "asrv $dst, $src1, $src2" %} 10730 10731 ins_encode %{ 10732 __ asrv(as_Register($dst$$reg), 10733 as_Register($src1$$reg), 10734 as_Register($src2$$reg)); 10735 %} 10736 10737 ins_pipe(ialu_reg_reg_vshift); 10738 %} 10739 10740 // Shift Right Arithmetic Immediate 10741 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ 10742 match(Set dst (RShiftL src1 src2)); 10743 10744 ins_cost(INSN_COST); 10745 format %{ "asr $dst, $src1, ($src2 & 0x3f)" %} 10746 10747 ins_encode %{ 10748 __ asr(as_Register($dst$$reg), 10749 as_Register($src1$$reg), 10750 $src2$$constant & 0x3f); 10751 %} 10752 10753 ins_pipe(ialu_reg_shift); 10754 %} 10755 10756 // BEGIN This section of the file is automatically generated. Do not edit -------------- 10757 10758 instruct regL_not_reg(iRegLNoSp dst, 10759 iRegL src1, immL_M1 m1, 10760 rFlagsReg cr) %{ 10761 match(Set dst (XorL src1 m1)); 10762 ins_cost(INSN_COST); 10763 format %{ "eon $dst, $src1, zr" %} 10764 10765 ins_encode %{ 10766 __ eon(as_Register($dst$$reg), 10767 as_Register($src1$$reg), 10768 zr, 10769 Assembler::LSL, 0); 10770 %} 10771 10772 ins_pipe(ialu_reg); 10773 %} 10774 instruct regI_not_reg(iRegINoSp dst, 10775 iRegIorL2I src1, immI_M1 m1, 10776 rFlagsReg cr) %{ 10777 match(Set dst (XorI src1 m1)); 10778 ins_cost(INSN_COST); 10779 format %{ "eonw $dst, $src1, zr" %} 10780 10781 ins_encode %{ 10782 __ eonw(as_Register($dst$$reg), 10783 as_Register($src1$$reg), 10784 zr, 10785 Assembler::LSL, 0); 10786 %} 10787 10788 ins_pipe(ialu_reg); 10789 %} 10790 10791 instruct AndI_reg_not_reg(iRegINoSp dst, 10792 iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1, 10793 rFlagsReg cr) %{ 10794 match(Set dst (AndI src1 (XorI src2 m1))); 10795 ins_cost(INSN_COST); 10796 format %{ "bicw $dst, $src1, $src2" %} 10797 10798 ins_encode %{ 10799 __ bicw(as_Register($dst$$reg), 10800 as_Register($src1$$reg), 10801 as_Register($src2$$reg), 10802 Assembler::LSL, 0); 10803 %} 10804 10805 ins_pipe(ialu_reg_reg); 10806 %} 10807 10808 instruct AndL_reg_not_reg(iRegLNoSp dst, 10809 iRegL src1, iRegL src2, immL_M1 m1, 10810 rFlagsReg cr) %{ 10811 match(Set dst (AndL src1 (XorL src2 m1))); 10812 ins_cost(INSN_COST); 10813 format %{ "bic $dst, $src1, $src2" %} 10814 10815 ins_encode %{ 10816 __ bic(as_Register($dst$$reg), 10817 as_Register($src1$$reg), 10818 as_Register($src2$$reg), 10819 Assembler::LSL, 0); 10820 %} 10821 10822 ins_pipe(ialu_reg_reg); 10823 %} 10824 10825 instruct OrI_reg_not_reg(iRegINoSp dst, 10826 iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1, 10827 rFlagsReg cr) %{ 10828 match(Set dst (OrI src1 (XorI src2 m1))); 10829 ins_cost(INSN_COST); 10830 format %{ "ornw $dst, $src1, $src2" %} 10831 10832 ins_encode %{ 10833 __ ornw(as_Register($dst$$reg), 10834 as_Register($src1$$reg), 10835 as_Register($src2$$reg), 10836 Assembler::LSL, 0); 10837 %} 10838 10839 ins_pipe(ialu_reg_reg); 10840 %} 10841 10842 instruct OrL_reg_not_reg(iRegLNoSp dst, 10843 iRegL src1, iRegL src2, immL_M1 m1, 10844 rFlagsReg cr) %{ 10845 match(Set dst (OrL src1 (XorL src2 m1))); 10846 ins_cost(INSN_COST); 10847 format %{ "orn $dst, $src1, $src2" %} 10848 10849 ins_encode %{ 10850 __ orn(as_Register($dst$$reg), 10851 as_Register($src1$$reg), 10852 as_Register($src2$$reg), 10853 Assembler::LSL, 0); 10854 %} 10855 10856 ins_pipe(ialu_reg_reg); 10857 %} 10858 10859 instruct XorI_reg_not_reg(iRegINoSp dst, 10860 iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1, 10861 rFlagsReg cr) %{ 10862 match(Set dst (XorI m1 (XorI src2 src1))); 10863 ins_cost(INSN_COST); 10864 format %{ "eonw $dst, $src1, $src2" %} 10865 10866 ins_encode %{ 10867 __ eonw(as_Register($dst$$reg), 10868 as_Register($src1$$reg), 10869 as_Register($src2$$reg), 10870 Assembler::LSL, 0); 10871 %} 10872 10873 ins_pipe(ialu_reg_reg); 10874 %} 10875 10876 instruct XorL_reg_not_reg(iRegLNoSp dst, 10877 iRegL src1, iRegL src2, immL_M1 m1, 10878 rFlagsReg cr) %{ 10879 match(Set dst (XorL m1 (XorL src2 src1))); 10880 ins_cost(INSN_COST); 10881 format %{ "eon $dst, $src1, $src2" %} 10882 10883 ins_encode %{ 10884 __ eon(as_Register($dst$$reg), 10885 as_Register($src1$$reg), 10886 as_Register($src2$$reg), 10887 Assembler::LSL, 0); 10888 %} 10889 10890 ins_pipe(ialu_reg_reg); 10891 %} 10892 10893 instruct AndI_reg_URShift_not_reg(iRegINoSp dst, 10894 iRegIorL2I src1, iRegIorL2I src2, 10895 immI src3, immI_M1 src4, rFlagsReg cr) %{ 10896 match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4))); 10897 ins_cost(1.9 * INSN_COST); 10898 format %{ "bicw $dst, $src1, $src2, LSR $src3" %} 10899 10900 ins_encode %{ 10901 __ bicw(as_Register($dst$$reg), 10902 as_Register($src1$$reg), 10903 as_Register($src2$$reg), 10904 Assembler::LSR, 10905 $src3$$constant & 0x1f); 10906 %} 10907 10908 ins_pipe(ialu_reg_reg_shift); 10909 %} 10910 10911 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst, 10912 iRegL src1, iRegL src2, 10913 immI src3, immL_M1 src4, rFlagsReg cr) %{ 10914 match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4))); 10915 ins_cost(1.9 * INSN_COST); 10916 format %{ "bic $dst, $src1, $src2, LSR $src3" %} 10917 10918 ins_encode %{ 10919 __ bic(as_Register($dst$$reg), 10920 as_Register($src1$$reg), 10921 as_Register($src2$$reg), 10922 Assembler::LSR, 10923 $src3$$constant & 0x3f); 10924 %} 10925 10926 ins_pipe(ialu_reg_reg_shift); 10927 %} 10928 10929 instruct AndI_reg_RShift_not_reg(iRegINoSp dst, 10930 iRegIorL2I src1, iRegIorL2I src2, 10931 immI src3, immI_M1 src4, rFlagsReg cr) %{ 10932 match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4))); 10933 ins_cost(1.9 * INSN_COST); 10934 format %{ "bicw $dst, $src1, $src2, ASR $src3" %} 10935 10936 ins_encode %{ 10937 __ bicw(as_Register($dst$$reg), 10938 as_Register($src1$$reg), 10939 as_Register($src2$$reg), 10940 Assembler::ASR, 10941 $src3$$constant & 0x1f); 10942 %} 10943 10944 ins_pipe(ialu_reg_reg_shift); 10945 %} 10946 10947 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst, 10948 iRegL src1, iRegL src2, 10949 immI src3, immL_M1 src4, rFlagsReg cr) %{ 10950 match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4))); 10951 ins_cost(1.9 * INSN_COST); 10952 format %{ "bic $dst, $src1, $src2, ASR $src3" %} 10953 10954 ins_encode %{ 10955 __ bic(as_Register($dst$$reg), 10956 as_Register($src1$$reg), 10957 as_Register($src2$$reg), 10958 Assembler::ASR, 10959 $src3$$constant & 0x3f); 10960 %} 10961 10962 ins_pipe(ialu_reg_reg_shift); 10963 %} 10964 10965 instruct AndI_reg_LShift_not_reg(iRegINoSp dst, 10966 iRegIorL2I src1, iRegIorL2I src2, 10967 immI src3, immI_M1 src4, rFlagsReg cr) %{ 10968 match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4))); 10969 ins_cost(1.9 * INSN_COST); 10970 format %{ "bicw $dst, $src1, $src2, LSL $src3" %} 10971 10972 ins_encode %{ 10973 __ bicw(as_Register($dst$$reg), 10974 as_Register($src1$$reg), 10975 as_Register($src2$$reg), 10976 Assembler::LSL, 10977 $src3$$constant & 0x1f); 10978 %} 10979 10980 ins_pipe(ialu_reg_reg_shift); 10981 %} 10982 10983 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst, 10984 iRegL src1, iRegL src2, 10985 immI src3, immL_M1 src4, rFlagsReg cr) %{ 10986 match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4))); 10987 ins_cost(1.9 * INSN_COST); 10988 format %{ "bic $dst, $src1, $src2, LSL $src3" %} 10989 10990 ins_encode %{ 10991 __ bic(as_Register($dst$$reg), 10992 as_Register($src1$$reg), 10993 as_Register($src2$$reg), 10994 Assembler::LSL, 10995 $src3$$constant & 0x3f); 10996 %} 10997 10998 ins_pipe(ialu_reg_reg_shift); 10999 %} 11000 11001 instruct XorI_reg_URShift_not_reg(iRegINoSp dst, 11002 iRegIorL2I src1, iRegIorL2I src2, 11003 immI src3, immI_M1 src4, rFlagsReg cr) %{ 11004 match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1))); 11005 ins_cost(1.9 * INSN_COST); 11006 format %{ "eonw $dst, $src1, $src2, LSR $src3" %} 11007 11008 ins_encode %{ 11009 __ eonw(as_Register($dst$$reg), 11010 as_Register($src1$$reg), 11011 as_Register($src2$$reg), 11012 Assembler::LSR, 11013 $src3$$constant & 0x1f); 11014 %} 11015 11016 ins_pipe(ialu_reg_reg_shift); 11017 %} 11018 11019 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst, 11020 iRegL src1, iRegL src2, 11021 immI src3, immL_M1 src4, rFlagsReg cr) %{ 11022 match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1))); 11023 ins_cost(1.9 * INSN_COST); 11024 format %{ "eon $dst, $src1, $src2, LSR $src3" %} 11025 11026 ins_encode %{ 11027 __ eon(as_Register($dst$$reg), 11028 as_Register($src1$$reg), 11029 as_Register($src2$$reg), 11030 Assembler::LSR, 11031 $src3$$constant & 0x3f); 11032 %} 11033 11034 ins_pipe(ialu_reg_reg_shift); 11035 %} 11036 11037 instruct XorI_reg_RShift_not_reg(iRegINoSp dst, 11038 iRegIorL2I src1, iRegIorL2I src2, 11039 immI src3, immI_M1 src4, rFlagsReg cr) %{ 11040 match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1))); 11041 ins_cost(1.9 * INSN_COST); 11042 format %{ "eonw $dst, $src1, $src2, ASR $src3" %} 11043 11044 ins_encode %{ 11045 __ eonw(as_Register($dst$$reg), 11046 as_Register($src1$$reg), 11047 as_Register($src2$$reg), 11048 Assembler::ASR, 11049 $src3$$constant & 0x1f); 11050 %} 11051 11052 ins_pipe(ialu_reg_reg_shift); 11053 %} 11054 11055 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst, 11056 iRegL src1, iRegL src2, 11057 immI src3, immL_M1 src4, rFlagsReg cr) %{ 11058 match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1))); 11059 ins_cost(1.9 * INSN_COST); 11060 format %{ "eon $dst, $src1, $src2, ASR $src3" %} 11061 11062 ins_encode %{ 11063 __ eon(as_Register($dst$$reg), 11064 as_Register($src1$$reg), 11065 as_Register($src2$$reg), 11066 Assembler::ASR, 11067 $src3$$constant & 0x3f); 11068 %} 11069 11070 ins_pipe(ialu_reg_reg_shift); 11071 %} 11072 11073 instruct XorI_reg_LShift_not_reg(iRegINoSp dst, 11074 iRegIorL2I src1, iRegIorL2I src2, 11075 immI src3, immI_M1 src4, rFlagsReg cr) %{ 11076 match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1))); 11077 ins_cost(1.9 * INSN_COST); 11078 format %{ "eonw $dst, $src1, $src2, LSL $src3" %} 11079 11080 ins_encode %{ 11081 __ eonw(as_Register($dst$$reg), 11082 as_Register($src1$$reg), 11083 as_Register($src2$$reg), 11084 Assembler::LSL, 11085 $src3$$constant & 0x1f); 11086 %} 11087 11088 ins_pipe(ialu_reg_reg_shift); 11089 %} 11090 11091 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst, 11092 iRegL src1, iRegL src2, 11093 immI src3, immL_M1 src4, rFlagsReg cr) %{ 11094 match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1))); 11095 ins_cost(1.9 * INSN_COST); 11096 format %{ "eon $dst, $src1, $src2, LSL $src3" %} 11097 11098 ins_encode %{ 11099 __ eon(as_Register($dst$$reg), 11100 as_Register($src1$$reg), 11101 as_Register($src2$$reg), 11102 Assembler::LSL, 11103 $src3$$constant & 0x3f); 11104 %} 11105 11106 ins_pipe(ialu_reg_reg_shift); 11107 %} 11108 11109 instruct OrI_reg_URShift_not_reg(iRegINoSp dst, 11110 iRegIorL2I src1, iRegIorL2I src2, 11111 immI src3, immI_M1 src4, rFlagsReg cr) %{ 11112 match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4))); 11113 ins_cost(1.9 * INSN_COST); 11114 format %{ "ornw $dst, $src1, $src2, LSR $src3" %} 11115 11116 ins_encode %{ 11117 __ ornw(as_Register($dst$$reg), 11118 as_Register($src1$$reg), 11119 as_Register($src2$$reg), 11120 Assembler::LSR, 11121 $src3$$constant & 0x1f); 11122 %} 11123 11124 ins_pipe(ialu_reg_reg_shift); 11125 %} 11126 11127 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst, 11128 iRegL src1, iRegL src2, 11129 immI src3, immL_M1 src4, rFlagsReg cr) %{ 11130 match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4))); 11131 ins_cost(1.9 * INSN_COST); 11132 format %{ "orn $dst, $src1, $src2, LSR $src3" %} 11133 11134 ins_encode %{ 11135 __ orn(as_Register($dst$$reg), 11136 as_Register($src1$$reg), 11137 as_Register($src2$$reg), 11138 Assembler::LSR, 11139 $src3$$constant & 0x3f); 11140 %} 11141 11142 ins_pipe(ialu_reg_reg_shift); 11143 %} 11144 11145 instruct OrI_reg_RShift_not_reg(iRegINoSp dst, 11146 iRegIorL2I src1, iRegIorL2I src2, 11147 immI src3, immI_M1 src4, rFlagsReg cr) %{ 11148 match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4))); 11149 ins_cost(1.9 * INSN_COST); 11150 format %{ "ornw $dst, $src1, $src2, ASR $src3" %} 11151 11152 ins_encode %{ 11153 __ ornw(as_Register($dst$$reg), 11154 as_Register($src1$$reg), 11155 as_Register($src2$$reg), 11156 Assembler::ASR, 11157 $src3$$constant & 0x1f); 11158 %} 11159 11160 ins_pipe(ialu_reg_reg_shift); 11161 %} 11162 11163 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst, 11164 iRegL src1, iRegL src2, 11165 immI src3, immL_M1 src4, rFlagsReg cr) %{ 11166 match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4))); 11167 ins_cost(1.9 * INSN_COST); 11168 format %{ "orn $dst, $src1, $src2, ASR $src3" %} 11169 11170 ins_encode %{ 11171 __ orn(as_Register($dst$$reg), 11172 as_Register($src1$$reg), 11173 as_Register($src2$$reg), 11174 Assembler::ASR, 11175 $src3$$constant & 0x3f); 11176 %} 11177 11178 ins_pipe(ialu_reg_reg_shift); 11179 %} 11180 11181 instruct OrI_reg_LShift_not_reg(iRegINoSp dst, 11182 iRegIorL2I src1, iRegIorL2I src2, 11183 immI src3, immI_M1 src4, rFlagsReg cr) %{ 11184 match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4))); 11185 ins_cost(1.9 * INSN_COST); 11186 format %{ "ornw $dst, $src1, $src2, LSL $src3" %} 11187 11188 ins_encode %{ 11189 __ ornw(as_Register($dst$$reg), 11190 as_Register($src1$$reg), 11191 as_Register($src2$$reg), 11192 Assembler::LSL, 11193 $src3$$constant & 0x1f); 11194 %} 11195 11196 ins_pipe(ialu_reg_reg_shift); 11197 %} 11198 11199 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst, 11200 iRegL src1, iRegL src2, 11201 immI src3, immL_M1 src4, rFlagsReg cr) %{ 11202 match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4))); 11203 ins_cost(1.9 * INSN_COST); 11204 format %{ "orn $dst, $src1, $src2, LSL $src3" %} 11205 11206 ins_encode %{ 11207 __ orn(as_Register($dst$$reg), 11208 as_Register($src1$$reg), 11209 as_Register($src2$$reg), 11210 Assembler::LSL, 11211 $src3$$constant & 0x3f); 11212 %} 11213 11214 ins_pipe(ialu_reg_reg_shift); 11215 %} 11216 11217 instruct AndI_reg_URShift_reg(iRegINoSp dst, 11218 iRegIorL2I src1, iRegIorL2I src2, 11219 immI src3, rFlagsReg cr) %{ 11220 match(Set dst (AndI src1 (URShiftI src2 src3))); 11221 11222 ins_cost(1.9 * INSN_COST); 11223 format %{ "andw $dst, $src1, $src2, LSR $src3" %} 11224 11225 ins_encode %{ 11226 __ andw(as_Register($dst$$reg), 11227 as_Register($src1$$reg), 11228 as_Register($src2$$reg), 11229 Assembler::LSR, 11230 $src3$$constant & 0x1f); 11231 %} 11232 11233 ins_pipe(ialu_reg_reg_shift); 11234 %} 11235 11236 instruct AndL_reg_URShift_reg(iRegLNoSp dst, 11237 iRegL src1, iRegL src2, 11238 immI src3, rFlagsReg cr) %{ 11239 match(Set dst (AndL src1 (URShiftL src2 src3))); 11240 11241 ins_cost(1.9 * INSN_COST); 11242 format %{ "andr $dst, $src1, $src2, LSR $src3" %} 11243 11244 ins_encode %{ 11245 __ andr(as_Register($dst$$reg), 11246 as_Register($src1$$reg), 11247 as_Register($src2$$reg), 11248 Assembler::LSR, 11249 $src3$$constant & 0x3f); 11250 %} 11251 11252 ins_pipe(ialu_reg_reg_shift); 11253 %} 11254 11255 instruct AndI_reg_RShift_reg(iRegINoSp dst, 11256 iRegIorL2I src1, iRegIorL2I src2, 11257 immI src3, rFlagsReg cr) %{ 11258 match(Set dst (AndI src1 (RShiftI src2 src3))); 11259 11260 ins_cost(1.9 * INSN_COST); 11261 format %{ "andw $dst, $src1, $src2, ASR $src3" %} 11262 11263 ins_encode %{ 11264 __ andw(as_Register($dst$$reg), 11265 as_Register($src1$$reg), 11266 as_Register($src2$$reg), 11267 Assembler::ASR, 11268 $src3$$constant & 0x1f); 11269 %} 11270 11271 ins_pipe(ialu_reg_reg_shift); 11272 %} 11273 11274 instruct AndL_reg_RShift_reg(iRegLNoSp dst, 11275 iRegL src1, iRegL src2, 11276 immI src3, rFlagsReg cr) %{ 11277 match(Set dst (AndL src1 (RShiftL src2 src3))); 11278 11279 ins_cost(1.9 * INSN_COST); 11280 format %{ "andr $dst, $src1, $src2, ASR $src3" %} 11281 11282 ins_encode %{ 11283 __ andr(as_Register($dst$$reg), 11284 as_Register($src1$$reg), 11285 as_Register($src2$$reg), 11286 Assembler::ASR, 11287 $src3$$constant & 0x3f); 11288 %} 11289 11290 ins_pipe(ialu_reg_reg_shift); 11291 %} 11292 11293 instruct AndI_reg_LShift_reg(iRegINoSp dst, 11294 iRegIorL2I src1, iRegIorL2I src2, 11295 immI src3, rFlagsReg cr) %{ 11296 match(Set dst (AndI src1 (LShiftI src2 src3))); 11297 11298 ins_cost(1.9 * INSN_COST); 11299 format %{ "andw $dst, $src1, $src2, LSL $src3" %} 11300 11301 ins_encode %{ 11302 __ andw(as_Register($dst$$reg), 11303 as_Register($src1$$reg), 11304 as_Register($src2$$reg), 11305 Assembler::LSL, 11306 $src3$$constant & 0x1f); 11307 %} 11308 11309 ins_pipe(ialu_reg_reg_shift); 11310 %} 11311 11312 instruct AndL_reg_LShift_reg(iRegLNoSp dst, 11313 iRegL src1, iRegL src2, 11314 immI src3, rFlagsReg cr) %{ 11315 match(Set dst (AndL src1 (LShiftL src2 src3))); 11316 11317 ins_cost(1.9 * INSN_COST); 11318 format %{ "andr $dst, $src1, $src2, LSL $src3" %} 11319 11320 ins_encode %{ 11321 __ andr(as_Register($dst$$reg), 11322 as_Register($src1$$reg), 11323 as_Register($src2$$reg), 11324 Assembler::LSL, 11325 $src3$$constant & 0x3f); 11326 %} 11327 11328 ins_pipe(ialu_reg_reg_shift); 11329 %} 11330 11331 instruct XorI_reg_URShift_reg(iRegINoSp dst, 11332 iRegIorL2I src1, iRegIorL2I src2, 11333 immI src3, rFlagsReg cr) %{ 11334 match(Set dst (XorI src1 (URShiftI src2 src3))); 11335 11336 ins_cost(1.9 * INSN_COST); 11337 format %{ "eorw $dst, $src1, $src2, LSR $src3" %} 11338 11339 ins_encode %{ 11340 __ eorw(as_Register($dst$$reg), 11341 as_Register($src1$$reg), 11342 as_Register($src2$$reg), 11343 Assembler::LSR, 11344 $src3$$constant & 0x1f); 11345 %} 11346 11347 ins_pipe(ialu_reg_reg_shift); 11348 %} 11349 11350 instruct XorL_reg_URShift_reg(iRegLNoSp dst, 11351 iRegL src1, iRegL src2, 11352 immI src3, rFlagsReg cr) %{ 11353 match(Set dst (XorL src1 (URShiftL src2 src3))); 11354 11355 ins_cost(1.9 * INSN_COST); 11356 format %{ "eor $dst, $src1, $src2, LSR $src3" %} 11357 11358 ins_encode %{ 11359 __ eor(as_Register($dst$$reg), 11360 as_Register($src1$$reg), 11361 as_Register($src2$$reg), 11362 Assembler::LSR, 11363 $src3$$constant & 0x3f); 11364 %} 11365 11366 ins_pipe(ialu_reg_reg_shift); 11367 %} 11368 11369 instruct XorI_reg_RShift_reg(iRegINoSp dst, 11370 iRegIorL2I src1, iRegIorL2I src2, 11371 immI src3, rFlagsReg cr) %{ 11372 match(Set dst (XorI src1 (RShiftI src2 src3))); 11373 11374 ins_cost(1.9 * INSN_COST); 11375 format %{ "eorw $dst, $src1, $src2, ASR $src3" %} 11376 11377 ins_encode %{ 11378 __ eorw(as_Register($dst$$reg), 11379 as_Register($src1$$reg), 11380 as_Register($src2$$reg), 11381 Assembler::ASR, 11382 $src3$$constant & 0x1f); 11383 %} 11384 11385 ins_pipe(ialu_reg_reg_shift); 11386 %} 11387 11388 instruct XorL_reg_RShift_reg(iRegLNoSp dst, 11389 iRegL src1, iRegL src2, 11390 immI src3, rFlagsReg cr) %{ 11391 match(Set dst (XorL src1 (RShiftL src2 src3))); 11392 11393 ins_cost(1.9 * INSN_COST); 11394 format %{ "eor $dst, $src1, $src2, ASR $src3" %} 11395 11396 ins_encode %{ 11397 __ eor(as_Register($dst$$reg), 11398 as_Register($src1$$reg), 11399 as_Register($src2$$reg), 11400 Assembler::ASR, 11401 $src3$$constant & 0x3f); 11402 %} 11403 11404 ins_pipe(ialu_reg_reg_shift); 11405 %} 11406 11407 instruct XorI_reg_LShift_reg(iRegINoSp dst, 11408 iRegIorL2I src1, iRegIorL2I src2, 11409 immI src3, rFlagsReg cr) %{ 11410 match(Set dst (XorI src1 (LShiftI src2 src3))); 11411 11412 ins_cost(1.9 * INSN_COST); 11413 format %{ "eorw $dst, $src1, $src2, LSL $src3" %} 11414 11415 ins_encode %{ 11416 __ eorw(as_Register($dst$$reg), 11417 as_Register($src1$$reg), 11418 as_Register($src2$$reg), 11419 Assembler::LSL, 11420 $src3$$constant & 0x1f); 11421 %} 11422 11423 ins_pipe(ialu_reg_reg_shift); 11424 %} 11425 11426 instruct XorL_reg_LShift_reg(iRegLNoSp dst, 11427 iRegL src1, iRegL src2, 11428 immI src3, rFlagsReg cr) %{ 11429 match(Set dst (XorL src1 (LShiftL src2 src3))); 11430 11431 ins_cost(1.9 * INSN_COST); 11432 format %{ "eor $dst, $src1, $src2, LSL $src3" %} 11433 11434 ins_encode %{ 11435 __ eor(as_Register($dst$$reg), 11436 as_Register($src1$$reg), 11437 as_Register($src2$$reg), 11438 Assembler::LSL, 11439 $src3$$constant & 0x3f); 11440 %} 11441 11442 ins_pipe(ialu_reg_reg_shift); 11443 %} 11444 11445 instruct OrI_reg_URShift_reg(iRegINoSp dst, 11446 iRegIorL2I src1, iRegIorL2I src2, 11447 immI src3, rFlagsReg cr) %{ 11448 match(Set dst (OrI src1 (URShiftI src2 src3))); 11449 11450 ins_cost(1.9 * INSN_COST); 11451 format %{ "orrw $dst, $src1, $src2, LSR $src3" %} 11452 11453 ins_encode %{ 11454 __ orrw(as_Register($dst$$reg), 11455 as_Register($src1$$reg), 11456 as_Register($src2$$reg), 11457 Assembler::LSR, 11458 $src3$$constant & 0x1f); 11459 %} 11460 11461 ins_pipe(ialu_reg_reg_shift); 11462 %} 11463 11464 instruct OrL_reg_URShift_reg(iRegLNoSp dst, 11465 iRegL src1, iRegL src2, 11466 immI src3, rFlagsReg cr) %{ 11467 match(Set dst (OrL src1 (URShiftL src2 src3))); 11468 11469 ins_cost(1.9 * INSN_COST); 11470 format %{ "orr $dst, $src1, $src2, LSR $src3" %} 11471 11472 ins_encode %{ 11473 __ orr(as_Register($dst$$reg), 11474 as_Register($src1$$reg), 11475 as_Register($src2$$reg), 11476 Assembler::LSR, 11477 $src3$$constant & 0x3f); 11478 %} 11479 11480 ins_pipe(ialu_reg_reg_shift); 11481 %} 11482 11483 instruct OrI_reg_RShift_reg(iRegINoSp dst, 11484 iRegIorL2I src1, iRegIorL2I src2, 11485 immI src3, rFlagsReg cr) %{ 11486 match(Set dst (OrI src1 (RShiftI src2 src3))); 11487 11488 ins_cost(1.9 * INSN_COST); 11489 format %{ "orrw $dst, $src1, $src2, ASR $src3" %} 11490 11491 ins_encode %{ 11492 __ orrw(as_Register($dst$$reg), 11493 as_Register($src1$$reg), 11494 as_Register($src2$$reg), 11495 Assembler::ASR, 11496 $src3$$constant & 0x1f); 11497 %} 11498 11499 ins_pipe(ialu_reg_reg_shift); 11500 %} 11501 11502 instruct OrL_reg_RShift_reg(iRegLNoSp dst, 11503 iRegL src1, iRegL src2, 11504 immI src3, rFlagsReg cr) %{ 11505 match(Set dst (OrL src1 (RShiftL src2 src3))); 11506 11507 ins_cost(1.9 * INSN_COST); 11508 format %{ "orr $dst, $src1, $src2, ASR $src3" %} 11509 11510 ins_encode %{ 11511 __ orr(as_Register($dst$$reg), 11512 as_Register($src1$$reg), 11513 as_Register($src2$$reg), 11514 Assembler::ASR, 11515 $src3$$constant & 0x3f); 11516 %} 11517 11518 ins_pipe(ialu_reg_reg_shift); 11519 %} 11520 11521 instruct OrI_reg_LShift_reg(iRegINoSp dst, 11522 iRegIorL2I src1, iRegIorL2I src2, 11523 immI src3, rFlagsReg cr) %{ 11524 match(Set dst (OrI src1 (LShiftI src2 src3))); 11525 11526 ins_cost(1.9 * INSN_COST); 11527 format %{ "orrw $dst, $src1, $src2, LSL $src3" %} 11528 11529 ins_encode %{ 11530 __ orrw(as_Register($dst$$reg), 11531 as_Register($src1$$reg), 11532 as_Register($src2$$reg), 11533 Assembler::LSL, 11534 $src3$$constant & 0x1f); 11535 %} 11536 11537 ins_pipe(ialu_reg_reg_shift); 11538 %} 11539 11540 instruct OrL_reg_LShift_reg(iRegLNoSp dst, 11541 iRegL src1, iRegL src2, 11542 immI src3, rFlagsReg cr) %{ 11543 match(Set dst (OrL src1 (LShiftL src2 src3))); 11544 11545 ins_cost(1.9 * INSN_COST); 11546 format %{ "orr $dst, $src1, $src2, LSL $src3" %} 11547 11548 ins_encode %{ 11549 __ orr(as_Register($dst$$reg), 11550 as_Register($src1$$reg), 11551 as_Register($src2$$reg), 11552 Assembler::LSL, 11553 $src3$$constant & 0x3f); 11554 %} 11555 11556 ins_pipe(ialu_reg_reg_shift); 11557 %} 11558 11559 instruct AddI_reg_URShift_reg(iRegINoSp dst, 11560 iRegIorL2I src1, iRegIorL2I src2, 11561 immI src3, rFlagsReg cr) %{ 11562 match(Set dst (AddI src1 (URShiftI src2 src3))); 11563 11564 ins_cost(1.9 * INSN_COST); 11565 format %{ "addw $dst, $src1, $src2, LSR $src3" %} 11566 11567 ins_encode %{ 11568 __ addw(as_Register($dst$$reg), 11569 as_Register($src1$$reg), 11570 as_Register($src2$$reg), 11571 Assembler::LSR, 11572 $src3$$constant & 0x1f); 11573 %} 11574 11575 ins_pipe(ialu_reg_reg_shift); 11576 %} 11577 11578 instruct AddL_reg_URShift_reg(iRegLNoSp dst, 11579 iRegL src1, iRegL src2, 11580 immI src3, rFlagsReg cr) %{ 11581 match(Set dst (AddL src1 (URShiftL src2 src3))); 11582 11583 ins_cost(1.9 * INSN_COST); 11584 format %{ "add $dst, $src1, $src2, LSR $src3" %} 11585 11586 ins_encode %{ 11587 __ add(as_Register($dst$$reg), 11588 as_Register($src1$$reg), 11589 as_Register($src2$$reg), 11590 Assembler::LSR, 11591 $src3$$constant & 0x3f); 11592 %} 11593 11594 ins_pipe(ialu_reg_reg_shift); 11595 %} 11596 11597 instruct AddI_reg_RShift_reg(iRegINoSp dst, 11598 iRegIorL2I src1, iRegIorL2I src2, 11599 immI src3, rFlagsReg cr) %{ 11600 match(Set dst (AddI src1 (RShiftI src2 src3))); 11601 11602 ins_cost(1.9 * INSN_COST); 11603 format %{ "addw $dst, $src1, $src2, ASR $src3" %} 11604 11605 ins_encode %{ 11606 __ addw(as_Register($dst$$reg), 11607 as_Register($src1$$reg), 11608 as_Register($src2$$reg), 11609 Assembler::ASR, 11610 $src3$$constant & 0x1f); 11611 %} 11612 11613 ins_pipe(ialu_reg_reg_shift); 11614 %} 11615 11616 instruct AddL_reg_RShift_reg(iRegLNoSp dst, 11617 iRegL src1, iRegL src2, 11618 immI src3, rFlagsReg cr) %{ 11619 match(Set dst (AddL src1 (RShiftL src2 src3))); 11620 11621 ins_cost(1.9 * INSN_COST); 11622 format %{ "add $dst, $src1, $src2, ASR $src3" %} 11623 11624 ins_encode %{ 11625 __ add(as_Register($dst$$reg), 11626 as_Register($src1$$reg), 11627 as_Register($src2$$reg), 11628 Assembler::ASR, 11629 $src3$$constant & 0x3f); 11630 %} 11631 11632 ins_pipe(ialu_reg_reg_shift); 11633 %} 11634 11635 instruct AddI_reg_LShift_reg(iRegINoSp dst, 11636 iRegIorL2I src1, iRegIorL2I src2, 11637 immI src3, rFlagsReg cr) %{ 11638 match(Set dst (AddI src1 (LShiftI src2 src3))); 11639 11640 ins_cost(1.9 * INSN_COST); 11641 format %{ "addw $dst, $src1, $src2, LSL $src3" %} 11642 11643 ins_encode %{ 11644 __ addw(as_Register($dst$$reg), 11645 as_Register($src1$$reg), 11646 as_Register($src2$$reg), 11647 Assembler::LSL, 11648 $src3$$constant & 0x1f); 11649 %} 11650 11651 ins_pipe(ialu_reg_reg_shift); 11652 %} 11653 11654 instruct AddL_reg_LShift_reg(iRegLNoSp dst, 11655 iRegL src1, iRegL src2, 11656 immI src3, rFlagsReg cr) %{ 11657 match(Set dst (AddL src1 (LShiftL src2 src3))); 11658 11659 ins_cost(1.9 * INSN_COST); 11660 format %{ "add $dst, $src1, $src2, LSL $src3" %} 11661 11662 ins_encode %{ 11663 __ add(as_Register($dst$$reg), 11664 as_Register($src1$$reg), 11665 as_Register($src2$$reg), 11666 Assembler::LSL, 11667 $src3$$constant & 0x3f); 11668 %} 11669 11670 ins_pipe(ialu_reg_reg_shift); 11671 %} 11672 11673 instruct SubI_reg_URShift_reg(iRegINoSp dst, 11674 iRegIorL2I src1, iRegIorL2I src2, 11675 immI src3, rFlagsReg cr) %{ 11676 match(Set dst (SubI src1 (URShiftI src2 src3))); 11677 11678 ins_cost(1.9 * INSN_COST); 11679 format %{ "subw $dst, $src1, $src2, LSR $src3" %} 11680 11681 ins_encode %{ 11682 __ subw(as_Register($dst$$reg), 11683 as_Register($src1$$reg), 11684 as_Register($src2$$reg), 11685 Assembler::LSR, 11686 $src3$$constant & 0x1f); 11687 %} 11688 11689 ins_pipe(ialu_reg_reg_shift); 11690 %} 11691 11692 instruct SubL_reg_URShift_reg(iRegLNoSp dst, 11693 iRegL src1, iRegL src2, 11694 immI src3, rFlagsReg cr) %{ 11695 match(Set dst (SubL src1 (URShiftL src2 src3))); 11696 11697 ins_cost(1.9 * INSN_COST); 11698 format %{ "sub $dst, $src1, $src2, LSR $src3" %} 11699 11700 ins_encode %{ 11701 __ sub(as_Register($dst$$reg), 11702 as_Register($src1$$reg), 11703 as_Register($src2$$reg), 11704 Assembler::LSR, 11705 $src3$$constant & 0x3f); 11706 %} 11707 11708 ins_pipe(ialu_reg_reg_shift); 11709 %} 11710 11711 instruct SubI_reg_RShift_reg(iRegINoSp dst, 11712 iRegIorL2I src1, iRegIorL2I src2, 11713 immI src3, rFlagsReg cr) %{ 11714 match(Set dst (SubI src1 (RShiftI src2 src3))); 11715 11716 ins_cost(1.9 * INSN_COST); 11717 format %{ "subw $dst, $src1, $src2, ASR $src3" %} 11718 11719 ins_encode %{ 11720 __ subw(as_Register($dst$$reg), 11721 as_Register($src1$$reg), 11722 as_Register($src2$$reg), 11723 Assembler::ASR, 11724 $src3$$constant & 0x1f); 11725 %} 11726 11727 ins_pipe(ialu_reg_reg_shift); 11728 %} 11729 11730 instruct SubL_reg_RShift_reg(iRegLNoSp dst, 11731 iRegL src1, iRegL src2, 11732 immI src3, rFlagsReg cr) %{ 11733 match(Set dst (SubL src1 (RShiftL src2 src3))); 11734 11735 ins_cost(1.9 * INSN_COST); 11736 format %{ "sub $dst, $src1, $src2, ASR $src3" %} 11737 11738 ins_encode %{ 11739 __ sub(as_Register($dst$$reg), 11740 as_Register($src1$$reg), 11741 as_Register($src2$$reg), 11742 Assembler::ASR, 11743 $src3$$constant & 0x3f); 11744 %} 11745 11746 ins_pipe(ialu_reg_reg_shift); 11747 %} 11748 11749 instruct SubI_reg_LShift_reg(iRegINoSp dst, 11750 iRegIorL2I src1, iRegIorL2I src2, 11751 immI src3, rFlagsReg cr) %{ 11752 match(Set dst (SubI src1 (LShiftI src2 src3))); 11753 11754 ins_cost(1.9 * INSN_COST); 11755 format %{ "subw $dst, $src1, $src2, LSL $src3" %} 11756 11757 ins_encode %{ 11758 __ subw(as_Register($dst$$reg), 11759 as_Register($src1$$reg), 11760 as_Register($src2$$reg), 11761 Assembler::LSL, 11762 $src3$$constant & 0x1f); 11763 %} 11764 11765 ins_pipe(ialu_reg_reg_shift); 11766 %} 11767 11768 instruct SubL_reg_LShift_reg(iRegLNoSp dst, 11769 iRegL src1, iRegL src2, 11770 immI src3, rFlagsReg cr) %{ 11771 match(Set dst (SubL src1 (LShiftL src2 src3))); 11772 11773 ins_cost(1.9 * INSN_COST); 11774 format %{ "sub $dst, $src1, $src2, LSL $src3" %} 11775 11776 ins_encode %{ 11777 __ sub(as_Register($dst$$reg), 11778 as_Register($src1$$reg), 11779 as_Register($src2$$reg), 11780 Assembler::LSL, 11781 $src3$$constant & 0x3f); 11782 %} 11783 11784 ins_pipe(ialu_reg_reg_shift); 11785 %} 11786 11787 11788 11789 // Shift Left followed by Shift Right. 11790 // This idiom is used by the compiler for the i2b bytecode etc. 11791 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count) 11792 %{ 11793 match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count)); 11794 // Make sure we are not going to exceed what sbfm can do. 11795 predicate((unsigned int)n->in(2)->get_int() <= 63 11796 && (unsigned int)n->in(1)->in(2)->get_int() <= 63); 11797 11798 ins_cost(INSN_COST * 2); 11799 format %{ "sbfm $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %} 11800 ins_encode %{ 11801 int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant; 11802 int s = 63 - lshift; 11803 int r = (rshift - lshift) & 63; 11804 __ sbfm(as_Register($dst$$reg), 11805 as_Register($src$$reg), 11806 r, s); 11807 %} 11808 11809 ins_pipe(ialu_reg_shift); 11810 %} 11811 11812 // Shift Left followed by Shift Right. 11813 // This idiom is used by the compiler for the i2b bytecode etc. 11814 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count) 11815 %{ 11816 match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count)); 11817 // Make sure we are not going to exceed what sbfmw can do. 11818 predicate((unsigned int)n->in(2)->get_int() <= 31 11819 && (unsigned int)n->in(1)->in(2)->get_int() <= 31); 11820 11821 ins_cost(INSN_COST * 2); 11822 format %{ "sbfmw $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %} 11823 ins_encode %{ 11824 int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant; 11825 int s = 31 - lshift; 11826 int r = (rshift - lshift) & 31; 11827 __ sbfmw(as_Register($dst$$reg), 11828 as_Register($src$$reg), 11829 r, s); 11830 %} 11831 11832 ins_pipe(ialu_reg_shift); 11833 %} 11834 11835 // Shift Left followed by Shift Right. 11836 // This idiom is used by the compiler for the i2b bytecode etc. 11837 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count) 11838 %{ 11839 match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count)); 11840 // Make sure we are not going to exceed what ubfm can do. 11841 predicate((unsigned int)n->in(2)->get_int() <= 63 11842 && (unsigned int)n->in(1)->in(2)->get_int() <= 63); 11843 11844 ins_cost(INSN_COST * 2); 11845 format %{ "ubfm $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %} 11846 ins_encode %{ 11847 int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant; 11848 int s = 63 - lshift; 11849 int r = (rshift - lshift) & 63; 11850 __ ubfm(as_Register($dst$$reg), 11851 as_Register($src$$reg), 11852 r, s); 11853 %} 11854 11855 ins_pipe(ialu_reg_shift); 11856 %} 11857 11858 // Shift Left followed by Shift Right. 11859 // This idiom is used by the compiler for the i2b bytecode etc. 11860 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count) 11861 %{ 11862 match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count)); 11863 // Make sure we are not going to exceed what ubfmw can do. 11864 predicate((unsigned int)n->in(2)->get_int() <= 31 11865 && (unsigned int)n->in(1)->in(2)->get_int() <= 31); 11866 11867 ins_cost(INSN_COST * 2); 11868 format %{ "ubfmw $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %} 11869 ins_encode %{ 11870 int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant; 11871 int s = 31 - lshift; 11872 int r = (rshift - lshift) & 31; 11873 __ ubfmw(as_Register($dst$$reg), 11874 as_Register($src$$reg), 11875 r, s); 11876 %} 11877 11878 ins_pipe(ialu_reg_shift); 11879 %} 11880 // Bitfield extract with shift & mask 11881 11882 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask) 11883 %{ 11884 match(Set dst (AndI (URShiftI src rshift) mask)); 11885 11886 ins_cost(INSN_COST); 11887 format %{ "ubfxw $dst, $src, $mask" %} 11888 ins_encode %{ 11889 int rshift = $rshift$$constant; 11890 long mask = $mask$$constant; 11891 int width = exact_log2(mask+1); 11892 __ ubfxw(as_Register($dst$$reg), 11893 as_Register($src$$reg), rshift, width); 11894 %} 11895 ins_pipe(ialu_reg_shift); 11896 %} 11897 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask) 11898 %{ 11899 match(Set dst (AndL (URShiftL src rshift) mask)); 11900 11901 ins_cost(INSN_COST); 11902 format %{ "ubfx $dst, $src, $mask" %} 11903 ins_encode %{ 11904 int rshift = $rshift$$constant; 11905 long mask = $mask$$constant; 11906 int width = exact_log2(mask+1); 11907 __ ubfx(as_Register($dst$$reg), 11908 as_Register($src$$reg), rshift, width); 11909 %} 11910 ins_pipe(ialu_reg_shift); 11911 %} 11912 11913 // We can use ubfx when extending an And with a mask when we know mask 11914 // is positive. We know that because immI_bitmask guarantees it. 11915 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask) 11916 %{ 11917 match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask))); 11918 11919 ins_cost(INSN_COST * 2); 11920 format %{ "ubfx $dst, $src, $mask" %} 11921 ins_encode %{ 11922 int rshift = $rshift$$constant; 11923 long mask = $mask$$constant; 11924 int width = exact_log2(mask+1); 11925 __ ubfx(as_Register($dst$$reg), 11926 as_Register($src$$reg), rshift, width); 11927 %} 11928 ins_pipe(ialu_reg_shift); 11929 %} 11930 11931 // Rotations 11932 11933 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr) 11934 %{ 11935 match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift))); 11936 predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63)); 11937 11938 ins_cost(INSN_COST); 11939 format %{ "extr $dst, $src1, $src2, #$rshift" %} 11940 11941 ins_encode %{ 11942 __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), 11943 $rshift$$constant & 63); 11944 %} 11945 ins_pipe(ialu_reg_reg_extr); 11946 %} 11947 11948 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr) 11949 %{ 11950 match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift))); 11951 predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31)); 11952 11953 ins_cost(INSN_COST); 11954 format %{ "extr $dst, $src1, $src2, #$rshift" %} 11955 11956 ins_encode %{ 11957 __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), 11958 $rshift$$constant & 31); 11959 %} 11960 ins_pipe(ialu_reg_reg_extr); 11961 %} 11962 11963 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr) 11964 %{ 11965 match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift))); 11966 predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63)); 11967 11968 ins_cost(INSN_COST); 11969 format %{ "extr $dst, $src1, $src2, #$rshift" %} 11970 11971 ins_encode %{ 11972 __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), 11973 $rshift$$constant & 63); 11974 %} 11975 ins_pipe(ialu_reg_reg_extr); 11976 %} 11977 11978 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr) 11979 %{ 11980 match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift))); 11981 predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31)); 11982 11983 ins_cost(INSN_COST); 11984 format %{ "extr $dst, $src1, $src2, #$rshift" %} 11985 11986 ins_encode %{ 11987 __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg), 11988 $rshift$$constant & 31); 11989 %} 11990 ins_pipe(ialu_reg_reg_extr); 11991 %} 11992 11993 11994 // rol expander 11995 11996 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr) 11997 %{ 11998 effect(DEF dst, USE src, USE shift); 11999 12000 format %{ "rol $dst, $src, $shift" %} 12001 ins_cost(INSN_COST * 3); 12002 ins_encode %{ 12003 __ subw(rscratch1, zr, as_Register($shift$$reg)); 12004 __ rorv(as_Register($dst$$reg), as_Register($src$$reg), 12005 rscratch1); 12006 %} 12007 ins_pipe(ialu_reg_reg_vshift); 12008 %} 12009 12010 // rol expander 12011 12012 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr) 12013 %{ 12014 effect(DEF dst, USE src, USE shift); 12015 12016 format %{ "rol $dst, $src, $shift" %} 12017 ins_cost(INSN_COST * 3); 12018 ins_encode %{ 12019 __ subw(rscratch1, zr, as_Register($shift$$reg)); 12020 __ rorvw(as_Register($dst$$reg), as_Register($src$$reg), 12021 rscratch1); 12022 %} 12023 ins_pipe(ialu_reg_reg_vshift); 12024 %} 12025 12026 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr) 12027 %{ 12028 match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift)))); 12029 12030 expand %{ 12031 rolL_rReg(dst, src, shift, cr); 12032 %} 12033 %} 12034 12035 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr) 12036 %{ 12037 match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift)))); 12038 12039 expand %{ 12040 rolL_rReg(dst, src, shift, cr); 12041 %} 12042 %} 12043 12044 instruct rolI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr) 12045 %{ 12046 match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift)))); 12047 12048 expand %{ 12049 rolL_rReg(dst, src, shift, cr); 12050 %} 12051 %} 12052 12053 instruct rolI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr) 12054 %{ 12055 match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift)))); 12056 12057 expand %{ 12058 rolL_rReg(dst, src, shift, cr); 12059 %} 12060 %} 12061 12062 // ror expander 12063 12064 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr) 12065 %{ 12066 effect(DEF dst, USE src, USE shift); 12067 12068 format %{ "ror $dst, $src, $shift" %} 12069 ins_cost(INSN_COST); 12070 ins_encode %{ 12071 __ rorv(as_Register($dst$$reg), as_Register($src$$reg), 12072 as_Register($shift$$reg)); 12073 %} 12074 ins_pipe(ialu_reg_reg_vshift); 12075 %} 12076 12077 // ror expander 12078 12079 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr) 12080 %{ 12081 effect(DEF dst, USE src, USE shift); 12082 12083 format %{ "ror $dst, $src, $shift" %} 12084 ins_cost(INSN_COST); 12085 ins_encode %{ 12086 __ rorvw(as_Register($dst$$reg), as_Register($src$$reg), 12087 as_Register($shift$$reg)); 12088 %} 12089 ins_pipe(ialu_reg_reg_vshift); 12090 %} 12091 12092 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr) 12093 %{ 12094 match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift)))); 12095 12096 expand %{ 12097 rorL_rReg(dst, src, shift, cr); 12098 %} 12099 %} 12100 12101 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr) 12102 %{ 12103 match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift)))); 12104 12105 expand %{ 12106 rorL_rReg(dst, src, shift, cr); 12107 %} 12108 %} 12109 12110 instruct rorI_rReg_Var_C_32(iRegLNoSp dst, iRegL src, iRegI shift, immI_32 c_32, rFlagsReg cr) 12111 %{ 12112 match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift)))); 12113 12114 expand %{ 12115 rorL_rReg(dst, src, shift, cr); 12116 %} 12117 %} 12118 12119 instruct rorI_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr) 12120 %{ 12121 match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift)))); 12122 12123 expand %{ 12124 rorL_rReg(dst, src, shift, cr); 12125 %} 12126 %} 12127 12128 // Add/subtract (extended) 12129 12130 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr) 12131 %{ 12132 match(Set dst (AddL src1 (ConvI2L src2))); 12133 ins_cost(INSN_COST); 12134 format %{ "add $dst, $src1, sxtw $src2" %} 12135 12136 ins_encode %{ 12137 __ add(as_Register($dst$$reg), as_Register($src1$$reg), 12138 as_Register($src2$$reg), ext::sxtw); 12139 %} 12140 ins_pipe(ialu_reg_reg); 12141 %}; 12142 12143 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr) 12144 %{ 12145 match(Set dst (SubL src1 (ConvI2L src2))); 12146 ins_cost(INSN_COST); 12147 format %{ "sub $dst, $src1, sxtw $src2" %} 12148 12149 ins_encode %{ 12150 __ sub(as_Register($dst$$reg), as_Register($src1$$reg), 12151 as_Register($src2$$reg), ext::sxtw); 12152 %} 12153 ins_pipe(ialu_reg_reg); 12154 %}; 12155 12156 12157 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr) 12158 %{ 12159 match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift))); 12160 ins_cost(INSN_COST); 12161 format %{ "add $dst, $src1, sxth $src2" %} 12162 12163 ins_encode %{ 12164 __ add(as_Register($dst$$reg), as_Register($src1$$reg), 12165 as_Register($src2$$reg), ext::sxth); 12166 %} 12167 ins_pipe(ialu_reg_reg); 12168 %} 12169 12170 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr) 12171 %{ 12172 match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift))); 12173 ins_cost(INSN_COST); 12174 format %{ "add $dst, $src1, sxtb $src2" %} 12175 12176 ins_encode %{ 12177 __ add(as_Register($dst$$reg), as_Register($src1$$reg), 12178 as_Register($src2$$reg), ext::sxtb); 12179 %} 12180 ins_pipe(ialu_reg_reg); 12181 %} 12182 12183 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr) 12184 %{ 12185 match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift))); 12186 ins_cost(INSN_COST); 12187 format %{ "add $dst, $src1, uxtb $src2" %} 12188 12189 ins_encode %{ 12190 __ add(as_Register($dst$$reg), as_Register($src1$$reg), 12191 as_Register($src2$$reg), ext::uxtb); 12192 %} 12193 ins_pipe(ialu_reg_reg); 12194 %} 12195 12196 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr) 12197 %{ 12198 match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift))); 12199 ins_cost(INSN_COST); 12200 format %{ "add $dst, $src1, sxth $src2" %} 12201 12202 ins_encode %{ 12203 __ add(as_Register($dst$$reg), as_Register($src1$$reg), 12204 as_Register($src2$$reg), ext::sxth); 12205 %} 12206 ins_pipe(ialu_reg_reg); 12207 %} 12208 12209 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr) 12210 %{ 12211 match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift))); 12212 ins_cost(INSN_COST); 12213 format %{ "add $dst, $src1, sxtw $src2" %} 12214 12215 ins_encode %{ 12216 __ add(as_Register($dst$$reg), as_Register($src1$$reg), 12217 as_Register($src2$$reg), ext::sxtw); 12218 %} 12219 ins_pipe(ialu_reg_reg); 12220 %} 12221 12222 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr) 12223 %{ 12224 match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift))); 12225 ins_cost(INSN_COST); 12226 format %{ "add $dst, $src1, sxtb $src2" %} 12227 12228 ins_encode %{ 12229 __ add(as_Register($dst$$reg), as_Register($src1$$reg), 12230 as_Register($src2$$reg), ext::sxtb); 12231 %} 12232 ins_pipe(ialu_reg_reg); 12233 %} 12234 12235 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr) 12236 %{ 12237 match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift))); 12238 ins_cost(INSN_COST); 12239 format %{ "add $dst, $src1, uxtb $src2" %} 12240 12241 ins_encode %{ 12242 __ add(as_Register($dst$$reg), as_Register($src1$$reg), 12243 as_Register($src2$$reg), ext::uxtb); 12244 %} 12245 ins_pipe(ialu_reg_reg); 12246 %} 12247 12248 12249 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr) 12250 %{ 12251 match(Set dst (AddI src1 (AndI src2 mask))); 12252 ins_cost(INSN_COST); 12253 format %{ "addw $dst, $src1, $src2, uxtb" %} 12254 12255 ins_encode %{ 12256 __ addw(as_Register($dst$$reg), as_Register($src1$$reg), 12257 as_Register($src2$$reg), ext::uxtb); 12258 %} 12259 ins_pipe(ialu_reg_reg); 12260 %} 12261 12262 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr) 12263 %{ 12264 match(Set dst (AddI src1 (AndI src2 mask))); 12265 ins_cost(INSN_COST); 12266 format %{ "addw $dst, $src1, $src2, uxth" %} 12267 12268 ins_encode %{ 12269 __ addw(as_Register($dst$$reg), as_Register($src1$$reg), 12270 as_Register($src2$$reg), ext::uxth); 12271 %} 12272 ins_pipe(ialu_reg_reg); 12273 %} 12274 12275 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr) 12276 %{ 12277 match(Set dst (AddL src1 (AndL src2 mask))); 12278 ins_cost(INSN_COST); 12279 format %{ "add $dst, $src1, $src2, uxtb" %} 12280 12281 ins_encode %{ 12282 __ add(as_Register($dst$$reg), as_Register($src1$$reg), 12283 as_Register($src2$$reg), ext::uxtb); 12284 %} 12285 ins_pipe(ialu_reg_reg); 12286 %} 12287 12288 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr) 12289 %{ 12290 match(Set dst (AddL src1 (AndL src2 mask))); 12291 ins_cost(INSN_COST); 12292 format %{ "add $dst, $src1, $src2, uxth" %} 12293 12294 ins_encode %{ 12295 __ add(as_Register($dst$$reg), as_Register($src1$$reg), 12296 as_Register($src2$$reg), ext::uxth); 12297 %} 12298 ins_pipe(ialu_reg_reg); 12299 %} 12300 12301 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr) 12302 %{ 12303 match(Set dst (AddL src1 (AndL src2 mask))); 12304 ins_cost(INSN_COST); 12305 format %{ "add $dst, $src1, $src2, uxtw" %} 12306 12307 ins_encode %{ 12308 __ add(as_Register($dst$$reg), as_Register($src1$$reg), 12309 as_Register($src2$$reg), ext::uxtw); 12310 %} 12311 ins_pipe(ialu_reg_reg); 12312 %} 12313 12314 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr) 12315 %{ 12316 match(Set dst (SubI src1 (AndI src2 mask))); 12317 ins_cost(INSN_COST); 12318 format %{ "subw $dst, $src1, $src2, uxtb" %} 12319 12320 ins_encode %{ 12321 __ subw(as_Register($dst$$reg), as_Register($src1$$reg), 12322 as_Register($src2$$reg), ext::uxtb); 12323 %} 12324 ins_pipe(ialu_reg_reg); 12325 %} 12326 12327 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr) 12328 %{ 12329 match(Set dst (SubI src1 (AndI src2 mask))); 12330 ins_cost(INSN_COST); 12331 format %{ "subw $dst, $src1, $src2, uxth" %} 12332 12333 ins_encode %{ 12334 __ subw(as_Register($dst$$reg), as_Register($src1$$reg), 12335 as_Register($src2$$reg), ext::uxth); 12336 %} 12337 ins_pipe(ialu_reg_reg); 12338 %} 12339 12340 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr) 12341 %{ 12342 match(Set dst (SubL src1 (AndL src2 mask))); 12343 ins_cost(INSN_COST); 12344 format %{ "sub $dst, $src1, $src2, uxtb" %} 12345 12346 ins_encode %{ 12347 __ sub(as_Register($dst$$reg), as_Register($src1$$reg), 12348 as_Register($src2$$reg), ext::uxtb); 12349 %} 12350 ins_pipe(ialu_reg_reg); 12351 %} 12352 12353 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr) 12354 %{ 12355 match(Set dst (SubL src1 (AndL src2 mask))); 12356 ins_cost(INSN_COST); 12357 format %{ "sub $dst, $src1, $src2, uxth" %} 12358 12359 ins_encode %{ 12360 __ sub(as_Register($dst$$reg), as_Register($src1$$reg), 12361 as_Register($src2$$reg), ext::uxth); 12362 %} 12363 ins_pipe(ialu_reg_reg); 12364 %} 12365 12366 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr) 12367 %{ 12368 match(Set dst (SubL src1 (AndL src2 mask))); 12369 ins_cost(INSN_COST); 12370 format %{ "sub $dst, $src1, $src2, uxtw" %} 12371 12372 ins_encode %{ 12373 __ sub(as_Register($dst$$reg), as_Register($src1$$reg), 12374 as_Register($src2$$reg), ext::uxtw); 12375 %} 12376 ins_pipe(ialu_reg_reg); 12377 %} 12378 12379 // END This section of the file is automatically generated. Do not edit -------------- 12380 12381 // ============================================================================ 12382 // Floating Point Arithmetic Instructions 12383 12384 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ 12385 match(Set dst (AddF src1 src2)); 12386 12387 ins_cost(INSN_COST * 5); 12388 format %{ "fadds $dst, $src1, $src2" %} 12389 12390 ins_encode %{ 12391 __ fadds(as_FloatRegister($dst$$reg), 12392 as_FloatRegister($src1$$reg), 12393 as_FloatRegister($src2$$reg)); 12394 %} 12395 12396 ins_pipe(fp_dop_reg_reg_s); 12397 %} 12398 12399 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ 12400 match(Set dst (AddD src1 src2)); 12401 12402 ins_cost(INSN_COST * 5); 12403 format %{ "faddd $dst, $src1, $src2" %} 12404 12405 ins_encode %{ 12406 __ faddd(as_FloatRegister($dst$$reg), 12407 as_FloatRegister($src1$$reg), 12408 as_FloatRegister($src2$$reg)); 12409 %} 12410 12411 ins_pipe(fp_dop_reg_reg_d); 12412 %} 12413 12414 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ 12415 match(Set dst (SubF src1 src2)); 12416 12417 ins_cost(INSN_COST * 5); 12418 format %{ "fsubs $dst, $src1, $src2" %} 12419 12420 ins_encode %{ 12421 __ fsubs(as_FloatRegister($dst$$reg), 12422 as_FloatRegister($src1$$reg), 12423 as_FloatRegister($src2$$reg)); 12424 %} 12425 12426 ins_pipe(fp_dop_reg_reg_s); 12427 %} 12428 12429 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ 12430 match(Set dst (SubD src1 src2)); 12431 12432 ins_cost(INSN_COST * 5); 12433 format %{ "fsubd $dst, $src1, $src2" %} 12434 12435 ins_encode %{ 12436 __ fsubd(as_FloatRegister($dst$$reg), 12437 as_FloatRegister($src1$$reg), 12438 as_FloatRegister($src2$$reg)); 12439 %} 12440 12441 ins_pipe(fp_dop_reg_reg_d); 12442 %} 12443 12444 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ 12445 match(Set dst (MulF src1 src2)); 12446 12447 ins_cost(INSN_COST * 6); 12448 format %{ "fmuls $dst, $src1, $src2" %} 12449 12450 ins_encode %{ 12451 __ fmuls(as_FloatRegister($dst$$reg), 12452 as_FloatRegister($src1$$reg), 12453 as_FloatRegister($src2$$reg)); 12454 %} 12455 12456 ins_pipe(fp_dop_reg_reg_s); 12457 %} 12458 12459 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ 12460 match(Set dst (MulD src1 src2)); 12461 12462 ins_cost(INSN_COST * 6); 12463 format %{ "fmuld $dst, $src1, $src2" %} 12464 12465 ins_encode %{ 12466 __ fmuld(as_FloatRegister($dst$$reg), 12467 as_FloatRegister($src1$$reg), 12468 as_FloatRegister($src2$$reg)); 12469 %} 12470 12471 ins_pipe(fp_dop_reg_reg_d); 12472 %} 12473 12474 // We cannot use these fused mul w add/sub ops because they don't 12475 // produce the same result as the equivalent separated ops 12476 // (essentially they don't round the intermediate result). that's a 12477 // shame. leaving them here in case we can idenitfy cases where it is 12478 // legitimate to use them 12479 12480 12481 // instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{ 12482 // match(Set dst (AddF (MulF src1 src2) src3)); 12483 12484 // format %{ "fmadds $dst, $src1, $src2, $src3" %} 12485 12486 // ins_encode %{ 12487 // __ fmadds(as_FloatRegister($dst$$reg), 12488 // as_FloatRegister($src1$$reg), 12489 // as_FloatRegister($src2$$reg), 12490 // as_FloatRegister($src3$$reg)); 12491 // %} 12492 12493 // ins_pipe(pipe_class_default); 12494 // %} 12495 12496 // instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{ 12497 // match(Set dst (AddD (MulD src1 src2) src3)); 12498 12499 // format %{ "fmaddd $dst, $src1, $src2, $src3" %} 12500 12501 // ins_encode %{ 12502 // __ fmaddd(as_FloatRegister($dst$$reg), 12503 // as_FloatRegister($src1$$reg), 12504 // as_FloatRegister($src2$$reg), 12505 // as_FloatRegister($src3$$reg)); 12506 // %} 12507 12508 // ins_pipe(pipe_class_default); 12509 // %} 12510 12511 // instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{ 12512 // match(Set dst (AddF (MulF (NegF src1) src2) src3)); 12513 // match(Set dst (AddF (NegF (MulF src1 src2)) src3)); 12514 12515 // format %{ "fmsubs $dst, $src1, $src2, $src3" %} 12516 12517 // ins_encode %{ 12518 // __ fmsubs(as_FloatRegister($dst$$reg), 12519 // as_FloatRegister($src1$$reg), 12520 // as_FloatRegister($src2$$reg), 12521 // as_FloatRegister($src3$$reg)); 12522 // %} 12523 12524 // ins_pipe(pipe_class_default); 12525 // %} 12526 12527 // instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{ 12528 // match(Set dst (AddD (MulD (NegD src1) src2) src3)); 12529 // match(Set dst (AddD (NegD (MulD src1 src2)) src3)); 12530 12531 // format %{ "fmsubd $dst, $src1, $src2, $src3" %} 12532 12533 // ins_encode %{ 12534 // __ fmsubd(as_FloatRegister($dst$$reg), 12535 // as_FloatRegister($src1$$reg), 12536 // as_FloatRegister($src2$$reg), 12537 // as_FloatRegister($src3$$reg)); 12538 // %} 12539 12540 // ins_pipe(pipe_class_default); 12541 // %} 12542 12543 // instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{ 12544 // match(Set dst (SubF (MulF (NegF src1) src2) src3)); 12545 // match(Set dst (SubF (NegF (MulF src1 src2)) src3)); 12546 12547 // format %{ "fnmadds $dst, $src1, $src2, $src3" %} 12548 12549 // ins_encode %{ 12550 // __ fnmadds(as_FloatRegister($dst$$reg), 12551 // as_FloatRegister($src1$$reg), 12552 // as_FloatRegister($src2$$reg), 12553 // as_FloatRegister($src3$$reg)); 12554 // %} 12555 12556 // ins_pipe(pipe_class_default); 12557 // %} 12558 12559 // instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{ 12560 // match(Set dst (SubD (MulD (NegD src1) src2) src3)); 12561 // match(Set dst (SubD (NegD (MulD src1 src2)) src3)); 12562 12563 // format %{ "fnmaddd $dst, $src1, $src2, $src3" %} 12564 12565 // ins_encode %{ 12566 // __ fnmaddd(as_FloatRegister($dst$$reg), 12567 // as_FloatRegister($src1$$reg), 12568 // as_FloatRegister($src2$$reg), 12569 // as_FloatRegister($src3$$reg)); 12570 // %} 12571 12572 // ins_pipe(pipe_class_default); 12573 // %} 12574 12575 // instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{ 12576 // match(Set dst (SubF (MulF src1 src2) src3)); 12577 12578 // format %{ "fnmsubs $dst, $src1, $src2, $src3" %} 12579 12580 // ins_encode %{ 12581 // __ fnmsubs(as_FloatRegister($dst$$reg), 12582 // as_FloatRegister($src1$$reg), 12583 // as_FloatRegister($src2$$reg), 12584 // as_FloatRegister($src3$$reg)); 12585 // %} 12586 12587 // ins_pipe(pipe_class_default); 12588 // %} 12589 12590 // instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{ 12591 // match(Set dst (SubD (MulD src1 src2) src3)); 12592 12593 // format %{ "fnmsubd $dst, $src1, $src2, $src3" %} 12594 12595 // ins_encode %{ 12596 // // n.b. insn name should be fnmsubd 12597 // __ fnmsub(as_FloatRegister($dst$$reg), 12598 // as_FloatRegister($src1$$reg), 12599 // as_FloatRegister($src2$$reg), 12600 // as_FloatRegister($src3$$reg)); 12601 // %} 12602 12603 // ins_pipe(pipe_class_default); 12604 // %} 12605 12606 12607 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ 12608 match(Set dst (DivF src1 src2)); 12609 12610 ins_cost(INSN_COST * 18); 12611 format %{ "fdivs $dst, $src1, $src2" %} 12612 12613 ins_encode %{ 12614 __ fdivs(as_FloatRegister($dst$$reg), 12615 as_FloatRegister($src1$$reg), 12616 as_FloatRegister($src2$$reg)); 12617 %} 12618 12619 ins_pipe(fp_div_s); 12620 %} 12621 12622 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ 12623 match(Set dst (DivD src1 src2)); 12624 12625 ins_cost(INSN_COST * 32); 12626 format %{ "fdivd $dst, $src1, $src2" %} 12627 12628 ins_encode %{ 12629 __ fdivd(as_FloatRegister($dst$$reg), 12630 as_FloatRegister($src1$$reg), 12631 as_FloatRegister($src2$$reg)); 12632 %} 12633 12634 ins_pipe(fp_div_d); 12635 %} 12636 12637 instruct negF_reg_reg(vRegF dst, vRegF src) %{ 12638 match(Set dst (NegF src)); 12639 12640 ins_cost(INSN_COST * 3); 12641 format %{ "fneg $dst, $src" %} 12642 12643 ins_encode %{ 12644 __ fnegs(as_FloatRegister($dst$$reg), 12645 as_FloatRegister($src$$reg)); 12646 %} 12647 12648 ins_pipe(fp_uop_s); 12649 %} 12650 12651 instruct negD_reg_reg(vRegD dst, vRegD src) %{ 12652 match(Set dst (NegD src)); 12653 12654 ins_cost(INSN_COST * 3); 12655 format %{ "fnegd $dst, $src" %} 12656 12657 ins_encode %{ 12658 __ fnegd(as_FloatRegister($dst$$reg), 12659 as_FloatRegister($src$$reg)); 12660 %} 12661 12662 ins_pipe(fp_uop_d); 12663 %} 12664 12665 instruct absF_reg(vRegF dst, vRegF src) %{ 12666 match(Set dst (AbsF src)); 12667 12668 ins_cost(INSN_COST * 3); 12669 format %{ "fabss $dst, $src" %} 12670 ins_encode %{ 12671 __ fabss(as_FloatRegister($dst$$reg), 12672 as_FloatRegister($src$$reg)); 12673 %} 12674 12675 ins_pipe(fp_uop_s); 12676 %} 12677 12678 instruct absD_reg(vRegD dst, vRegD src) %{ 12679 match(Set dst (AbsD src)); 12680 12681 ins_cost(INSN_COST * 3); 12682 format %{ "fabsd $dst, $src" %} 12683 ins_encode %{ 12684 __ fabsd(as_FloatRegister($dst$$reg), 12685 as_FloatRegister($src$$reg)); 12686 %} 12687 12688 ins_pipe(fp_uop_d); 12689 %} 12690 12691 instruct sqrtD_reg(vRegD dst, vRegD src) %{ 12692 match(Set dst (SqrtD src)); 12693 12694 ins_cost(INSN_COST * 50); 12695 format %{ "fsqrtd $dst, $src" %} 12696 ins_encode %{ 12697 __ fsqrtd(as_FloatRegister($dst$$reg), 12698 as_FloatRegister($src$$reg)); 12699 %} 12700 12701 ins_pipe(fp_div_s); 12702 %} 12703 12704 instruct sqrtF_reg(vRegF dst, vRegF src) %{ 12705 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 12706 12707 ins_cost(INSN_COST * 50); 12708 format %{ "fsqrts $dst, $src" %} 12709 ins_encode %{ 12710 __ fsqrts(as_FloatRegister($dst$$reg), 12711 as_FloatRegister($src$$reg)); 12712 %} 12713 12714 ins_pipe(fp_div_d); 12715 %} 12716 12717 // ============================================================================ 12718 // Logical Instructions 12719 12720 // Integer Logical Instructions 12721 12722 // And Instructions 12723 12724 12725 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{ 12726 match(Set dst (AndI src1 src2)); 12727 12728 format %{ "andw $dst, $src1, $src2\t# int" %} 12729 12730 ins_cost(INSN_COST); 12731 ins_encode %{ 12732 __ andw(as_Register($dst$$reg), 12733 as_Register($src1$$reg), 12734 as_Register($src2$$reg)); 12735 %} 12736 12737 ins_pipe(ialu_reg_reg); 12738 %} 12739 12740 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{ 12741 match(Set dst (AndI src1 src2)); 12742 12743 format %{ "andsw $dst, $src1, $src2\t# int" %} 12744 12745 ins_cost(INSN_COST); 12746 ins_encode %{ 12747 __ andw(as_Register($dst$$reg), 12748 as_Register($src1$$reg), 12749 (unsigned long)($src2$$constant)); 12750 %} 12751 12752 ins_pipe(ialu_reg_imm); 12753 %} 12754 12755 // Or Instructions 12756 12757 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ 12758 match(Set dst (OrI src1 src2)); 12759 12760 format %{ "orrw $dst, $src1, $src2\t# int" %} 12761 12762 ins_cost(INSN_COST); 12763 ins_encode %{ 12764 __ orrw(as_Register($dst$$reg), 12765 as_Register($src1$$reg), 12766 as_Register($src2$$reg)); 12767 %} 12768 12769 ins_pipe(ialu_reg_reg); 12770 %} 12771 12772 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{ 12773 match(Set dst (OrI src1 src2)); 12774 12775 format %{ "orrw $dst, $src1, $src2\t# int" %} 12776 12777 ins_cost(INSN_COST); 12778 ins_encode %{ 12779 __ orrw(as_Register($dst$$reg), 12780 as_Register($src1$$reg), 12781 (unsigned long)($src2$$constant)); 12782 %} 12783 12784 ins_pipe(ialu_reg_imm); 12785 %} 12786 12787 // Xor Instructions 12788 12789 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ 12790 match(Set dst (XorI src1 src2)); 12791 12792 format %{ "eorw $dst, $src1, $src2\t# int" %} 12793 12794 ins_cost(INSN_COST); 12795 ins_encode %{ 12796 __ eorw(as_Register($dst$$reg), 12797 as_Register($src1$$reg), 12798 as_Register($src2$$reg)); 12799 %} 12800 12801 ins_pipe(ialu_reg_reg); 12802 %} 12803 12804 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{ 12805 match(Set dst (XorI src1 src2)); 12806 12807 format %{ "eorw $dst, $src1, $src2\t# int" %} 12808 12809 ins_cost(INSN_COST); 12810 ins_encode %{ 12811 __ eorw(as_Register($dst$$reg), 12812 as_Register($src1$$reg), 12813 (unsigned long)($src2$$constant)); 12814 %} 12815 12816 ins_pipe(ialu_reg_imm); 12817 %} 12818 12819 // Long Logical Instructions 12820 // TODO 12821 12822 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{ 12823 match(Set dst (AndL src1 src2)); 12824 12825 format %{ "and $dst, $src1, $src2\t# int" %} 12826 12827 ins_cost(INSN_COST); 12828 ins_encode %{ 12829 __ andr(as_Register($dst$$reg), 12830 as_Register($src1$$reg), 12831 as_Register($src2$$reg)); 12832 %} 12833 12834 ins_pipe(ialu_reg_reg); 12835 %} 12836 12837 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{ 12838 match(Set dst (AndL src1 src2)); 12839 12840 format %{ "and $dst, $src1, $src2\t# int" %} 12841 12842 ins_cost(INSN_COST); 12843 ins_encode %{ 12844 __ andr(as_Register($dst$$reg), 12845 as_Register($src1$$reg), 12846 (unsigned long)($src2$$constant)); 12847 %} 12848 12849 ins_pipe(ialu_reg_imm); 12850 %} 12851 12852 // Or Instructions 12853 12854 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ 12855 match(Set dst (OrL src1 src2)); 12856 12857 format %{ "orr $dst, $src1, $src2\t# int" %} 12858 12859 ins_cost(INSN_COST); 12860 ins_encode %{ 12861 __ orr(as_Register($dst$$reg), 12862 as_Register($src1$$reg), 12863 as_Register($src2$$reg)); 12864 %} 12865 12866 ins_pipe(ialu_reg_reg); 12867 %} 12868 12869 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{ 12870 match(Set dst (OrL src1 src2)); 12871 12872 format %{ "orr $dst, $src1, $src2\t# int" %} 12873 12874 ins_cost(INSN_COST); 12875 ins_encode %{ 12876 __ orr(as_Register($dst$$reg), 12877 as_Register($src1$$reg), 12878 (unsigned long)($src2$$constant)); 12879 %} 12880 12881 ins_pipe(ialu_reg_imm); 12882 %} 12883 12884 // Xor Instructions 12885 12886 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ 12887 match(Set dst (XorL src1 src2)); 12888 12889 format %{ "eor $dst, $src1, $src2\t# int" %} 12890 12891 ins_cost(INSN_COST); 12892 ins_encode %{ 12893 __ eor(as_Register($dst$$reg), 12894 as_Register($src1$$reg), 12895 as_Register($src2$$reg)); 12896 %} 12897 12898 ins_pipe(ialu_reg_reg); 12899 %} 12900 12901 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{ 12902 match(Set dst (XorL src1 src2)); 12903 12904 ins_cost(INSN_COST); 12905 format %{ "eor $dst, $src1, $src2\t# int" %} 12906 12907 ins_encode %{ 12908 __ eor(as_Register($dst$$reg), 12909 as_Register($src1$$reg), 12910 (unsigned long)($src2$$constant)); 12911 %} 12912 12913 ins_pipe(ialu_reg_imm); 12914 %} 12915 12916 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src) 12917 %{ 12918 match(Set dst (ConvI2L src)); 12919 12920 ins_cost(INSN_COST); 12921 format %{ "sxtw $dst, $src\t# i2l" %} 12922 ins_encode %{ 12923 __ sbfm($dst$$Register, $src$$Register, 0, 31); 12924 %} 12925 ins_pipe(ialu_reg_shift); 12926 %} 12927 12928 // this pattern occurs in bigmath arithmetic 12929 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) 12930 %{ 12931 match(Set dst (AndL (ConvI2L src) mask)); 12932 12933 ins_cost(INSN_COST); 12934 format %{ "ubfm $dst, $src, 0, 31\t# ui2l" %} 12935 ins_encode %{ 12936 __ ubfm($dst$$Register, $src$$Register, 0, 31); 12937 %} 12938 12939 ins_pipe(ialu_reg_shift); 12940 %} 12941 12942 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{ 12943 match(Set dst (ConvL2I src)); 12944 12945 ins_cost(INSN_COST); 12946 format %{ "movw $dst, $src \t// l2i" %} 12947 12948 ins_encode %{ 12949 __ movw(as_Register($dst$$reg), as_Register($src$$reg)); 12950 %} 12951 12952 ins_pipe(ialu_reg); 12953 %} 12954 12955 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr) 12956 %{ 12957 match(Set dst (Conv2B src)); 12958 effect(KILL cr); 12959 12960 format %{ 12961 "cmpw $src, zr\n\t" 12962 "cset $dst, ne" 12963 %} 12964 12965 ins_encode %{ 12966 __ cmpw(as_Register($src$$reg), zr); 12967 __ cset(as_Register($dst$$reg), Assembler::NE); 12968 %} 12969 12970 ins_pipe(ialu_reg); 12971 %} 12972 12973 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr) 12974 %{ 12975 match(Set dst (Conv2B src)); 12976 effect(KILL cr); 12977 12978 format %{ 12979 "cmp $src, zr\n\t" 12980 "cset $dst, ne" 12981 %} 12982 12983 ins_encode %{ 12984 __ cmp(as_Register($src$$reg), zr); 12985 __ cset(as_Register($dst$$reg), Assembler::NE); 12986 %} 12987 12988 ins_pipe(ialu_reg); 12989 %} 12990 12991 instruct convD2F_reg(vRegF dst, vRegD src) %{ 12992 match(Set dst (ConvD2F src)); 12993 12994 ins_cost(INSN_COST * 5); 12995 format %{ "fcvtd $dst, $src \t// d2f" %} 12996 12997 ins_encode %{ 12998 __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); 12999 %} 13000 13001 ins_pipe(fp_d2f); 13002 %} 13003 13004 instruct convF2D_reg(vRegD dst, vRegF src) %{ 13005 match(Set dst (ConvF2D src)); 13006 13007 ins_cost(INSN_COST * 5); 13008 format %{ "fcvts $dst, $src \t// f2d" %} 13009 13010 ins_encode %{ 13011 __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); 13012 %} 13013 13014 ins_pipe(fp_f2d); 13015 %} 13016 13017 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{ 13018 match(Set dst (ConvF2I src)); 13019 13020 ins_cost(INSN_COST * 5); 13021 format %{ "fcvtzsw $dst, $src \t// f2i" %} 13022 13023 ins_encode %{ 13024 __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg)); 13025 %} 13026 13027 ins_pipe(fp_f2i); 13028 %} 13029 13030 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{ 13031 match(Set dst (ConvF2L src)); 13032 13033 ins_cost(INSN_COST * 5); 13034 format %{ "fcvtzs $dst, $src \t// f2l" %} 13035 13036 ins_encode %{ 13037 __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg)); 13038 %} 13039 13040 ins_pipe(fp_f2l); 13041 %} 13042 13043 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{ 13044 match(Set dst (ConvI2F src)); 13045 13046 ins_cost(INSN_COST * 5); 13047 format %{ "scvtfws $dst, $src \t// i2f" %} 13048 13049 ins_encode %{ 13050 __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg)); 13051 %} 13052 13053 ins_pipe(fp_i2f); 13054 %} 13055 13056 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{ 13057 match(Set dst (ConvL2F src)); 13058 13059 ins_cost(INSN_COST * 5); 13060 format %{ "scvtfs $dst, $src \t// l2f" %} 13061 13062 ins_encode %{ 13063 __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg)); 13064 %} 13065 13066 ins_pipe(fp_l2f); 13067 %} 13068 13069 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{ 13070 match(Set dst (ConvD2I src)); 13071 13072 ins_cost(INSN_COST * 5); 13073 format %{ "fcvtzdw $dst, $src \t// d2i" %} 13074 13075 ins_encode %{ 13076 __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg)); 13077 %} 13078 13079 ins_pipe(fp_d2i); 13080 %} 13081 13082 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{ 13083 match(Set dst (ConvD2L src)); 13084 13085 ins_cost(INSN_COST * 5); 13086 format %{ "fcvtzd $dst, $src \t// d2l" %} 13087 13088 ins_encode %{ 13089 __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg)); 13090 %} 13091 13092 ins_pipe(fp_d2l); 13093 %} 13094 13095 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{ 13096 match(Set dst (ConvI2D src)); 13097 13098 ins_cost(INSN_COST * 5); 13099 format %{ "scvtfwd $dst, $src \t// i2d" %} 13100 13101 ins_encode %{ 13102 __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg)); 13103 %} 13104 13105 ins_pipe(fp_i2d); 13106 %} 13107 13108 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{ 13109 match(Set dst (ConvL2D src)); 13110 13111 ins_cost(INSN_COST * 5); 13112 format %{ "scvtfd $dst, $src \t// l2d" %} 13113 13114 ins_encode %{ 13115 __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg)); 13116 %} 13117 13118 ins_pipe(fp_l2d); 13119 %} 13120 13121 // stack <-> reg and reg <-> reg shuffles with no conversion 13122 13123 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{ 13124 13125 match(Set dst (MoveF2I src)); 13126 13127 effect(DEF dst, USE src); 13128 13129 ins_cost(4 * INSN_COST); 13130 13131 format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %} 13132 13133 ins_encode %{ 13134 __ ldrw($dst$$Register, Address(sp, $src$$disp)); 13135 %} 13136 13137 ins_pipe(iload_reg_reg); 13138 13139 %} 13140 13141 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{ 13142 13143 match(Set dst (MoveI2F src)); 13144 13145 effect(DEF dst, USE src); 13146 13147 ins_cost(4 * INSN_COST); 13148 13149 format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %} 13150 13151 ins_encode %{ 13152 __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); 13153 %} 13154 13155 ins_pipe(pipe_class_memory); 13156 13157 %} 13158 13159 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{ 13160 13161 match(Set dst (MoveD2L src)); 13162 13163 effect(DEF dst, USE src); 13164 13165 ins_cost(4 * INSN_COST); 13166 13167 format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %} 13168 13169 ins_encode %{ 13170 __ ldr($dst$$Register, Address(sp, $src$$disp)); 13171 %} 13172 13173 ins_pipe(iload_reg_reg); 13174 13175 %} 13176 13177 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{ 13178 13179 match(Set dst (MoveL2D src)); 13180 13181 effect(DEF dst, USE src); 13182 13183 ins_cost(4 * INSN_COST); 13184 13185 format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %} 13186 13187 ins_encode %{ 13188 __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); 13189 %} 13190 13191 ins_pipe(pipe_class_memory); 13192 13193 %} 13194 13195 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{ 13196 13197 match(Set dst (MoveF2I src)); 13198 13199 effect(DEF dst, USE src); 13200 13201 ins_cost(INSN_COST); 13202 13203 format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %} 13204 13205 ins_encode %{ 13206 __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); 13207 %} 13208 13209 ins_pipe(pipe_class_memory); 13210 13211 %} 13212 13213 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{ 13214 13215 match(Set dst (MoveI2F src)); 13216 13217 effect(DEF dst, USE src); 13218 13219 ins_cost(INSN_COST); 13220 13221 format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %} 13222 13223 ins_encode %{ 13224 __ strw($src$$Register, Address(sp, $dst$$disp)); 13225 %} 13226 13227 ins_pipe(istore_reg_reg); 13228 13229 %} 13230 13231 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{ 13232 13233 match(Set dst (MoveD2L src)); 13234 13235 effect(DEF dst, USE src); 13236 13237 ins_cost(INSN_COST); 13238 13239 format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %} 13240 13241 ins_encode %{ 13242 __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); 13243 %} 13244 13245 ins_pipe(pipe_class_memory); 13246 13247 %} 13248 13249 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{ 13250 13251 match(Set dst (MoveL2D src)); 13252 13253 effect(DEF dst, USE src); 13254 13255 ins_cost(INSN_COST); 13256 13257 format %{ "str $src, $dst\t# MoveL2D_reg_stack" %} 13258 13259 ins_encode %{ 13260 __ str($src$$Register, Address(sp, $dst$$disp)); 13261 %} 13262 13263 ins_pipe(istore_reg_reg); 13264 13265 %} 13266 13267 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{ 13268 13269 match(Set dst (MoveF2I src)); 13270 13271 effect(DEF dst, USE src); 13272 13273 ins_cost(INSN_COST); 13274 13275 format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %} 13276 13277 ins_encode %{ 13278 __ fmovs($dst$$Register, as_FloatRegister($src$$reg)); 13279 %} 13280 13281 ins_pipe(pipe_class_memory); 13282 13283 %} 13284 13285 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{ 13286 13287 match(Set dst (MoveI2F src)); 13288 13289 effect(DEF dst, USE src); 13290 13291 ins_cost(INSN_COST); 13292 13293 format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %} 13294 13295 ins_encode %{ 13296 __ fmovs(as_FloatRegister($dst$$reg), $src$$Register); 13297 %} 13298 13299 ins_pipe(pipe_class_memory); 13300 13301 %} 13302 13303 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{ 13304 13305 match(Set dst (MoveD2L src)); 13306 13307 effect(DEF dst, USE src); 13308 13309 ins_cost(INSN_COST); 13310 13311 format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %} 13312 13313 ins_encode %{ 13314 __ fmovd($dst$$Register, as_FloatRegister($src$$reg)); 13315 %} 13316 13317 ins_pipe(pipe_class_memory); 13318 13319 %} 13320 13321 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{ 13322 13323 match(Set dst (MoveL2D src)); 13324 13325 effect(DEF dst, USE src); 13326 13327 ins_cost(INSN_COST); 13328 13329 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %} 13330 13331 ins_encode %{ 13332 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register); 13333 %} 13334 13335 ins_pipe(pipe_class_memory); 13336 13337 %} 13338 13339 // ============================================================================ 13340 // clearing of an array 13341 13342 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr) 13343 %{ 13344 match(Set dummy (ClearArray cnt base)); 13345 effect(USE_KILL cnt, USE_KILL base); 13346 13347 ins_cost(4 * INSN_COST); 13348 format %{ "ClearArray $cnt, $base" %} 13349 13350 ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base)); 13351 13352 ins_pipe(pipe_class_memory); 13353 %} 13354 13355 // ============================================================================ 13356 // Overflow Math Instructions 13357 13358 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2) 13359 %{ 13360 match(Set cr (OverflowAddI op1 op2)); 13361 13362 format %{ "cmnw $op1, $op2\t# overflow check int" %} 13363 ins_cost(INSN_COST); 13364 ins_encode %{ 13365 __ cmnw($op1$$Register, $op2$$Register); 13366 %} 13367 13368 ins_pipe(icmp_reg_reg); 13369 %} 13370 13371 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2) 13372 %{ 13373 match(Set cr (OverflowAddI op1 op2)); 13374 13375 format %{ "cmnw $op1, $op2\t# overflow check int" %} 13376 ins_cost(INSN_COST); 13377 ins_encode %{ 13378 __ cmnw($op1$$Register, $op2$$constant); 13379 %} 13380 13381 ins_pipe(icmp_reg_imm); 13382 %} 13383 13384 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2) 13385 %{ 13386 match(Set cr (OverflowAddL op1 op2)); 13387 13388 format %{ "cmn $op1, $op2\t# overflow check long" %} 13389 ins_cost(INSN_COST); 13390 ins_encode %{ 13391 __ cmn($op1$$Register, $op2$$Register); 13392 %} 13393 13394 ins_pipe(icmp_reg_reg); 13395 %} 13396 13397 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2) 13398 %{ 13399 match(Set cr (OverflowAddL op1 op2)); 13400 13401 format %{ "cmn $op1, $op2\t# overflow check long" %} 13402 ins_cost(INSN_COST); 13403 ins_encode %{ 13404 __ cmn($op1$$Register, $op2$$constant); 13405 %} 13406 13407 ins_pipe(icmp_reg_imm); 13408 %} 13409 13410 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2) 13411 %{ 13412 match(Set cr (OverflowSubI op1 op2)); 13413 13414 format %{ "cmpw $op1, $op2\t# overflow check int" %} 13415 ins_cost(INSN_COST); 13416 ins_encode %{ 13417 __ cmpw($op1$$Register, $op2$$Register); 13418 %} 13419 13420 ins_pipe(icmp_reg_reg); 13421 %} 13422 13423 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2) 13424 %{ 13425 match(Set cr (OverflowSubI op1 op2)); 13426 13427 format %{ "cmpw $op1, $op2\t# overflow check int" %} 13428 ins_cost(INSN_COST); 13429 ins_encode %{ 13430 __ cmpw($op1$$Register, $op2$$constant); 13431 %} 13432 13433 ins_pipe(icmp_reg_imm); 13434 %} 13435 13436 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2) 13437 %{ 13438 match(Set cr (OverflowSubL op1 op2)); 13439 13440 format %{ "cmp $op1, $op2\t# overflow check long" %} 13441 ins_cost(INSN_COST); 13442 ins_encode %{ 13443 __ cmp($op1$$Register, $op2$$Register); 13444 %} 13445 13446 ins_pipe(icmp_reg_reg); 13447 %} 13448 13449 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2) 13450 %{ 13451 match(Set cr (OverflowSubL op1 op2)); 13452 13453 format %{ "cmp $op1, $op2\t# overflow check long" %} 13454 ins_cost(INSN_COST); 13455 ins_encode %{ 13456 __ cmp($op1$$Register, $op2$$constant); 13457 %} 13458 13459 ins_pipe(icmp_reg_imm); 13460 %} 13461 13462 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1) 13463 %{ 13464 match(Set cr (OverflowSubI zero op1)); 13465 13466 format %{ "cmpw zr, $op1\t# overflow check int" %} 13467 ins_cost(INSN_COST); 13468 ins_encode %{ 13469 __ cmpw(zr, $op1$$Register); 13470 %} 13471 13472 ins_pipe(icmp_reg_imm); 13473 %} 13474 13475 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1) 13476 %{ 13477 match(Set cr (OverflowSubL zero op1)); 13478 13479 format %{ "cmp zr, $op1\t# overflow check long" %} 13480 ins_cost(INSN_COST); 13481 ins_encode %{ 13482 __ cmp(zr, $op1$$Register); 13483 %} 13484 13485 ins_pipe(icmp_reg_imm); 13486 %} 13487 13488 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2) 13489 %{ 13490 match(Set cr (OverflowMulI op1 op2)); 13491 13492 format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t" 13493 "cmp rscratch1, rscratch1, sxtw\n\t" 13494 "movw rscratch1, #0x80000000\n\t" 13495 "cselw rscratch1, rscratch1, zr, NE\n\t" 13496 "cmpw rscratch1, #1" %} 13497 ins_cost(5 * INSN_COST); 13498 ins_encode %{ 13499 __ smull(rscratch1, $op1$$Register, $op2$$Register); 13500 __ subs(zr, rscratch1, rscratch1, ext::sxtw); // NE => overflow 13501 __ movw(rscratch1, 0x80000000); // Develop 0 (EQ), 13502 __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE) 13503 __ cmpw(rscratch1, 1); // 0x80000000 - 1 => VS 13504 %} 13505 13506 ins_pipe(pipe_slow); 13507 %} 13508 13509 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr) 13510 %{ 13511 match(If cmp (OverflowMulI op1 op2)); 13512 predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow 13513 || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow); 13514 effect(USE labl, KILL cr); 13515 13516 format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t" 13517 "cmp rscratch1, rscratch1, sxtw\n\t" 13518 "b$cmp $labl" %} 13519 ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST 13520 ins_encode %{ 13521 Label* L = $labl$$label; 13522 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; 13523 __ smull(rscratch1, $op1$$Register, $op2$$Register); 13524 __ subs(zr, rscratch1, rscratch1, ext::sxtw); // NE => overflow 13525 __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L); 13526 %} 13527 13528 ins_pipe(pipe_serial); 13529 %} 13530 13531 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2) 13532 %{ 13533 match(Set cr (OverflowMulL op1 op2)); 13534 13535 format %{ "mul rscratch1, $op1, $op2\t#overflow check long\n\t" 13536 "smulh rscratch2, $op1, $op2\n\t" 13537 "cmp rscratch2, rscratch1, ASR #31\n\t" 13538 "movw rscratch1, #0x80000000\n\t" 13539 "cselw rscratch1, rscratch1, zr, NE\n\t" 13540 "cmpw rscratch1, #1" %} 13541 ins_cost(6 * INSN_COST); 13542 ins_encode %{ 13543 __ mul(rscratch1, $op1$$Register, $op2$$Register); // Result bits 0..63 13544 __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127 13545 __ cmp(rscratch2, rscratch1, Assembler::ASR, 31); // Top is pure sign ext 13546 __ movw(rscratch1, 0x80000000); // Develop 0 (EQ), 13547 __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE) 13548 __ cmpw(rscratch1, 1); // 0x80000000 - 1 => VS 13549 %} 13550 13551 ins_pipe(pipe_slow); 13552 %} 13553 13554 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr) 13555 %{ 13556 match(If cmp (OverflowMulL op1 op2)); 13557 predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow 13558 || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow); 13559 effect(USE labl, KILL cr); 13560 13561 format %{ "mul rscratch1, $op1, $op2\t#overflow check long\n\t" 13562 "smulh rscratch2, $op1, $op2\n\t" 13563 "cmp rscratch2, rscratch1, ASR #31\n\t" 13564 "b$cmp $labl" %} 13565 ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST 13566 ins_encode %{ 13567 Label* L = $labl$$label; 13568 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; 13569 __ mul(rscratch1, $op1$$Register, $op2$$Register); // Result bits 0..63 13570 __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127 13571 __ cmp(rscratch2, rscratch1, Assembler::ASR, 31); // Top is pure sign ext 13572 __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L); 13573 %} 13574 13575 ins_pipe(pipe_serial); 13576 %} 13577 13578 // ============================================================================ 13579 // Compare Instructions 13580 13581 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2) 13582 %{ 13583 match(Set cr (CmpI op1 op2)); 13584 13585 effect(DEF cr, USE op1, USE op2); 13586 13587 ins_cost(INSN_COST); 13588 format %{ "cmpw $op1, $op2" %} 13589 13590 ins_encode(aarch64_enc_cmpw(op1, op2)); 13591 13592 ins_pipe(icmp_reg_reg); 13593 %} 13594 13595 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero) 13596 %{ 13597 match(Set cr (CmpI op1 zero)); 13598 13599 effect(DEF cr, USE op1); 13600 13601 ins_cost(INSN_COST); 13602 format %{ "cmpw $op1, 0" %} 13603 13604 ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero)); 13605 13606 ins_pipe(icmp_reg_imm); 13607 %} 13608 13609 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2) 13610 %{ 13611 match(Set cr (CmpI op1 op2)); 13612 13613 effect(DEF cr, USE op1); 13614 13615 ins_cost(INSN_COST); 13616 format %{ "cmpw $op1, $op2" %} 13617 13618 ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2)); 13619 13620 ins_pipe(icmp_reg_imm); 13621 %} 13622 13623 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2) 13624 %{ 13625 match(Set cr (CmpI op1 op2)); 13626 13627 effect(DEF cr, USE op1); 13628 13629 ins_cost(INSN_COST * 2); 13630 format %{ "cmpw $op1, $op2" %} 13631 13632 ins_encode(aarch64_enc_cmpw_imm(op1, op2)); 13633 13634 ins_pipe(icmp_reg_imm); 13635 %} 13636 13637 // Unsigned compare Instructions; really, same as signed compare 13638 // except it should only be used to feed an If or a CMovI which takes a 13639 // cmpOpU. 13640 13641 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2) 13642 %{ 13643 match(Set cr (CmpU op1 op2)); 13644 13645 effect(DEF cr, USE op1, USE op2); 13646 13647 ins_cost(INSN_COST); 13648 format %{ "cmpw $op1, $op2\t# unsigned" %} 13649 13650 ins_encode(aarch64_enc_cmpw(op1, op2)); 13651 13652 ins_pipe(icmp_reg_reg); 13653 %} 13654 13655 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero) 13656 %{ 13657 match(Set cr (CmpU op1 zero)); 13658 13659 effect(DEF cr, USE op1); 13660 13661 ins_cost(INSN_COST); 13662 format %{ "cmpw $op1, #0\t# unsigned" %} 13663 13664 ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero)); 13665 13666 ins_pipe(icmp_reg_imm); 13667 %} 13668 13669 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2) 13670 %{ 13671 match(Set cr (CmpU op1 op2)); 13672 13673 effect(DEF cr, USE op1); 13674 13675 ins_cost(INSN_COST); 13676 format %{ "cmpw $op1, $op2\t# unsigned" %} 13677 13678 ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2)); 13679 13680 ins_pipe(icmp_reg_imm); 13681 %} 13682 13683 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2) 13684 %{ 13685 match(Set cr (CmpU op1 op2)); 13686 13687 effect(DEF cr, USE op1); 13688 13689 ins_cost(INSN_COST * 2); 13690 format %{ "cmpw $op1, $op2\t# unsigned" %} 13691 13692 ins_encode(aarch64_enc_cmpw_imm(op1, op2)); 13693 13694 ins_pipe(icmp_reg_imm); 13695 %} 13696 13697 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2) 13698 %{ 13699 match(Set cr (CmpL op1 op2)); 13700 13701 effect(DEF cr, USE op1, USE op2); 13702 13703 ins_cost(INSN_COST); 13704 format %{ "cmp $op1, $op2" %} 13705 13706 ins_encode(aarch64_enc_cmp(op1, op2)); 13707 13708 ins_pipe(icmp_reg_reg); 13709 %} 13710 13711 instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero) 13712 %{ 13713 match(Set cr (CmpL op1 zero)); 13714 13715 effect(DEF cr, USE op1); 13716 13717 ins_cost(INSN_COST); 13718 format %{ "tst $op1" %} 13719 13720 ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero)); 13721 13722 ins_pipe(icmp_reg_imm); 13723 %} 13724 13725 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2) 13726 %{ 13727 match(Set cr (CmpL op1 op2)); 13728 13729 effect(DEF cr, USE op1); 13730 13731 ins_cost(INSN_COST); 13732 format %{ "cmp $op1, $op2" %} 13733 13734 ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2)); 13735 13736 ins_pipe(icmp_reg_imm); 13737 %} 13738 13739 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2) 13740 %{ 13741 match(Set cr (CmpL op1 op2)); 13742 13743 effect(DEF cr, USE op1); 13744 13745 ins_cost(INSN_COST * 2); 13746 format %{ "cmp $op1, $op2" %} 13747 13748 ins_encode(aarch64_enc_cmp_imm(op1, op2)); 13749 13750 ins_pipe(icmp_reg_imm); 13751 %} 13752 13753 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2) 13754 %{ 13755 match(Set cr (CmpP op1 op2)); 13756 13757 effect(DEF cr, USE op1, USE op2); 13758 13759 ins_cost(INSN_COST); 13760 format %{ "cmp $op1, $op2\t // ptr" %} 13761 13762 ins_encode(aarch64_enc_cmpp(op1, op2)); 13763 13764 ins_pipe(icmp_reg_reg); 13765 %} 13766 13767 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2) 13768 %{ 13769 match(Set cr (CmpN op1 op2)); 13770 13771 effect(DEF cr, USE op1, USE op2); 13772 13773 ins_cost(INSN_COST); 13774 format %{ "cmp $op1, $op2\t // compressed ptr" %} 13775 13776 ins_encode(aarch64_enc_cmpn(op1, op2)); 13777 13778 ins_pipe(icmp_reg_reg); 13779 %} 13780 13781 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero) 13782 %{ 13783 match(Set cr (CmpP op1 zero)); 13784 13785 effect(DEF cr, USE op1, USE zero); 13786 13787 ins_cost(INSN_COST); 13788 format %{ "cmp $op1, 0\t // ptr" %} 13789 13790 ins_encode(aarch64_enc_testp(op1)); 13791 13792 ins_pipe(icmp_reg_imm); 13793 %} 13794 13795 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero) 13796 %{ 13797 match(Set cr (CmpN op1 zero)); 13798 13799 effect(DEF cr, USE op1, USE zero); 13800 13801 ins_cost(INSN_COST); 13802 format %{ "cmp $op1, 0\t // compressed ptr" %} 13803 13804 ins_encode(aarch64_enc_testn(op1)); 13805 13806 ins_pipe(icmp_reg_imm); 13807 %} 13808 13809 // FP comparisons 13810 // 13811 // n.b. CmpF/CmpD set a normal flags reg which then gets compared 13812 // using normal cmpOp. See declaration of rFlagsReg for details. 13813 13814 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2) 13815 %{ 13816 match(Set cr (CmpF src1 src2)); 13817 13818 ins_cost(3 * INSN_COST); 13819 format %{ "fcmps $src1, $src2" %} 13820 13821 ins_encode %{ 13822 __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); 13823 %} 13824 13825 ins_pipe(pipe_class_compare); 13826 %} 13827 13828 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2) 13829 %{ 13830 match(Set cr (CmpF src1 src2)); 13831 13832 ins_cost(3 * INSN_COST); 13833 format %{ "fcmps $src1, 0.0" %} 13834 13835 ins_encode %{ 13836 __ fcmps(as_FloatRegister($src1$$reg), 0.0D); 13837 %} 13838 13839 ins_pipe(pipe_class_compare); 13840 %} 13841 // FROM HERE 13842 13843 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2) 13844 %{ 13845 match(Set cr (CmpD src1 src2)); 13846 13847 ins_cost(3 * INSN_COST); 13848 format %{ "fcmpd $src1, $src2" %} 13849 13850 ins_encode %{ 13851 __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); 13852 %} 13853 13854 ins_pipe(pipe_class_compare); 13855 %} 13856 13857 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2) 13858 %{ 13859 match(Set cr (CmpD src1 src2)); 13860 13861 ins_cost(3 * INSN_COST); 13862 format %{ "fcmpd $src1, 0.0" %} 13863 13864 ins_encode %{ 13865 __ fcmpd(as_FloatRegister($src1$$reg), 0.0D); 13866 %} 13867 13868 ins_pipe(pipe_class_compare); 13869 %} 13870 13871 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr) 13872 %{ 13873 match(Set dst (CmpF3 src1 src2)); 13874 effect(KILL cr); 13875 13876 ins_cost(5 * INSN_COST); 13877 format %{ "fcmps $src1, $src2\n\t" 13878 "csinvw($dst, zr, zr, eq\n\t" 13879 "csnegw($dst, $dst, $dst, lt)" 13880 %} 13881 13882 ins_encode %{ 13883 Label done; 13884 FloatRegister s1 = as_FloatRegister($src1$$reg); 13885 FloatRegister s2 = as_FloatRegister($src2$$reg); 13886 Register d = as_Register($dst$$reg); 13887 __ fcmps(s1, s2); 13888 // installs 0 if EQ else -1 13889 __ csinvw(d, zr, zr, Assembler::EQ); 13890 // keeps -1 if less or unordered else installs 1 13891 __ csnegw(d, d, d, Assembler::LT); 13892 __ bind(done); 13893 %} 13894 13895 ins_pipe(pipe_class_default); 13896 13897 %} 13898 13899 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr) 13900 %{ 13901 match(Set dst (CmpD3 src1 src2)); 13902 effect(KILL cr); 13903 13904 ins_cost(5 * INSN_COST); 13905 format %{ "fcmpd $src1, $src2\n\t" 13906 "csinvw($dst, zr, zr, eq\n\t" 13907 "csnegw($dst, $dst, $dst, lt)" 13908 %} 13909 13910 ins_encode %{ 13911 Label done; 13912 FloatRegister s1 = as_FloatRegister($src1$$reg); 13913 FloatRegister s2 = as_FloatRegister($src2$$reg); 13914 Register d = as_Register($dst$$reg); 13915 __ fcmpd(s1, s2); 13916 // installs 0 if EQ else -1 13917 __ csinvw(d, zr, zr, Assembler::EQ); 13918 // keeps -1 if less or unordered else installs 1 13919 __ csnegw(d, d, d, Assembler::LT); 13920 __ bind(done); 13921 %} 13922 ins_pipe(pipe_class_default); 13923 13924 %} 13925 13926 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr) 13927 %{ 13928 match(Set dst (CmpF3 src1 zero)); 13929 effect(KILL cr); 13930 13931 ins_cost(5 * INSN_COST); 13932 format %{ "fcmps $src1, 0.0\n\t" 13933 "csinvw($dst, zr, zr, eq\n\t" 13934 "csnegw($dst, $dst, $dst, lt)" 13935 %} 13936 13937 ins_encode %{ 13938 Label done; 13939 FloatRegister s1 = as_FloatRegister($src1$$reg); 13940 Register d = as_Register($dst$$reg); 13941 __ fcmps(s1, 0.0D); 13942 // installs 0 if EQ else -1 13943 __ csinvw(d, zr, zr, Assembler::EQ); 13944 // keeps -1 if less or unordered else installs 1 13945 __ csnegw(d, d, d, Assembler::LT); 13946 __ bind(done); 13947 %} 13948 13949 ins_pipe(pipe_class_default); 13950 13951 %} 13952 13953 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr) 13954 %{ 13955 match(Set dst (CmpD3 src1 zero)); 13956 effect(KILL cr); 13957 13958 ins_cost(5 * INSN_COST); 13959 format %{ "fcmpd $src1, 0.0\n\t" 13960 "csinvw($dst, zr, zr, eq\n\t" 13961 "csnegw($dst, $dst, $dst, lt)" 13962 %} 13963 13964 ins_encode %{ 13965 Label done; 13966 FloatRegister s1 = as_FloatRegister($src1$$reg); 13967 Register d = as_Register($dst$$reg); 13968 __ fcmpd(s1, 0.0D); 13969 // installs 0 if EQ else -1 13970 __ csinvw(d, zr, zr, Assembler::EQ); 13971 // keeps -1 if less or unordered else installs 1 13972 __ csnegw(d, d, d, Assembler::LT); 13973 __ bind(done); 13974 %} 13975 ins_pipe(pipe_class_default); 13976 13977 %} 13978 13979 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr) 13980 %{ 13981 match(Set dst (CmpLTMask p q)); 13982 effect(KILL cr); 13983 13984 ins_cost(3 * INSN_COST); 13985 13986 format %{ "cmpw $p, $q\t# cmpLTMask\n\t" 13987 "csetw $dst, lt\n\t" 13988 "subw $dst, zr, $dst" 13989 %} 13990 13991 ins_encode %{ 13992 __ cmpw(as_Register($p$$reg), as_Register($q$$reg)); 13993 __ csetw(as_Register($dst$$reg), Assembler::LT); 13994 __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg)); 13995 %} 13996 13997 ins_pipe(ialu_reg_reg); 13998 %} 13999 14000 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) 14001 %{ 14002 match(Set dst (CmpLTMask src zero)); 14003 effect(KILL cr); 14004 14005 ins_cost(INSN_COST); 14006 14007 format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %} 14008 14009 ins_encode %{ 14010 __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31); 14011 %} 14012 14013 ins_pipe(ialu_reg_shift); 14014 %} 14015 14016 // ============================================================================ 14017 // Max and Min 14018 14019 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr) 14020 %{ 14021 match(Set dst (MinI src1 src2)); 14022 14023 effect(DEF dst, USE src1, USE src2, KILL cr); 14024 size(8); 14025 14026 ins_cost(INSN_COST * 3); 14027 format %{ 14028 "cmpw $src1 $src2\t signed int\n\t" 14029 "cselw $dst, $src1, $src2 lt\t" 14030 %} 14031 14032 ins_encode %{ 14033 __ cmpw(as_Register($src1$$reg), 14034 as_Register($src2$$reg)); 14035 __ cselw(as_Register($dst$$reg), 14036 as_Register($src1$$reg), 14037 as_Register($src2$$reg), 14038 Assembler::LT); 14039 %} 14040 14041 ins_pipe(ialu_reg_reg); 14042 %} 14043 // FROM HERE 14044 14045 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr) 14046 %{ 14047 match(Set dst (MaxI src1 src2)); 14048 14049 effect(DEF dst, USE src1, USE src2, KILL cr); 14050 size(8); 14051 14052 ins_cost(INSN_COST * 3); 14053 format %{ 14054 "cmpw $src1 $src2\t signed int\n\t" 14055 "cselw $dst, $src1, $src2 gt\t" 14056 %} 14057 14058 ins_encode %{ 14059 __ cmpw(as_Register($src1$$reg), 14060 as_Register($src2$$reg)); 14061 __ cselw(as_Register($dst$$reg), 14062 as_Register($src1$$reg), 14063 as_Register($src2$$reg), 14064 Assembler::GT); 14065 %} 14066 14067 ins_pipe(ialu_reg_reg); 14068 %} 14069 14070 // ============================================================================ 14071 // Branch Instructions 14072 14073 // Direct Branch. 14074 instruct branch(label lbl) 14075 %{ 14076 match(Goto); 14077 14078 effect(USE lbl); 14079 14080 ins_cost(BRANCH_COST); 14081 format %{ "b $lbl" %} 14082 14083 ins_encode(aarch64_enc_b(lbl)); 14084 14085 ins_pipe(pipe_branch); 14086 %} 14087 14088 // Conditional Near Branch 14089 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl) 14090 %{ 14091 // Same match rule as `branchConFar'. 14092 match(If cmp cr); 14093 14094 effect(USE lbl); 14095 14096 ins_cost(BRANCH_COST); 14097 // If set to 1 this indicates that the current instruction is a 14098 // short variant of a long branch. This avoids using this 14099 // instruction in first-pass matching. It will then only be used in 14100 // the `Shorten_branches' pass. 14101 // ins_short_branch(1); 14102 format %{ "b$cmp $lbl" %} 14103 14104 ins_encode(aarch64_enc_br_con(cmp, lbl)); 14105 14106 ins_pipe(pipe_branch_cond); 14107 %} 14108 14109 // Conditional Near Branch Unsigned 14110 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl) 14111 %{ 14112 // Same match rule as `branchConFar'. 14113 match(If cmp cr); 14114 14115 effect(USE lbl); 14116 14117 ins_cost(BRANCH_COST); 14118 // If set to 1 this indicates that the current instruction is a 14119 // short variant of a long branch. This avoids using this 14120 // instruction in first-pass matching. It will then only be used in 14121 // the `Shorten_branches' pass. 14122 // ins_short_branch(1); 14123 format %{ "b$cmp $lbl\t# unsigned" %} 14124 14125 ins_encode(aarch64_enc_br_conU(cmp, lbl)); 14126 14127 ins_pipe(pipe_branch_cond); 14128 %} 14129 14130 // Make use of CBZ and CBNZ. These instructions, as well as being 14131 // shorter than (cmp; branch), have the additional benefit of not 14132 // killing the flags. 14133 14134 instruct cmpI_imm0_branch(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{ 14135 match(If cmp (CmpI op1 op2)); 14136 predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne 14137 || n->in(1)->as_Bool()->_test._test == BoolTest::eq); 14138 effect(USE labl); 14139 14140 ins_cost(BRANCH_COST); 14141 format %{ "cbw$cmp $op1, $labl" %} 14142 ins_encode %{ 14143 Label* L = $labl$$label; 14144 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; 14145 if (cond == Assembler::EQ) 14146 __ cbzw($op1$$Register, *L); 14147 else 14148 __ cbnzw($op1$$Register, *L); 14149 %} 14150 ins_pipe(pipe_cmp_branch); 14151 %} 14152 14153 instruct cmpL_imm0_branch(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{ 14154 match(If cmp (CmpL op1 op2)); 14155 predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne 14156 || n->in(1)->as_Bool()->_test._test == BoolTest::eq); 14157 effect(USE labl); 14158 14159 ins_cost(BRANCH_COST); 14160 format %{ "cb$cmp $op1, $labl" %} 14161 ins_encode %{ 14162 Label* L = $labl$$label; 14163 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; 14164 if (cond == Assembler::EQ) 14165 __ cbz($op1$$Register, *L); 14166 else 14167 __ cbnz($op1$$Register, *L); 14168 %} 14169 ins_pipe(pipe_cmp_branch); 14170 %} 14171 14172 instruct cmpP_imm0_branch(cmpOp cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{ 14173 match(If cmp (CmpP op1 op2)); 14174 predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne 14175 || n->in(1)->as_Bool()->_test._test == BoolTest::eq); 14176 effect(USE labl); 14177 14178 ins_cost(BRANCH_COST); 14179 format %{ "cb$cmp $op1, $labl" %} 14180 ins_encode %{ 14181 Label* L = $labl$$label; 14182 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; 14183 if (cond == Assembler::EQ) 14184 __ cbz($op1$$Register, *L); 14185 else 14186 __ cbnz($op1$$Register, *L); 14187 %} 14188 ins_pipe(pipe_cmp_branch); 14189 %} 14190 14191 instruct cmpP_narrowOop_imm0_branch(cmpOp cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{ 14192 match(If cmp (CmpP (DecodeN oop) zero)); 14193 predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne 14194 || n->in(1)->as_Bool()->_test._test == BoolTest::eq); 14195 effect(USE labl); 14196 14197 ins_cost(BRANCH_COST); 14198 format %{ "cb$cmp $oop, $labl" %} 14199 ins_encode %{ 14200 Label* L = $labl$$label; 14201 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; 14202 if (cond == Assembler::EQ) 14203 __ cbzw($oop$$Register, *L); 14204 else 14205 __ cbnzw($oop$$Register, *L); 14206 %} 14207 ins_pipe(pipe_cmp_branch); 14208 %} 14209 14210 // Test bit and Branch 14211 14212 // Patterns for short (< 32KiB) variants 14213 instruct cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{ 14214 match(If cmp (CmpL op1 op2)); 14215 predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt 14216 || n->in(1)->as_Bool()->_test._test == BoolTest::ge); 14217 effect(USE labl); 14218 14219 ins_cost(BRANCH_COST); 14220 format %{ "cb$cmp $op1, $labl # long" %} 14221 ins_encode %{ 14222 Label* L = $labl$$label; 14223 Assembler::Condition cond = 14224 ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ; 14225 __ tbr(cond, $op1$$Register, 63, *L); 14226 %} 14227 ins_pipe(pipe_cmp_branch); 14228 ins_short_branch(1); 14229 %} 14230 14231 instruct cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{ 14232 match(If cmp (CmpI op1 op2)); 14233 predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt 14234 || n->in(1)->as_Bool()->_test._test == BoolTest::ge); 14235 effect(USE labl); 14236 14237 ins_cost(BRANCH_COST); 14238 format %{ "cb$cmp $op1, $labl # int" %} 14239 ins_encode %{ 14240 Label* L = $labl$$label; 14241 Assembler::Condition cond = 14242 ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ; 14243 __ tbr(cond, $op1$$Register, 31, *L); 14244 %} 14245 ins_pipe(pipe_cmp_branch); 14246 ins_short_branch(1); 14247 %} 14248 14249 instruct cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{ 14250 match(If cmp (CmpL (AndL op1 op2) op3)); 14251 predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne 14252 || n->in(1)->as_Bool()->_test._test == BoolTest::eq) 14253 && is_power_of_2(n->in(2)->in(1)->in(2)->get_long())); 14254 effect(USE labl); 14255 14256 ins_cost(BRANCH_COST); 14257 format %{ "tb$cmp $op1, $op2, $labl" %} 14258 ins_encode %{ 14259 Label* L = $labl$$label; 14260 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; 14261 int bit = exact_log2($op2$$constant); 14262 __ tbr(cond, $op1$$Register, bit, *L); 14263 %} 14264 ins_pipe(pipe_cmp_branch); 14265 ins_short_branch(1); 14266 %} 14267 14268 instruct cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{ 14269 match(If cmp (CmpI (AndI op1 op2) op3)); 14270 predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne 14271 || n->in(1)->as_Bool()->_test._test == BoolTest::eq) 14272 && is_power_of_2(n->in(2)->in(1)->in(2)->get_int())); 14273 effect(USE labl); 14274 14275 ins_cost(BRANCH_COST); 14276 format %{ "tb$cmp $op1, $op2, $labl" %} 14277 ins_encode %{ 14278 Label* L = $labl$$label; 14279 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; 14280 int bit = exact_log2($op2$$constant); 14281 __ tbr(cond, $op1$$Register, bit, *L); 14282 %} 14283 ins_pipe(pipe_cmp_branch); 14284 ins_short_branch(1); 14285 %} 14286 14287 // And far variants 14288 instruct far_cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{ 14289 match(If cmp (CmpL op1 op2)); 14290 predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt 14291 || n->in(1)->as_Bool()->_test._test == BoolTest::ge); 14292 effect(USE labl); 14293 14294 ins_cost(BRANCH_COST); 14295 format %{ "cb$cmp $op1, $labl # long" %} 14296 ins_encode %{ 14297 Label* L = $labl$$label; 14298 Assembler::Condition cond = 14299 ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ; 14300 __ tbr(cond, $op1$$Register, 63, *L, /*far*/true); 14301 %} 14302 ins_pipe(pipe_cmp_branch); 14303 %} 14304 14305 instruct far_cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{ 14306 match(If cmp (CmpI op1 op2)); 14307 predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt 14308 || n->in(1)->as_Bool()->_test._test == BoolTest::ge); 14309 effect(USE labl); 14310 14311 ins_cost(BRANCH_COST); 14312 format %{ "cb$cmp $op1, $labl # int" %} 14313 ins_encode %{ 14314 Label* L = $labl$$label; 14315 Assembler::Condition cond = 14316 ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ; 14317 __ tbr(cond, $op1$$Register, 31, *L, /*far*/true); 14318 %} 14319 ins_pipe(pipe_cmp_branch); 14320 %} 14321 14322 instruct far_cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{ 14323 match(If cmp (CmpL (AndL op1 op2) op3)); 14324 predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne 14325 || n->in(1)->as_Bool()->_test._test == BoolTest::eq) 14326 && is_power_of_2(n->in(2)->in(1)->in(2)->get_long())); 14327 effect(USE labl); 14328 14329 ins_cost(BRANCH_COST); 14330 format %{ "tb$cmp $op1, $op2, $labl" %} 14331 ins_encode %{ 14332 Label* L = $labl$$label; 14333 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; 14334 int bit = exact_log2($op2$$constant); 14335 __ tbr(cond, $op1$$Register, bit, *L, /*far*/true); 14336 %} 14337 ins_pipe(pipe_cmp_branch); 14338 %} 14339 14340 instruct far_cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{ 14341 match(If cmp (CmpI (AndI op1 op2) op3)); 14342 predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne 14343 || n->in(1)->as_Bool()->_test._test == BoolTest::eq) 14344 && is_power_of_2(n->in(2)->in(1)->in(2)->get_int())); 14345 effect(USE labl); 14346 14347 ins_cost(BRANCH_COST); 14348 format %{ "tb$cmp $op1, $op2, $labl" %} 14349 ins_encode %{ 14350 Label* L = $labl$$label; 14351 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; 14352 int bit = exact_log2($op2$$constant); 14353 __ tbr(cond, $op1$$Register, bit, *L, /*far*/true); 14354 %} 14355 ins_pipe(pipe_cmp_branch); 14356 %} 14357 14358 // Test bits 14359 14360 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{ 14361 match(Set cr (CmpL (AndL op1 op2) op3)); 14362 predicate(Assembler::operand_valid_for_logical_immediate 14363 (/*is_32*/false, n->in(1)->in(2)->get_long())); 14364 14365 ins_cost(INSN_COST); 14366 format %{ "tst $op1, $op2 # long" %} 14367 ins_encode %{ 14368 __ tst($op1$$Register, $op2$$constant); 14369 %} 14370 ins_pipe(ialu_reg_reg); 14371 %} 14372 14373 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{ 14374 match(Set cr (CmpI (AndI op1 op2) op3)); 14375 predicate(Assembler::operand_valid_for_logical_immediate 14376 (/*is_32*/true, n->in(1)->in(2)->get_int())); 14377 14378 ins_cost(INSN_COST); 14379 format %{ "tst $op1, $op2 # int" %} 14380 ins_encode %{ 14381 __ tstw($op1$$Register, $op2$$constant); 14382 %} 14383 ins_pipe(ialu_reg_reg); 14384 %} 14385 14386 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{ 14387 match(Set cr (CmpL (AndL op1 op2) op3)); 14388 14389 ins_cost(INSN_COST); 14390 format %{ "tst $op1, $op2 # long" %} 14391 ins_encode %{ 14392 __ tst($op1$$Register, $op2$$Register); 14393 %} 14394 ins_pipe(ialu_reg_reg); 14395 %} 14396 14397 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{ 14398 match(Set cr (CmpI (AndI op1 op2) op3)); 14399 14400 ins_cost(INSN_COST); 14401 format %{ "tstw $op1, $op2 # int" %} 14402 ins_encode %{ 14403 __ tstw($op1$$Register, $op2$$Register); 14404 %} 14405 ins_pipe(ialu_reg_reg); 14406 %} 14407 14408 14409 // Conditional Far Branch 14410 // Conditional Far Branch Unsigned 14411 // TODO: fixme 14412 14413 // counted loop end branch near 14414 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl) 14415 %{ 14416 match(CountedLoopEnd cmp cr); 14417 14418 effect(USE lbl); 14419 14420 ins_cost(BRANCH_COST); 14421 // short variant. 14422 // ins_short_branch(1); 14423 format %{ "b$cmp $lbl \t// counted loop end" %} 14424 14425 ins_encode(aarch64_enc_br_con(cmp, lbl)); 14426 14427 ins_pipe(pipe_branch); 14428 %} 14429 14430 // counted loop end branch near Unsigned 14431 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl) 14432 %{ 14433 match(CountedLoopEnd cmp cr); 14434 14435 effect(USE lbl); 14436 14437 ins_cost(BRANCH_COST); 14438 // short variant. 14439 // ins_short_branch(1); 14440 format %{ "b$cmp $lbl \t// counted loop end unsigned" %} 14441 14442 ins_encode(aarch64_enc_br_conU(cmp, lbl)); 14443 14444 ins_pipe(pipe_branch); 14445 %} 14446 14447 // counted loop end branch far 14448 // counted loop end branch far unsigned 14449 // TODO: fixme 14450 14451 // ============================================================================ 14452 // inlined locking and unlocking 14453 14454 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) 14455 %{ 14456 match(Set cr (FastLock object box)); 14457 effect(TEMP tmp, TEMP tmp2); 14458 14459 // TODO 14460 // identify correct cost 14461 ins_cost(5 * INSN_COST); 14462 format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %} 14463 14464 ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2)); 14465 14466 ins_pipe(pipe_serial); 14467 %} 14468 14469 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) 14470 %{ 14471 match(Set cr (FastUnlock object box)); 14472 effect(TEMP tmp, TEMP tmp2); 14473 14474 ins_cost(5 * INSN_COST); 14475 format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %} 14476 14477 ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2)); 14478 14479 ins_pipe(pipe_serial); 14480 %} 14481 14482 14483 // ============================================================================ 14484 // Safepoint Instructions 14485 14486 // TODO 14487 // provide a near and far version of this code 14488 14489 instruct safePoint(iRegP poll) 14490 %{ 14491 match(SafePoint poll); 14492 14493 format %{ 14494 "ldrw zr, [$poll]\t# Safepoint: poll for GC" 14495 %} 14496 ins_encode %{ 14497 __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type); 14498 %} 14499 ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem); 14500 %} 14501 14502 14503 // ============================================================================ 14504 // Procedure Call/Return Instructions 14505 14506 // Call Java Static Instruction 14507 14508 instruct CallStaticJavaDirect(method meth) 14509 %{ 14510 match(CallStaticJava); 14511 14512 effect(USE meth); 14513 14514 ins_cost(CALL_COST); 14515 14516 format %{ "call,static $meth \t// ==> " %} 14517 14518 ins_encode( aarch64_enc_java_static_call(meth), 14519 aarch64_enc_call_epilog ); 14520 14521 ins_pipe(pipe_class_call); 14522 %} 14523 14524 // TO HERE 14525 14526 // Call Java Dynamic Instruction 14527 instruct CallDynamicJavaDirect(method meth) 14528 %{ 14529 match(CallDynamicJava); 14530 14531 effect(USE meth); 14532 14533 ins_cost(CALL_COST); 14534 14535 format %{ "CALL,dynamic $meth \t// ==> " %} 14536 14537 ins_encode( aarch64_enc_java_dynamic_call(meth), 14538 aarch64_enc_call_epilog ); 14539 14540 ins_pipe(pipe_class_call); 14541 %} 14542 14543 // Call Runtime Instruction 14544 14545 instruct CallRuntimeDirect(method meth) 14546 %{ 14547 match(CallRuntime); 14548 14549 effect(USE meth); 14550 14551 ins_cost(CALL_COST); 14552 14553 format %{ "CALL, runtime $meth" %} 14554 14555 ins_encode( aarch64_enc_java_to_runtime(meth) ); 14556 14557 ins_pipe(pipe_class_call); 14558 %} 14559 14560 // Call Runtime Instruction 14561 14562 instruct CallLeafDirect(method meth) 14563 %{ 14564 match(CallLeaf); 14565 14566 effect(USE meth); 14567 14568 ins_cost(CALL_COST); 14569 14570 format %{ "CALL, runtime leaf $meth" %} 14571 14572 ins_encode( aarch64_enc_java_to_runtime(meth) ); 14573 14574 ins_pipe(pipe_class_call); 14575 %} 14576 14577 // Call Runtime Instruction 14578 14579 instruct CallLeafNoFPDirect(method meth) 14580 %{ 14581 match(CallLeafNoFP); 14582 14583 effect(USE meth); 14584 14585 ins_cost(CALL_COST); 14586 14587 format %{ "CALL, runtime leaf nofp $meth" %} 14588 14589 ins_encode( aarch64_enc_java_to_runtime(meth) ); 14590 14591 ins_pipe(pipe_class_call); 14592 %} 14593 14594 // Tail Call; Jump from runtime stub to Java code. 14595 // Also known as an 'interprocedural jump'. 14596 // Target of jump will eventually return to caller. 14597 // TailJump below removes the return address. 14598 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop) 14599 %{ 14600 match(TailCall jump_target method_oop); 14601 14602 ins_cost(CALL_COST); 14603 14604 format %{ "br $jump_target\t# $method_oop holds method oop" %} 14605 14606 ins_encode(aarch64_enc_tail_call(jump_target)); 14607 14608 ins_pipe(pipe_class_call); 14609 %} 14610 14611 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop) 14612 %{ 14613 match(TailJump jump_target ex_oop); 14614 14615 ins_cost(CALL_COST); 14616 14617 format %{ "br $jump_target\t# $ex_oop holds exception oop" %} 14618 14619 ins_encode(aarch64_enc_tail_jmp(jump_target)); 14620 14621 ins_pipe(pipe_class_call); 14622 %} 14623 14624 // Create exception oop: created by stack-crawling runtime code. 14625 // Created exception is now available to this handler, and is setup 14626 // just prior to jumping to this handler. No code emitted. 14627 // TODO check 14628 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1 14629 instruct CreateException(iRegP_R0 ex_oop) 14630 %{ 14631 match(Set ex_oop (CreateEx)); 14632 14633 format %{ " -- \t// exception oop; no code emitted" %} 14634 14635 size(0); 14636 14637 ins_encode( /*empty*/ ); 14638 14639 ins_pipe(pipe_class_empty); 14640 %} 14641 14642 // Rethrow exception: The exception oop will come in the first 14643 // argument position. Then JUMP (not call) to the rethrow stub code. 14644 instruct RethrowException() %{ 14645 match(Rethrow); 14646 ins_cost(CALL_COST); 14647 14648 format %{ "b rethrow_stub" %} 14649 14650 ins_encode( aarch64_enc_rethrow() ); 14651 14652 ins_pipe(pipe_class_call); 14653 %} 14654 14655 14656 // Return Instruction 14657 // epilog node loads ret address into lr as part of frame pop 14658 instruct Ret() 14659 %{ 14660 match(Return); 14661 14662 format %{ "ret\t// return register" %} 14663 14664 ins_encode( aarch64_enc_ret() ); 14665 14666 ins_pipe(pipe_branch); 14667 %} 14668 14669 // Die now. 14670 instruct ShouldNotReachHere() %{ 14671 match(Halt); 14672 14673 ins_cost(CALL_COST); 14674 format %{ "ShouldNotReachHere" %} 14675 14676 ins_encode %{ 14677 // TODO 14678 // implement proper trap call here 14679 __ brk(999); 14680 %} 14681 14682 ins_pipe(pipe_class_default); 14683 %} 14684 14685 // ============================================================================ 14686 // Partial Subtype Check 14687 // 14688 // superklass array for an instance of the superklass. Set a hidden 14689 // internal cache on a hit (cache is checked with exposed code in 14690 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The 14691 // encoding ALSO sets flags. 14692 14693 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr) 14694 %{ 14695 match(Set result (PartialSubtypeCheck sub super)); 14696 effect(KILL cr, KILL temp); 14697 14698 ins_cost(1100); // slightly larger than the next version 14699 format %{ "partialSubtypeCheck $result, $sub, $super" %} 14700 14701 ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result)); 14702 14703 opcode(0x1); // Force zero of result reg on hit 14704 14705 ins_pipe(pipe_class_memory); 14706 %} 14707 14708 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr) 14709 %{ 14710 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 14711 effect(KILL temp, KILL result); 14712 14713 ins_cost(1100); // slightly larger than the next version 14714 format %{ "partialSubtypeCheck $result, $sub, $super == 0" %} 14715 14716 ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result)); 14717 14718 opcode(0x0); // Don't zero result reg on hit 14719 14720 ins_pipe(pipe_class_memory); 14721 %} 14722 14723 instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2, 14724 iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr) 14725 %{ 14726 predicate(!CompactStrings); 14727 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 14728 effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 14729 14730 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1" %} 14731 ins_encode %{ 14732 // Count is in 8-bit bytes; non-Compact chars are 16 bits. 14733 __ asrw($cnt1$$Register, $cnt1$$Register, 1); 14734 __ asrw($cnt2$$Register, $cnt2$$Register, 1); 14735 __ string_compare($str1$$Register, $str2$$Register, 14736 $cnt1$$Register, $cnt2$$Register, $result$$Register, 14737 $tmp1$$Register); 14738 %} 14739 ins_pipe(pipe_class_memory); 14740 %} 14741 14742 instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2, 14743 iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr) 14744 %{ 14745 predicate(!CompactStrings); 14746 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 14747 effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, 14748 TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 14749 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result" %} 14750 14751 ins_encode %{ 14752 __ string_indexof($str1$$Register, $str2$$Register, 14753 $cnt1$$Register, $cnt2$$Register, 14754 $tmp1$$Register, $tmp2$$Register, 14755 $tmp3$$Register, $tmp4$$Register, 14756 -1, $result$$Register); 14757 %} 14758 ins_pipe(pipe_class_memory); 14759 %} 14760 14761 instruct string_indexof_con(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, 14762 immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2, 14763 iRegI tmp3, iRegI tmp4, rFlagsReg cr) 14764 %{ 14765 predicate(!CompactStrings); 14766 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 14767 effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, 14768 TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 14769 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result" %} 14770 14771 ins_encode %{ 14772 int icnt2 = (int)$int_cnt2$$constant; 14773 __ string_indexof($str1$$Register, $str2$$Register, 14774 $cnt1$$Register, zr, 14775 $tmp1$$Register, $tmp2$$Register, 14776 $tmp3$$Register, $tmp4$$Register, 14777 icnt2, $result$$Register); 14778 %} 14779 ins_pipe(pipe_class_memory); 14780 %} 14781 14782 instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt, 14783 iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr) 14784 %{ 14785 predicate(!CompactStrings); 14786 match(Set result (StrEquals (Binary str1 str2) cnt)); 14787 effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); 14788 14789 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp" %} 14790 ins_encode %{ 14791 // Count is in 8-bit bytes; non-Compact chars are 16 bits. 14792 __ asrw($cnt$$Register, $cnt$$Register, 1); 14793 __ string_equals($str1$$Register, $str2$$Register, 14794 $cnt$$Register, $result$$Register, 14795 $tmp$$Register); 14796 %} 14797 ins_pipe(pipe_class_memory); 14798 %} 14799 14800 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result, 14801 iRegP_R10 tmp, rFlagsReg cr) 14802 %{ 14803 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 14804 match(Set result (AryEq ary1 ary2)); 14805 effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr); 14806 14807 format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %} 14808 ins_encode %{ 14809 __ byte_arrays_equals($ary1$$Register, $ary2$$Register, 14810 $result$$Register, $tmp$$Register); 14811 %} 14812 ins_pipe(pipe_class_memory); 14813 %} 14814 14815 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result, 14816 iRegP_R10 tmp, rFlagsReg cr) 14817 %{ 14818 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 14819 match(Set result (AryEq ary1 ary2)); 14820 effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr); 14821 14822 format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %} 14823 ins_encode %{ 14824 __ char_arrays_equals($ary1$$Register, $ary2$$Register, 14825 $result$$Register, $tmp$$Register); 14826 %} 14827 ins_pipe(pipe_class_memory); 14828 %} 14829 14830 // encode char[] to byte[] in ISO_8859_1 14831 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len, 14832 vRegD_V0 Vtmp1, vRegD_V1 Vtmp2, 14833 vRegD_V2 Vtmp3, vRegD_V3 Vtmp4, 14834 iRegI_R0 result, rFlagsReg cr) 14835 %{ 14836 match(Set result (EncodeISOArray src (Binary dst len))); 14837 effect(USE_KILL src, USE_KILL dst, USE_KILL len, 14838 KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr); 14839 14840 format %{ "Encode array $src,$dst,$len -> $result" %} 14841 ins_encode %{ 14842 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 14843 $result$$Register, $Vtmp1$$FloatRegister, $Vtmp2$$FloatRegister, 14844 $Vtmp3$$FloatRegister, $Vtmp4$$FloatRegister); 14845 %} 14846 ins_pipe( pipe_class_memory ); 14847 %} 14848 14849 // ============================================================================ 14850 // This name is KNOWN by the ADLC and cannot be changed. 14851 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 14852 // for this guy. 14853 instruct tlsLoadP(thread_RegP dst) 14854 %{ 14855 match(Set dst (ThreadLocal)); 14856 14857 ins_cost(0); 14858 14859 format %{ " -- \t// $dst=Thread::current(), empty" %} 14860 14861 size(0); 14862 14863 ins_encode( /*empty*/ ); 14864 14865 ins_pipe(pipe_class_empty); 14866 %} 14867 14868 // ====================VECTOR INSTRUCTIONS===================================== 14869 14870 // Load vector (32 bits) 14871 instruct loadV4(vecD dst, vmem mem) 14872 %{ 14873 predicate(n->as_LoadVector()->memory_size() == 4); 14874 match(Set dst (LoadVector mem)); 14875 ins_cost(4 * INSN_COST); 14876 format %{ "ldrs $dst,$mem\t# vector (32 bits)" %} 14877 ins_encode( aarch64_enc_ldrvS(dst, mem) ); 14878 ins_pipe(vload_reg_mem64); 14879 %} 14880 14881 // Load vector (64 bits) 14882 instruct loadV8(vecD dst, vmem mem) 14883 %{ 14884 predicate(n->as_LoadVector()->memory_size() == 8); 14885 match(Set dst (LoadVector mem)); 14886 ins_cost(4 * INSN_COST); 14887 format %{ "ldrd $dst,$mem\t# vector (64 bits)" %} 14888 ins_encode( aarch64_enc_ldrvD(dst, mem) ); 14889 ins_pipe(vload_reg_mem64); 14890 %} 14891 14892 // Load Vector (128 bits) 14893 instruct loadV16(vecX dst, vmem mem) 14894 %{ 14895 predicate(n->as_LoadVector()->memory_size() == 16); 14896 match(Set dst (LoadVector mem)); 14897 ins_cost(4 * INSN_COST); 14898 format %{ "ldrq $dst,$mem\t# vector (128 bits)" %} 14899 ins_encode( aarch64_enc_ldrvQ(dst, mem) ); 14900 ins_pipe(vload_reg_mem128); 14901 %} 14902 14903 // Store Vector (32 bits) 14904 instruct storeV4(vecD src, vmem mem) 14905 %{ 14906 predicate(n->as_StoreVector()->memory_size() == 4); 14907 match(Set mem (StoreVector mem src)); 14908 ins_cost(4 * INSN_COST); 14909 format %{ "strs $mem,$src\t# vector (32 bits)" %} 14910 ins_encode( aarch64_enc_strvS(src, mem) ); 14911 ins_pipe(vstore_reg_mem64); 14912 %} 14913 14914 // Store Vector (64 bits) 14915 instruct storeV8(vecD src, vmem mem) 14916 %{ 14917 predicate(n->as_StoreVector()->memory_size() == 8); 14918 match(Set mem (StoreVector mem src)); 14919 ins_cost(4 * INSN_COST); 14920 format %{ "strd $mem,$src\t# vector (64 bits)" %} 14921 ins_encode( aarch64_enc_strvD(src, mem) ); 14922 ins_pipe(vstore_reg_mem64); 14923 %} 14924 14925 // Store Vector (128 bits) 14926 instruct storeV16(vecX src, vmem mem) 14927 %{ 14928 predicate(n->as_StoreVector()->memory_size() == 16); 14929 match(Set mem (StoreVector mem src)); 14930 ins_cost(4 * INSN_COST); 14931 format %{ "strq $mem,$src\t# vector (128 bits)" %} 14932 ins_encode( aarch64_enc_strvQ(src, mem) ); 14933 ins_pipe(vstore_reg_mem128); 14934 %} 14935 14936 instruct replicate8B(vecD dst, iRegIorL2I src) 14937 %{ 14938 predicate(n->as_Vector()->length() == 4 || 14939 n->as_Vector()->length() == 8); 14940 match(Set dst (ReplicateB src)); 14941 ins_cost(INSN_COST); 14942 format %{ "dup $dst, $src\t# vector (8B)" %} 14943 ins_encode %{ 14944 __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg)); 14945 %} 14946 ins_pipe(vdup_reg_reg64); 14947 %} 14948 14949 instruct replicate16B(vecX dst, iRegIorL2I src) 14950 %{ 14951 predicate(n->as_Vector()->length() == 16); 14952 match(Set dst (ReplicateB src)); 14953 ins_cost(INSN_COST); 14954 format %{ "dup $dst, $src\t# vector (16B)" %} 14955 ins_encode %{ 14956 __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg)); 14957 %} 14958 ins_pipe(vdup_reg_reg128); 14959 %} 14960 14961 instruct replicate8B_imm(vecD dst, immI con) 14962 %{ 14963 predicate(n->as_Vector()->length() == 4 || 14964 n->as_Vector()->length() == 8); 14965 match(Set dst (ReplicateB con)); 14966 ins_cost(INSN_COST); 14967 format %{ "movi $dst, $con\t# vector(8B)" %} 14968 ins_encode %{ 14969 __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff); 14970 %} 14971 ins_pipe(vmovi_reg_imm64); 14972 %} 14973 14974 instruct replicate16B_imm(vecX dst, immI con) 14975 %{ 14976 predicate(n->as_Vector()->length() == 16); 14977 match(Set dst (ReplicateB con)); 14978 ins_cost(INSN_COST); 14979 format %{ "movi $dst, $con\t# vector(16B)" %} 14980 ins_encode %{ 14981 __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff); 14982 %} 14983 ins_pipe(vmovi_reg_imm128); 14984 %} 14985 14986 instruct replicate4S(vecD dst, iRegIorL2I src) 14987 %{ 14988 predicate(n->as_Vector()->length() == 2 || 14989 n->as_Vector()->length() == 4); 14990 match(Set dst (ReplicateS src)); 14991 ins_cost(INSN_COST); 14992 format %{ "dup $dst, $src\t# vector (4S)" %} 14993 ins_encode %{ 14994 __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg)); 14995 %} 14996 ins_pipe(vdup_reg_reg64); 14997 %} 14998 14999 instruct replicate8S(vecX dst, iRegIorL2I src) 15000 %{ 15001 predicate(n->as_Vector()->length() == 8); 15002 match(Set dst (ReplicateS src)); 15003 ins_cost(INSN_COST); 15004 format %{ "dup $dst, $src\t# vector (8S)" %} 15005 ins_encode %{ 15006 __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg)); 15007 %} 15008 ins_pipe(vdup_reg_reg128); 15009 %} 15010 15011 instruct replicate4S_imm(vecD dst, immI con) 15012 %{ 15013 predicate(n->as_Vector()->length() == 2 || 15014 n->as_Vector()->length() == 4); 15015 match(Set dst (ReplicateS con)); 15016 ins_cost(INSN_COST); 15017 format %{ "movi $dst, $con\t# vector(4H)" %} 15018 ins_encode %{ 15019 __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff); 15020 %} 15021 ins_pipe(vmovi_reg_imm64); 15022 %} 15023 15024 instruct replicate8S_imm(vecX dst, immI con) 15025 %{ 15026 predicate(n->as_Vector()->length() == 8); 15027 match(Set dst (ReplicateS con)); 15028 ins_cost(INSN_COST); 15029 format %{ "movi $dst, $con\t# vector(8H)" %} 15030 ins_encode %{ 15031 __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff); 15032 %} 15033 ins_pipe(vmovi_reg_imm128); 15034 %} 15035 15036 instruct replicate2I(vecD dst, iRegIorL2I src) 15037 %{ 15038 predicate(n->as_Vector()->length() == 2); 15039 match(Set dst (ReplicateI src)); 15040 ins_cost(INSN_COST); 15041 format %{ "dup $dst, $src\t# vector (2I)" %} 15042 ins_encode %{ 15043 __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg)); 15044 %} 15045 ins_pipe(vdup_reg_reg64); 15046 %} 15047 15048 instruct replicate4I(vecX dst, iRegIorL2I src) 15049 %{ 15050 predicate(n->as_Vector()->length() == 4); 15051 match(Set dst (ReplicateI src)); 15052 ins_cost(INSN_COST); 15053 format %{ "dup $dst, $src\t# vector (4I)" %} 15054 ins_encode %{ 15055 __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg)); 15056 %} 15057 ins_pipe(vdup_reg_reg128); 15058 %} 15059 15060 instruct replicate2I_imm(vecD dst, immI con) 15061 %{ 15062 predicate(n->as_Vector()->length() == 2); 15063 match(Set dst (ReplicateI con)); 15064 ins_cost(INSN_COST); 15065 format %{ "movi $dst, $con\t# vector(2I)" %} 15066 ins_encode %{ 15067 __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant); 15068 %} 15069 ins_pipe(vmovi_reg_imm64); 15070 %} 15071 15072 instruct replicate4I_imm(vecX dst, immI con) 15073 %{ 15074 predicate(n->as_Vector()->length() == 4); 15075 match(Set dst (ReplicateI con)); 15076 ins_cost(INSN_COST); 15077 format %{ "movi $dst, $con\t# vector(4I)" %} 15078 ins_encode %{ 15079 __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant); 15080 %} 15081 ins_pipe(vmovi_reg_imm128); 15082 %} 15083 15084 instruct replicate2L(vecX dst, iRegL src) 15085 %{ 15086 predicate(n->as_Vector()->length() == 2); 15087 match(Set dst (ReplicateL src)); 15088 ins_cost(INSN_COST); 15089 format %{ "dup $dst, $src\t# vector (2L)" %} 15090 ins_encode %{ 15091 __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg)); 15092 %} 15093 ins_pipe(vdup_reg_reg128); 15094 %} 15095 15096 instruct replicate2L_zero(vecX dst, immI0 zero) 15097 %{ 15098 predicate(n->as_Vector()->length() == 2); 15099 match(Set dst (ReplicateI zero)); 15100 ins_cost(INSN_COST); 15101 format %{ "movi $dst, $zero\t# vector(4I)" %} 15102 ins_encode %{ 15103 __ eor(as_FloatRegister($dst$$reg), __ T16B, 15104 as_FloatRegister($dst$$reg), 15105 as_FloatRegister($dst$$reg)); 15106 %} 15107 ins_pipe(vmovi_reg_imm128); 15108 %} 15109 15110 instruct replicate2F(vecD dst, vRegF src) 15111 %{ 15112 predicate(n->as_Vector()->length() == 2); 15113 match(Set dst (ReplicateF src)); 15114 ins_cost(INSN_COST); 15115 format %{ "dup $dst, $src\t# vector (2F)" %} 15116 ins_encode %{ 15117 __ dup(as_FloatRegister($dst$$reg), __ T2S, 15118 as_FloatRegister($src$$reg)); 15119 %} 15120 ins_pipe(vdup_reg_freg64); 15121 %} 15122 15123 instruct replicate4F(vecX dst, vRegF src) 15124 %{ 15125 predicate(n->as_Vector()->length() == 4); 15126 match(Set dst (ReplicateF src)); 15127 ins_cost(INSN_COST); 15128 format %{ "dup $dst, $src\t# vector (4F)" %} 15129 ins_encode %{ 15130 __ dup(as_FloatRegister($dst$$reg), __ T4S, 15131 as_FloatRegister($src$$reg)); 15132 %} 15133 ins_pipe(vdup_reg_freg128); 15134 %} 15135 15136 instruct replicate2D(vecX dst, vRegD src) 15137 %{ 15138 predicate(n->as_Vector()->length() == 2); 15139 match(Set dst (ReplicateD src)); 15140 ins_cost(INSN_COST); 15141 format %{ "dup $dst, $src\t# vector (2D)" %} 15142 ins_encode %{ 15143 __ dup(as_FloatRegister($dst$$reg), __ T2D, 15144 as_FloatRegister($src$$reg)); 15145 %} 15146 ins_pipe(vdup_reg_dreg128); 15147 %} 15148 15149 // ====================REDUCTION ARITHMETIC==================================== 15150 15151 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2) 15152 %{ 15153 match(Set dst (AddReductionVI src1 src2)); 15154 ins_cost(INSN_COST); 15155 effect(TEMP tmp, TEMP tmp2); 15156 format %{ "umov $tmp, $src2, S, 0\n\t" 15157 "umov $tmp2, $src2, S, 1\n\t" 15158 "addw $dst, $src1, $tmp\n\t" 15159 "addw $dst, $dst, $tmp2\t add reduction2i" 15160 %} 15161 ins_encode %{ 15162 __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0); 15163 __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1); 15164 __ addw($dst$$Register, $src1$$Register, $tmp$$Register); 15165 __ addw($dst$$Register, $dst$$Register, $tmp2$$Register); 15166 %} 15167 ins_pipe(pipe_class_default); 15168 %} 15169 15170 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2) 15171 %{ 15172 match(Set dst (AddReductionVI src1 src2)); 15173 ins_cost(INSN_COST); 15174 effect(TEMP tmp, TEMP tmp2); 15175 format %{ "addv $tmp, T4S, $src2\n\t" 15176 "umov $tmp2, $tmp, S, 0\n\t" 15177 "addw $dst, $tmp2, $src1\t add reduction4i" 15178 %} 15179 ins_encode %{ 15180 __ addv(as_FloatRegister($tmp$$reg), __ T4S, 15181 as_FloatRegister($src2$$reg)); 15182 __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0); 15183 __ addw($dst$$Register, $tmp2$$Register, $src1$$Register); 15184 %} 15185 ins_pipe(pipe_class_default); 15186 %} 15187 15188 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp) 15189 %{ 15190 match(Set dst (MulReductionVI src1 src2)); 15191 ins_cost(INSN_COST); 15192 effect(TEMP tmp, TEMP dst); 15193 format %{ "umov $tmp, $src2, S, 0\n\t" 15194 "mul $dst, $tmp, $src1\n\t" 15195 "umov $tmp, $src2, S, 1\n\t" 15196 "mul $dst, $tmp, $dst\t mul reduction2i\n\t" 15197 %} 15198 ins_encode %{ 15199 __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0); 15200 __ mul($dst$$Register, $tmp$$Register, $src1$$Register); 15201 __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1); 15202 __ mul($dst$$Register, $tmp$$Register, $dst$$Register); 15203 %} 15204 ins_pipe(pipe_class_default); 15205 %} 15206 15207 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2) 15208 %{ 15209 match(Set dst (MulReductionVI src1 src2)); 15210 ins_cost(INSN_COST); 15211 effect(TEMP tmp, TEMP tmp2, TEMP dst); 15212 format %{ "ins $tmp, $src2, 0, 1\n\t" 15213 "mul $tmp, $tmp, $src2\n\t" 15214 "umov $tmp2, $tmp, S, 0\n\t" 15215 "mul $dst, $tmp2, $src1\n\t" 15216 "umov $tmp2, $tmp, S, 1\n\t" 15217 "mul $dst, $tmp2, $dst\t mul reduction4i\n\t" 15218 %} 15219 ins_encode %{ 15220 __ ins(as_FloatRegister($tmp$$reg), __ D, 15221 as_FloatRegister($src2$$reg), 0, 1); 15222 __ mulv(as_FloatRegister($tmp$$reg), __ T2S, 15223 as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg)); 15224 __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0); 15225 __ mul($dst$$Register, $tmp2$$Register, $src1$$Register); 15226 __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1); 15227 __ mul($dst$$Register, $tmp2$$Register, $dst$$Register); 15228 %} 15229 ins_pipe(pipe_class_default); 15230 %} 15231 15232 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp) 15233 %{ 15234 match(Set dst (AddReductionVF src1 src2)); 15235 ins_cost(INSN_COST); 15236 effect(TEMP tmp, TEMP dst); 15237 format %{ "fadds $dst, $src1, $src2\n\t" 15238 "ins $tmp, S, $src2, 0, 1\n\t" 15239 "fadds $dst, $dst, $tmp\t add reduction2f" 15240 %} 15241 ins_encode %{ 15242 __ fadds(as_FloatRegister($dst$$reg), 15243 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); 15244 __ ins(as_FloatRegister($tmp$$reg), __ S, 15245 as_FloatRegister($src2$$reg), 0, 1); 15246 __ fadds(as_FloatRegister($dst$$reg), 15247 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); 15248 %} 15249 ins_pipe(pipe_class_default); 15250 %} 15251 15252 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp) 15253 %{ 15254 match(Set dst (AddReductionVF src1 src2)); 15255 ins_cost(INSN_COST); 15256 effect(TEMP tmp, TEMP dst); 15257 format %{ "fadds $dst, $src1, $src2\n\t" 15258 "ins $tmp, S, $src2, 0, 1\n\t" 15259 "fadds $dst, $dst, $tmp\n\t" 15260 "ins $tmp, S, $src2, 0, 2\n\t" 15261 "fadds $dst, $dst, $tmp\n\t" 15262 "ins $tmp, S, $src2, 0, 3\n\t" 15263 "fadds $dst, $dst, $tmp\t add reduction4f" 15264 %} 15265 ins_encode %{ 15266 __ fadds(as_FloatRegister($dst$$reg), 15267 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); 15268 __ ins(as_FloatRegister($tmp$$reg), __ S, 15269 as_FloatRegister($src2$$reg), 0, 1); 15270 __ fadds(as_FloatRegister($dst$$reg), 15271 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); 15272 __ ins(as_FloatRegister($tmp$$reg), __ S, 15273 as_FloatRegister($src2$$reg), 0, 2); 15274 __ fadds(as_FloatRegister($dst$$reg), 15275 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); 15276 __ ins(as_FloatRegister($tmp$$reg), __ S, 15277 as_FloatRegister($src2$$reg), 0, 3); 15278 __ fadds(as_FloatRegister($dst$$reg), 15279 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); 15280 %} 15281 ins_pipe(pipe_class_default); 15282 %} 15283 15284 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp) 15285 %{ 15286 match(Set dst (MulReductionVF src1 src2)); 15287 ins_cost(INSN_COST); 15288 effect(TEMP tmp, TEMP dst); 15289 format %{ "fmuls $dst, $src1, $src2\n\t" 15290 "ins $tmp, S, $src2, 0, 1\n\t" 15291 "fmuls $dst, $dst, $tmp\t add reduction4f" 15292 %} 15293 ins_encode %{ 15294 __ fmuls(as_FloatRegister($dst$$reg), 15295 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); 15296 __ ins(as_FloatRegister($tmp$$reg), __ S, 15297 as_FloatRegister($src2$$reg), 0, 1); 15298 __ fmuls(as_FloatRegister($dst$$reg), 15299 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); 15300 %} 15301 ins_pipe(pipe_class_default); 15302 %} 15303 15304 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp) 15305 %{ 15306 match(Set dst (MulReductionVF src1 src2)); 15307 ins_cost(INSN_COST); 15308 effect(TEMP tmp, TEMP dst); 15309 format %{ "fmuls $dst, $src1, $src2\n\t" 15310 "ins $tmp, S, $src2, 0, 1\n\t" 15311 "fmuls $dst, $dst, $tmp\n\t" 15312 "ins $tmp, S, $src2, 0, 2\n\t" 15313 "fmuls $dst, $dst, $tmp\n\t" 15314 "ins $tmp, S, $src2, 0, 3\n\t" 15315 "fmuls $dst, $dst, $tmp\t add reduction4f" 15316 %} 15317 ins_encode %{ 15318 __ fmuls(as_FloatRegister($dst$$reg), 15319 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); 15320 __ ins(as_FloatRegister($tmp$$reg), __ S, 15321 as_FloatRegister($src2$$reg), 0, 1); 15322 __ fmuls(as_FloatRegister($dst$$reg), 15323 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); 15324 __ ins(as_FloatRegister($tmp$$reg), __ S, 15325 as_FloatRegister($src2$$reg), 0, 2); 15326 __ fmuls(as_FloatRegister($dst$$reg), 15327 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); 15328 __ ins(as_FloatRegister($tmp$$reg), __ S, 15329 as_FloatRegister($src2$$reg), 0, 3); 15330 __ fmuls(as_FloatRegister($dst$$reg), 15331 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); 15332 %} 15333 ins_pipe(pipe_class_default); 15334 %} 15335 15336 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp) 15337 %{ 15338 match(Set dst (AddReductionVD src1 src2)); 15339 ins_cost(INSN_COST); 15340 effect(TEMP tmp, TEMP dst); 15341 format %{ "faddd $dst, $src1, $src2\n\t" 15342 "ins $tmp, D, $src2, 0, 1\n\t" 15343 "faddd $dst, $dst, $tmp\t add reduction2d" 15344 %} 15345 ins_encode %{ 15346 __ faddd(as_FloatRegister($dst$$reg), 15347 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); 15348 __ ins(as_FloatRegister($tmp$$reg), __ D, 15349 as_FloatRegister($src2$$reg), 0, 1); 15350 __ faddd(as_FloatRegister($dst$$reg), 15351 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); 15352 %} 15353 ins_pipe(pipe_class_default); 15354 %} 15355 15356 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp) 15357 %{ 15358 match(Set dst (MulReductionVD src1 src2)); 15359 ins_cost(INSN_COST); 15360 effect(TEMP tmp, TEMP dst); 15361 format %{ "fmuld $dst, $src1, $src2\n\t" 15362 "ins $tmp, D, $src2, 0, 1\n\t" 15363 "fmuld $dst, $dst, $tmp\t add reduction2d" 15364 %} 15365 ins_encode %{ 15366 __ fmuld(as_FloatRegister($dst$$reg), 15367 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); 15368 __ ins(as_FloatRegister($tmp$$reg), __ D, 15369 as_FloatRegister($src2$$reg), 0, 1); 15370 __ fmuld(as_FloatRegister($dst$$reg), 15371 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); 15372 %} 15373 ins_pipe(pipe_class_default); 15374 %} 15375 15376 // ====================VECTOR ARITHMETIC======================================= 15377 15378 // --------------------------------- ADD -------------------------------------- 15379 15380 instruct vadd8B(vecD dst, vecD src1, vecD src2) 15381 %{ 15382 predicate(n->as_Vector()->length() == 4 || 15383 n->as_Vector()->length() == 8); 15384 match(Set dst (AddVB src1 src2)); 15385 ins_cost(INSN_COST); 15386 format %{ "addv $dst,$src1,$src2\t# vector (8B)" %} 15387 ins_encode %{ 15388 __ addv(as_FloatRegister($dst$$reg), __ T8B, 15389 as_FloatRegister($src1$$reg), 15390 as_FloatRegister($src2$$reg)); 15391 %} 15392 ins_pipe(vdop64); 15393 %} 15394 15395 instruct vadd16B(vecX dst, vecX src1, vecX src2) 15396 %{ 15397 predicate(n->as_Vector()->length() == 16); 15398 match(Set dst (AddVB src1 src2)); 15399 ins_cost(INSN_COST); 15400 format %{ "addv $dst,$src1,$src2\t# vector (16B)" %} 15401 ins_encode %{ 15402 __ addv(as_FloatRegister($dst$$reg), __ T16B, 15403 as_FloatRegister($src1$$reg), 15404 as_FloatRegister($src2$$reg)); 15405 %} 15406 ins_pipe(vdop128); 15407 %} 15408 15409 instruct vadd4S(vecD dst, vecD src1, vecD src2) 15410 %{ 15411 predicate(n->as_Vector()->length() == 2 || 15412 n->as_Vector()->length() == 4); 15413 match(Set dst (AddVS src1 src2)); 15414 ins_cost(INSN_COST); 15415 format %{ "addv $dst,$src1,$src2\t# vector (4H)" %} 15416 ins_encode %{ 15417 __ addv(as_FloatRegister($dst$$reg), __ T4H, 15418 as_FloatRegister($src1$$reg), 15419 as_FloatRegister($src2$$reg)); 15420 %} 15421 ins_pipe(vdop64); 15422 %} 15423 15424 instruct vadd8S(vecX dst, vecX src1, vecX src2) 15425 %{ 15426 predicate(n->as_Vector()->length() == 8); 15427 match(Set dst (AddVS src1 src2)); 15428 ins_cost(INSN_COST); 15429 format %{ "addv $dst,$src1,$src2\t# vector (8H)" %} 15430 ins_encode %{ 15431 __ addv(as_FloatRegister($dst$$reg), __ T8H, 15432 as_FloatRegister($src1$$reg), 15433 as_FloatRegister($src2$$reg)); 15434 %} 15435 ins_pipe(vdop128); 15436 %} 15437 15438 instruct vadd2I(vecD dst, vecD src1, vecD src2) 15439 %{ 15440 predicate(n->as_Vector()->length() == 2); 15441 match(Set dst (AddVI src1 src2)); 15442 ins_cost(INSN_COST); 15443 format %{ "addv $dst,$src1,$src2\t# vector (2S)" %} 15444 ins_encode %{ 15445 __ addv(as_FloatRegister($dst$$reg), __ T2S, 15446 as_FloatRegister($src1$$reg), 15447 as_FloatRegister($src2$$reg)); 15448 %} 15449 ins_pipe(vdop64); 15450 %} 15451 15452 instruct vadd4I(vecX dst, vecX src1, vecX src2) 15453 %{ 15454 predicate(n->as_Vector()->length() == 4); 15455 match(Set dst (AddVI src1 src2)); 15456 ins_cost(INSN_COST); 15457 format %{ "addv $dst,$src1,$src2\t# vector (4S)" %} 15458 ins_encode %{ 15459 __ addv(as_FloatRegister($dst$$reg), __ T4S, 15460 as_FloatRegister($src1$$reg), 15461 as_FloatRegister($src2$$reg)); 15462 %} 15463 ins_pipe(vdop128); 15464 %} 15465 15466 instruct vadd2L(vecX dst, vecX src1, vecX src2) 15467 %{ 15468 predicate(n->as_Vector()->length() == 2); 15469 match(Set dst (AddVL src1 src2)); 15470 ins_cost(INSN_COST); 15471 format %{ "addv $dst,$src1,$src2\t# vector (2L)" %} 15472 ins_encode %{ 15473 __ addv(as_FloatRegister($dst$$reg), __ T2D, 15474 as_FloatRegister($src1$$reg), 15475 as_FloatRegister($src2$$reg)); 15476 %} 15477 ins_pipe(vdop128); 15478 %} 15479 15480 instruct vadd2F(vecD dst, vecD src1, vecD src2) 15481 %{ 15482 predicate(n->as_Vector()->length() == 2); 15483 match(Set dst (AddVF src1 src2)); 15484 ins_cost(INSN_COST); 15485 format %{ "fadd $dst,$src1,$src2\t# vector (2S)" %} 15486 ins_encode %{ 15487 __ fadd(as_FloatRegister($dst$$reg), __ T2S, 15488 as_FloatRegister($src1$$reg), 15489 as_FloatRegister($src2$$reg)); 15490 %} 15491 ins_pipe(vdop_fp64); 15492 %} 15493 15494 instruct vadd4F(vecX dst, vecX src1, vecX src2) 15495 %{ 15496 predicate(n->as_Vector()->length() == 4); 15497 match(Set dst (AddVF src1 src2)); 15498 ins_cost(INSN_COST); 15499 format %{ "fadd $dst,$src1,$src2\t# vector (4S)" %} 15500 ins_encode %{ 15501 __ fadd(as_FloatRegister($dst$$reg), __ T4S, 15502 as_FloatRegister($src1$$reg), 15503 as_FloatRegister($src2$$reg)); 15504 %} 15505 ins_pipe(vdop_fp128); 15506 %} 15507 15508 instruct vadd2D(vecX dst, vecX src1, vecX src2) 15509 %{ 15510 match(Set dst (AddVD src1 src2)); 15511 ins_cost(INSN_COST); 15512 format %{ "fadd $dst,$src1,$src2\t# vector (2D)" %} 15513 ins_encode %{ 15514 __ fadd(as_FloatRegister($dst$$reg), __ T2D, 15515 as_FloatRegister($src1$$reg), 15516 as_FloatRegister($src2$$reg)); 15517 %} 15518 ins_pipe(vdop_fp128); 15519 %} 15520 15521 // --------------------------------- SUB -------------------------------------- 15522 15523 instruct vsub8B(vecD dst, vecD src1, vecD src2) 15524 %{ 15525 predicate(n->as_Vector()->length() == 4 || 15526 n->as_Vector()->length() == 8); 15527 match(Set dst (SubVB src1 src2)); 15528 ins_cost(INSN_COST); 15529 format %{ "subv $dst,$src1,$src2\t# vector (8B)" %} 15530 ins_encode %{ 15531 __ subv(as_FloatRegister($dst$$reg), __ T8B, 15532 as_FloatRegister($src1$$reg), 15533 as_FloatRegister($src2$$reg)); 15534 %} 15535 ins_pipe(vdop64); 15536 %} 15537 15538 instruct vsub16B(vecX dst, vecX src1, vecX src2) 15539 %{ 15540 predicate(n->as_Vector()->length() == 16); 15541 match(Set dst (SubVB src1 src2)); 15542 ins_cost(INSN_COST); 15543 format %{ "subv $dst,$src1,$src2\t# vector (16B)" %} 15544 ins_encode %{ 15545 __ subv(as_FloatRegister($dst$$reg), __ T16B, 15546 as_FloatRegister($src1$$reg), 15547 as_FloatRegister($src2$$reg)); 15548 %} 15549 ins_pipe(vdop128); 15550 %} 15551 15552 instruct vsub4S(vecD dst, vecD src1, vecD src2) 15553 %{ 15554 predicate(n->as_Vector()->length() == 2 || 15555 n->as_Vector()->length() == 4); 15556 match(Set dst (SubVS src1 src2)); 15557 ins_cost(INSN_COST); 15558 format %{ "subv $dst,$src1,$src2\t# vector (4H)" %} 15559 ins_encode %{ 15560 __ subv(as_FloatRegister($dst$$reg), __ T4H, 15561 as_FloatRegister($src1$$reg), 15562 as_FloatRegister($src2$$reg)); 15563 %} 15564 ins_pipe(vdop64); 15565 %} 15566 15567 instruct vsub8S(vecX dst, vecX src1, vecX src2) 15568 %{ 15569 predicate(n->as_Vector()->length() == 8); 15570 match(Set dst (SubVS src1 src2)); 15571 ins_cost(INSN_COST); 15572 format %{ "subv $dst,$src1,$src2\t# vector (8H)" %} 15573 ins_encode %{ 15574 __ subv(as_FloatRegister($dst$$reg), __ T8H, 15575 as_FloatRegister($src1$$reg), 15576 as_FloatRegister($src2$$reg)); 15577 %} 15578 ins_pipe(vdop128); 15579 %} 15580 15581 instruct vsub2I(vecD dst, vecD src1, vecD src2) 15582 %{ 15583 predicate(n->as_Vector()->length() == 2); 15584 match(Set dst (SubVI src1 src2)); 15585 ins_cost(INSN_COST); 15586 format %{ "subv $dst,$src1,$src2\t# vector (2S)" %} 15587 ins_encode %{ 15588 __ subv(as_FloatRegister($dst$$reg), __ T2S, 15589 as_FloatRegister($src1$$reg), 15590 as_FloatRegister($src2$$reg)); 15591 %} 15592 ins_pipe(vdop64); 15593 %} 15594 15595 instruct vsub4I(vecX dst, vecX src1, vecX src2) 15596 %{ 15597 predicate(n->as_Vector()->length() == 4); 15598 match(Set dst (SubVI src1 src2)); 15599 ins_cost(INSN_COST); 15600 format %{ "subv $dst,$src1,$src2\t# vector (4S)" %} 15601 ins_encode %{ 15602 __ subv(as_FloatRegister($dst$$reg), __ T4S, 15603 as_FloatRegister($src1$$reg), 15604 as_FloatRegister($src2$$reg)); 15605 %} 15606 ins_pipe(vdop128); 15607 %} 15608 15609 instruct vsub2L(vecX dst, vecX src1, vecX src2) 15610 %{ 15611 predicate(n->as_Vector()->length() == 2); 15612 match(Set dst (SubVL src1 src2)); 15613 ins_cost(INSN_COST); 15614 format %{ "subv $dst,$src1,$src2\t# vector (2L)" %} 15615 ins_encode %{ 15616 __ subv(as_FloatRegister($dst$$reg), __ T2D, 15617 as_FloatRegister($src1$$reg), 15618 as_FloatRegister($src2$$reg)); 15619 %} 15620 ins_pipe(vdop128); 15621 %} 15622 15623 instruct vsub2F(vecD dst, vecD src1, vecD src2) 15624 %{ 15625 predicate(n->as_Vector()->length() == 2); 15626 match(Set dst (SubVF src1 src2)); 15627 ins_cost(INSN_COST); 15628 format %{ "fsub $dst,$src1,$src2\t# vector (2S)" %} 15629 ins_encode %{ 15630 __ fsub(as_FloatRegister($dst$$reg), __ T2S, 15631 as_FloatRegister($src1$$reg), 15632 as_FloatRegister($src2$$reg)); 15633 %} 15634 ins_pipe(vdop_fp64); 15635 %} 15636 15637 instruct vsub4F(vecX dst, vecX src1, vecX src2) 15638 %{ 15639 predicate(n->as_Vector()->length() == 4); 15640 match(Set dst (SubVF src1 src2)); 15641 ins_cost(INSN_COST); 15642 format %{ "fsub $dst,$src1,$src2\t# vector (4S)" %} 15643 ins_encode %{ 15644 __ fsub(as_FloatRegister($dst$$reg), __ T4S, 15645 as_FloatRegister($src1$$reg), 15646 as_FloatRegister($src2$$reg)); 15647 %} 15648 ins_pipe(vdop_fp128); 15649 %} 15650 15651 instruct vsub2D(vecX dst, vecX src1, vecX src2) 15652 %{ 15653 predicate(n->as_Vector()->length() == 2); 15654 match(Set dst (SubVD src1 src2)); 15655 ins_cost(INSN_COST); 15656 format %{ "fsub $dst,$src1,$src2\t# vector (2D)" %} 15657 ins_encode %{ 15658 __ fsub(as_FloatRegister($dst$$reg), __ T2D, 15659 as_FloatRegister($src1$$reg), 15660 as_FloatRegister($src2$$reg)); 15661 %} 15662 ins_pipe(vdop_fp128); 15663 %} 15664 15665 // --------------------------------- MUL -------------------------------------- 15666 15667 instruct vmul4S(vecD dst, vecD src1, vecD src2) 15668 %{ 15669 predicate(n->as_Vector()->length() == 2 || 15670 n->as_Vector()->length() == 4); 15671 match(Set dst (MulVS src1 src2)); 15672 ins_cost(INSN_COST); 15673 format %{ "mulv $dst,$src1,$src2\t# vector (4H)" %} 15674 ins_encode %{ 15675 __ mulv(as_FloatRegister($dst$$reg), __ T4H, 15676 as_FloatRegister($src1$$reg), 15677 as_FloatRegister($src2$$reg)); 15678 %} 15679 ins_pipe(vmul64); 15680 %} 15681 15682 instruct vmul8S(vecX dst, vecX src1, vecX src2) 15683 %{ 15684 predicate(n->as_Vector()->length() == 8); 15685 match(Set dst (MulVS src1 src2)); 15686 ins_cost(INSN_COST); 15687 format %{ "mulv $dst,$src1,$src2\t# vector (8H)" %} 15688 ins_encode %{ 15689 __ mulv(as_FloatRegister($dst$$reg), __ T8H, 15690 as_FloatRegister($src1$$reg), 15691 as_FloatRegister($src2$$reg)); 15692 %} 15693 ins_pipe(vmul128); 15694 %} 15695 15696 instruct vmul2I(vecD dst, vecD src1, vecD src2) 15697 %{ 15698 predicate(n->as_Vector()->length() == 2); 15699 match(Set dst (MulVI src1 src2)); 15700 ins_cost(INSN_COST); 15701 format %{ "mulv $dst,$src1,$src2\t# vector (2S)" %} 15702 ins_encode %{ 15703 __ mulv(as_FloatRegister($dst$$reg), __ T2S, 15704 as_FloatRegister($src1$$reg), 15705 as_FloatRegister($src2$$reg)); 15706 %} 15707 ins_pipe(vmul64); 15708 %} 15709 15710 instruct vmul4I(vecX dst, vecX src1, vecX src2) 15711 %{ 15712 predicate(n->as_Vector()->length() == 4); 15713 match(Set dst (MulVI src1 src2)); 15714 ins_cost(INSN_COST); 15715 format %{ "mulv $dst,$src1,$src2\t# vector (4S)" %} 15716 ins_encode %{ 15717 __ mulv(as_FloatRegister($dst$$reg), __ T4S, 15718 as_FloatRegister($src1$$reg), 15719 as_FloatRegister($src2$$reg)); 15720 %} 15721 ins_pipe(vmul128); 15722 %} 15723 15724 instruct vmul2F(vecD dst, vecD src1, vecD src2) 15725 %{ 15726 predicate(n->as_Vector()->length() == 2); 15727 match(Set dst (MulVF src1 src2)); 15728 ins_cost(INSN_COST); 15729 format %{ "fmul $dst,$src1,$src2\t# vector (2S)" %} 15730 ins_encode %{ 15731 __ fmul(as_FloatRegister($dst$$reg), __ T2S, 15732 as_FloatRegister($src1$$reg), 15733 as_FloatRegister($src2$$reg)); 15734 %} 15735 ins_pipe(vmuldiv_fp64); 15736 %} 15737 15738 instruct vmul4F(vecX dst, vecX src1, vecX src2) 15739 %{ 15740 predicate(n->as_Vector()->length() == 4); 15741 match(Set dst (MulVF src1 src2)); 15742 ins_cost(INSN_COST); 15743 format %{ "fmul $dst,$src1,$src2\t# vector (4S)" %} 15744 ins_encode %{ 15745 __ fmul(as_FloatRegister($dst$$reg), __ T4S, 15746 as_FloatRegister($src1$$reg), 15747 as_FloatRegister($src2$$reg)); 15748 %} 15749 ins_pipe(vmuldiv_fp128); 15750 %} 15751 15752 instruct vmul2D(vecX dst, vecX src1, vecX src2) 15753 %{ 15754 predicate(n->as_Vector()->length() == 2); 15755 match(Set dst (MulVD src1 src2)); 15756 ins_cost(INSN_COST); 15757 format %{ "fmul $dst,$src1,$src2\t# vector (2D)" %} 15758 ins_encode %{ 15759 __ fmul(as_FloatRegister($dst$$reg), __ T2D, 15760 as_FloatRegister($src1$$reg), 15761 as_FloatRegister($src2$$reg)); 15762 %} 15763 ins_pipe(vmuldiv_fp128); 15764 %} 15765 15766 // --------------------------------- MLA -------------------------------------- 15767 15768 instruct vmla4S(vecD dst, vecD src1, vecD src2) 15769 %{ 15770 predicate(n->as_Vector()->length() == 2 || 15771 n->as_Vector()->length() == 4); 15772 match(Set dst (AddVS dst (MulVS src1 src2))); 15773 ins_cost(INSN_COST); 15774 format %{ "mlav $dst,$src1,$src2\t# vector (4H)" %} 15775 ins_encode %{ 15776 __ mlav(as_FloatRegister($dst$$reg), __ T4H, 15777 as_FloatRegister($src1$$reg), 15778 as_FloatRegister($src2$$reg)); 15779 %} 15780 ins_pipe(vmla64); 15781 %} 15782 15783 instruct vmla8S(vecX dst, vecX src1, vecX src2) 15784 %{ 15785 predicate(n->as_Vector()->length() == 8); 15786 match(Set dst (AddVS dst (MulVS src1 src2))); 15787 ins_cost(INSN_COST); 15788 format %{ "mlav $dst,$src1,$src2\t# vector (8H)" %} 15789 ins_encode %{ 15790 __ mlav(as_FloatRegister($dst$$reg), __ T8H, 15791 as_FloatRegister($src1$$reg), 15792 as_FloatRegister($src2$$reg)); 15793 %} 15794 ins_pipe(vmla128); 15795 %} 15796 15797 instruct vmla2I(vecD dst, vecD src1, vecD src2) 15798 %{ 15799 predicate(n->as_Vector()->length() == 2); 15800 match(Set dst (AddVI dst (MulVI src1 src2))); 15801 ins_cost(INSN_COST); 15802 format %{ "mlav $dst,$src1,$src2\t# vector (2S)" %} 15803 ins_encode %{ 15804 __ mlav(as_FloatRegister($dst$$reg), __ T2S, 15805 as_FloatRegister($src1$$reg), 15806 as_FloatRegister($src2$$reg)); 15807 %} 15808 ins_pipe(vmla64); 15809 %} 15810 15811 instruct vmla4I(vecX dst, vecX src1, vecX src2) 15812 %{ 15813 predicate(n->as_Vector()->length() == 4); 15814 match(Set dst (AddVI dst (MulVI src1 src2))); 15815 ins_cost(INSN_COST); 15816 format %{ "mlav $dst,$src1,$src2\t# vector (4S)" %} 15817 ins_encode %{ 15818 __ mlav(as_FloatRegister($dst$$reg), __ T4S, 15819 as_FloatRegister($src1$$reg), 15820 as_FloatRegister($src2$$reg)); 15821 %} 15822 ins_pipe(vmla128); 15823 %} 15824 15825 // --------------------------------- MLS -------------------------------------- 15826 15827 instruct vmls4S(vecD dst, vecD src1, vecD src2) 15828 %{ 15829 predicate(n->as_Vector()->length() == 2 || 15830 n->as_Vector()->length() == 4); 15831 match(Set dst (SubVS dst (MulVS src1 src2))); 15832 ins_cost(INSN_COST); 15833 format %{ "mlsv $dst,$src1,$src2\t# vector (4H)" %} 15834 ins_encode %{ 15835 __ mlsv(as_FloatRegister($dst$$reg), __ T4H, 15836 as_FloatRegister($src1$$reg), 15837 as_FloatRegister($src2$$reg)); 15838 %} 15839 ins_pipe(vmla64); 15840 %} 15841 15842 instruct vmls8S(vecX dst, vecX src1, vecX src2) 15843 %{ 15844 predicate(n->as_Vector()->length() == 8); 15845 match(Set dst (SubVS dst (MulVS src1 src2))); 15846 ins_cost(INSN_COST); 15847 format %{ "mlsv $dst,$src1,$src2\t# vector (8H)" %} 15848 ins_encode %{ 15849 __ mlsv(as_FloatRegister($dst$$reg), __ T8H, 15850 as_FloatRegister($src1$$reg), 15851 as_FloatRegister($src2$$reg)); 15852 %} 15853 ins_pipe(vmla128); 15854 %} 15855 15856 instruct vmls2I(vecD dst, vecD src1, vecD src2) 15857 %{ 15858 predicate(n->as_Vector()->length() == 2); 15859 match(Set dst (SubVI dst (MulVI src1 src2))); 15860 ins_cost(INSN_COST); 15861 format %{ "mlsv $dst,$src1,$src2\t# vector (2S)" %} 15862 ins_encode %{ 15863 __ mlsv(as_FloatRegister($dst$$reg), __ T2S, 15864 as_FloatRegister($src1$$reg), 15865 as_FloatRegister($src2$$reg)); 15866 %} 15867 ins_pipe(vmla64); 15868 %} 15869 15870 instruct vmls4I(vecX dst, vecX src1, vecX src2) 15871 %{ 15872 predicate(n->as_Vector()->length() == 4); 15873 match(Set dst (SubVI dst (MulVI src1 src2))); 15874 ins_cost(INSN_COST); 15875 format %{ "mlsv $dst,$src1,$src2\t# vector (4S)" %} 15876 ins_encode %{ 15877 __ mlsv(as_FloatRegister($dst$$reg), __ T4S, 15878 as_FloatRegister($src1$$reg), 15879 as_FloatRegister($src2$$reg)); 15880 %} 15881 ins_pipe(vmla128); 15882 %} 15883 15884 // --------------------------------- DIV -------------------------------------- 15885 15886 instruct vdiv2F(vecD dst, vecD src1, vecD src2) 15887 %{ 15888 predicate(n->as_Vector()->length() == 2); 15889 match(Set dst (DivVF src1 src2)); 15890 ins_cost(INSN_COST); 15891 format %{ "fdiv $dst,$src1,$src2\t# vector (2S)" %} 15892 ins_encode %{ 15893 __ fdiv(as_FloatRegister($dst$$reg), __ T2S, 15894 as_FloatRegister($src1$$reg), 15895 as_FloatRegister($src2$$reg)); 15896 %} 15897 ins_pipe(vmuldiv_fp64); 15898 %} 15899 15900 instruct vdiv4F(vecX dst, vecX src1, vecX src2) 15901 %{ 15902 predicate(n->as_Vector()->length() == 4); 15903 match(Set dst (DivVF src1 src2)); 15904 ins_cost(INSN_COST); 15905 format %{ "fdiv $dst,$src1,$src2\t# vector (4S)" %} 15906 ins_encode %{ 15907 __ fdiv(as_FloatRegister($dst$$reg), __ T4S, 15908 as_FloatRegister($src1$$reg), 15909 as_FloatRegister($src2$$reg)); 15910 %} 15911 ins_pipe(vmuldiv_fp128); 15912 %} 15913 15914 instruct vdiv2D(vecX dst, vecX src1, vecX src2) 15915 %{ 15916 predicate(n->as_Vector()->length() == 2); 15917 match(Set dst (DivVD src1 src2)); 15918 ins_cost(INSN_COST); 15919 format %{ "fdiv $dst,$src1,$src2\t# vector (2D)" %} 15920 ins_encode %{ 15921 __ fdiv(as_FloatRegister($dst$$reg), __ T2D, 15922 as_FloatRegister($src1$$reg), 15923 as_FloatRegister($src2$$reg)); 15924 %} 15925 ins_pipe(vmuldiv_fp128); 15926 %} 15927 15928 // --------------------------------- SQRT ------------------------------------- 15929 15930 instruct vsqrt2D(vecX dst, vecX src) 15931 %{ 15932 predicate(n->as_Vector()->length() == 2); 15933 match(Set dst (SqrtVD src)); 15934 format %{ "fsqrt $dst, $src\t# vector (2D)" %} 15935 ins_encode %{ 15936 __ fsqrt(as_FloatRegister($dst$$reg), __ T2D, 15937 as_FloatRegister($src$$reg)); 15938 %} 15939 ins_pipe(vsqrt_fp128); 15940 %} 15941 15942 // --------------------------------- ABS -------------------------------------- 15943 15944 instruct vabs2F(vecD dst, vecD src) 15945 %{ 15946 predicate(n->as_Vector()->length() == 2); 15947 match(Set dst (AbsVF src)); 15948 ins_cost(INSN_COST * 3); 15949 format %{ "fabs $dst,$src\t# vector (2S)" %} 15950 ins_encode %{ 15951 __ fabs(as_FloatRegister($dst$$reg), __ T2S, 15952 as_FloatRegister($src$$reg)); 15953 %} 15954 ins_pipe(vunop_fp64); 15955 %} 15956 15957 instruct vabs4F(vecX dst, vecX src) 15958 %{ 15959 predicate(n->as_Vector()->length() == 4); 15960 match(Set dst (AbsVF src)); 15961 ins_cost(INSN_COST * 3); 15962 format %{ "fabs $dst,$src\t# vector (4S)" %} 15963 ins_encode %{ 15964 __ fabs(as_FloatRegister($dst$$reg), __ T4S, 15965 as_FloatRegister($src$$reg)); 15966 %} 15967 ins_pipe(vunop_fp128); 15968 %} 15969 15970 instruct vabs2D(vecX dst, vecX src) 15971 %{ 15972 predicate(n->as_Vector()->length() == 2); 15973 match(Set dst (AbsVD src)); 15974 ins_cost(INSN_COST * 3); 15975 format %{ "fabs $dst,$src\t# vector (2D)" %} 15976 ins_encode %{ 15977 __ fabs(as_FloatRegister($dst$$reg), __ T2D, 15978 as_FloatRegister($src$$reg)); 15979 %} 15980 ins_pipe(vunop_fp128); 15981 %} 15982 15983 // --------------------------------- NEG -------------------------------------- 15984 15985 instruct vneg2F(vecD dst, vecD src) 15986 %{ 15987 predicate(n->as_Vector()->length() == 2); 15988 match(Set dst (NegVF src)); 15989 ins_cost(INSN_COST * 3); 15990 format %{ "fneg $dst,$src\t# vector (2S)" %} 15991 ins_encode %{ 15992 __ fneg(as_FloatRegister($dst$$reg), __ T2S, 15993 as_FloatRegister($src$$reg)); 15994 %} 15995 ins_pipe(vunop_fp64); 15996 %} 15997 15998 instruct vneg4F(vecX dst, vecX src) 15999 %{ 16000 predicate(n->as_Vector()->length() == 4); 16001 match(Set dst (NegVF src)); 16002 ins_cost(INSN_COST * 3); 16003 format %{ "fneg $dst,$src\t# vector (4S)" %} 16004 ins_encode %{ 16005 __ fneg(as_FloatRegister($dst$$reg), __ T4S, 16006 as_FloatRegister($src$$reg)); 16007 %} 16008 ins_pipe(vunop_fp128); 16009 %} 16010 16011 instruct vneg2D(vecX dst, vecX src) 16012 %{ 16013 predicate(n->as_Vector()->length() == 2); 16014 match(Set dst (NegVD src)); 16015 ins_cost(INSN_COST * 3); 16016 format %{ "fneg $dst,$src\t# vector (2D)" %} 16017 ins_encode %{ 16018 __ fneg(as_FloatRegister($dst$$reg), __ T2D, 16019 as_FloatRegister($src$$reg)); 16020 %} 16021 ins_pipe(vunop_fp128); 16022 %} 16023 16024 // --------------------------------- AND -------------------------------------- 16025 16026 instruct vand8B(vecD dst, vecD src1, vecD src2) 16027 %{ 16028 predicate(n->as_Vector()->length_in_bytes() == 4 || 16029 n->as_Vector()->length_in_bytes() == 8); 16030 match(Set dst (AndV src1 src2)); 16031 ins_cost(INSN_COST); 16032 format %{ "and $dst,$src1,$src2\t# vector (8B)" %} 16033 ins_encode %{ 16034 __ andr(as_FloatRegister($dst$$reg), __ T8B, 16035 as_FloatRegister($src1$$reg), 16036 as_FloatRegister($src2$$reg)); 16037 %} 16038 ins_pipe(vlogical64); 16039 %} 16040 16041 instruct vand16B(vecX dst, vecX src1, vecX src2) 16042 %{ 16043 predicate(n->as_Vector()->length_in_bytes() == 16); 16044 match(Set dst (AndV src1 src2)); 16045 ins_cost(INSN_COST); 16046 format %{ "and $dst,$src1,$src2\t# vector (16B)" %} 16047 ins_encode %{ 16048 __ andr(as_FloatRegister($dst$$reg), __ T16B, 16049 as_FloatRegister($src1$$reg), 16050 as_FloatRegister($src2$$reg)); 16051 %} 16052 ins_pipe(vlogical128); 16053 %} 16054 16055 // --------------------------------- OR --------------------------------------- 16056 16057 instruct vor8B(vecD dst, vecD src1, vecD src2) 16058 %{ 16059 predicate(n->as_Vector()->length_in_bytes() == 4 || 16060 n->as_Vector()->length_in_bytes() == 8); 16061 match(Set dst (OrV src1 src2)); 16062 ins_cost(INSN_COST); 16063 format %{ "and $dst,$src1,$src2\t# vector (8B)" %} 16064 ins_encode %{ 16065 __ orr(as_FloatRegister($dst$$reg), __ T8B, 16066 as_FloatRegister($src1$$reg), 16067 as_FloatRegister($src2$$reg)); 16068 %} 16069 ins_pipe(vlogical64); 16070 %} 16071 16072 instruct vor16B(vecX dst, vecX src1, vecX src2) 16073 %{ 16074 predicate(n->as_Vector()->length_in_bytes() == 16); 16075 match(Set dst (OrV src1 src2)); 16076 ins_cost(INSN_COST); 16077 format %{ "orr $dst,$src1,$src2\t# vector (16B)" %} 16078 ins_encode %{ 16079 __ orr(as_FloatRegister($dst$$reg), __ T16B, 16080 as_FloatRegister($src1$$reg), 16081 as_FloatRegister($src2$$reg)); 16082 %} 16083 ins_pipe(vlogical128); 16084 %} 16085 16086 // --------------------------------- XOR -------------------------------------- 16087 16088 instruct vxor8B(vecD dst, vecD src1, vecD src2) 16089 %{ 16090 predicate(n->as_Vector()->length_in_bytes() == 4 || 16091 n->as_Vector()->length_in_bytes() == 8); 16092 match(Set dst (XorV src1 src2)); 16093 ins_cost(INSN_COST); 16094 format %{ "xor $dst,$src1,$src2\t# vector (8B)" %} 16095 ins_encode %{ 16096 __ eor(as_FloatRegister($dst$$reg), __ T8B, 16097 as_FloatRegister($src1$$reg), 16098 as_FloatRegister($src2$$reg)); 16099 %} 16100 ins_pipe(vlogical64); 16101 %} 16102 16103 instruct vxor16B(vecX dst, vecX src1, vecX src2) 16104 %{ 16105 predicate(n->as_Vector()->length_in_bytes() == 16); 16106 match(Set dst (XorV src1 src2)); 16107 ins_cost(INSN_COST); 16108 format %{ "xor $dst,$src1,$src2\t# vector (16B)" %} 16109 ins_encode %{ 16110 __ eor(as_FloatRegister($dst$$reg), __ T16B, 16111 as_FloatRegister($src1$$reg), 16112 as_FloatRegister($src2$$reg)); 16113 %} 16114 ins_pipe(vlogical128); 16115 %} 16116 16117 // ------------------------------ Shift --------------------------------------- 16118 16119 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{ 16120 match(Set dst (LShiftCntV cnt)); 16121 format %{ "dup $dst, $cnt\t# shift count (vecX)" %} 16122 ins_encode %{ 16123 __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg)); 16124 %} 16125 ins_pipe(vdup_reg_reg128); 16126 %} 16127 16128 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount 16129 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{ 16130 match(Set dst (RShiftCntV cnt)); 16131 format %{ "dup $dst, $cnt\t# shift count (vecX)\n\tneg $dst, $dst\t T16B" %} 16132 ins_encode %{ 16133 __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg)); 16134 __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg)); 16135 %} 16136 ins_pipe(vdup_reg_reg128); 16137 %} 16138 16139 instruct vsll8B(vecD dst, vecD src, vecX shift) %{ 16140 predicate(n->as_Vector()->length() == 4 || 16141 n->as_Vector()->length() == 8); 16142 match(Set dst (LShiftVB src shift)); 16143 match(Set dst (RShiftVB src shift)); 16144 ins_cost(INSN_COST); 16145 format %{ "sshl $dst,$src,$shift\t# vector (8B)" %} 16146 ins_encode %{ 16147 __ sshl(as_FloatRegister($dst$$reg), __ T8B, 16148 as_FloatRegister($src$$reg), 16149 as_FloatRegister($shift$$reg)); 16150 %} 16151 ins_pipe(vshift64); 16152 %} 16153 16154 instruct vsll16B(vecX dst, vecX src, vecX shift) %{ 16155 predicate(n->as_Vector()->length() == 16); 16156 match(Set dst (LShiftVB src shift)); 16157 match(Set dst (RShiftVB src shift)); 16158 ins_cost(INSN_COST); 16159 format %{ "sshl $dst,$src,$shift\t# vector (16B)" %} 16160 ins_encode %{ 16161 __ sshl(as_FloatRegister($dst$$reg), __ T16B, 16162 as_FloatRegister($src$$reg), 16163 as_FloatRegister($shift$$reg)); 16164 %} 16165 ins_pipe(vshift128); 16166 %} 16167 16168 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{ 16169 predicate(n->as_Vector()->length() == 4 || 16170 n->as_Vector()->length() == 8); 16171 match(Set dst (URShiftVB src shift)); 16172 ins_cost(INSN_COST); 16173 format %{ "ushl $dst,$src,$shift\t# vector (8B)" %} 16174 ins_encode %{ 16175 __ ushl(as_FloatRegister($dst$$reg), __ T8B, 16176 as_FloatRegister($src$$reg), 16177 as_FloatRegister($shift$$reg)); 16178 %} 16179 ins_pipe(vshift64); 16180 %} 16181 16182 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{ 16183 predicate(n->as_Vector()->length() == 16); 16184 match(Set dst (URShiftVB src shift)); 16185 ins_cost(INSN_COST); 16186 format %{ "ushl $dst,$src,$shift\t# vector (16B)" %} 16187 ins_encode %{ 16188 __ ushl(as_FloatRegister($dst$$reg), __ T16B, 16189 as_FloatRegister($src$$reg), 16190 as_FloatRegister($shift$$reg)); 16191 %} 16192 ins_pipe(vshift128); 16193 %} 16194 16195 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{ 16196 predicate(n->as_Vector()->length() == 4 || 16197 n->as_Vector()->length() == 8); 16198 match(Set dst (LShiftVB src shift)); 16199 ins_cost(INSN_COST); 16200 format %{ "shl $dst, $src, $shift\t# vector (8B)" %} 16201 ins_encode %{ 16202 int sh = (int)$shift$$constant & 31; 16203 if (sh >= 8) { 16204 __ eor(as_FloatRegister($dst$$reg), __ T8B, 16205 as_FloatRegister($src$$reg), 16206 as_FloatRegister($src$$reg)); 16207 } else { 16208 __ shl(as_FloatRegister($dst$$reg), __ T8B, 16209 as_FloatRegister($src$$reg), sh); 16210 } 16211 %} 16212 ins_pipe(vshift64_imm); 16213 %} 16214 16215 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{ 16216 predicate(n->as_Vector()->length() == 16); 16217 match(Set dst (LShiftVB src shift)); 16218 ins_cost(INSN_COST); 16219 format %{ "shl $dst, $src, $shift\t# vector (16B)" %} 16220 ins_encode %{ 16221 int sh = (int)$shift$$constant & 31; 16222 if (sh >= 8) { 16223 __ eor(as_FloatRegister($dst$$reg), __ T16B, 16224 as_FloatRegister($src$$reg), 16225 as_FloatRegister($src$$reg)); 16226 } else { 16227 __ shl(as_FloatRegister($dst$$reg), __ T16B, 16228 as_FloatRegister($src$$reg), sh); 16229 } 16230 %} 16231 ins_pipe(vshift128_imm); 16232 %} 16233 16234 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{ 16235 predicate(n->as_Vector()->length() == 4 || 16236 n->as_Vector()->length() == 8); 16237 match(Set dst (RShiftVB src shift)); 16238 ins_cost(INSN_COST); 16239 format %{ "sshr $dst, $src, $shift\t# vector (8B)" %} 16240 ins_encode %{ 16241 int sh = (int)$shift$$constant & 31; 16242 if (sh >= 8) sh = 7; 16243 sh = -sh & 7; 16244 __ sshr(as_FloatRegister($dst$$reg), __ T8B, 16245 as_FloatRegister($src$$reg), sh); 16246 %} 16247 ins_pipe(vshift64_imm); 16248 %} 16249 16250 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{ 16251 predicate(n->as_Vector()->length() == 16); 16252 match(Set dst (RShiftVB src shift)); 16253 ins_cost(INSN_COST); 16254 format %{ "sshr $dst, $src, $shift\t# vector (16B)" %} 16255 ins_encode %{ 16256 int sh = (int)$shift$$constant & 31; 16257 if (sh >= 8) sh = 7; 16258 sh = -sh & 7; 16259 __ sshr(as_FloatRegister($dst$$reg), __ T16B, 16260 as_FloatRegister($src$$reg), sh); 16261 %} 16262 ins_pipe(vshift128_imm); 16263 %} 16264 16265 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{ 16266 predicate(n->as_Vector()->length() == 4 || 16267 n->as_Vector()->length() == 8); 16268 match(Set dst (URShiftVB src shift)); 16269 ins_cost(INSN_COST); 16270 format %{ "ushr $dst, $src, $shift\t# vector (8B)" %} 16271 ins_encode %{ 16272 int sh = (int)$shift$$constant & 31; 16273 if (sh >= 8) { 16274 __ eor(as_FloatRegister($dst$$reg), __ T8B, 16275 as_FloatRegister($src$$reg), 16276 as_FloatRegister($src$$reg)); 16277 } else { 16278 __ ushr(as_FloatRegister($dst$$reg), __ T8B, 16279 as_FloatRegister($src$$reg), -sh & 7); 16280 } 16281 %} 16282 ins_pipe(vshift64_imm); 16283 %} 16284 16285 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{ 16286 predicate(n->as_Vector()->length() == 16); 16287 match(Set dst (URShiftVB src shift)); 16288 ins_cost(INSN_COST); 16289 format %{ "ushr $dst, $src, $shift\t# vector (16B)" %} 16290 ins_encode %{ 16291 int sh = (int)$shift$$constant & 31; 16292 if (sh >= 8) { 16293 __ eor(as_FloatRegister($dst$$reg), __ T16B, 16294 as_FloatRegister($src$$reg), 16295 as_FloatRegister($src$$reg)); 16296 } else { 16297 __ ushr(as_FloatRegister($dst$$reg), __ T16B, 16298 as_FloatRegister($src$$reg), -sh & 7); 16299 } 16300 %} 16301 ins_pipe(vshift128_imm); 16302 %} 16303 16304 instruct vsll4S(vecD dst, vecD src, vecX shift) %{ 16305 predicate(n->as_Vector()->length() == 2 || 16306 n->as_Vector()->length() == 4); 16307 match(Set dst (LShiftVS src shift)); 16308 match(Set dst (RShiftVS src shift)); 16309 ins_cost(INSN_COST); 16310 format %{ "sshl $dst,$src,$shift\t# vector (4H)" %} 16311 ins_encode %{ 16312 __ sshl(as_FloatRegister($dst$$reg), __ T4H, 16313 as_FloatRegister($src$$reg), 16314 as_FloatRegister($shift$$reg)); 16315 %} 16316 ins_pipe(vshift64); 16317 %} 16318 16319 instruct vsll8S(vecX dst, vecX src, vecX shift) %{ 16320 predicate(n->as_Vector()->length() == 8); 16321 match(Set dst (LShiftVS src shift)); 16322 match(Set dst (RShiftVS src shift)); 16323 ins_cost(INSN_COST); 16324 format %{ "sshl $dst,$src,$shift\t# vector (8H)" %} 16325 ins_encode %{ 16326 __ sshl(as_FloatRegister($dst$$reg), __ T8H, 16327 as_FloatRegister($src$$reg), 16328 as_FloatRegister($shift$$reg)); 16329 %} 16330 ins_pipe(vshift128); 16331 %} 16332 16333 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{ 16334 predicate(n->as_Vector()->length() == 2 || 16335 n->as_Vector()->length() == 4); 16336 match(Set dst (URShiftVS src shift)); 16337 ins_cost(INSN_COST); 16338 format %{ "ushl $dst,$src,$shift\t# vector (4H)" %} 16339 ins_encode %{ 16340 __ ushl(as_FloatRegister($dst$$reg), __ T4H, 16341 as_FloatRegister($src$$reg), 16342 as_FloatRegister($shift$$reg)); 16343 %} 16344 ins_pipe(vshift64); 16345 %} 16346 16347 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{ 16348 predicate(n->as_Vector()->length() == 8); 16349 match(Set dst (URShiftVS src shift)); 16350 ins_cost(INSN_COST); 16351 format %{ "ushl $dst,$src,$shift\t# vector (8H)" %} 16352 ins_encode %{ 16353 __ ushl(as_FloatRegister($dst$$reg), __ T8H, 16354 as_FloatRegister($src$$reg), 16355 as_FloatRegister($shift$$reg)); 16356 %} 16357 ins_pipe(vshift128); 16358 %} 16359 16360 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{ 16361 predicate(n->as_Vector()->length() == 2 || 16362 n->as_Vector()->length() == 4); 16363 match(Set dst (LShiftVS src shift)); 16364 ins_cost(INSN_COST); 16365 format %{ "shl $dst, $src, $shift\t# vector (4H)" %} 16366 ins_encode %{ 16367 int sh = (int)$shift$$constant & 31; 16368 if (sh >= 16) { 16369 __ eor(as_FloatRegister($dst$$reg), __ T8B, 16370 as_FloatRegister($src$$reg), 16371 as_FloatRegister($src$$reg)); 16372 } else { 16373 __ shl(as_FloatRegister($dst$$reg), __ T4H, 16374 as_FloatRegister($src$$reg), sh); 16375 } 16376 %} 16377 ins_pipe(vshift64_imm); 16378 %} 16379 16380 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{ 16381 predicate(n->as_Vector()->length() == 8); 16382 match(Set dst (LShiftVS src shift)); 16383 ins_cost(INSN_COST); 16384 format %{ "shl $dst, $src, $shift\t# vector (8H)" %} 16385 ins_encode %{ 16386 int sh = (int)$shift$$constant & 31; 16387 if (sh >= 16) { 16388 __ eor(as_FloatRegister($dst$$reg), __ T16B, 16389 as_FloatRegister($src$$reg), 16390 as_FloatRegister($src$$reg)); 16391 } else { 16392 __ shl(as_FloatRegister($dst$$reg), __ T8H, 16393 as_FloatRegister($src$$reg), sh); 16394 } 16395 %} 16396 ins_pipe(vshift128_imm); 16397 %} 16398 16399 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{ 16400 predicate(n->as_Vector()->length() == 2 || 16401 n->as_Vector()->length() == 4); 16402 match(Set dst (RShiftVS src shift)); 16403 ins_cost(INSN_COST); 16404 format %{ "sshr $dst, $src, $shift\t# vector (4H)" %} 16405 ins_encode %{ 16406 int sh = (int)$shift$$constant & 31; 16407 if (sh >= 16) sh = 15; 16408 sh = -sh & 15; 16409 __ sshr(as_FloatRegister($dst$$reg), __ T4H, 16410 as_FloatRegister($src$$reg), sh); 16411 %} 16412 ins_pipe(vshift64_imm); 16413 %} 16414 16415 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{ 16416 predicate(n->as_Vector()->length() == 8); 16417 match(Set dst (RShiftVS src shift)); 16418 ins_cost(INSN_COST); 16419 format %{ "sshr $dst, $src, $shift\t# vector (8H)" %} 16420 ins_encode %{ 16421 int sh = (int)$shift$$constant & 31; 16422 if (sh >= 16) sh = 15; 16423 sh = -sh & 15; 16424 __ sshr(as_FloatRegister($dst$$reg), __ T8H, 16425 as_FloatRegister($src$$reg), sh); 16426 %} 16427 ins_pipe(vshift128_imm); 16428 %} 16429 16430 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{ 16431 predicate(n->as_Vector()->length() == 2 || 16432 n->as_Vector()->length() == 4); 16433 match(Set dst (URShiftVS src shift)); 16434 ins_cost(INSN_COST); 16435 format %{ "ushr $dst, $src, $shift\t# vector (4H)" %} 16436 ins_encode %{ 16437 int sh = (int)$shift$$constant & 31; 16438 if (sh >= 16) { 16439 __ eor(as_FloatRegister($dst$$reg), __ T8B, 16440 as_FloatRegister($src$$reg), 16441 as_FloatRegister($src$$reg)); 16442 } else { 16443 __ ushr(as_FloatRegister($dst$$reg), __ T4H, 16444 as_FloatRegister($src$$reg), -sh & 15); 16445 } 16446 %} 16447 ins_pipe(vshift64_imm); 16448 %} 16449 16450 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{ 16451 predicate(n->as_Vector()->length() == 8); 16452 match(Set dst (URShiftVS src shift)); 16453 ins_cost(INSN_COST); 16454 format %{ "ushr $dst, $src, $shift\t# vector (8H)" %} 16455 ins_encode %{ 16456 int sh = (int)$shift$$constant & 31; 16457 if (sh >= 16) { 16458 __ eor(as_FloatRegister($dst$$reg), __ T16B, 16459 as_FloatRegister($src$$reg), 16460 as_FloatRegister($src$$reg)); 16461 } else { 16462 __ ushr(as_FloatRegister($dst$$reg), __ T8H, 16463 as_FloatRegister($src$$reg), -sh & 15); 16464 } 16465 %} 16466 ins_pipe(vshift128_imm); 16467 %} 16468 16469 instruct vsll2I(vecD dst, vecD src, vecX shift) %{ 16470 predicate(n->as_Vector()->length() == 2); 16471 match(Set dst (LShiftVI src shift)); 16472 match(Set dst (RShiftVI src shift)); 16473 ins_cost(INSN_COST); 16474 format %{ "sshl $dst,$src,$shift\t# vector (2S)" %} 16475 ins_encode %{ 16476 __ sshl(as_FloatRegister($dst$$reg), __ T2S, 16477 as_FloatRegister($src$$reg), 16478 as_FloatRegister($shift$$reg)); 16479 %} 16480 ins_pipe(vshift64_imm); 16481 %} 16482 16483 instruct vsll4I(vecX dst, vecX src, vecX shift) %{ 16484 predicate(n->as_Vector()->length() == 4); 16485 match(Set dst (LShiftVI src shift)); 16486 match(Set dst (RShiftVI src shift)); 16487 ins_cost(INSN_COST); 16488 format %{ "sshl $dst,$src,$shift\t# vector (4S)" %} 16489 ins_encode %{ 16490 __ sshl(as_FloatRegister($dst$$reg), __ T4S, 16491 as_FloatRegister($src$$reg), 16492 as_FloatRegister($shift$$reg)); 16493 %} 16494 ins_pipe(vshift128_imm); 16495 %} 16496 16497 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{ 16498 predicate(n->as_Vector()->length() == 2); 16499 match(Set dst (URShiftVI src shift)); 16500 ins_cost(INSN_COST); 16501 format %{ "ushl $dst,$src,$shift\t# vector (2S)" %} 16502 ins_encode %{ 16503 __ ushl(as_FloatRegister($dst$$reg), __ T2S, 16504 as_FloatRegister($src$$reg), 16505 as_FloatRegister($shift$$reg)); 16506 %} 16507 ins_pipe(vshift64_imm); 16508 %} 16509 16510 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{ 16511 predicate(n->as_Vector()->length() == 4); 16512 match(Set dst (URShiftVI src shift)); 16513 ins_cost(INSN_COST); 16514 format %{ "ushl $dst,$src,$shift\t# vector (4S)" %} 16515 ins_encode %{ 16516 __ ushl(as_FloatRegister($dst$$reg), __ T4S, 16517 as_FloatRegister($src$$reg), 16518 as_FloatRegister($shift$$reg)); 16519 %} 16520 ins_pipe(vshift128_imm); 16521 %} 16522 16523 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{ 16524 predicate(n->as_Vector()->length() == 2); 16525 match(Set dst (LShiftVI src shift)); 16526 ins_cost(INSN_COST); 16527 format %{ "shl $dst, $src, $shift\t# vector (2S)" %} 16528 ins_encode %{ 16529 __ shl(as_FloatRegister($dst$$reg), __ T2S, 16530 as_FloatRegister($src$$reg), 16531 (int)$shift$$constant & 31); 16532 %} 16533 ins_pipe(vshift64_imm); 16534 %} 16535 16536 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{ 16537 predicate(n->as_Vector()->length() == 4); 16538 match(Set dst (LShiftVI src shift)); 16539 ins_cost(INSN_COST); 16540 format %{ "shl $dst, $src, $shift\t# vector (4S)" %} 16541 ins_encode %{ 16542 __ shl(as_FloatRegister($dst$$reg), __ T4S, 16543 as_FloatRegister($src$$reg), 16544 (int)$shift$$constant & 31); 16545 %} 16546 ins_pipe(vshift128_imm); 16547 %} 16548 16549 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{ 16550 predicate(n->as_Vector()->length() == 2); 16551 match(Set dst (RShiftVI src shift)); 16552 ins_cost(INSN_COST); 16553 format %{ "sshr $dst, $src, $shift\t# vector (2S)" %} 16554 ins_encode %{ 16555 __ sshr(as_FloatRegister($dst$$reg), __ T2S, 16556 as_FloatRegister($src$$reg), 16557 -(int)$shift$$constant & 31); 16558 %} 16559 ins_pipe(vshift64_imm); 16560 %} 16561 16562 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{ 16563 predicate(n->as_Vector()->length() == 4); 16564 match(Set dst (RShiftVI src shift)); 16565 ins_cost(INSN_COST); 16566 format %{ "sshr $dst, $src, $shift\t# vector (4S)" %} 16567 ins_encode %{ 16568 __ sshr(as_FloatRegister($dst$$reg), __ T4S, 16569 as_FloatRegister($src$$reg), 16570 -(int)$shift$$constant & 31); 16571 %} 16572 ins_pipe(vshift128_imm); 16573 %} 16574 16575 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{ 16576 predicate(n->as_Vector()->length() == 2); 16577 match(Set dst (URShiftVI src shift)); 16578 ins_cost(INSN_COST); 16579 format %{ "ushr $dst, $src, $shift\t# vector (2S)" %} 16580 ins_encode %{ 16581 __ ushr(as_FloatRegister($dst$$reg), __ T2S, 16582 as_FloatRegister($src$$reg), 16583 -(int)$shift$$constant & 31); 16584 %} 16585 ins_pipe(vshift64_imm); 16586 %} 16587 16588 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{ 16589 predicate(n->as_Vector()->length() == 4); 16590 match(Set dst (URShiftVI src shift)); 16591 ins_cost(INSN_COST); 16592 format %{ "ushr $dst, $src, $shift\t# vector (4S)" %} 16593 ins_encode %{ 16594 __ ushr(as_FloatRegister($dst$$reg), __ T4S, 16595 as_FloatRegister($src$$reg), 16596 -(int)$shift$$constant & 31); 16597 %} 16598 ins_pipe(vshift128_imm); 16599 %} 16600 16601 instruct vsll2L(vecX dst, vecX src, vecX shift) %{ 16602 predicate(n->as_Vector()->length() == 2); 16603 match(Set dst (LShiftVL src shift)); 16604 match(Set dst (RShiftVL src shift)); 16605 ins_cost(INSN_COST); 16606 format %{ "sshl $dst,$src,$shift\t# vector (2D)" %} 16607 ins_encode %{ 16608 __ sshl(as_FloatRegister($dst$$reg), __ T2D, 16609 as_FloatRegister($src$$reg), 16610 as_FloatRegister($shift$$reg)); 16611 %} 16612 ins_pipe(vshift128); 16613 %} 16614 16615 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{ 16616 predicate(n->as_Vector()->length() == 2); 16617 match(Set dst (URShiftVL src shift)); 16618 ins_cost(INSN_COST); 16619 format %{ "ushl $dst,$src,$shift\t# vector (2D)" %} 16620 ins_encode %{ 16621 __ ushl(as_FloatRegister($dst$$reg), __ T2D, 16622 as_FloatRegister($src$$reg), 16623 as_FloatRegister($shift$$reg)); 16624 %} 16625 ins_pipe(vshift128); 16626 %} 16627 16628 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{ 16629 predicate(n->as_Vector()->length() == 2); 16630 match(Set dst (LShiftVL src shift)); 16631 ins_cost(INSN_COST); 16632 format %{ "shl $dst, $src, $shift\t# vector (2D)" %} 16633 ins_encode %{ 16634 __ shl(as_FloatRegister($dst$$reg), __ T2D, 16635 as_FloatRegister($src$$reg), 16636 (int)$shift$$constant & 63); 16637 %} 16638 ins_pipe(vshift128); 16639 %} 16640 16641 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{ 16642 predicate(n->as_Vector()->length() == 2); 16643 match(Set dst (RShiftVL src shift)); 16644 ins_cost(INSN_COST); 16645 format %{ "sshr $dst, $src, $shift\t# vector (2D)" %} 16646 ins_encode %{ 16647 __ sshr(as_FloatRegister($dst$$reg), __ T2D, 16648 as_FloatRegister($src$$reg), 16649 -(int)$shift$$constant & 63); 16650 %} 16651 ins_pipe(vshift128_imm); 16652 %} 16653 16654 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{ 16655 predicate(n->as_Vector()->length() == 2); 16656 match(Set dst (URShiftVL src shift)); 16657 ins_cost(INSN_COST); 16658 format %{ "ushr $dst, $src, $shift\t# vector (2D)" %} 16659 ins_encode %{ 16660 __ ushr(as_FloatRegister($dst$$reg), __ T2D, 16661 as_FloatRegister($src$$reg), 16662 -(int)$shift$$constant & 63); 16663 %} 16664 ins_pipe(vshift128_imm); 16665 %} 16666 16667 //----------PEEPHOLE RULES----------------------------------------------------- 16668 // These must follow all instruction definitions as they use the names 16669 // defined in the instructions definitions. 16670 // 16671 // peepmatch ( root_instr_name [preceding_instruction]* ); 16672 // 16673 // peepconstraint %{ 16674 // (instruction_number.operand_name relational_op instruction_number.operand_name 16675 // [, ...] ); 16676 // // instruction numbers are zero-based using left to right order in peepmatch 16677 // 16678 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 16679 // // provide an instruction_number.operand_name for each operand that appears 16680 // // in the replacement instruction's match rule 16681 // 16682 // ---------VM FLAGS--------------------------------------------------------- 16683 // 16684 // All peephole optimizations can be turned off using -XX:-OptoPeephole 16685 // 16686 // Each peephole rule is given an identifying number starting with zero and 16687 // increasing by one in the order seen by the parser. An individual peephole 16688 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 16689 // on the command-line. 16690 // 16691 // ---------CURRENT LIMITATIONS---------------------------------------------- 16692 // 16693 // Only match adjacent instructions in same basic block 16694 // Only equality constraints 16695 // Only constraints between operands, not (0.dest_reg == RAX_enc) 16696 // Only one replacement instruction 16697 // 16698 // ---------EXAMPLE---------------------------------------------------------- 16699 // 16700 // // pertinent parts of existing instructions in architecture description 16701 // instruct movI(iRegINoSp dst, iRegI src) 16702 // %{ 16703 // match(Set dst (CopyI src)); 16704 // %} 16705 // 16706 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr) 16707 // %{ 16708 // match(Set dst (AddI dst src)); 16709 // effect(KILL cr); 16710 // %} 16711 // 16712 // // Change (inc mov) to lea 16713 // peephole %{ 16714 // // increment preceeded by register-register move 16715 // peepmatch ( incI_iReg movI ); 16716 // // require that the destination register of the increment 16717 // // match the destination register of the move 16718 // peepconstraint ( 0.dst == 1.dst ); 16719 // // construct a replacement instruction that sets 16720 // // the destination to ( move's source register + one ) 16721 // peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) ); 16722 // %} 16723 // 16724 16725 // Implementation no longer uses movX instructions since 16726 // machine-independent system no longer uses CopyX nodes. 16727 // 16728 // peephole 16729 // %{ 16730 // peepmatch (incI_iReg movI); 16731 // peepconstraint (0.dst == 1.dst); 16732 // peepreplace (leaI_iReg_immI(0.dst 1.src 0.src)); 16733 // %} 16734 16735 // peephole 16736 // %{ 16737 // peepmatch (decI_iReg movI); 16738 // peepconstraint (0.dst == 1.dst); 16739 // peepreplace (leaI_iReg_immI(0.dst 1.src 0.src)); 16740 // %} 16741 16742 // peephole 16743 // %{ 16744 // peepmatch (addI_iReg_imm movI); 16745 // peepconstraint (0.dst == 1.dst); 16746 // peepreplace (leaI_iReg_immI(0.dst 1.src 0.src)); 16747 // %} 16748 16749 // peephole 16750 // %{ 16751 // peepmatch (incL_iReg movL); 16752 // peepconstraint (0.dst == 1.dst); 16753 // peepreplace (leaL_iReg_immL(0.dst 1.src 0.src)); 16754 // %} 16755 16756 // peephole 16757 // %{ 16758 // peepmatch (decL_iReg movL); 16759 // peepconstraint (0.dst == 1.dst); 16760 // peepreplace (leaL_iReg_immL(0.dst 1.src 0.src)); 16761 // %} 16762 16763 // peephole 16764 // %{ 16765 // peepmatch (addL_iReg_imm movL); 16766 // peepconstraint (0.dst == 1.dst); 16767 // peepreplace (leaL_iReg_immL(0.dst 1.src 0.src)); 16768 // %} 16769 16770 // peephole 16771 // %{ 16772 // peepmatch (addP_iReg_imm movP); 16773 // peepconstraint (0.dst == 1.dst); 16774 // peepreplace (leaP_iReg_imm(0.dst 1.src 0.src)); 16775 // %} 16776 16777 // // Change load of spilled value to only a spill 16778 // instruct storeI(memory mem, iRegI src) 16779 // %{ 16780 // match(Set mem (StoreI mem src)); 16781 // %} 16782 // 16783 // instruct loadI(iRegINoSp dst, memory mem) 16784 // %{ 16785 // match(Set dst (LoadI mem)); 16786 // %} 16787 // 16788 16789 //----------SMARTSPILL RULES--------------------------------------------------- 16790 // These must follow all instruction definitions as they use the names 16791 // defined in the instructions definitions. 16792 16793 // Local Variables: 16794 // mode: c++ 16795 // End: