1 /* 2 * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.hpp" 27 #include "compiler/disassembler.hpp" 28 #include "interpreter/interp_masm.hpp" 29 #include "interpreter/interpreter.hpp" 30 #include "interpreter/interpreterRuntime.hpp" 31 #include "interpreter/templateInterpreterGenerator.hpp" 32 #include "runtime/arguments.hpp" 33 #include "runtime/sharedRuntime.hpp" 34 35 #define __ Disassembler::hook<InterpreterMacroAssembler>(__FILE__, __LINE__, _masm)-> 36 37 #ifdef _WIN64 38 address TemplateInterpreterGenerator::generate_slow_signature_handler() { 39 address entry = __ pc(); 40 41 // rbx: method 42 // r14: pointer to locals 43 // c_rarg3: first stack arg - wordSize 44 __ mov(c_rarg3, rsp); 45 // adjust rsp 46 __ subptr(rsp, 4 * wordSize); 47 __ call_VM(noreg, 48 CAST_FROM_FN_PTR(address, 49 InterpreterRuntime::slow_signature_handler), 50 rbx, r14, c_rarg3); 51 52 // rax: result handler 53 54 // Stack layout: 55 // rsp: 3 integer or float args (if static first is unused) 56 // 1 float/double identifiers 57 // return address 58 // stack args 59 // garbage 60 // expression stack bottom 61 // bcp (NULL) 62 // ... 63 64 // Do FP first so we can use c_rarg3 as temp 65 __ movl(c_rarg3, Address(rsp, 3 * wordSize)); // float/double identifiers 66 67 for ( int i= 0; i < Argument::n_int_register_parameters_c-1; i++ ) { 68 XMMRegister floatreg = as_XMMRegister(i+1); 69 Label isfloatordouble, isdouble, next; 70 71 __ testl(c_rarg3, 1 << (i*2)); // Float or Double? 72 __ jcc(Assembler::notZero, isfloatordouble); 73 74 // Do Int register here 75 switch ( i ) { 76 case 0: 77 __ movl(rscratch1, Address(rbx, Method::access_flags_offset())); 78 __ testl(rscratch1, JVM_ACC_STATIC); 79 __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0)); 80 break; 81 case 1: 82 __ movptr(c_rarg2, Address(rsp, wordSize)); 83 break; 84 case 2: 85 __ movptr(c_rarg3, Address(rsp, 2 * wordSize)); 86 break; 87 default: 88 break; 89 } 90 91 __ jmp (next); 92 93 __ bind(isfloatordouble); 94 __ testl(c_rarg3, 1 << ((i*2)+1)); // Double? 95 __ jcc(Assembler::notZero, isdouble); 96 97 // Do Float Here 98 __ movflt(floatreg, Address(rsp, i * wordSize)); 99 __ jmp(next); 100 101 // Do Double here 102 __ bind(isdouble); 103 __ movdbl(floatreg, Address(rsp, i * wordSize)); 104 105 __ bind(next); 106 } 107 108 109 // restore rsp 110 __ addptr(rsp, 4 * wordSize); 111 112 __ ret(0); 113 114 return entry; 115 } 116 #else 117 address TemplateInterpreterGenerator::generate_slow_signature_handler() { 118 address entry = __ pc(); 119 120 // rbx: method 121 // r14: pointer to locals 122 // c_rarg3: first stack arg - wordSize 123 __ mov(c_rarg3, rsp); 124 // adjust rsp 125 __ subptr(rsp, 14 * wordSize); 126 __ call_VM(noreg, 127 CAST_FROM_FN_PTR(address, 128 InterpreterRuntime::slow_signature_handler), 129 rbx, r14, c_rarg3); 130 131 // rax: result handler 132 133 // Stack layout: 134 // rsp: 5 integer args (if static first is unused) 135 // 1 float/double identifiers 136 // 8 double args 137 // return address 138 // stack args 139 // garbage 140 // expression stack bottom 141 // bcp (NULL) 142 // ... 143 144 // Do FP first so we can use c_rarg3 as temp 145 __ movl(c_rarg3, Address(rsp, 5 * wordSize)); // float/double identifiers 146 147 for (int i = 0; i < Argument::n_float_register_parameters_c; i++) { 148 const XMMRegister r = as_XMMRegister(i); 149 150 Label d, done; 151 152 __ testl(c_rarg3, 1 << i); 153 __ jcc(Assembler::notZero, d); 154 __ movflt(r, Address(rsp, (6 + i) * wordSize)); 155 __ jmp(done); 156 __ bind(d); 157 __ movdbl(r, Address(rsp, (6 + i) * wordSize)); 158 __ bind(done); 159 } 160 161 // Now handle integrals. Only do c_rarg1 if not static. 162 __ movl(c_rarg3, Address(rbx, Method::access_flags_offset())); 163 __ testl(c_rarg3, JVM_ACC_STATIC); 164 __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0)); 165 166 __ movptr(c_rarg2, Address(rsp, wordSize)); 167 __ movptr(c_rarg3, Address(rsp, 2 * wordSize)); 168 __ movptr(c_rarg4, Address(rsp, 3 * wordSize)); 169 __ movptr(c_rarg5, Address(rsp, 4 * wordSize)); 170 171 // restore rsp 172 __ addptr(rsp, 14 * wordSize); 173 174 __ ret(0); 175 176 return entry; 177 } 178 #endif // __WIN64 179 180 /** 181 * Method entry for static native methods: 182 * int java.util.zip.CRC32.update(int crc, int b) 183 */ 184 address TemplateInterpreterGenerator::generate_CRC32_update_entry() { 185 if (UseCRC32Intrinsics) { 186 address entry = __ pc(); 187 188 // rbx,: Method* 189 // r13: senderSP must preserved for slow path, set SP to it on fast path 190 // c_rarg0: scratch (rdi on non-Win64, rcx on Win64) 191 // c_rarg1: scratch (rsi on non-Win64, rdx on Win64) 192 193 Label slow_path; 194 __ safepoint_poll(slow_path, r15_thread, rscratch1); 195 196 // We don't generate local frame and don't align stack because 197 // we call stub code and there is no safepoint on this path. 198 199 // Load parameters 200 const Register crc = rax; // crc 201 const Register val = c_rarg0; // source java byte value 202 const Register tbl = c_rarg1; // scratch 203 204 // Arguments are reversed on java expression stack 205 __ movl(val, Address(rsp, wordSize)); // byte value 206 __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC 207 208 __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); 209 __ notl(crc); // ~crc 210 __ update_byte_crc32(crc, val, tbl); 211 __ notl(crc); // ~crc 212 // result in rax 213 214 // _areturn 215 __ pop(rdi); // get return address 216 __ mov(rsp, r13); // set sp to sender sp 217 __ jmp(rdi); 218 219 // generate a vanilla native entry as the slow path 220 __ bind(slow_path); 221 __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); 222 return entry; 223 } 224 return NULL; 225 } 226 227 /** 228 * Method entry for static native methods: 229 * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) 230 * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) 231 */ 232 address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { 233 if (UseCRC32Intrinsics) { 234 address entry = __ pc(); 235 236 // rbx,: Method* 237 // r13: senderSP must preserved for slow path, set SP to it on fast path 238 239 Label slow_path; 240 __ safepoint_poll(slow_path, r15_thread, rscratch1); 241 242 // We don't generate local frame and don't align stack because 243 // we call stub code and there is no safepoint on this path. 244 245 // Load parameters 246 const Register crc = c_rarg0; // crc 247 const Register buf = c_rarg1; // source java byte array address 248 const Register len = c_rarg2; // length 249 const Register off = len; // offset (never overlaps with 'len') 250 251 // Arguments are reversed on java expression stack 252 // Calculate address of start element 253 if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { 254 __ movptr(buf, Address(rsp, 3*wordSize)); // long buf 255 __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset 256 __ addq(buf, off); // + offset 257 __ movl(crc, Address(rsp, 5*wordSize)); // Initial CRC 258 } else { 259 __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array 260 __ resolve(IS_NOT_NULL | ACCESS_READ, buf); 261 __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size 262 __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset 263 __ addq(buf, off); // + offset 264 __ movl(crc, Address(rsp, 4*wordSize)); // Initial CRC 265 } 266 // Can now load 'len' since we're finished with 'off' 267 __ movl(len, Address(rsp, wordSize)); // Length 268 269 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); 270 // result in rax 271 272 // _areturn 273 __ pop(rdi); // get return address 274 __ mov(rsp, r13); // set sp to sender sp 275 __ jmp(rdi); 276 277 // generate a vanilla native entry as the slow path 278 __ bind(slow_path); 279 __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); 280 return entry; 281 } 282 return NULL; 283 } 284 285 /** 286 * Method entry for static (non-native) methods: 287 * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) 288 * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end) 289 */ 290 address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { 291 if (UseCRC32CIntrinsics) { 292 address entry = __ pc(); 293 // Load parameters 294 const Register crc = c_rarg0; // crc 295 const Register buf = c_rarg1; // source java byte array address 296 const Register len = c_rarg2; 297 const Register off = c_rarg3; // offset 298 const Register end = len; 299 300 // Arguments are reversed on java expression stack 301 // Calculate address of start element 302 if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { 303 __ movptr(buf, Address(rsp, 3 * wordSize)); // long address 304 __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset 305 __ addq(buf, off); // + offset 306 __ movl(crc, Address(rsp, 5 * wordSize)); // Initial CRC 307 // Note on 5 * wordSize vs. 4 * wordSize: 308 // * int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end) 309 // 4 2,3 1 0 310 // end starts at SP + 8 311 // The Java(R) Virtual Machine Specification Java SE 7 Edition 312 // 4.10.2.3. Values of Types long and double 313 // "When calculating operand stack length, values of type long and double have length two." 314 } else { 315 __ movptr(buf, Address(rsp, 3 * wordSize)); // byte[] array 316 __ resolve(IS_NOT_NULL | ACCESS_READ, buf); 317 __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size 318 __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset 319 __ addq(buf, off); // + offset 320 __ movl(crc, Address(rsp, 4 * wordSize)); // Initial CRC 321 } 322 __ movl(end, Address(rsp, wordSize)); // end 323 __ subl(end, off); // end - off 324 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len); 325 // result in rax 326 // _areturn 327 __ pop(rdi); // get return address 328 __ mov(rsp, r13); // set sp to sender sp 329 __ jmp(rdi); 330 331 return entry; 332 } 333 334 return NULL; 335 } 336 337 // 338 // Various method entries 339 // 340 341 address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { 342 343 // rbx,: Method* 344 // rcx: scratrch 345 // r13: sender sp 346 347 if (!InlineIntrinsics) return NULL; // Generate a vanilla entry 348 349 address entry_point = __ pc(); 350 351 // These don't need a safepoint check because they aren't virtually 352 // callable. We won't enter these intrinsics from compiled code. 353 // If in the future we added an intrinsic which was virtually callable 354 // we'd have to worry about how to safepoint so that this code is used. 355 356 // mathematical functions inlined by compiler 357 // (interpreter must provide identical implementation 358 // in order to avoid monotonicity bugs when switching 359 // from interpreter to compiler in the middle of some 360 // computation) 361 // 362 // stack: [ ret adr ] <-- rsp 363 // [ lo(arg) ] 364 // [ hi(arg) ] 365 // 366 367 if (kind == Interpreter::java_lang_math_fmaD) { 368 if (!UseFMA) { 369 return NULL; // Generate a vanilla entry 370 } 371 __ movdbl(xmm0, Address(rsp, wordSize)); 372 __ movdbl(xmm1, Address(rsp, 3 * wordSize)); 373 __ movdbl(xmm2, Address(rsp, 5 * wordSize)); 374 __ fmad(xmm0, xmm1, xmm2, xmm0); 375 } else if (kind == Interpreter::java_lang_math_fmaF) { 376 if (!UseFMA) { 377 return NULL; // Generate a vanilla entry 378 } 379 __ movflt(xmm0, Address(rsp, wordSize)); 380 __ movflt(xmm1, Address(rsp, 2 * wordSize)); 381 __ movflt(xmm2, Address(rsp, 3 * wordSize)); 382 __ fmaf(xmm0, xmm1, xmm2, xmm0); 383 } else if (kind == Interpreter::java_lang_math_sqrt) { 384 __ sqrtsd(xmm0, Address(rsp, wordSize)); 385 } else if (kind == Interpreter::java_lang_math_exp) { 386 __ movdbl(xmm0, Address(rsp, wordSize)); 387 if (StubRoutines::dexp() != NULL) { 388 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp()))); 389 } else { 390 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)); 391 } 392 } else if (kind == Interpreter::java_lang_math_log) { 393 __ movdbl(xmm0, Address(rsp, wordSize)); 394 if (StubRoutines::dlog() != NULL) { 395 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog()))); 396 } else { 397 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)); 398 } 399 } else if (kind == Interpreter::java_lang_math_log10) { 400 __ movdbl(xmm0, Address(rsp, wordSize)); 401 if (StubRoutines::dlog10() != NULL) { 402 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10()))); 403 } else { 404 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)); 405 } 406 } else if (kind == Interpreter::java_lang_math_sin) { 407 __ movdbl(xmm0, Address(rsp, wordSize)); 408 if (StubRoutines::dsin() != NULL) { 409 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin()))); 410 } else { 411 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)); 412 } 413 } else if (kind == Interpreter::java_lang_math_cos) { 414 __ movdbl(xmm0, Address(rsp, wordSize)); 415 if (StubRoutines::dcos() != NULL) { 416 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos()))); 417 } else { 418 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)); 419 } 420 } else if (kind == Interpreter::java_lang_math_pow) { 421 __ movdbl(xmm1, Address(rsp, wordSize)); 422 __ movdbl(xmm0, Address(rsp, 3 * wordSize)); 423 if (StubRoutines::dpow() != NULL) { 424 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow()))); 425 } else { 426 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)); 427 } 428 } else if (kind == Interpreter::java_lang_math_tan) { 429 __ movdbl(xmm0, Address(rsp, wordSize)); 430 if (StubRoutines::dtan() != NULL) { 431 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan()))); 432 } else { 433 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)); 434 } 435 } else if (kind == Interpreter::java_lang_math_abs) { 436 assert(StubRoutines::x86::double_sign_mask() != NULL, "not initialized"); 437 __ movdbl(xmm0, Address(rsp, wordSize)); 438 __ andpd(xmm0, ExternalAddress(StubRoutines::x86::double_sign_mask())); 439 } else { 440 ShouldNotReachHere(); 441 } 442 443 __ pop(rax); 444 __ mov(rsp, r13); 445 __ jmp(rax); 446 447 return entry_point; 448 } 449