1 /* 2 * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.hpp" 27 #include "interpreter/interp_masm.hpp" 28 #include "interpreter/interpreter.hpp" 29 #include "interpreter/interpreterRuntime.hpp" 30 #include "interpreter/templateInterpreterGenerator.hpp" 31 #include "runtime/arguments.hpp" 32 #include "runtime/sharedRuntime.hpp" 33 34 #define __ _masm-> 35 36 #ifdef _WIN64 37 address TemplateInterpreterGenerator::generate_slow_signature_handler() { 38 address entry = __ pc(); 39 40 // rbx: method 41 // r14: pointer to locals 42 // c_rarg3: first stack arg - wordSize 43 __ mov(c_rarg3, rsp); 44 // adjust rsp 45 __ subptr(rsp, 4 * wordSize); 46 __ call_VM(noreg, 47 CAST_FROM_FN_PTR(address, 48 InterpreterRuntime::slow_signature_handler), 49 rbx, r14, c_rarg3); 50 51 // rax: result handler 52 53 // Stack layout: 54 // rsp: 3 integer or float args (if static first is unused) 55 // 1 float/double identifiers 56 // return address 57 // stack args 58 // garbage 59 // expression stack bottom 60 // bcp (NULL) 61 // ... 62 63 // Do FP first so we can use c_rarg3 as temp 64 __ movl(c_rarg3, Address(rsp, 3 * wordSize)); // float/double identifiers 65 66 for ( int i= 0; i < Argument::n_int_register_parameters_c-1; i++ ) { 67 XMMRegister floatreg = as_XMMRegister(i+1); 68 Label isfloatordouble, isdouble, next; 69 70 __ testl(c_rarg3, 1 << (i*2)); // Float or Double? 71 __ jcc(Assembler::notZero, isfloatordouble); 72 73 // Do Int register here 74 switch ( i ) { 75 case 0: 76 __ movl(rscratch1, Address(rbx, Method::access_flags_offset())); 77 __ testl(rscratch1, JVM_ACC_STATIC); 78 __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0)); 79 break; 80 case 1: 81 __ movptr(c_rarg2, Address(rsp, wordSize)); 82 break; 83 case 2: 84 __ movptr(c_rarg3, Address(rsp, 2 * wordSize)); 85 break; 86 default: 87 break; 88 } 89 90 __ jmp (next); 91 92 __ bind(isfloatordouble); 93 __ testl(c_rarg3, 1 << ((i*2)+1)); // Double? 94 __ jcc(Assembler::notZero, isdouble); 95 96 // Do Float Here 97 __ movflt(floatreg, Address(rsp, i * wordSize)); 98 __ jmp(next); 99 100 // Do Double here 101 __ bind(isdouble); 102 __ movdbl(floatreg, Address(rsp, i * wordSize)); 103 104 __ bind(next); 105 } 106 107 108 // restore rsp 109 __ addptr(rsp, 4 * wordSize); 110 111 __ ret(0); 112 113 return entry; 114 } 115 #else 116 address TemplateInterpreterGenerator::generate_slow_signature_handler() { 117 address entry = __ pc(); 118 119 // rbx: method 120 // r14: pointer to locals 121 // c_rarg3: first stack arg - wordSize 122 __ mov(c_rarg3, rsp); 123 // adjust rsp 124 __ subptr(rsp, 14 * wordSize); 125 __ call_VM(noreg, 126 CAST_FROM_FN_PTR(address, 127 InterpreterRuntime::slow_signature_handler), 128 rbx, r14, c_rarg3); 129 130 // rax: result handler 131 132 // Stack layout: 133 // rsp: 5 integer args (if static first is unused) 134 // 1 float/double identifiers 135 // 8 double args 136 // return address 137 // stack args 138 // garbage 139 // expression stack bottom 140 // bcp (NULL) 141 // ... 142 143 // Do FP first so we can use c_rarg3 as temp 144 __ movl(c_rarg3, Address(rsp, 5 * wordSize)); // float/double identifiers 145 146 for (int i = 0; i < Argument::n_float_register_parameters_c; i++) { 147 const XMMRegister r = as_XMMRegister(i); 148 149 Label d, done; 150 151 __ testl(c_rarg3, 1 << i); 152 __ jcc(Assembler::notZero, d); 153 __ movflt(r, Address(rsp, (6 + i) * wordSize)); 154 __ jmp(done); 155 __ bind(d); 156 __ movdbl(r, Address(rsp, (6 + i) * wordSize)); 157 __ bind(done); 158 } 159 160 // Now handle integrals. Only do c_rarg1 if not static. 161 __ movl(c_rarg3, Address(rbx, Method::access_flags_offset())); 162 __ testl(c_rarg3, JVM_ACC_STATIC); 163 __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0)); 164 165 __ movptr(c_rarg2, Address(rsp, wordSize)); 166 __ movptr(c_rarg3, Address(rsp, 2 * wordSize)); 167 __ movptr(c_rarg4, Address(rsp, 3 * wordSize)); 168 __ movptr(c_rarg5, Address(rsp, 4 * wordSize)); 169 170 // restore rsp 171 __ addptr(rsp, 14 * wordSize); 172 173 __ ret(0); 174 175 return entry; 176 } 177 #endif // __WIN64 178 179 /** 180 * Method entry for static native methods: 181 * int java.util.zip.CRC32.update(int crc, int b) 182 */ 183 address TemplateInterpreterGenerator::generate_CRC32_update_entry() { 184 if (UseCRC32Intrinsics) { 185 address entry = __ pc(); 186 187 // rbx,: Method* 188 // r13: senderSP must preserved for slow path, set SP to it on fast path 189 // c_rarg0: scratch (rdi on non-Win64, rcx on Win64) 190 // c_rarg1: scratch (rsi on non-Win64, rdx on Win64) 191 192 Label slow_path; 193 __ safepoint_poll(slow_path, r15_thread, rscratch1); 194 195 // We don't generate local frame and don't align stack because 196 // we call stub code and there is no safepoint on this path. 197 198 // Load parameters 199 const Register crc = rax; // crc 200 const Register val = c_rarg0; // source java byte value 201 const Register tbl = c_rarg1; // scratch 202 203 // Arguments are reversed on java expression stack 204 __ movl(val, Address(rsp, wordSize)); // byte value 205 __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC 206 207 __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); 208 __ notl(crc); // ~crc 209 __ update_byte_crc32(crc, val, tbl); 210 __ notl(crc); // ~crc 211 // result in rax 212 213 // _areturn 214 __ pop(rdi); // get return address 215 __ mov(rsp, r13); // set sp to sender sp 216 __ jmp(rdi); 217 218 // generate a vanilla native entry as the slow path 219 __ bind(slow_path); 220 __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); 221 return entry; 222 } 223 return NULL; 224 } 225 226 /** 227 * Method entry for static native methods: 228 * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) 229 * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) 230 */ 231 address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { 232 if (UseCRC32Intrinsics) { 233 address entry = __ pc(); 234 235 // rbx,: Method* 236 // r13: senderSP must preserved for slow path, set SP to it on fast path 237 238 Label slow_path; 239 __ safepoint_poll(slow_path, r15_thread, rscratch1); 240 241 // We don't generate local frame and don't align stack because 242 // we call stub code and there is no safepoint on this path. 243 244 // Load parameters 245 const Register crc = c_rarg0; // crc 246 const Register buf = c_rarg1; // source java byte array address 247 const Register len = c_rarg2; // length 248 const Register off = len; // offset (never overlaps with 'len') 249 250 // Arguments are reversed on java expression stack 251 // Calculate address of start element 252 if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { 253 __ movptr(buf, Address(rsp, 3*wordSize)); // long buf 254 __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset 255 __ addq(buf, off); // + offset 256 __ movl(crc, Address(rsp, 5*wordSize)); // Initial CRC 257 } else { 258 __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array 259 __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size 260 __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset 261 __ addq(buf, off); // + offset 262 __ movl(crc, Address(rsp, 4*wordSize)); // Initial CRC 263 } 264 // Can now load 'len' since we're finished with 'off' 265 __ movl(len, Address(rsp, wordSize)); // Length 266 267 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); 268 // result in rax 269 270 // _areturn 271 __ pop(rdi); // get return address 272 __ mov(rsp, r13); // set sp to sender sp 273 __ jmp(rdi); 274 275 // generate a vanilla native entry as the slow path 276 __ bind(slow_path); 277 __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); 278 return entry; 279 } 280 return NULL; 281 } 282 283 /** 284 * Method entry for static (non-native) methods: 285 * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) 286 * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end) 287 */ 288 address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { 289 if (UseCRC32CIntrinsics) { 290 address entry = __ pc(); 291 // Load parameters 292 const Register crc = c_rarg0; // crc 293 const Register buf = c_rarg1; // source java byte array address 294 const Register len = c_rarg2; 295 const Register off = c_rarg3; // offset 296 const Register end = len; 297 298 // Arguments are reversed on java expression stack 299 // Calculate address of start element 300 if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { 301 __ movptr(buf, Address(rsp, 3 * wordSize)); // long address 302 __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset 303 __ addq(buf, off); // + offset 304 __ movl(crc, Address(rsp, 5 * wordSize)); // Initial CRC 305 // Note on 5 * wordSize vs. 4 * wordSize: 306 // * int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end) 307 // 4 2,3 1 0 308 // end starts at SP + 8 309 // The Java(R) Virtual Machine Specification Java SE 7 Edition 310 // 4.10.2.3. Values of Types long and double 311 // "When calculating operand stack length, values of type long and double have length two." 312 } else { 313 __ movptr(buf, Address(rsp, 3 * wordSize)); // byte[] array 314 __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size 315 __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset 316 __ addq(buf, off); // + offset 317 __ movl(crc, Address(rsp, 4 * wordSize)); // Initial CRC 318 } 319 __ movl(end, Address(rsp, wordSize)); // end 320 __ subl(end, off); // end - off 321 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len); 322 // result in rax 323 // _areturn 324 __ pop(rdi); // get return address 325 __ mov(rsp, r13); // set sp to sender sp 326 __ jmp(rdi); 327 328 return entry; 329 } 330 331 return NULL; 332 } 333 334 // 335 // Various method entries 336 // 337 338 address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { 339 340 // rbx,: Method* 341 // rcx: scratrch 342 // r13: sender sp 343 344 if (!InlineIntrinsics) return NULL; // Generate a vanilla entry 345 346 address entry_point = __ pc(); 347 348 // These don't need a safepoint check because they aren't virtually 349 // callable. We won't enter these intrinsics from compiled code. 350 // If in the future we added an intrinsic which was virtually callable 351 // we'd have to worry about how to safepoint so that this code is used. 352 353 // mathematical functions inlined by compiler 354 // (interpreter must provide identical implementation 355 // in order to avoid monotonicity bugs when switching 356 // from interpreter to compiler in the middle of some 357 // computation) 358 // 359 // stack: [ ret adr ] <-- rsp 360 // [ lo(arg) ] 361 // [ hi(arg) ] 362 // 363 364 if (kind == Interpreter::java_lang_math_fmaD) { 365 if (!UseFMA) { 366 return NULL; // Generate a vanilla entry 367 } 368 __ movdbl(xmm0, Address(rsp, wordSize)); 369 __ movdbl(xmm1, Address(rsp, 3 * wordSize)); 370 __ movdbl(xmm2, Address(rsp, 5 * wordSize)); 371 __ fmad(xmm0, xmm1, xmm2, xmm0); 372 } else if (kind == Interpreter::java_lang_math_fmaF) { 373 if (!UseFMA) { 374 return NULL; // Generate a vanilla entry 375 } 376 __ movflt(xmm0, Address(rsp, wordSize)); 377 __ movflt(xmm1, Address(rsp, 2 * wordSize)); 378 __ movflt(xmm2, Address(rsp, 3 * wordSize)); 379 __ fmaf(xmm0, xmm1, xmm2, xmm0); 380 } else if (kind == Interpreter::java_lang_math_sqrt) { 381 __ sqrtsd(xmm0, Address(rsp, wordSize)); 382 } else if (kind == Interpreter::java_lang_math_exp) { 383 __ movdbl(xmm0, Address(rsp, wordSize)); 384 if (StubRoutines::dexp() != NULL) { 385 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp()))); 386 } else { 387 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)); 388 } 389 } else if (kind == Interpreter::java_lang_math_log) { 390 __ movdbl(xmm0, Address(rsp, wordSize)); 391 if (StubRoutines::dlog() != NULL) { 392 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog()))); 393 } else { 394 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)); 395 } 396 } else if (kind == Interpreter::java_lang_math_log10) { 397 __ movdbl(xmm0, Address(rsp, wordSize)); 398 if (StubRoutines::dlog10() != NULL) { 399 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10()))); 400 } else { 401 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)); 402 } 403 } else if (kind == Interpreter::java_lang_math_sin) { 404 __ movdbl(xmm0, Address(rsp, wordSize)); 405 if (StubRoutines::dsin() != NULL) { 406 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin()))); 407 } else { 408 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)); 409 } 410 } else if (kind == Interpreter::java_lang_math_cos) { 411 __ movdbl(xmm0, Address(rsp, wordSize)); 412 if (StubRoutines::dcos() != NULL) { 413 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos()))); 414 } else { 415 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)); 416 } 417 } else if (kind == Interpreter::java_lang_math_pow) { 418 __ movdbl(xmm1, Address(rsp, wordSize)); 419 __ movdbl(xmm0, Address(rsp, 3 * wordSize)); 420 if (StubRoutines::dpow() != NULL) { 421 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow()))); 422 } else { 423 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)); 424 } 425 } else if (kind == Interpreter::java_lang_math_tan) { 426 __ movdbl(xmm0, Address(rsp, wordSize)); 427 if (StubRoutines::dtan() != NULL) { 428 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan()))); 429 } else { 430 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)); 431 } 432 } else { 433 __ fld_d(Address(rsp, wordSize)); 434 switch (kind) { 435 case Interpreter::java_lang_math_abs: 436 __ fabs(); 437 break; 438 default: 439 ShouldNotReachHere(); 440 } 441 442 // return double result in xmm0 for interpreter and compilers. 443 __ subptr(rsp, 2*wordSize); 444 // Round to 64bit precision 445 __ fstp_d(Address(rsp, 0)); 446 __ movdbl(xmm0, Address(rsp, 0)); 447 __ addptr(rsp, 2*wordSize); 448 } 449 450 451 __ pop(rax); 452 __ mov(rsp, r13); 453 __ jmp(rax); 454 455 return entry_point; 456 } 457