1 /* 2 * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.hpp" 27 #include "interpreter/interp_masm.hpp" 28 #include "interpreter/interpreter.hpp" 29 #include "interpreter/interpreterRuntime.hpp" 30 #include "interpreter/templateInterpreterGenerator.hpp" 31 #include "runtime/arguments.hpp" 32 #include "runtime/sharedRuntime.hpp" 33 34 #define __ _masm-> 35 36 #ifdef _WIN64 37 address TemplateInterpreterGenerator::generate_slow_signature_handler() { 38 address entry = __ pc(); 39 40 // rbx: method 41 // r14: pointer to locals 42 // c_rarg3: first stack arg - wordSize 43 __ mov(c_rarg3, rsp); 44 // adjust rsp 45 __ subptr(rsp, 4 * wordSize); 46 __ call_VM(noreg, 47 CAST_FROM_FN_PTR(address, 48 InterpreterRuntime::slow_signature_handler), 49 rbx, r14, c_rarg3); 50 51 // rax: result handler 52 53 // Stack layout: 54 // rsp: 3 integer or float args (if static first is unused) 55 // 1 float/double identifiers 56 // return address 57 // stack args 58 // garbage 59 // expression stack bottom 60 // bcp (NULL) 61 // ... 62 63 // Do FP first so we can use c_rarg3 as temp 64 __ movl(c_rarg3, Address(rsp, 3 * wordSize)); // float/double identifiers 65 66 for ( int i= 0; i < Argument::n_int_register_parameters_c-1; i++ ) { 67 XMMRegister floatreg = as_XMMRegister(i+1); 68 Label isfloatordouble, isdouble, next; 69 70 __ testl(c_rarg3, 1 << (i*2)); // Float or Double? 71 __ jcc(Assembler::notZero, isfloatordouble); 72 73 // Do Int register here 74 switch ( i ) { 75 case 0: 76 __ movl(rscratch1, Address(rbx, Method::access_flags_offset())); 77 __ testl(rscratch1, JVM_ACC_STATIC); 78 __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0)); 79 break; 80 case 1: 81 __ movptr(c_rarg2, Address(rsp, wordSize)); 82 break; 83 case 2: 84 __ movptr(c_rarg3, Address(rsp, 2 * wordSize)); 85 break; 86 default: 87 break; 88 } 89 90 __ jmp (next); 91 92 __ bind(isfloatordouble); 93 __ testl(c_rarg3, 1 << ((i*2)+1)); // Double? 94 __ jcc(Assembler::notZero, isdouble); 95 96 // Do Float Here 97 __ movflt(floatreg, Address(rsp, i * wordSize)); 98 __ jmp(next); 99 100 // Do Double here 101 __ bind(isdouble); 102 __ movdbl(floatreg, Address(rsp, i * wordSize)); 103 104 __ bind(next); 105 } 106 107 108 // restore rsp 109 __ addptr(rsp, 4 * wordSize); 110 111 __ ret(0); 112 113 return entry; 114 } 115 #else 116 address TemplateInterpreterGenerator::generate_slow_signature_handler() { 117 address entry = __ pc(); 118 119 // rbx: method 120 // r14: pointer to locals 121 // c_rarg3: first stack arg - wordSize 122 __ mov(c_rarg3, rsp); 123 // adjust rsp 124 __ subptr(rsp, 14 * wordSize); 125 __ call_VM(noreg, 126 CAST_FROM_FN_PTR(address, 127 InterpreterRuntime::slow_signature_handler), 128 rbx, r14, c_rarg3); 129 130 // rax: result handler 131 132 // Stack layout: 133 // rsp: 5 integer args (if static first is unused) 134 // 1 float/double identifiers 135 // 8 double args 136 // return address 137 // stack args 138 // garbage 139 // expression stack bottom 140 // bcp (NULL) 141 // ... 142 143 // Do FP first so we can use c_rarg3 as temp 144 __ movl(c_rarg3, Address(rsp, 5 * wordSize)); // float/double identifiers 145 146 for (int i = 0; i < Argument::n_float_register_parameters_c; i++) { 147 const XMMRegister r = as_XMMRegister(i); 148 149 Label d, done; 150 151 __ testl(c_rarg3, 1 << i); 152 __ jcc(Assembler::notZero, d); 153 __ movflt(r, Address(rsp, (6 + i) * wordSize)); 154 __ jmp(done); 155 __ bind(d); 156 __ movdbl(r, Address(rsp, (6 + i) * wordSize)); 157 __ bind(done); 158 } 159 160 // Now handle integrals. Only do c_rarg1 if not static. 161 __ movl(c_rarg3, Address(rbx, Method::access_flags_offset())); 162 __ testl(c_rarg3, JVM_ACC_STATIC); 163 __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0)); 164 165 __ movptr(c_rarg2, Address(rsp, wordSize)); 166 __ movptr(c_rarg3, Address(rsp, 2 * wordSize)); 167 __ movptr(c_rarg4, Address(rsp, 3 * wordSize)); 168 __ movptr(c_rarg5, Address(rsp, 4 * wordSize)); 169 170 // restore rsp 171 __ addptr(rsp, 14 * wordSize); 172 173 __ ret(0); 174 175 return entry; 176 } 177 #endif // __WIN64 178 179 /** 180 * Method entry for static native methods: 181 * int java.util.zip.CRC32.update(int crc, int b) 182 */ 183 address TemplateInterpreterGenerator::generate_CRC32_update_entry() { 184 if (UseCRC32Intrinsics) { 185 address entry = __ pc(); 186 187 // rbx,: Method* 188 // r13: senderSP must preserved for slow path, set SP to it on fast path 189 // c_rarg0: scratch (rdi on non-Win64, rcx on Win64) 190 // c_rarg1: scratch (rsi on non-Win64, rdx on Win64) 191 192 Label slow_path; 193 __ safepoint_poll(slow_path, r15_thread, rscratch1); 194 195 // We don't generate local frame and don't align stack because 196 // we call stub code and there is no safepoint on this path. 197 198 // Load parameters 199 const Register crc = rax; // crc 200 const Register val = c_rarg0; // source java byte value 201 const Register tbl = c_rarg1; // scratch 202 203 // Arguments are reversed on java expression stack 204 __ movl(val, Address(rsp, wordSize)); // byte value 205 __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC 206 207 __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); 208 __ notl(crc); // ~crc 209 __ update_byte_crc32(crc, val, tbl); 210 __ notl(crc); // ~crc 211 // result in rax 212 213 // _areturn 214 __ pop(rdi); // get return address 215 __ mov(rsp, r13); // set sp to sender sp 216 __ jmp(rdi); 217 218 // generate a vanilla native entry as the slow path 219 __ bind(slow_path); 220 __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); 221 return entry; 222 } 223 return NULL; 224 } 225 226 /** 227 * Method entry for static native methods: 228 * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) 229 * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) 230 */ 231 address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { 232 if (UseCRC32Intrinsics) { 233 address entry = __ pc(); 234 235 // rbx,: Method* 236 // r13: senderSP must preserved for slow path, set SP to it on fast path 237 238 Label slow_path; 239 __ safepoint_poll(slow_path, r15_thread, rscratch1); 240 241 // We don't generate local frame and don't align stack because 242 // we call stub code and there is no safepoint on this path. 243 244 // Load parameters 245 const Register crc = c_rarg0; // crc 246 const Register buf = c_rarg1; // source java byte array address 247 const Register len = c_rarg2; // length 248 const Register off = len; // offset (never overlaps with 'len') 249 250 // Arguments are reversed on java expression stack 251 // Calculate address of start element 252 if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { 253 __ movptr(buf, Address(rsp, 3*wordSize)); // long buf 254 __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset 255 __ addq(buf, off); // + offset 256 __ movl(crc, Address(rsp, 5*wordSize)); // Initial CRC 257 } else { 258 __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array 259 __ resolve_for_read(OOP_NOT_NULL, buf); 260 __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size 261 __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset 262 __ addq(buf, off); // + offset 263 __ movl(crc, Address(rsp, 4*wordSize)); // Initial CRC 264 } 265 // Can now load 'len' since we're finished with 'off' 266 __ movl(len, Address(rsp, wordSize)); // Length 267 268 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); 269 // result in rax 270 271 // _areturn 272 __ pop(rdi); // get return address 273 __ mov(rsp, r13); // set sp to sender sp 274 __ jmp(rdi); 275 276 // generate a vanilla native entry as the slow path 277 __ bind(slow_path); 278 __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); 279 return entry; 280 } 281 return NULL; 282 } 283 284 /** 285 * Method entry for static (non-native) methods: 286 * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) 287 * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end) 288 */ 289 address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { 290 if (UseCRC32CIntrinsics) { 291 address entry = __ pc(); 292 // Load parameters 293 const Register crc = c_rarg0; // crc 294 const Register buf = c_rarg1; // source java byte array address 295 const Register len = c_rarg2; 296 const Register off = c_rarg3; // offset 297 const Register end = len; 298 299 // Arguments are reversed on java expression stack 300 // Calculate address of start element 301 if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { 302 __ movptr(buf, Address(rsp, 3 * wordSize)); // long address 303 __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset 304 __ addq(buf, off); // + offset 305 __ movl(crc, Address(rsp, 5 * wordSize)); // Initial CRC 306 // Note on 5 * wordSize vs. 4 * wordSize: 307 // * int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end) 308 // 4 2,3 1 0 309 // end starts at SP + 8 310 // The Java(R) Virtual Machine Specification Java SE 7 Edition 311 // 4.10.2.3. Values of Types long and double 312 // "When calculating operand stack length, values of type long and double have length two." 313 } else { 314 __ movptr(buf, Address(rsp, 3 * wordSize)); // byte[] array 315 __ resolve_for_read(OOP_NOT_NULL, buf); 316 __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size 317 __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset 318 __ addq(buf, off); // + offset 319 __ movl(crc, Address(rsp, 4 * wordSize)); // Initial CRC 320 } 321 __ movl(end, Address(rsp, wordSize)); // end 322 __ subl(end, off); // end - off 323 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len); 324 // result in rax 325 // _areturn 326 __ pop(rdi); // get return address 327 __ mov(rsp, r13); // set sp to sender sp 328 __ jmp(rdi); 329 330 return entry; 331 } 332 333 return NULL; 334 } 335 336 // 337 // Various method entries 338 // 339 340 address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { 341 342 // rbx,: Method* 343 // rcx: scratrch 344 // r13: sender sp 345 346 if (!InlineIntrinsics) return NULL; // Generate a vanilla entry 347 348 address entry_point = __ pc(); 349 350 // These don't need a safepoint check because they aren't virtually 351 // callable. We won't enter these intrinsics from compiled code. 352 // If in the future we added an intrinsic which was virtually callable 353 // we'd have to worry about how to safepoint so that this code is used. 354 355 // mathematical functions inlined by compiler 356 // (interpreter must provide identical implementation 357 // in order to avoid monotonicity bugs when switching 358 // from interpreter to compiler in the middle of some 359 // computation) 360 // 361 // stack: [ ret adr ] <-- rsp 362 // [ lo(arg) ] 363 // [ hi(arg) ] 364 // 365 366 if (kind == Interpreter::java_lang_math_fmaD) { 367 if (!UseFMA) { 368 return NULL; // Generate a vanilla entry 369 } 370 __ movdbl(xmm0, Address(rsp, wordSize)); 371 __ movdbl(xmm1, Address(rsp, 3 * wordSize)); 372 __ movdbl(xmm2, Address(rsp, 5 * wordSize)); 373 __ fmad(xmm0, xmm1, xmm2, xmm0); 374 } else if (kind == Interpreter::java_lang_math_fmaF) { 375 if (!UseFMA) { 376 return NULL; // Generate a vanilla entry 377 } 378 __ movflt(xmm0, Address(rsp, wordSize)); 379 __ movflt(xmm1, Address(rsp, 2 * wordSize)); 380 __ movflt(xmm2, Address(rsp, 3 * wordSize)); 381 __ fmaf(xmm0, xmm1, xmm2, xmm0); 382 } else if (kind == Interpreter::java_lang_math_sqrt) { 383 __ sqrtsd(xmm0, Address(rsp, wordSize)); 384 } else if (kind == Interpreter::java_lang_math_exp) { 385 __ movdbl(xmm0, Address(rsp, wordSize)); 386 if (StubRoutines::dexp() != NULL) { 387 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp()))); 388 } else { 389 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)); 390 } 391 } else if (kind == Interpreter::java_lang_math_log) { 392 __ movdbl(xmm0, Address(rsp, wordSize)); 393 if (StubRoutines::dlog() != NULL) { 394 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog()))); 395 } else { 396 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)); 397 } 398 } else if (kind == Interpreter::java_lang_math_log10) { 399 __ movdbl(xmm0, Address(rsp, wordSize)); 400 if (StubRoutines::dlog10() != NULL) { 401 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10()))); 402 } else { 403 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)); 404 } 405 } else if (kind == Interpreter::java_lang_math_sin) { 406 __ movdbl(xmm0, Address(rsp, wordSize)); 407 if (StubRoutines::dsin() != NULL) { 408 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin()))); 409 } else { 410 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)); 411 } 412 } else if (kind == Interpreter::java_lang_math_cos) { 413 __ movdbl(xmm0, Address(rsp, wordSize)); 414 if (StubRoutines::dcos() != NULL) { 415 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos()))); 416 } else { 417 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)); 418 } 419 } else if (kind == Interpreter::java_lang_math_pow) { 420 __ movdbl(xmm1, Address(rsp, wordSize)); 421 __ movdbl(xmm0, Address(rsp, 3 * wordSize)); 422 if (StubRoutines::dpow() != NULL) { 423 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow()))); 424 } else { 425 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)); 426 } 427 } else if (kind == Interpreter::java_lang_math_tan) { 428 __ movdbl(xmm0, Address(rsp, wordSize)); 429 if (StubRoutines::dtan() != NULL) { 430 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan()))); 431 } else { 432 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)); 433 } 434 } else { 435 __ fld_d(Address(rsp, wordSize)); 436 switch (kind) { 437 case Interpreter::java_lang_math_abs: 438 __ fabs(); 439 break; 440 default: 441 ShouldNotReachHere(); 442 } 443 444 // return double result in xmm0 for interpreter and compilers. 445 __ subptr(rsp, 2*wordSize); 446 // Round to 64bit precision 447 __ fstp_d(Address(rsp, 0)); 448 __ movdbl(xmm0, Address(rsp, 0)); 449 __ addptr(rsp, 2*wordSize); 450 } 451 452 453 __ pop(rax); 454 __ mov(rsp, r13); 455 __ jmp(rax); 456 457 return entry_point; 458 } 459