1 /* 2 * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.hpp" 27 #include "interpreter/interp_masm.hpp" 28 #include "interpreter/interpreter.hpp" 29 #include "interpreter/interpreterRuntime.hpp" 30 #include "interpreter/templateInterpreterGenerator.hpp" 31 #include "runtime/arguments.hpp" 32 #include "runtime/sharedRuntime.hpp" 33 34 #define __ _masm-> 35 36 #ifdef _WIN64 37 address TemplateInterpreterGenerator::generate_slow_signature_handler() { 38 address entry = __ pc(); 39 40 // rbx: method 41 // r14: pointer to locals 42 // c_rarg3: first stack arg - wordSize 43 __ mov(c_rarg3, rsp); 44 // adjust rsp 45 __ subptr(rsp, 4 * wordSize); 46 __ call_VM(noreg, 47 CAST_FROM_FN_PTR(address, 48 InterpreterRuntime::slow_signature_handler), 49 rbx, r14, c_rarg3); 50 51 // rax: result handler 52 53 // Stack layout: 54 // rsp: 3 integer or float args (if static first is unused) 55 // 1 float/double identifiers 56 // return address 57 // stack args 58 // garbage 59 // expression stack bottom 60 // bcp (NULL) 61 // ... 62 63 // Do FP first so we can use c_rarg3 as temp 64 __ movl(c_rarg3, Address(rsp, 3 * wordSize)); // float/double identifiers 65 66 for ( int i= 0; i < Argument::n_int_register_parameters_c-1; i++ ) { 67 XMMRegister floatreg = as_XMMRegister(i+1); 68 Label isfloatordouble, isdouble, next; 69 70 __ testl(c_rarg3, 1 << (i*2)); // Float or Double? 71 __ jcc(Assembler::notZero, isfloatordouble); 72 73 // Do Int register here 74 switch ( i ) { 75 case 0: 76 __ movl(rscratch1, Address(rbx, Method::access_flags_offset())); 77 __ testl(rscratch1, JVM_ACC_STATIC); 78 __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0)); 79 break; 80 case 1: 81 __ movptr(c_rarg2, Address(rsp, wordSize)); 82 break; 83 case 2: 84 __ movptr(c_rarg3, Address(rsp, 2 * wordSize)); 85 break; 86 default: 87 break; 88 } 89 90 __ jmp (next); 91 92 __ bind(isfloatordouble); 93 __ testl(c_rarg3, 1 << ((i*2)+1)); // Double? 94 __ jcc(Assembler::notZero, isdouble); 95 96 // Do Float Here 97 __ movflt(floatreg, Address(rsp, i * wordSize)); 98 __ jmp(next); 99 100 // Do Double here 101 __ bind(isdouble); 102 __ movdbl(floatreg, Address(rsp, i * wordSize)); 103 104 __ bind(next); 105 } 106 107 108 // restore rsp 109 __ addptr(rsp, 4 * wordSize); 110 111 __ ret(0); 112 113 return entry; 114 } 115 #else 116 address TemplateInterpreterGenerator::generate_slow_signature_handler() { 117 address entry = __ pc(); 118 119 // rbx: method 120 // r14: pointer to locals 121 // c_rarg3: first stack arg - wordSize 122 __ mov(c_rarg3, rsp); 123 // adjust rsp 124 __ subptr(rsp, 14 * wordSize); 125 __ call_VM(noreg, 126 CAST_FROM_FN_PTR(address, 127 InterpreterRuntime::slow_signature_handler), 128 rbx, r14, c_rarg3); 129 130 // rax: result handler 131 132 // Stack layout: 133 // rsp: 5 integer args (if static first is unused) 134 // 1 float/double identifiers 135 // 8 double args 136 // return address 137 // stack args 138 // garbage 139 // expression stack bottom 140 // bcp (NULL) 141 // ... 142 143 // Do FP first so we can use c_rarg3 as temp 144 __ movl(c_rarg3, Address(rsp, 5 * wordSize)); // float/double identifiers 145 146 for (int i = 0; i < Argument::n_float_register_parameters_c; i++) { 147 const XMMRegister r = as_XMMRegister(i); 148 149 Label d, done; 150 151 __ testl(c_rarg3, 1 << i); 152 __ jcc(Assembler::notZero, d); 153 __ movflt(r, Address(rsp, (6 + i) * wordSize)); 154 __ jmp(done); 155 __ bind(d); 156 __ movdbl(r, Address(rsp, (6 + i) * wordSize)); 157 __ bind(done); 158 } 159 160 // Now handle integrals. Only do c_rarg1 if not static. 161 __ movl(c_rarg3, Address(rbx, Method::access_flags_offset())); 162 __ testl(c_rarg3, JVM_ACC_STATIC); 163 __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0)); 164 165 __ movptr(c_rarg2, Address(rsp, wordSize)); 166 __ movptr(c_rarg3, Address(rsp, 2 * wordSize)); 167 __ movptr(c_rarg4, Address(rsp, 3 * wordSize)); 168 __ movptr(c_rarg5, Address(rsp, 4 * wordSize)); 169 170 // restore rsp 171 __ addptr(rsp, 14 * wordSize); 172 173 __ ret(0); 174 175 return entry; 176 } 177 #endif // __WIN64 178 179 /** 180 * Method entry for static native methods: 181 * int java.util.zip.CRC32.update(int crc, int b) 182 */ 183 address TemplateInterpreterGenerator::generate_CRC32_update_entry() { 184 if (UseCRC32Intrinsics) { 185 address entry = __ pc(); 186 187 // rbx,: Method* 188 // r13: senderSP must preserved for slow path, set SP to it on fast path 189 // c_rarg0: scratch (rdi on non-Win64, rcx on Win64) 190 // c_rarg1: scratch (rsi on non-Win64, rdx on Win64) 191 192 Label slow_path; 193 // If we need a safepoint check, generate full interpreter entry. 194 ExternalAddress state(SafepointSynchronize::address_of_state()); 195 __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), 196 SafepointSynchronize::_not_synchronized); 197 __ jcc(Assembler::notEqual, slow_path); 198 199 // We don't generate local frame and don't align stack because 200 // we call stub code and there is no safepoint on this path. 201 202 // Load parameters 203 const Register crc = rax; // crc 204 const Register val = c_rarg0; // source java byte value 205 const Register tbl = c_rarg1; // scratch 206 207 // Arguments are reversed on java expression stack 208 __ movl(val, Address(rsp, wordSize)); // byte value 209 __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC 210 211 __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); 212 __ notl(crc); // ~crc 213 __ update_byte_crc32(crc, val, tbl); 214 __ notl(crc); // ~crc 215 // result in rax 216 217 // _areturn 218 __ pop(rdi); // get return address 219 __ mov(rsp, r13); // set sp to sender sp 220 __ jmp(rdi); 221 222 // generate a vanilla native entry as the slow path 223 __ bind(slow_path); 224 __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); 225 return entry; 226 } 227 return NULL; 228 } 229 230 /** 231 * Method entry for static native methods: 232 * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) 233 * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) 234 */ 235 address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { 236 if (UseCRC32Intrinsics) { 237 address entry = __ pc(); 238 239 // rbx,: Method* 240 // r13: senderSP must preserved for slow path, set SP to it on fast path 241 242 Label slow_path; 243 // If we need a safepoint check, generate full interpreter entry. 244 ExternalAddress state(SafepointSynchronize::address_of_state()); 245 __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), 246 SafepointSynchronize::_not_synchronized); 247 __ jcc(Assembler::notEqual, slow_path); 248 249 // We don't generate local frame and don't align stack because 250 // we call stub code and there is no safepoint on this path. 251 252 // Load parameters 253 const Register crc = c_rarg0; // crc 254 const Register buf = c_rarg1; // source java byte array address 255 const Register len = c_rarg2; // length 256 const Register off = len; // offset (never overlaps with 'len') 257 258 // Arguments are reversed on java expression stack 259 // Calculate address of start element 260 if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { 261 __ movptr(buf, Address(rsp, 3*wordSize)); // long buf 262 __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset 263 __ addq(buf, off); // + offset 264 __ movl(crc, Address(rsp, 5*wordSize)); // Initial CRC 265 } else { 266 __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array 267 __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size 268 __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset 269 __ addq(buf, off); // + offset 270 __ movl(crc, Address(rsp, 4*wordSize)); // Initial CRC 271 } 272 // Can now load 'len' since we're finished with 'off' 273 __ movl(len, Address(rsp, wordSize)); // Length 274 275 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); 276 // result in rax 277 278 // _areturn 279 __ pop(rdi); // get return address 280 __ mov(rsp, r13); // set sp to sender sp 281 __ jmp(rdi); 282 283 // generate a vanilla native entry as the slow path 284 __ bind(slow_path); 285 __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); 286 return entry; 287 } 288 return NULL; 289 } 290 291 /** 292 * Method entry for static (non-native) methods: 293 * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) 294 * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end) 295 */ 296 address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { 297 if (UseCRC32CIntrinsics) { 298 address entry = __ pc(); 299 // Load parameters 300 const Register crc = c_rarg0; // crc 301 const Register buf = c_rarg1; // source java byte array address 302 const Register len = c_rarg2; 303 const Register off = c_rarg3; // offset 304 const Register end = len; 305 306 // Arguments are reversed on java expression stack 307 // Calculate address of start element 308 if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { 309 __ movptr(buf, Address(rsp, 3 * wordSize)); // long address 310 __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset 311 __ addq(buf, off); // + offset 312 __ movl(crc, Address(rsp, 5 * wordSize)); // Initial CRC 313 // Note on 5 * wordSize vs. 4 * wordSize: 314 // * int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end) 315 // 4 2,3 1 0 316 // end starts at SP + 8 317 // The Java(R) Virtual Machine Specification Java SE 7 Edition 318 // 4.10.2.3. Values of Types long and double 319 // "When calculating operand stack length, values of type long and double have length two." 320 } else { 321 __ movptr(buf, Address(rsp, 3 * wordSize)); // byte[] array 322 __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size 323 __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset 324 __ addq(buf, off); // + offset 325 __ movl(crc, Address(rsp, 4 * wordSize)); // Initial CRC 326 } 327 __ movl(end, Address(rsp, wordSize)); // end 328 __ subl(end, off); // end - off 329 __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len); 330 // result in rax 331 // _areturn 332 __ pop(rdi); // get return address 333 __ mov(rsp, r13); // set sp to sender sp 334 __ jmp(rdi); 335 336 return entry; 337 } 338 339 return NULL; 340 } 341 342 // 343 // Various method entries 344 // 345 346 address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { 347 348 // rbx,: Method* 349 // rcx: scratrch 350 // r13: sender sp 351 352 if (!InlineIntrinsics) return NULL; // Generate a vanilla entry 353 354 address entry_point = __ pc(); 355 356 // These don't need a safepoint check because they aren't virtually 357 // callable. We won't enter these intrinsics from compiled code. 358 // If in the future we added an intrinsic which was virtually callable 359 // we'd have to worry about how to safepoint so that this code is used. 360 361 // mathematical functions inlined by compiler 362 // (interpreter must provide identical implementation 363 // in order to avoid monotonicity bugs when switching 364 // from interpreter to compiler in the middle of some 365 // computation) 366 // 367 // stack: [ ret adr ] <-- rsp 368 // [ lo(arg) ] 369 // [ hi(arg) ] 370 // 371 372 if (kind == Interpreter::java_lang_math_fmaD) { 373 __ movdbl(xmm0, Address(rsp, wordSize)); 374 __ movdbl(xmm1, Address(rsp, 3 * wordSize)); 375 __ movdbl(xmm2, Address(rsp, 5 * wordSize)); 376 __ vfmadd231sd(xmm0, xmm1, xmm2); 377 } else if (kind == Interpreter::java_lang_math_fmaF) { 378 __ movdbl(xmm0, Address(rsp, wordSize)); 379 __ movdbl(xmm1, Address(rsp, 2 * wordSize)); 380 __ movdbl(xmm2, Address(rsp, 3 * wordSize)); 381 __ vfmadd231ss(xmm0, xmm1, xmm2); 382 } else if (kind == Interpreter::java_lang_math_sqrt) { 383 __ sqrtsd(xmm0, Address(rsp, wordSize)); 384 } else if (kind == Interpreter::java_lang_math_exp) { 385 __ movdbl(xmm0, Address(rsp, wordSize)); 386 if (StubRoutines::dexp() != NULL) { 387 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp()))); 388 } else { 389 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)); 390 } 391 } else if (kind == Interpreter::java_lang_math_log) { 392 __ movdbl(xmm0, Address(rsp, wordSize)); 393 if (StubRoutines::dlog() != NULL) { 394 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog()))); 395 } else { 396 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)); 397 } 398 } else if (kind == Interpreter::java_lang_math_log10) { 399 __ movdbl(xmm0, Address(rsp, wordSize)); 400 if (StubRoutines::dlog10() != NULL) { 401 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10()))); 402 } else { 403 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)); 404 } 405 } else if (kind == Interpreter::java_lang_math_sin) { 406 __ movdbl(xmm0, Address(rsp, wordSize)); 407 if (StubRoutines::dsin() != NULL) { 408 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin()))); 409 } else { 410 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)); 411 } 412 } else if (kind == Interpreter::java_lang_math_cos) { 413 __ movdbl(xmm0, Address(rsp, wordSize)); 414 if (StubRoutines::dcos() != NULL) { 415 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos()))); 416 } else { 417 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)); 418 } 419 } else if (kind == Interpreter::java_lang_math_pow) { 420 __ movdbl(xmm1, Address(rsp, wordSize)); 421 __ movdbl(xmm0, Address(rsp, 3 * wordSize)); 422 if (StubRoutines::dpow() != NULL) { 423 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow()))); 424 } else { 425 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)); 426 } 427 } else if (kind == Interpreter::java_lang_math_tan) { 428 __ movdbl(xmm0, Address(rsp, wordSize)); 429 if (StubRoutines::dtan() != NULL) { 430 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan()))); 431 } else { 432 __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)); 433 } 434 } else { 435 __ fld_d(Address(rsp, wordSize)); 436 switch (kind) { 437 case Interpreter::java_lang_math_abs: 438 __ fabs(); 439 break; 440 default: 441 ShouldNotReachHere(); 442 } 443 444 // return double result in xmm0 for interpreter and compilers. 445 __ subptr(rsp, 2*wordSize); 446 // Round to 64bit precision 447 __ fstp_d(Address(rsp, 0)); 448 __ movdbl(xmm0, Address(rsp, 0)); 449 __ addptr(rsp, 2*wordSize); 450 } 451 452 453 __ pop(rax); 454 __ mov(rsp, r13); 455 __ jmp(rax); 456 457 return entry_point; 458 } 459