1 /*
   2  * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "compiler/disassembler.hpp"
  28 #include "interpreter/interp_masm.hpp"
  29 #include "interpreter/interpreter.hpp"
  30 #include "interpreter/interpreterRuntime.hpp"
  31 #include "interpreter/templateInterpreterGenerator.hpp"
  32 #include "runtime/arguments.hpp"
  33 #include "runtime/sharedRuntime.hpp"
  34 
  35 #define __ Disassembler::hook<InterpreterMacroAssembler>(__FILE__, __LINE__, _masm)->
  36 
  37 #ifdef _WIN64
  38 address TemplateInterpreterGenerator::generate_slow_signature_handler() {
  39   address entry = __ pc();
  40 
  41   // rbx: method
  42   // r14: pointer to locals
  43   // c_rarg3: first stack arg - wordSize
  44   __ mov(c_rarg3, rsp);
  45   // adjust rsp
  46   __ subptr(rsp, 4 * wordSize);
  47   __ call_VM(noreg,
  48              CAST_FROM_FN_PTR(address,
  49                               InterpreterRuntime::slow_signature_handler),
  50              rbx, r14, c_rarg3);
  51 
  52   // rax: result handler
  53 
  54   // Stack layout:
  55   // rsp: 3 integer or float args (if static first is unused)
  56   //      1 float/double identifiers
  57   //        return address
  58   //        stack args
  59   //        garbage
  60   //        expression stack bottom
  61   //        bcp (NULL)
  62   //        ...
  63 
  64   // Do FP first so we can use c_rarg3 as temp
  65   __ movl(c_rarg3, Address(rsp, 3 * wordSize)); // float/double identifiers
  66 
  67   for ( int i= 0; i < Argument::n_int_register_parameters_c-1; i++ ) {
  68     XMMRegister floatreg = as_XMMRegister(i+1);
  69     Label isfloatordouble, isdouble, next;
  70 
  71     __ testl(c_rarg3, 1 << (i*2));      // Float or Double?
  72     __ jcc(Assembler::notZero, isfloatordouble);
  73 
  74     // Do Int register here
  75     switch ( i ) {
  76       case 0:
  77         __ movl(rscratch1, Address(rbx, Method::access_flags_offset()));
  78         __ testl(rscratch1, JVM_ACC_STATIC);
  79         __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0));
  80         break;
  81       case 1:
  82         __ movptr(c_rarg2, Address(rsp, wordSize));
  83         break;
  84       case 2:
  85         __ movptr(c_rarg3, Address(rsp, 2 * wordSize));
  86         break;
  87       default:
  88         break;
  89     }
  90 
  91     __ jmp (next);
  92 
  93     __ bind(isfloatordouble);
  94     __ testl(c_rarg3, 1 << ((i*2)+1));     // Double?
  95     __ jcc(Assembler::notZero, isdouble);
  96 
  97 // Do Float Here
  98     __ movflt(floatreg, Address(rsp, i * wordSize));
  99     __ jmp(next);
 100 
 101 // Do Double here
 102     __ bind(isdouble);
 103     __ movdbl(floatreg, Address(rsp, i * wordSize));
 104 
 105     __ bind(next);
 106   }
 107 
 108 
 109   // restore rsp
 110   __ addptr(rsp, 4 * wordSize);
 111 
 112   __ ret(0);
 113 
 114   return entry;
 115 }
 116 #else
 117 address TemplateInterpreterGenerator::generate_slow_signature_handler() {
 118   address entry = __ pc();
 119 
 120   // rbx: method
 121   // r14: pointer to locals
 122   // c_rarg3: first stack arg - wordSize
 123   __ mov(c_rarg3, rsp);
 124   // adjust rsp
 125   __ subptr(rsp, 14 * wordSize);
 126   __ call_VM(noreg,
 127              CAST_FROM_FN_PTR(address,
 128                               InterpreterRuntime::slow_signature_handler),
 129              rbx, r14, c_rarg3);
 130 
 131   // rax: result handler
 132 
 133   // Stack layout:
 134   // rsp: 5 integer args (if static first is unused)
 135   //      1 float/double identifiers
 136   //      8 double args
 137   //        return address
 138   //        stack args
 139   //        garbage
 140   //        expression stack bottom
 141   //        bcp (NULL)
 142   //        ...
 143 
 144   // Do FP first so we can use c_rarg3 as temp
 145   __ movl(c_rarg3, Address(rsp, 5 * wordSize)); // float/double identifiers
 146 
 147   for (int i = 0; i < Argument::n_float_register_parameters_c; i++) {
 148     const XMMRegister r = as_XMMRegister(i);
 149 
 150     Label d, done;
 151 
 152     __ testl(c_rarg3, 1 << i);
 153     __ jcc(Assembler::notZero, d);
 154     __ movflt(r, Address(rsp, (6 + i) * wordSize));
 155     __ jmp(done);
 156     __ bind(d);
 157     __ movdbl(r, Address(rsp, (6 + i) * wordSize));
 158     __ bind(done);
 159   }
 160 
 161   // Now handle integrals.  Only do c_rarg1 if not static.
 162   __ movl(c_rarg3, Address(rbx, Method::access_flags_offset()));
 163   __ testl(c_rarg3, JVM_ACC_STATIC);
 164   __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0));
 165 
 166   __ movptr(c_rarg2, Address(rsp, wordSize));
 167   __ movptr(c_rarg3, Address(rsp, 2 * wordSize));
 168   __ movptr(c_rarg4, Address(rsp, 3 * wordSize));
 169   __ movptr(c_rarg5, Address(rsp, 4 * wordSize));
 170 
 171   // restore rsp
 172   __ addptr(rsp, 14 * wordSize);
 173 
 174   __ ret(0);
 175 
 176   return entry;
 177 }
 178 #endif  // __WIN64
 179 
 180 /**
 181  * Method entry for static native methods:
 182  *   int java.util.zip.CRC32.update(int crc, int b)
 183  */
 184 address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
 185   if (UseCRC32Intrinsics) {
 186     address entry = __ pc();
 187 
 188     // rbx,: Method*
 189     // r13: senderSP must preserved for slow path, set SP to it on fast path
 190     // c_rarg0: scratch (rdi on non-Win64, rcx on Win64)
 191     // c_rarg1: scratch (rsi on non-Win64, rdx on Win64)
 192 
 193     Label slow_path;
 194     __ safepoint_poll(slow_path, r15_thread, rscratch1);
 195 
 196     // We don't generate local frame and don't align stack because
 197     // we call stub code and there is no safepoint on this path.
 198 
 199     // Load parameters
 200     const Register crc = rax;  // crc
 201     const Register val = c_rarg0;  // source java byte value
 202     const Register tbl = c_rarg1;  // scratch
 203 
 204     // Arguments are reversed on java expression stack
 205     __ movl(val, Address(rsp,   wordSize)); // byte value
 206     __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC
 207 
 208     __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr()));
 209     __ notl(crc); // ~crc
 210     __ update_byte_crc32(crc, val, tbl);
 211     __ notl(crc); // ~crc
 212     // result in rax
 213 
 214     // _areturn
 215     __ pop(rdi);                // get return address
 216     __ mov(rsp, r13);           // set sp to sender sp
 217     __ jmp(rdi);
 218 
 219     // generate a vanilla native entry as the slow path
 220     __ bind(slow_path);
 221     __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
 222     return entry;
 223   }
 224   return NULL;
 225 }
 226 
 227 /**
 228  * Method entry for static native methods:
 229  *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
 230  *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
 231  */
 232 address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
 233   if (UseCRC32Intrinsics) {
 234     address entry = __ pc();
 235 
 236     // rbx,: Method*
 237     // r13: senderSP must preserved for slow path, set SP to it on fast path
 238 
 239     Label slow_path;
 240     __ safepoint_poll(slow_path, r15_thread, rscratch1);
 241 
 242     // We don't generate local frame and don't align stack because
 243     // we call stub code and there is no safepoint on this path.
 244 
 245     // Load parameters
 246     const Register crc = c_rarg0;  // crc
 247     const Register buf = c_rarg1;  // source java byte array address
 248     const Register len = c_rarg2;  // length
 249     const Register off = len;      // offset (never overlaps with 'len')
 250 
 251     // Arguments are reversed on java expression stack
 252     // Calculate address of start element
 253     if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
 254       __ movptr(buf, Address(rsp, 3*wordSize)); // long buf
 255       __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset
 256       __ addq(buf, off); // + offset
 257       __ movl(crc,   Address(rsp, 5*wordSize)); // Initial CRC
 258     } else {
 259       __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array
 260       __ resolve(IS_NOT_NULL | ACCESS_READ, buf);
 261       __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
 262       __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset
 263       __ addq(buf, off); // + offset
 264       __ movl(crc,   Address(rsp, 4*wordSize)); // Initial CRC
 265     }
 266     // Can now load 'len' since we're finished with 'off'
 267     __ movl(len, Address(rsp, wordSize)); // Length
 268 
 269     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
 270     // result in rax
 271 
 272     // _areturn
 273     __ pop(rdi);                // get return address
 274     __ mov(rsp, r13);           // set sp to sender sp
 275     __ jmp(rdi);
 276 
 277     // generate a vanilla native entry as the slow path
 278     __ bind(slow_path);
 279     __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
 280     return entry;
 281   }
 282   return NULL;
 283 }
 284 
 285 /**
 286 * Method entry for static (non-native) methods:
 287 *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
 288 *   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end)
 289 */
 290 address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
 291   if (UseCRC32CIntrinsics) {
 292     address entry = __ pc();
 293     // Load parameters
 294     const Register crc = c_rarg0;  // crc
 295     const Register buf = c_rarg1;  // source java byte array address
 296     const Register len = c_rarg2;
 297     const Register off = c_rarg3;  // offset
 298     const Register end = len;
 299 
 300     // Arguments are reversed on java expression stack
 301     // Calculate address of start element
 302     if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
 303       __ movptr(buf, Address(rsp, 3 * wordSize)); // long address
 304       __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset
 305       __ addq(buf, off); // + offset
 306       __ movl(crc, Address(rsp, 5 * wordSize)); // Initial CRC
 307       // Note on 5 * wordSize vs. 4 * wordSize:
 308       // *   int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
 309       //                                                   4         2,3          1        0
 310       // end starts at SP + 8
 311       // The Java(R) Virtual Machine Specification Java SE 7 Edition
 312       // 4.10.2.3. Values of Types long and double
 313       //    "When calculating operand stack length, values of type long and double have length two."
 314     } else {
 315       __ movptr(buf, Address(rsp, 3 * wordSize)); // byte[] array
 316       __ resolve(IS_NOT_NULL | ACCESS_READ, buf);
 317       __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
 318       __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset
 319       __ addq(buf, off); // + offset
 320       __ movl(crc, Address(rsp, 4 * wordSize)); // Initial CRC
 321     }
 322     __ movl(end, Address(rsp, wordSize)); // end
 323     __ subl(end, off); // end - off
 324     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len);
 325     // result in rax
 326     // _areturn
 327     __ pop(rdi);                // get return address
 328     __ mov(rsp, r13);           // set sp to sender sp
 329     __ jmp(rdi);
 330 
 331     return entry;
 332   }
 333 
 334   return NULL;
 335 }
 336 
 337 //
 338 // Various method entries
 339 //
 340 
 341 address TemplateInterpreterGenerator::generate_bit_entry(AbstractInterpreter::MethodKind kind) {
 342   address entry = __ pc();
 343 
 344   const Register bits  = c_rarg0;
 345   const Register index = c_rarg1;
 346 
 347   __ movptr(bits, Address(rsp, 2 * wordSize));
 348   __ movl(index, Address(rsp, 1 * wordSize));
 349 
 350   if (kind == Interpreter::java_lang_System_setBit)
 351     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::setBit())));
 352   else
 353     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::clrBit())));
 354 
 355   __ pop(rax);
 356   __ mov(rsp, r13);
 357   __ jmp(rax);
 358 
 359   return entry;
 360 }
 361 
 362 address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
 363 
 364   // rbx,: Method*
 365   // rcx: scratrch
 366   // r13: sender sp
 367 
 368   if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
 369 
 370   address entry_point = __ pc();
 371 
 372   // These don't need a safepoint check because they aren't virtually
 373   // callable. We won't enter these intrinsics from compiled code.
 374   // If in the future we added an intrinsic which was virtually callable
 375   // we'd have to worry about how to safepoint so that this code is used.
 376 
 377   // mathematical functions inlined by compiler
 378   // (interpreter must provide identical implementation
 379   // in order to avoid monotonicity bugs when switching
 380   // from interpreter to compiler in the middle of some
 381   // computation)
 382   //
 383   // stack: [ ret adr ] <-- rsp
 384   //        [ lo(arg) ]
 385   //        [ hi(arg) ]
 386   //
 387 
 388   if (kind == Interpreter::java_lang_math_fmaD) {
 389     if (!UseFMA) {
 390       return NULL; // Generate a vanilla entry
 391     }
 392     __ movdbl(xmm0, Address(rsp, wordSize));
 393     __ movdbl(xmm1, Address(rsp, 3 * wordSize));
 394     __ movdbl(xmm2, Address(rsp, 5 * wordSize));
 395     __ fmad(xmm0, xmm1, xmm2, xmm0);
 396   } else if (kind == Interpreter::java_lang_math_fmaF) {
 397     if (!UseFMA) {
 398       return NULL; // Generate a vanilla entry
 399     }
 400     __ movflt(xmm0, Address(rsp, wordSize));
 401     __ movflt(xmm1, Address(rsp, 2 * wordSize));
 402     __ movflt(xmm2, Address(rsp, 3 * wordSize));
 403     __ fmaf(xmm0, xmm1, xmm2, xmm0);
 404   } else if (kind == Interpreter::java_lang_math_sqrt) {
 405     __ sqrtsd(xmm0, Address(rsp, wordSize));
 406   } else if (kind == Interpreter::java_lang_math_exp) {
 407     __ movdbl(xmm0, Address(rsp, wordSize));
 408     if (StubRoutines::dexp() != NULL) {
 409       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
 410     } else {
 411       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp));
 412     }
 413   } else if (kind == Interpreter::java_lang_math_log) {
 414     __ movdbl(xmm0, Address(rsp, wordSize));
 415     if (StubRoutines::dlog() != NULL) {
 416       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
 417     } else {
 418       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog));
 419     }
 420   } else if (kind == Interpreter::java_lang_math_log10) {
 421     __ movdbl(xmm0, Address(rsp, wordSize));
 422     if (StubRoutines::dlog10() != NULL) {
 423       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
 424     } else {
 425       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10));
 426     }
 427   } else if (kind == Interpreter::java_lang_math_sin) {
 428     __ movdbl(xmm0, Address(rsp, wordSize));
 429     if (StubRoutines::dsin() != NULL) {
 430       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
 431     } else {
 432       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin));
 433     }
 434   } else if (kind == Interpreter::java_lang_math_cos) {
 435     __ movdbl(xmm0, Address(rsp, wordSize));
 436     if (StubRoutines::dcos() != NULL) {
 437       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
 438     } else {
 439       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos));
 440     }
 441   } else if (kind == Interpreter::java_lang_math_pow) {
 442     __ movdbl(xmm1, Address(rsp, wordSize));
 443     __ movdbl(xmm0, Address(rsp, 3 * wordSize));
 444     if (StubRoutines::dpow() != NULL) {
 445       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
 446     } else {
 447       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow));
 448     }
 449   } else if (kind == Interpreter::java_lang_math_tan) {
 450     __ movdbl(xmm0, Address(rsp, wordSize));
 451     if (StubRoutines::dtan() != NULL) {
 452       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
 453     } else {
 454       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan));
 455     }
 456   } else {
 457     __ fld_d(Address(rsp, wordSize));
 458     switch (kind) {
 459     case Interpreter::java_lang_math_abs:
 460       __ fabs();
 461       break;
 462     default:
 463       ShouldNotReachHere();
 464     }
 465 
 466     // return double result in xmm0 for interpreter and compilers.
 467     __ subptr(rsp, 2*wordSize);
 468     // Round to 64bit precision
 469     __ fstp_d(Address(rsp, 0));
 470     __ movdbl(xmm0, Address(rsp, 0));
 471     __ addptr(rsp, 2*wordSize);
 472   }
 473 
 474 
 475   __ pop(rax);
 476   __ mov(rsp, r13);
 477   __ jmp(rax);
 478 
 479   return entry_point;
 480 }
 481