1 /*
   2  * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "interpreter/interp_masm.hpp"
  28 #include "interpreter/interpreter.hpp"
  29 #include "interpreter/interpreterRuntime.hpp"
  30 #include "interpreter/templateInterpreterGenerator.hpp"
  31 #include "runtime/arguments.hpp"
  32 #include "runtime/sharedRuntime.hpp"
  33 
  34 #define __ _masm->
  35 
  36 #ifdef _WIN64
  37 address TemplateInterpreterGenerator::generate_slow_signature_handler() {
  38   address entry = __ pc();
  39 
  40   // rbx: method
  41   // r14: pointer to locals
  42   // c_rarg3: first stack arg - wordSize
  43   __ mov(c_rarg3, rsp);
  44   // adjust rsp
  45   __ subptr(rsp, 4 * wordSize);
  46   __ call_VM(noreg,
  47              CAST_FROM_FN_PTR(address,
  48                               InterpreterRuntime::slow_signature_handler),
  49              rbx, r14, c_rarg3);
  50 
  51   // rax: result handler
  52 
  53   // Stack layout:
  54   // rsp: 3 integer or float args (if static first is unused)
  55   //      1 float/double identifiers
  56   //        return address
  57   //        stack args
  58   //        garbage
  59   //        expression stack bottom
  60   //        bcp (NULL)
  61   //        ...
  62 
  63   // Do FP first so we can use c_rarg3 as temp
  64   __ movl(c_rarg3, Address(rsp, 3 * wordSize)); // float/double identifiers
  65 
  66   for ( int i= 0; i < Argument::n_int_register_parameters_c-1; i++ ) {
  67     XMMRegister floatreg = as_XMMRegister(i+1);
  68     Label isfloatordouble, isdouble, next;
  69 
  70     __ testl(c_rarg3, 1 << (i*2));      // Float or Double?
  71     __ jcc(Assembler::notZero, isfloatordouble);
  72 
  73     // Do Int register here
  74     switch ( i ) {
  75       case 0:
  76         __ movl(rscratch1, Address(rbx, Method::access_flags_offset()));
  77         __ testl(rscratch1, JVM_ACC_STATIC);
  78         __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0));
  79         break;
  80       case 1:
  81         __ movptr(c_rarg2, Address(rsp, wordSize));
  82         break;
  83       case 2:
  84         __ movptr(c_rarg3, Address(rsp, 2 * wordSize));
  85         break;
  86       default:
  87         break;
  88     }
  89 
  90     __ jmp (next);
  91 
  92     __ bind(isfloatordouble);
  93     __ testl(c_rarg3, 1 << ((i*2)+1));     // Double?
  94     __ jcc(Assembler::notZero, isdouble);
  95 
  96 // Do Float Here
  97     __ movflt(floatreg, Address(rsp, i * wordSize));
  98     __ jmp(next);
  99 
 100 // Do Double here
 101     __ bind(isdouble);
 102     __ movdbl(floatreg, Address(rsp, i * wordSize));
 103 
 104     __ bind(next);
 105   }
 106 
 107 
 108   // restore rsp
 109   __ addptr(rsp, 4 * wordSize);
 110 
 111   __ ret(0);
 112 
 113   return entry;
 114 }
 115 #else
 116 address TemplateInterpreterGenerator::generate_slow_signature_handler() {
 117   address entry = __ pc();
 118 
 119   // rbx: method
 120   // r14: pointer to locals
 121   // c_rarg3: first stack arg - wordSize
 122   __ mov(c_rarg3, rsp);
 123   // adjust rsp
 124   __ subptr(rsp, 14 * wordSize);
 125   __ call_VM(noreg,
 126              CAST_FROM_FN_PTR(address,
 127                               InterpreterRuntime::slow_signature_handler),
 128              rbx, r14, c_rarg3);
 129 
 130   // rax: result handler
 131 
 132   // Stack layout:
 133   // rsp: 5 integer args (if static first is unused)
 134   //      1 float/double identifiers
 135   //      8 double args
 136   //        return address
 137   //        stack args
 138   //        garbage
 139   //        expression stack bottom
 140   //        bcp (NULL)
 141   //        ...
 142 
 143   // Do FP first so we can use c_rarg3 as temp
 144   __ movl(c_rarg3, Address(rsp, 5 * wordSize)); // float/double identifiers
 145 
 146   for (int i = 0; i < Argument::n_float_register_parameters_c; i++) {
 147     const XMMRegister r = as_XMMRegister(i);
 148 
 149     Label d, done;
 150 
 151     __ testl(c_rarg3, 1 << i);
 152     __ jcc(Assembler::notZero, d);
 153     __ movflt(r, Address(rsp, (6 + i) * wordSize));
 154     __ jmp(done);
 155     __ bind(d);
 156     __ movdbl(r, Address(rsp, (6 + i) * wordSize));
 157     __ bind(done);
 158   }
 159 
 160   // Now handle integrals.  Only do c_rarg1 if not static.
 161   __ movl(c_rarg3, Address(rbx, Method::access_flags_offset()));
 162   __ testl(c_rarg3, JVM_ACC_STATIC);
 163   __ cmovptr(Assembler::zero, c_rarg1, Address(rsp, 0));
 164 
 165   __ movptr(c_rarg2, Address(rsp, wordSize));
 166   __ movptr(c_rarg3, Address(rsp, 2 * wordSize));
 167   __ movptr(c_rarg4, Address(rsp, 3 * wordSize));
 168   __ movptr(c_rarg5, Address(rsp, 4 * wordSize));
 169 
 170   // restore rsp
 171   __ addptr(rsp, 14 * wordSize);
 172 
 173   __ ret(0);
 174 
 175   return entry;
 176 }
 177 #endif  // __WIN64
 178 
 179 /**
 180  * Method entry for static native methods:
 181  *   int java.util.zip.CRC32.update(int crc, int b)
 182  */
 183 address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
 184   if (UseCRC32Intrinsics) {
 185     address entry = __ pc();
 186 
 187     // rbx,: Method*
 188     // r13: senderSP must preserved for slow path, set SP to it on fast path
 189     // c_rarg0: scratch (rdi on non-Win64, rcx on Win64)
 190     // c_rarg1: scratch (rsi on non-Win64, rdx on Win64)
 191 
 192     Label slow_path;
 193     // If we need a safepoint check, generate full interpreter entry.
 194     ExternalAddress state(SafepointSynchronize::address_of_state());
 195     __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
 196              SafepointSynchronize::_not_synchronized);
 197     __ jcc(Assembler::notEqual, slow_path);
 198 
 199     // We don't generate local frame and don't align stack because
 200     // we call stub code and there is no safepoint on this path.
 201 
 202     // Load parameters
 203     const Register crc = rax;  // crc
 204     const Register val = c_rarg0;  // source java byte value
 205     const Register tbl = c_rarg1;  // scratch
 206 
 207     // Arguments are reversed on java expression stack
 208     __ movl(val, Address(rsp,   wordSize)); // byte value
 209     __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC
 210 
 211     __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr()));
 212     __ notl(crc); // ~crc
 213     __ update_byte_crc32(crc, val, tbl);
 214     __ notl(crc); // ~crc
 215     // result in rax
 216 
 217     // _areturn
 218     __ pop(rdi);                // get return address
 219     __ mov(rsp, r13);           // set sp to sender sp
 220     __ jmp(rdi);
 221 
 222     // generate a vanilla native entry as the slow path
 223     __ bind(slow_path);
 224     __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
 225     return entry;
 226   }
 227   return NULL;
 228 }
 229 
 230 /**
 231  * Method entry for static native methods:
 232  *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
 233  *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
 234  */
 235 address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
 236   if (UseCRC32Intrinsics) {
 237     address entry = __ pc();
 238 
 239     // rbx,: Method*
 240     // r13: senderSP must preserved for slow path, set SP to it on fast path
 241 
 242     Label slow_path;
 243     // If we need a safepoint check, generate full interpreter entry.
 244     ExternalAddress state(SafepointSynchronize::address_of_state());
 245     __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
 246              SafepointSynchronize::_not_synchronized);
 247     __ jcc(Assembler::notEqual, slow_path);
 248 
 249     // We don't generate local frame and don't align stack because
 250     // we call stub code and there is no safepoint on this path.
 251 
 252     // Load parameters
 253     const Register crc = c_rarg0;  // crc
 254     const Register buf = c_rarg1;  // source java byte array address
 255     const Register len = c_rarg2;  // length
 256     const Register off = len;      // offset (never overlaps with 'len')
 257 
 258     // Arguments are reversed on java expression stack
 259     // Calculate address of start element
 260     if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
 261       __ movptr(buf, Address(rsp, 3*wordSize)); // long buf
 262       __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset
 263       __ addq(buf, off); // + offset
 264       __ movl(crc,   Address(rsp, 5*wordSize)); // Initial CRC
 265     } else {
 266       __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array
 267       __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
 268       __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset
 269       __ addq(buf, off); // + offset
 270       __ movl(crc,   Address(rsp, 4*wordSize)); // Initial CRC
 271     }
 272     // Can now load 'len' since we're finished with 'off'
 273     __ movl(len, Address(rsp, wordSize)); // Length
 274 
 275     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
 276     // result in rax
 277 
 278     // _areturn
 279     __ pop(rdi);                // get return address
 280     __ mov(rsp, r13);           // set sp to sender sp
 281     __ jmp(rdi);
 282 
 283     // generate a vanilla native entry as the slow path
 284     __ bind(slow_path);
 285     __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
 286     return entry;
 287   }
 288   return NULL;
 289 }
 290 
 291 /**
 292 * Method entry for static (non-native) methods:
 293 *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
 294 *   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end)
 295 */
 296 address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
 297   if (UseCRC32CIntrinsics) {
 298     address entry = __ pc();
 299     // Load parameters
 300     const Register crc = c_rarg0;  // crc
 301     const Register buf = c_rarg1;  // source java byte array address
 302     const Register len = c_rarg2;
 303     const Register off = c_rarg3;  // offset
 304     const Register end = len;
 305 
 306     // Arguments are reversed on java expression stack
 307     // Calculate address of start element
 308     if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
 309       __ movptr(buf, Address(rsp, 3 * wordSize)); // long address
 310       __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset
 311       __ addq(buf, off); // + offset
 312       __ movl(crc, Address(rsp, 5 * wordSize)); // Initial CRC
 313       // Note on 5 * wordSize vs. 4 * wordSize:
 314       // *   int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
 315       //                                                   4         2,3          1        0
 316       // end starts at SP + 8
 317       // The Java(R) Virtual Machine Specification Java SE 7 Edition
 318       // 4.10.2.3. Values of Types long and double
 319       //    "When calculating operand stack length, values of type long and double have length two."
 320     } else {
 321       __ movptr(buf, Address(rsp, 3 * wordSize)); // byte[] array
 322       __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
 323       __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset
 324       __ addq(buf, off); // + offset
 325       __ movl(crc, Address(rsp, 4 * wordSize)); // Initial CRC
 326     }
 327     __ movl(end, Address(rsp, wordSize)); // end
 328     __ subl(end, off); // end - off
 329     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len);
 330     // result in rax
 331     // _areturn
 332     __ pop(rdi);                // get return address
 333     __ mov(rsp, r13);           // set sp to sender sp
 334     __ jmp(rdi);
 335 
 336     return entry;
 337   }
 338 
 339   return NULL;
 340 }
 341 
 342 //
 343 // Various method entries
 344 //
 345 
 346 address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
 347 
 348   // rbx,: Method*
 349   // rcx: scratrch
 350   // r13: sender sp
 351 
 352   if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
 353 
 354   address entry_point = __ pc();
 355 
 356   // These don't need a safepoint check because they aren't virtually
 357   // callable. We won't enter these intrinsics from compiled code.
 358   // If in the future we added an intrinsic which was virtually callable
 359   // we'd have to worry about how to safepoint so that this code is used.
 360 
 361   // mathematical functions inlined by compiler
 362   // (interpreter must provide identical implementation
 363   // in order to avoid monotonicity bugs when switching
 364   // from interpreter to compiler in the middle of some
 365   // computation)
 366   //
 367   // stack: [ ret adr ] <-- rsp
 368   //        [ lo(arg) ]
 369   //        [ hi(arg) ]
 370   //
 371 
 372   if (kind == Interpreter::java_lang_math_fmaD) {
 373     if (!UseFMA) {
 374       return NULL; // Generate a vanilla entry
 375     }
 376     __ movdbl(xmm0, Address(rsp, wordSize));
 377     __ movdbl(xmm1, Address(rsp, 3 * wordSize));
 378     __ movdbl(xmm2, Address(rsp, 5 * wordSize));
 379     __ fmad(xmm0, xmm1, xmm2, xmm0);
 380   } else if (kind == Interpreter::java_lang_math_fmaF) {
 381     if (!UseFMA) {
 382       return NULL; // Generate a vanilla entry
 383     }
 384     __ movflt(xmm0, Address(rsp, wordSize));
 385     __ movflt(xmm1, Address(rsp, 2 * wordSize));
 386     __ movflt(xmm2, Address(rsp, 3 * wordSize));
 387     __ fmaf(xmm0, xmm1, xmm2, xmm0);
 388   } else if (kind == Interpreter::java_lang_math_sqrt) {
 389     __ sqrtsd(xmm0, Address(rsp, wordSize));
 390   } else if (kind == Interpreter::java_lang_math_exp) {
 391     __ movdbl(xmm0, Address(rsp, wordSize));
 392     if (StubRoutines::dexp() != NULL) {
 393       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
 394     } else {
 395       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp));
 396     }
 397   } else if (kind == Interpreter::java_lang_math_log) {
 398     __ movdbl(xmm0, Address(rsp, wordSize));
 399     if (StubRoutines::dlog() != NULL) {
 400       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
 401     } else {
 402       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog));
 403     }
 404   } else if (kind == Interpreter::java_lang_math_log10) {
 405     __ movdbl(xmm0, Address(rsp, wordSize));
 406     if (StubRoutines::dlog10() != NULL) {
 407       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
 408     } else {
 409       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10));
 410     }
 411   } else if (kind == Interpreter::java_lang_math_sin) {
 412     __ movdbl(xmm0, Address(rsp, wordSize));
 413     if (StubRoutines::dsin() != NULL) {
 414       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
 415     } else {
 416       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin));
 417     }
 418   } else if (kind == Interpreter::java_lang_math_cos) {
 419     __ movdbl(xmm0, Address(rsp, wordSize));
 420     if (StubRoutines::dcos() != NULL) {
 421       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
 422     } else {
 423       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos));
 424     }
 425   } else if (kind == Interpreter::java_lang_math_pow) {
 426     __ movdbl(xmm1, Address(rsp, wordSize));
 427     __ movdbl(xmm0, Address(rsp, 3 * wordSize));
 428     if (StubRoutines::dpow() != NULL) {
 429       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
 430     } else {
 431       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow));
 432     }
 433   } else if (kind == Interpreter::java_lang_math_tan) {
 434     __ movdbl(xmm0, Address(rsp, wordSize));
 435     if (StubRoutines::dtan() != NULL) {
 436       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
 437     } else {
 438       __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan));
 439     }
 440   } else {
 441     __ fld_d(Address(rsp, wordSize));
 442     switch (kind) {
 443     case Interpreter::java_lang_math_abs:
 444       __ fabs();
 445       break;
 446     default:
 447       ShouldNotReachHere();
 448     }
 449 
 450     // return double result in xmm0 for interpreter and compilers.
 451     __ subptr(rsp, 2*wordSize);
 452     // Round to 64bit precision
 453     __ fstp_d(Address(rsp, 0));
 454     __ movdbl(xmm0, Address(rsp, 0));
 455     __ addptr(rsp, 2*wordSize);
 456   }
 457 
 458 
 459   __ pop(rax);
 460   __ mov(rsp, r13);
 461   __ jmp(rax);
 462 
 463   return entry_point;
 464 }
 465