1 /*
   2  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "interpreter/interpreter.hpp"
  27 #include "interpreter/interpreterRuntime.hpp"
  28 #include "interpreter/templateTable.hpp"
  29 #include "memory/universe.inline.hpp"
  30 #include "oops/methodDataOop.hpp"
  31 #include "oops/objArrayKlass.hpp"
  32 #include "oops/oop.inline.hpp"
  33 #include "prims/methodHandles.hpp"
  34 #include "runtime/sharedRuntime.hpp"
  35 #include "runtime/stubRoutines.hpp"
  36 #include "runtime/synchronizer.hpp"
  37 
  38 #ifndef CC_INTERP
  39 
  40 #define __ _masm->
  41 
  42 // Platform-dependent initialization
  43 
  44 void TemplateTable::pd_initialize() {
  45   // No amd64 specific initialization
  46 }
  47 
  48 // Address computation: local variables
  49 
  50 static inline Address iaddress(int n) {
  51   return Address(r14, Interpreter::local_offset_in_bytes(n));
  52 }
  53 
  54 static inline Address laddress(int n) {
  55   return iaddress(n + 1);
  56 }
  57 
  58 static inline Address faddress(int n) {
  59   return iaddress(n);
  60 }
  61 
  62 static inline Address daddress(int n) {
  63   return laddress(n);
  64 }
  65 
  66 static inline Address aaddress(int n) {
  67   return iaddress(n);
  68 }
  69 
  70 static inline Address iaddress(Register r) {
  71   return Address(r14, r, Address::times_8);
  72 }
  73 
  74 static inline Address laddress(Register r) {
  75   return Address(r14, r, Address::times_8, Interpreter::local_offset_in_bytes(1));
  76 }
  77 
  78 static inline Address faddress(Register r) {
  79   return iaddress(r);
  80 }
  81 
  82 static inline Address daddress(Register r) {
  83   return laddress(r);
  84 }
  85 
  86 static inline Address aaddress(Register r) {
  87   return iaddress(r);
  88 }
  89 
  90 static inline Address at_rsp() {
  91   return Address(rsp, 0);
  92 }
  93 
  94 // At top of Java expression stack which may be different than esp().  It
  95 // isn't for category 1 objects.
  96 static inline Address at_tos   () {
  97   return Address(rsp,  Interpreter::expr_offset_in_bytes(0));
  98 }
  99 
 100 static inline Address at_tos_p1() {
 101   return Address(rsp,  Interpreter::expr_offset_in_bytes(1));
 102 }
 103 
 104 static inline Address at_tos_p2() {
 105   return Address(rsp,  Interpreter::expr_offset_in_bytes(2));
 106 }
 107 
 108 static inline Address at_tos_p3() {
 109   return Address(rsp,  Interpreter::expr_offset_in_bytes(3));
 110 }
 111 
 112 // Condition conversion
 113 static Assembler::Condition j_not(TemplateTable::Condition cc) {
 114   switch (cc) {
 115   case TemplateTable::equal        : return Assembler::notEqual;
 116   case TemplateTable::not_equal    : return Assembler::equal;
 117   case TemplateTable::less         : return Assembler::greaterEqual;
 118   case TemplateTable::less_equal   : return Assembler::greater;
 119   case TemplateTable::greater      : return Assembler::lessEqual;
 120   case TemplateTable::greater_equal: return Assembler::less;
 121   }
 122   ShouldNotReachHere();
 123   return Assembler::zero;
 124 }
 125 
 126 
 127 // Miscelaneous helper routines
 128 // Store an oop (or NULL) at the address described by obj.
 129 // If val == noreg this means store a NULL
 130 
 131 static void do_oop_store(InterpreterMacroAssembler* _masm,
 132                          Address obj,
 133                          Register val,
 134                          BarrierSet::Name barrier,
 135                          bool precise) {
 136   assert(val == noreg || val == rax, "parameter is just for looks");
 137   switch (barrier) {
 138 #ifndef SERIALGC
 139     case BarrierSet::G1SATBCT:
 140     case BarrierSet::G1SATBCTLogging:
 141       {
 142         // flatten object address if needed
 143         if (obj.index() == noreg && obj.disp() == 0) {
 144           if (obj.base() != rdx) {
 145             __ movq(rdx, obj.base());
 146           }
 147         } else {
 148           __ leaq(rdx, obj);
 149         }
 150         __ g1_write_barrier_pre(rdx, r8, rbx, val != noreg);
 151         if (val == noreg) {
 152           __ store_heap_oop_null(Address(rdx, 0));
 153         } else {
 154           __ store_heap_oop(Address(rdx, 0), val);
 155           __ g1_write_barrier_post(rdx, val, r8, rbx);
 156         }
 157 
 158       }
 159       break;
 160 #endif // SERIALGC
 161     case BarrierSet::CardTableModRef:
 162     case BarrierSet::CardTableExtension:
 163       {
 164         if (val == noreg) {
 165           __ store_heap_oop_null(obj);
 166         } else {
 167           __ store_heap_oop(obj, val);
 168           // flatten object address if needed
 169           if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
 170             __ store_check(obj.base());
 171           } else {
 172             __ leaq(rdx, obj);
 173             __ store_check(rdx);
 174           }
 175         }
 176       }
 177       break;
 178     case BarrierSet::ModRef:
 179     case BarrierSet::Other:
 180       if (val == noreg) {
 181         __ store_heap_oop_null(obj);
 182       } else {
 183         __ store_heap_oop(obj, val);
 184       }
 185       break;
 186     default      :
 187       ShouldNotReachHere();
 188 
 189   }
 190 }
 191 
 192 Address TemplateTable::at_bcp(int offset) {
 193   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
 194   return Address(r13, offset);
 195 }
 196 
 197 void TemplateTable::patch_bytecode(Bytecodes::Code bytecode, Register bc,
 198                                    Register scratch,
 199                                    bool load_bc_into_scratch/*=true*/) {
 200   if (!RewriteBytecodes) {
 201     return;
 202   }
 203   // the pair bytecodes have already done the load.
 204   if (load_bc_into_scratch) {
 205     __ movl(bc, bytecode);
 206   }
 207   Label patch_done;
 208   if (JvmtiExport::can_post_breakpoint()) {
 209     Label fast_patch;
 210     // if a breakpoint is present we can't rewrite the stream directly
 211     __ movzbl(scratch, at_bcp(0));
 212     __ cmpl(scratch, Bytecodes::_breakpoint);
 213     __ jcc(Assembler::notEqual, fast_patch);
 214     __ get_method(scratch);
 215     // Let breakpoint table handling rewrite to quicker bytecode
 216     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), scratch, r13, bc);
 217 #ifndef ASSERT
 218     __ jmpb(patch_done);
 219 #else
 220     __ jmp(patch_done);
 221 #endif
 222     __ bind(fast_patch);
 223   }
 224 #ifdef ASSERT
 225   Label okay;
 226   __ load_unsigned_byte(scratch, at_bcp(0));
 227   __ cmpl(scratch, (int) Bytecodes::java_code(bytecode));
 228   __ jcc(Assembler::equal, okay);
 229   __ cmpl(scratch, bc);
 230   __ jcc(Assembler::equal, okay);
 231   __ stop("patching the wrong bytecode");
 232   __ bind(okay);
 233 #endif
 234   // patch bytecode
 235   __ movb(at_bcp(0), bc);
 236   __ bind(patch_done);
 237 }
 238 
 239 
 240 // Individual instructions
 241 
 242 void TemplateTable::nop() {
 243   transition(vtos, vtos);
 244   // nothing to do
 245 }
 246 
 247 void TemplateTable::shouldnotreachhere() {
 248   transition(vtos, vtos);
 249   __ stop("shouldnotreachhere bytecode");
 250 }
 251 
 252 void TemplateTable::aconst_null() {
 253   transition(vtos, atos);
 254   __ xorl(rax, rax);
 255 }
 256 
 257 void TemplateTable::iconst(int value) {
 258   transition(vtos, itos);
 259   if (value == 0) {
 260     __ xorl(rax, rax);
 261   } else {
 262     __ movl(rax, value);
 263   }
 264 }
 265 
 266 void TemplateTable::lconst(int value) {
 267   transition(vtos, ltos);
 268   if (value == 0) {
 269     __ xorl(rax, rax);
 270   } else {
 271     __ movl(rax, value);
 272   }
 273 }
 274 
 275 void TemplateTable::fconst(int value) {
 276   transition(vtos, ftos);
 277   static float one = 1.0f, two = 2.0f;
 278   switch (value) {
 279   case 0:
 280     __ xorps(xmm0, xmm0);
 281     break;
 282   case 1:
 283     __ movflt(xmm0, ExternalAddress((address) &one));
 284     break;
 285   case 2:
 286     __ movflt(xmm0, ExternalAddress((address) &two));
 287     break;
 288   default:
 289     ShouldNotReachHere();
 290     break;
 291   }
 292 }
 293 
 294 void TemplateTable::dconst(int value) {
 295   transition(vtos, dtos);
 296   static double one = 1.0;
 297   switch (value) {
 298   case 0:
 299     __ xorpd(xmm0, xmm0);
 300     break;
 301   case 1:
 302     __ movdbl(xmm0, ExternalAddress((address) &one));
 303     break;
 304   default:
 305     ShouldNotReachHere();
 306     break;
 307   }
 308 }
 309 
 310 void TemplateTable::bipush() {
 311   transition(vtos, itos);
 312   __ load_signed_byte(rax, at_bcp(1));
 313 }
 314 
 315 void TemplateTable::sipush() {
 316   transition(vtos, itos);
 317   __ load_unsigned_short(rax, at_bcp(1));
 318   __ bswapl(rax);
 319   __ sarl(rax, 16);
 320 }
 321 
 322 void TemplateTable::ldc(bool wide) {
 323   transition(vtos, vtos);
 324   Label call_ldc, notFloat, notClass, Done;
 325 
 326   if (wide) {
 327     __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
 328   } else {
 329     __ load_unsigned_byte(rbx, at_bcp(1));
 330   }
 331 
 332   __ get_cpool_and_tags(rcx, rax);
 333   const int base_offset = constantPoolOopDesc::header_size() * wordSize;
 334   const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
 335 
 336   // get type
 337   __ movzbl(rdx, Address(rax, rbx, Address::times_1, tags_offset));
 338 
 339   // unresolved string - get the resolved string
 340   __ cmpl(rdx, JVM_CONSTANT_UnresolvedString);
 341   __ jccb(Assembler::equal, call_ldc);
 342 
 343   // unresolved class - get the resolved class
 344   __ cmpl(rdx, JVM_CONSTANT_UnresolvedClass);
 345   __ jccb(Assembler::equal, call_ldc);
 346 
 347   // unresolved class in error state - call into runtime to throw the error
 348   // from the first resolution attempt
 349   __ cmpl(rdx, JVM_CONSTANT_UnresolvedClassInError);
 350   __ jccb(Assembler::equal, call_ldc);
 351 
 352   // resolved class - need to call vm to get java mirror of the class
 353   __ cmpl(rdx, JVM_CONSTANT_Class);
 354   __ jcc(Assembler::notEqual, notClass);
 355 
 356   __ bind(call_ldc);
 357   __ movl(c_rarg1, wide);
 358   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1);
 359   __ push_ptr(rax);
 360   __ verify_oop(rax);
 361   __ jmp(Done);
 362 
 363   __ bind(notClass);
 364   __ cmpl(rdx, JVM_CONSTANT_Float);
 365   __ jccb(Assembler::notEqual, notFloat);
 366   // ftos
 367   __ movflt(xmm0, Address(rcx, rbx, Address::times_8, base_offset));
 368   __ push_f();
 369   __ jmp(Done);
 370 
 371   __ bind(notFloat);
 372 #ifdef ASSERT
 373   {
 374     Label L;
 375     __ cmpl(rdx, JVM_CONSTANT_Integer);
 376     __ jcc(Assembler::equal, L);
 377     __ cmpl(rdx, JVM_CONSTANT_String);
 378     __ jcc(Assembler::equal, L);
 379     __ stop("unexpected tag type in ldc");
 380     __ bind(L);
 381   }
 382 #endif
 383   // atos and itos
 384   Label isOop;
 385   __ cmpl(rdx, JVM_CONSTANT_Integer);
 386   __ jcc(Assembler::notEqual, isOop);
 387   __ movl(rax, Address(rcx, rbx, Address::times_8, base_offset));
 388   __ push_i(rax);
 389   __ jmp(Done);
 390 
 391   __ bind(isOop);
 392   __ movptr(rax, Address(rcx, rbx, Address::times_8, base_offset));
 393   __ push_ptr(rax);
 394 
 395   if (VerifyOops) {
 396     __ verify_oop(rax);
 397   }
 398 
 399   __ bind(Done);
 400 }
 401 
 402 // Fast path for caching oop constants.
 403 // %%% We should use this to handle Class and String constants also.
 404 // %%% It will simplify the ldc/primitive path considerably.
 405 void TemplateTable::fast_aldc(bool wide) {
 406   transition(vtos, atos);
 407 
 408   if (!EnableMethodHandles) {
 409     // We should not encounter this bytecode if !EnableMethodHandles.
 410     // The verifier will stop it.  However, if we get past the verifier,
 411     // this will stop the thread in a reasonable way, without crashing the JVM.
 412     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
 413                      InterpreterRuntime::throw_IncompatibleClassChangeError));
 414     // the call_VM checks for exception, so we should never return here.
 415     __ should_not_reach_here();
 416     return;
 417   }
 418 
 419   const Register cache = rcx;
 420   const Register index = rdx;
 421 
 422   resolve_cache_and_index(f1_oop, rax, cache, index, wide ? sizeof(u2) : sizeof(u1));
 423   if (VerifyOops) {
 424     __ verify_oop(rax);
 425   }
 426 }
 427 
 428 void TemplateTable::ldc2_w() {
 429   transition(vtos, vtos);
 430   Label Long, Done;
 431   __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
 432 
 433   __ get_cpool_and_tags(rcx, rax);
 434   const int base_offset = constantPoolOopDesc::header_size() * wordSize;
 435   const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
 436 
 437   // get type
 438   __ cmpb(Address(rax, rbx, Address::times_1, tags_offset),
 439           JVM_CONSTANT_Double);
 440   __ jccb(Assembler::notEqual, Long);
 441   // dtos
 442   __ movdbl(xmm0, Address(rcx, rbx, Address::times_8, base_offset));
 443   __ push_d();
 444   __ jmpb(Done);
 445 
 446   __ bind(Long);
 447   // ltos
 448   __ movq(rax, Address(rcx, rbx, Address::times_8, base_offset));
 449   __ push_l();
 450 
 451   __ bind(Done);
 452 }
 453 
 454 void TemplateTable::locals_index(Register reg, int offset) {
 455   __ load_unsigned_byte(reg, at_bcp(offset));
 456   __ negptr(reg);
 457 }
 458 
 459 void TemplateTable::iload() {
 460   transition(vtos, itos);
 461   if (RewriteFrequentPairs) {
 462     Label rewrite, done;
 463     const Register bc = c_rarg3;
 464     assert(rbx != bc, "register damaged");
 465 
 466     // get next byte
 467     __ load_unsigned_byte(rbx,
 468                           at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
 469     // if _iload, wait to rewrite to iload2.  We only want to rewrite the
 470     // last two iloads in a pair.  Comparing against fast_iload means that
 471     // the next bytecode is neither an iload or a caload, and therefore
 472     // an iload pair.
 473     __ cmpl(rbx, Bytecodes::_iload);
 474     __ jcc(Assembler::equal, done);
 475 
 476     __ cmpl(rbx, Bytecodes::_fast_iload);
 477     __ movl(bc, Bytecodes::_fast_iload2);
 478     __ jccb(Assembler::equal, rewrite);
 479 
 480     // if _caload, rewrite to fast_icaload
 481     __ cmpl(rbx, Bytecodes::_caload);
 482     __ movl(bc, Bytecodes::_fast_icaload);
 483     __ jccb(Assembler::equal, rewrite);
 484 
 485     // rewrite so iload doesn't check again.
 486     __ movl(bc, Bytecodes::_fast_iload);
 487 
 488     // rewrite
 489     // bc: fast bytecode
 490     __ bind(rewrite);
 491     patch_bytecode(Bytecodes::_iload, bc, rbx, false);
 492     __ bind(done);
 493   }
 494 
 495   // Get the local value into tos
 496   locals_index(rbx);
 497   __ movl(rax, iaddress(rbx));
 498 }
 499 
 500 void TemplateTable::fast_iload2() {
 501   transition(vtos, itos);
 502   locals_index(rbx);
 503   __ movl(rax, iaddress(rbx));
 504   __ push(itos);
 505   locals_index(rbx, 3);
 506   __ movl(rax, iaddress(rbx));
 507 }
 508 
 509 void TemplateTable::fast_iload() {
 510   transition(vtos, itos);
 511   locals_index(rbx);
 512   __ movl(rax, iaddress(rbx));
 513 }
 514 
 515 void TemplateTable::lload() {
 516   transition(vtos, ltos);
 517   locals_index(rbx);
 518   __ movq(rax, laddress(rbx));
 519 }
 520 
 521 void TemplateTable::fload() {
 522   transition(vtos, ftos);
 523   locals_index(rbx);
 524   __ movflt(xmm0, faddress(rbx));
 525 }
 526 
 527 void TemplateTable::dload() {
 528   transition(vtos, dtos);
 529   locals_index(rbx);
 530   __ movdbl(xmm0, daddress(rbx));
 531 }
 532 
 533 void TemplateTable::aload() {
 534   transition(vtos, atos);
 535   locals_index(rbx);
 536   __ movptr(rax, aaddress(rbx));
 537 }
 538 
 539 void TemplateTable::locals_index_wide(Register reg) {
 540   __ movl(reg, at_bcp(2));
 541   __ bswapl(reg);
 542   __ shrl(reg, 16);
 543   __ negptr(reg);
 544 }
 545 
 546 void TemplateTable::wide_iload() {
 547   transition(vtos, itos);
 548   locals_index_wide(rbx);
 549   __ movl(rax, iaddress(rbx));
 550 }
 551 
 552 void TemplateTable::wide_lload() {
 553   transition(vtos, ltos);
 554   locals_index_wide(rbx);
 555   __ movq(rax, laddress(rbx));
 556 }
 557 
 558 void TemplateTable::wide_fload() {
 559   transition(vtos, ftos);
 560   locals_index_wide(rbx);
 561   __ movflt(xmm0, faddress(rbx));
 562 }
 563 
 564 void TemplateTable::wide_dload() {
 565   transition(vtos, dtos);
 566   locals_index_wide(rbx);
 567   __ movdbl(xmm0, daddress(rbx));
 568 }
 569 
 570 void TemplateTable::wide_aload() {
 571   transition(vtos, atos);
 572   locals_index_wide(rbx);
 573   __ movptr(rax, aaddress(rbx));
 574 }
 575 
 576 void TemplateTable::index_check(Register array, Register index) {
 577   // destroys rbx
 578   // check array
 579   __ null_check(array, arrayOopDesc::length_offset_in_bytes());
 580   // sign extend index for use by indexed load
 581   __ movl2ptr(index, index);
 582   // check index
 583   __ cmpl(index, Address(array, arrayOopDesc::length_offset_in_bytes()));
 584   if (index != rbx) {
 585     // ??? convention: move aberrant index into ebx for exception message
 586     assert(rbx != array, "different registers");
 587     __ movl(rbx, index);
 588   }
 589   __ jump_cc(Assembler::aboveEqual,
 590              ExternalAddress(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry));
 591 }
 592 
 593 void TemplateTable::iaload() {
 594   transition(itos, itos);
 595   __ pop_ptr(rdx);
 596   // eax: index
 597   // rdx: array
 598   index_check(rdx, rax); // kills rbx
 599   __ movl(rax, Address(rdx, rax,
 600                        Address::times_4,
 601                        arrayOopDesc::base_offset_in_bytes(T_INT)));
 602 }
 603 
 604 void TemplateTable::laload() {
 605   transition(itos, ltos);
 606   __ pop_ptr(rdx);
 607   // eax: index
 608   // rdx: array
 609   index_check(rdx, rax); // kills rbx
 610   __ movq(rax, Address(rdx, rbx,
 611                        Address::times_8,
 612                        arrayOopDesc::base_offset_in_bytes(T_LONG)));
 613 }
 614 
 615 void TemplateTable::faload() {
 616   transition(itos, ftos);
 617   __ pop_ptr(rdx);
 618   // eax: index
 619   // rdx: array
 620   index_check(rdx, rax); // kills rbx
 621   __ movflt(xmm0, Address(rdx, rax,
 622                          Address::times_4,
 623                          arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
 624 }
 625 
 626 void TemplateTable::daload() {
 627   transition(itos, dtos);
 628   __ pop_ptr(rdx);
 629   // eax: index
 630   // rdx: array
 631   index_check(rdx, rax); // kills rbx
 632   __ movdbl(xmm0, Address(rdx, rax,
 633                           Address::times_8,
 634                           arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
 635 }
 636 
 637 void TemplateTable::aaload() {
 638   transition(itos, atos);
 639   __ pop_ptr(rdx);
 640   // eax: index
 641   // rdx: array
 642   index_check(rdx, rax); // kills rbx
 643   __ load_heap_oop(rax, Address(rdx, rax,
 644                                 UseCompressedOops ? Address::times_4 : Address::times_8,
 645                                 arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
 646 }
 647 
 648 void TemplateTable::baload() {
 649   transition(itos, itos);
 650   __ pop_ptr(rdx);
 651   // eax: index
 652   // rdx: array
 653   index_check(rdx, rax); // kills rbx
 654   __ load_signed_byte(rax,
 655                       Address(rdx, rax,
 656                               Address::times_1,
 657                               arrayOopDesc::base_offset_in_bytes(T_BYTE)));
 658 }
 659 
 660 void TemplateTable::caload() {
 661   transition(itos, itos);
 662   __ pop_ptr(rdx);
 663   // eax: index
 664   // rdx: array
 665   index_check(rdx, rax); // kills rbx
 666   __ load_unsigned_short(rax,
 667                          Address(rdx, rax,
 668                                  Address::times_2,
 669                                  arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 670 }
 671 
 672 // iload followed by caload frequent pair
 673 void TemplateTable::fast_icaload() {
 674   transition(vtos, itos);
 675   // load index out of locals
 676   locals_index(rbx);
 677   __ movl(rax, iaddress(rbx));
 678 
 679   // eax: index
 680   // rdx: array
 681   __ pop_ptr(rdx);
 682   index_check(rdx, rax); // kills rbx
 683   __ load_unsigned_short(rax,
 684                          Address(rdx, rax,
 685                                  Address::times_2,
 686                                  arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 687 }
 688 
 689 void TemplateTable::saload() {
 690   transition(itos, itos);
 691   __ pop_ptr(rdx);
 692   // eax: index
 693   // rdx: array
 694   index_check(rdx, rax); // kills rbx
 695   __ load_signed_short(rax,
 696                        Address(rdx, rax,
 697                                Address::times_2,
 698                                arrayOopDesc::base_offset_in_bytes(T_SHORT)));
 699 }
 700 
 701 void TemplateTable::iload(int n) {
 702   transition(vtos, itos);
 703   __ movl(rax, iaddress(n));
 704 }
 705 
 706 void TemplateTable::lload(int n) {
 707   transition(vtos, ltos);
 708   __ movq(rax, laddress(n));
 709 }
 710 
 711 void TemplateTable::fload(int n) {
 712   transition(vtos, ftos);
 713   __ movflt(xmm0, faddress(n));
 714 }
 715 
 716 void TemplateTable::dload(int n) {
 717   transition(vtos, dtos);
 718   __ movdbl(xmm0, daddress(n));
 719 }
 720 
 721 void TemplateTable::aload(int n) {
 722   transition(vtos, atos);
 723   __ movptr(rax, aaddress(n));
 724 }
 725 
 726 void TemplateTable::aload_0() {
 727   transition(vtos, atos);
 728   // According to bytecode histograms, the pairs:
 729   //
 730   // _aload_0, _fast_igetfield
 731   // _aload_0, _fast_agetfield
 732   // _aload_0, _fast_fgetfield
 733   //
 734   // occur frequently. If RewriteFrequentPairs is set, the (slow)
 735   // _aload_0 bytecode checks if the next bytecode is either
 736   // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
 737   // rewrites the current bytecode into a pair bytecode; otherwise it
 738   // rewrites the current bytecode into _fast_aload_0 that doesn't do
 739   // the pair check anymore.
 740   //
 741   // Note: If the next bytecode is _getfield, the rewrite must be
 742   //       delayed, otherwise we may miss an opportunity for a pair.
 743   //
 744   // Also rewrite frequent pairs
 745   //   aload_0, aload_1
 746   //   aload_0, iload_1
 747   // These bytecodes with a small amount of code are most profitable
 748   // to rewrite
 749   if (RewriteFrequentPairs) {
 750     Label rewrite, done;
 751     const Register bc = c_rarg3;
 752     assert(rbx != bc, "register damaged");
 753     // get next byte
 754     __ load_unsigned_byte(rbx,
 755                           at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
 756 
 757     // do actual aload_0
 758     aload(0);
 759 
 760     // if _getfield then wait with rewrite
 761     __ cmpl(rbx, Bytecodes::_getfield);
 762     __ jcc(Assembler::equal, done);
 763 
 764     // if _igetfield then reqrite to _fast_iaccess_0
 765     assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
 766            Bytecodes::_aload_0,
 767            "fix bytecode definition");
 768     __ cmpl(rbx, Bytecodes::_fast_igetfield);
 769     __ movl(bc, Bytecodes::_fast_iaccess_0);
 770     __ jccb(Assembler::equal, rewrite);
 771 
 772     // if _agetfield then reqrite to _fast_aaccess_0
 773     assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
 774            Bytecodes::_aload_0,
 775            "fix bytecode definition");
 776     __ cmpl(rbx, Bytecodes::_fast_agetfield);
 777     __ movl(bc, Bytecodes::_fast_aaccess_0);
 778     __ jccb(Assembler::equal, rewrite);
 779 
 780     // if _fgetfield then reqrite to _fast_faccess_0
 781     assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
 782            Bytecodes::_aload_0,
 783            "fix bytecode definition");
 784     __ cmpl(rbx, Bytecodes::_fast_fgetfield);
 785     __ movl(bc, Bytecodes::_fast_faccess_0);
 786     __ jccb(Assembler::equal, rewrite);
 787 
 788     // else rewrite to _fast_aload0
 789     assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
 790            Bytecodes::_aload_0,
 791            "fix bytecode definition");
 792     __ movl(bc, Bytecodes::_fast_aload_0);
 793 
 794     // rewrite
 795     // bc: fast bytecode
 796     __ bind(rewrite);
 797     patch_bytecode(Bytecodes::_aload_0, bc, rbx, false);
 798 
 799     __ bind(done);
 800   } else {
 801     aload(0);
 802   }
 803 }
 804 
 805 void TemplateTable::istore() {
 806   transition(itos, vtos);
 807   locals_index(rbx);
 808   __ movl(iaddress(rbx), rax);
 809 }
 810 
 811 void TemplateTable::lstore() {
 812   transition(ltos, vtos);
 813   locals_index(rbx);
 814   __ movq(laddress(rbx), rax);
 815 }
 816 
 817 void TemplateTable::fstore() {
 818   transition(ftos, vtos);
 819   locals_index(rbx);
 820   __ movflt(faddress(rbx), xmm0);
 821 }
 822 
 823 void TemplateTable::dstore() {
 824   transition(dtos, vtos);
 825   locals_index(rbx);
 826   __ movdbl(daddress(rbx), xmm0);
 827 }
 828 
 829 void TemplateTable::astore() {
 830   transition(vtos, vtos);
 831   __ pop_ptr(rax);
 832   locals_index(rbx);
 833   __ movptr(aaddress(rbx), rax);
 834 }
 835 
 836 void TemplateTable::wide_istore() {
 837   transition(vtos, vtos);
 838   __ pop_i();
 839   locals_index_wide(rbx);
 840   __ movl(iaddress(rbx), rax);
 841 }
 842 
 843 void TemplateTable::wide_lstore() {
 844   transition(vtos, vtos);
 845   __ pop_l();
 846   locals_index_wide(rbx);
 847   __ movq(laddress(rbx), rax);
 848 }
 849 
 850 void TemplateTable::wide_fstore() {
 851   transition(vtos, vtos);
 852   __ pop_f();
 853   locals_index_wide(rbx);
 854   __ movflt(faddress(rbx), xmm0);
 855 }
 856 
 857 void TemplateTable::wide_dstore() {
 858   transition(vtos, vtos);
 859   __ pop_d();
 860   locals_index_wide(rbx);
 861   __ movdbl(daddress(rbx), xmm0);
 862 }
 863 
 864 void TemplateTable::wide_astore() {
 865   transition(vtos, vtos);
 866   __ pop_ptr(rax);
 867   locals_index_wide(rbx);
 868   __ movptr(aaddress(rbx), rax);
 869 }
 870 
 871 void TemplateTable::iastore() {
 872   transition(itos, vtos);
 873   __ pop_i(rbx);
 874   __ pop_ptr(rdx);
 875   // eax: value
 876   // ebx: index
 877   // rdx: array
 878   index_check(rdx, rbx); // prefer index in ebx
 879   __ movl(Address(rdx, rbx,
 880                   Address::times_4,
 881                   arrayOopDesc::base_offset_in_bytes(T_INT)),
 882           rax);
 883 }
 884 
 885 void TemplateTable::lastore() {
 886   transition(ltos, vtos);
 887   __ pop_i(rbx);
 888   __ pop_ptr(rdx);
 889   // rax: value
 890   // ebx: index
 891   // rdx: array
 892   index_check(rdx, rbx); // prefer index in ebx
 893   __ movq(Address(rdx, rbx,
 894                   Address::times_8,
 895                   arrayOopDesc::base_offset_in_bytes(T_LONG)),
 896           rax);
 897 }
 898 
 899 void TemplateTable::fastore() {
 900   transition(ftos, vtos);
 901   __ pop_i(rbx);
 902   __ pop_ptr(rdx);
 903   // xmm0: value
 904   // ebx:  index
 905   // rdx:  array
 906   index_check(rdx, rbx); // prefer index in ebx
 907   __ movflt(Address(rdx, rbx,
 908                    Address::times_4,
 909                    arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
 910            xmm0);
 911 }
 912 
 913 void TemplateTable::dastore() {
 914   transition(dtos, vtos);
 915   __ pop_i(rbx);
 916   __ pop_ptr(rdx);
 917   // xmm0: value
 918   // ebx:  index
 919   // rdx:  array
 920   index_check(rdx, rbx); // prefer index in ebx
 921   __ movdbl(Address(rdx, rbx,
 922                    Address::times_8,
 923                    arrayOopDesc::base_offset_in_bytes(T_DOUBLE)),
 924            xmm0);
 925 }
 926 
 927 void TemplateTable::aastore() {
 928   Label is_null, ok_is_subtype, done;
 929   transition(vtos, vtos);
 930   // stack: ..., array, index, value
 931   __ movptr(rax, at_tos());    // value
 932   __ movl(rcx, at_tos_p1()); // index
 933   __ movptr(rdx, at_tos_p2()); // array
 934 
 935   Address element_address(rdx, rcx,
 936                           UseCompressedOops? Address::times_4 : Address::times_8,
 937                           arrayOopDesc::base_offset_in_bytes(T_OBJECT));
 938 
 939   index_check(rdx, rcx);     // kills rbx
 940   // do array store check - check for NULL value first
 941   __ testptr(rax, rax);
 942   __ jcc(Assembler::zero, is_null);
 943 
 944   // Move subklass into rbx
 945   __ load_klass(rbx, rax);
 946   // Move superklass into rax
 947   __ load_klass(rax, rdx);
 948   __ movptr(rax, Address(rax,
 949                          sizeof(oopDesc) +
 950                          objArrayKlass::element_klass_offset_in_bytes()));
 951   // Compress array + index*oopSize + 12 into a single register.  Frees rcx.
 952   __ lea(rdx, element_address);
 953 
 954   // Generate subtype check.  Blows rcx, rdi
 955   // Superklass in rax.  Subklass in rbx.
 956   __ gen_subtype_check(rbx, ok_is_subtype);
 957 
 958   // Come here on failure
 959   // object is at TOS
 960   __ jump(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));
 961 
 962   // Come here on success
 963   __ bind(ok_is_subtype);
 964 
 965   // Get the value we will store
 966   __ movptr(rax, at_tos());
 967   // Now store using the appropriate barrier
 968   do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true);
 969   __ jmp(done);
 970 
 971   // Have a NULL in rax, rdx=array, ecx=index.  Store NULL at ary[idx]
 972   __ bind(is_null);
 973   __ profile_null_seen(rbx);
 974 
 975   // Store a NULL
 976   do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
 977 
 978   // Pop stack arguments
 979   __ bind(done);
 980   __ addptr(rsp, 3 * Interpreter::stackElementSize);
 981 }
 982 
 983 void TemplateTable::bastore() {
 984   transition(itos, vtos);
 985   __ pop_i(rbx);
 986   __ pop_ptr(rdx);
 987   // eax: value
 988   // ebx: index
 989   // rdx: array
 990   index_check(rdx, rbx); // prefer index in ebx
 991   __ movb(Address(rdx, rbx,
 992                   Address::times_1,
 993                   arrayOopDesc::base_offset_in_bytes(T_BYTE)),
 994           rax);
 995 }
 996 
 997 void TemplateTable::castore() {
 998   transition(itos, vtos);
 999   __ pop_i(rbx);
1000   __ pop_ptr(rdx);
1001   // eax: value
1002   // ebx: index
1003   // rdx: array
1004   index_check(rdx, rbx);  // prefer index in ebx
1005   __ movw(Address(rdx, rbx,
1006                   Address::times_2,
1007                   arrayOopDesc::base_offset_in_bytes(T_CHAR)),
1008           rax);
1009 }
1010 
1011 void TemplateTable::sastore() {
1012   castore();
1013 }
1014 
1015 void TemplateTable::istore(int n) {
1016   transition(itos, vtos);
1017   __ movl(iaddress(n), rax);
1018 }
1019 
1020 void TemplateTable::lstore(int n) {
1021   transition(ltos, vtos);
1022   __ movq(laddress(n), rax);
1023 }
1024 
1025 void TemplateTable::fstore(int n) {
1026   transition(ftos, vtos);
1027   __ movflt(faddress(n), xmm0);
1028 }
1029 
1030 void TemplateTable::dstore(int n) {
1031   transition(dtos, vtos);
1032   __ movdbl(daddress(n), xmm0);
1033 }
1034 
1035 void TemplateTable::astore(int n) {
1036   transition(vtos, vtos);
1037   __ pop_ptr(rax);
1038   __ movptr(aaddress(n), rax);
1039 }
1040 
1041 void TemplateTable::pop() {
1042   transition(vtos, vtos);
1043   __ addptr(rsp, Interpreter::stackElementSize);
1044 }
1045 
1046 void TemplateTable::pop2() {
1047   transition(vtos, vtos);
1048   __ addptr(rsp, 2 * Interpreter::stackElementSize);
1049 }
1050 
1051 void TemplateTable::dup() {
1052   transition(vtos, vtos);
1053   __ load_ptr(0, rax);
1054   __ push_ptr(rax);
1055   // stack: ..., a, a
1056 }
1057 
1058 void TemplateTable::dup_x1() {
1059   transition(vtos, vtos);
1060   // stack: ..., a, b
1061   __ load_ptr( 0, rax);  // load b
1062   __ load_ptr( 1, rcx);  // load a
1063   __ store_ptr(1, rax);  // store b
1064   __ store_ptr(0, rcx);  // store a
1065   __ push_ptr(rax);      // push b
1066   // stack: ..., b, a, b
1067 }
1068 
1069 void TemplateTable::dup_x2() {
1070   transition(vtos, vtos);
1071   // stack: ..., a, b, c
1072   __ load_ptr( 0, rax);  // load c
1073   __ load_ptr( 2, rcx);  // load a
1074   __ store_ptr(2, rax);  // store c in a
1075   __ push_ptr(rax);      // push c
1076   // stack: ..., c, b, c, c
1077   __ load_ptr( 2, rax);  // load b
1078   __ store_ptr(2, rcx);  // store a in b
1079   // stack: ..., c, a, c, c
1080   __ store_ptr(1, rax);  // store b in c
1081   // stack: ..., c, a, b, c
1082 }
1083 
1084 void TemplateTable::dup2() {
1085   transition(vtos, vtos);
1086   // stack: ..., a, b
1087   __ load_ptr(1, rax);  // load a
1088   __ push_ptr(rax);     // push a
1089   __ load_ptr(1, rax);  // load b
1090   __ push_ptr(rax);     // push b
1091   // stack: ..., a, b, a, b
1092 }
1093 
1094 void TemplateTable::dup2_x1() {
1095   transition(vtos, vtos);
1096   // stack: ..., a, b, c
1097   __ load_ptr( 0, rcx);  // load c
1098   __ load_ptr( 1, rax);  // load b
1099   __ push_ptr(rax);      // push b
1100   __ push_ptr(rcx);      // push c
1101   // stack: ..., a, b, c, b, c
1102   __ store_ptr(3, rcx);  // store c in b
1103   // stack: ..., a, c, c, b, c
1104   __ load_ptr( 4, rcx);  // load a
1105   __ store_ptr(2, rcx);  // store a in 2nd c
1106   // stack: ..., a, c, a, b, c
1107   __ store_ptr(4, rax);  // store b in a
1108   // stack: ..., b, c, a, b, c
1109 }
1110 
1111 void TemplateTable::dup2_x2() {
1112   transition(vtos, vtos);
1113   // stack: ..., a, b, c, d
1114   __ load_ptr( 0, rcx);  // load d
1115   __ load_ptr( 1, rax);  // load c
1116   __ push_ptr(rax);      // push c
1117   __ push_ptr(rcx);      // push d
1118   // stack: ..., a, b, c, d, c, d
1119   __ load_ptr( 4, rax);  // load b
1120   __ store_ptr(2, rax);  // store b in d
1121   __ store_ptr(4, rcx);  // store d in b
1122   // stack: ..., a, d, c, b, c, d
1123   __ load_ptr( 5, rcx);  // load a
1124   __ load_ptr( 3, rax);  // load c
1125   __ store_ptr(3, rcx);  // store a in c
1126   __ store_ptr(5, rax);  // store c in a
1127   // stack: ..., c, d, a, b, c, d
1128 }
1129 
1130 void TemplateTable::swap() {
1131   transition(vtos, vtos);
1132   // stack: ..., a, b
1133   __ load_ptr( 1, rcx);  // load a
1134   __ load_ptr( 0, rax);  // load b
1135   __ store_ptr(0, rcx);  // store a in b
1136   __ store_ptr(1, rax);  // store b in a
1137   // stack: ..., b, a
1138 }
1139 
1140 void TemplateTable::iop2(Operation op) {
1141   transition(itos, itos);
1142   switch (op) {
1143   case add  :                    __ pop_i(rdx); __ addl (rax, rdx); break;
1144   case sub  : __ movl(rdx, rax); __ pop_i(rax); __ subl (rax, rdx); break;
1145   case mul  :                    __ pop_i(rdx); __ imull(rax, rdx); break;
1146   case _and :                    __ pop_i(rdx); __ andl (rax, rdx); break;
1147   case _or  :                    __ pop_i(rdx); __ orl  (rax, rdx); break;
1148   case _xor :                    __ pop_i(rdx); __ xorl (rax, rdx); break;
1149   case shl  : __ movl(rcx, rax); __ pop_i(rax); __ shll (rax);      break;
1150   case shr  : __ movl(rcx, rax); __ pop_i(rax); __ sarl (rax);      break;
1151   case ushr : __ movl(rcx, rax); __ pop_i(rax); __ shrl (rax);      break;
1152   default   : ShouldNotReachHere();
1153   }
1154 }
1155 
1156 void TemplateTable::lop2(Operation op) {
1157   transition(ltos, ltos);
1158   switch (op) {
1159   case add  :                    __ pop_l(rdx); __ addptr(rax, rdx); break;
1160   case sub  : __ mov(rdx, rax);  __ pop_l(rax); __ subptr(rax, rdx); break;
1161   case _and :                    __ pop_l(rdx); __ andptr(rax, rdx); break;
1162   case _or  :                    __ pop_l(rdx); __ orptr (rax, rdx); break;
1163   case _xor :                    __ pop_l(rdx); __ xorptr(rax, rdx); break;
1164   default   : ShouldNotReachHere();
1165   }
1166 }
1167 
1168 void TemplateTable::idiv() {
1169   transition(itos, itos);
1170   __ movl(rcx, rax);
1171   __ pop_i(rax);
1172   // Note: could xor eax and ecx and compare with (-1 ^ min_int). If
1173   //       they are not equal, one could do a normal division (no correction
1174   //       needed), which may speed up this implementation for the common case.
1175   //       (see also JVM spec., p.243 & p.271)
1176   __ corrected_idivl(rcx);
1177 }
1178 
1179 void TemplateTable::irem() {
1180   transition(itos, itos);
1181   __ movl(rcx, rax);
1182   __ pop_i(rax);
1183   // Note: could xor eax and ecx and compare with (-1 ^ min_int). If
1184   //       they are not equal, one could do a normal division (no correction
1185   //       needed), which may speed up this implementation for the common case.
1186   //       (see also JVM spec., p.243 & p.271)
1187   __ corrected_idivl(rcx);
1188   __ movl(rax, rdx);
1189 }
1190 
1191 void TemplateTable::lmul() {
1192   transition(ltos, ltos);
1193   __ pop_l(rdx);
1194   __ imulq(rax, rdx);
1195 }
1196 
1197 void TemplateTable::ldiv() {
1198   transition(ltos, ltos);
1199   __ mov(rcx, rax);
1200   __ pop_l(rax);
1201   // generate explicit div0 check
1202   __ testq(rcx, rcx);
1203   __ jump_cc(Assembler::zero,
1204              ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1205   // Note: could xor rax and rcx and compare with (-1 ^ min_int). If
1206   //       they are not equal, one could do a normal division (no correction
1207   //       needed), which may speed up this implementation for the common case.
1208   //       (see also JVM spec., p.243 & p.271)
1209   __ corrected_idivq(rcx); // kills rbx
1210 }
1211 
1212 void TemplateTable::lrem() {
1213   transition(ltos, ltos);
1214   __ mov(rcx, rax);
1215   __ pop_l(rax);
1216   __ testq(rcx, rcx);
1217   __ jump_cc(Assembler::zero,
1218              ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1219   // Note: could xor rax and rcx and compare with (-1 ^ min_int). If
1220   //       they are not equal, one could do a normal division (no correction
1221   //       needed), which may speed up this implementation for the common case.
1222   //       (see also JVM spec., p.243 & p.271)
1223   __ corrected_idivq(rcx); // kills rbx
1224   __ mov(rax, rdx);
1225 }
1226 
1227 void TemplateTable::lshl() {
1228   transition(itos, ltos);
1229   __ movl(rcx, rax);                             // get shift count
1230   __ pop_l(rax);                                 // get shift value
1231   __ shlq(rax);
1232 }
1233 
1234 void TemplateTable::lshr() {
1235   transition(itos, ltos);
1236   __ movl(rcx, rax);                             // get shift count
1237   __ pop_l(rax);                                 // get shift value
1238   __ sarq(rax);
1239 }
1240 
1241 void TemplateTable::lushr() {
1242   transition(itos, ltos);
1243   __ movl(rcx, rax);                             // get shift count
1244   __ pop_l(rax);                                 // get shift value
1245   __ shrq(rax);
1246 }
1247 
1248 void TemplateTable::fop2(Operation op) {
1249   transition(ftos, ftos);
1250   switch (op) {
1251   case add:
1252     __ addss(xmm0, at_rsp());
1253     __ addptr(rsp, Interpreter::stackElementSize);
1254     break;
1255   case sub:
1256     __ movflt(xmm1, xmm0);
1257     __ pop_f(xmm0);
1258     __ subss(xmm0, xmm1);
1259     break;
1260   case mul:
1261     __ mulss(xmm0, at_rsp());
1262     __ addptr(rsp, Interpreter::stackElementSize);
1263     break;
1264   case div:
1265     __ movflt(xmm1, xmm0);
1266     __ pop_f(xmm0);
1267     __ divss(xmm0, xmm1);
1268     break;
1269   case rem:
1270     __ movflt(xmm1, xmm0);
1271     __ pop_f(xmm0);
1272     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
1273     break;
1274   default:
1275     ShouldNotReachHere();
1276     break;
1277   }
1278 }
1279 
1280 void TemplateTable::dop2(Operation op) {
1281   transition(dtos, dtos);
1282   switch (op) {
1283   case add:
1284     __ addsd(xmm0, at_rsp());
1285     __ addptr(rsp, 2 * Interpreter::stackElementSize);
1286     break;
1287   case sub:
1288     __ movdbl(xmm1, xmm0);
1289     __ pop_d(xmm0);
1290     __ subsd(xmm0, xmm1);
1291     break;
1292   case mul:
1293     __ mulsd(xmm0, at_rsp());
1294     __ addptr(rsp, 2 * Interpreter::stackElementSize);
1295     break;
1296   case div:
1297     __ movdbl(xmm1, xmm0);
1298     __ pop_d(xmm0);
1299     __ divsd(xmm0, xmm1);
1300     break;
1301   case rem:
1302     __ movdbl(xmm1, xmm0);
1303     __ pop_d(xmm0);
1304     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
1305     break;
1306   default:
1307     ShouldNotReachHere();
1308     break;
1309   }
1310 }
1311 
1312 void TemplateTable::ineg() {
1313   transition(itos, itos);
1314   __ negl(rax);
1315 }
1316 
1317 void TemplateTable::lneg() {
1318   transition(ltos, ltos);
1319   __ negq(rax);
1320 }
1321 
1322 // Note: 'double' and 'long long' have 32-bits alignment on x86.
1323 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
1324   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
1325   // of 128-bits operands for SSE instructions.
1326   jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF)));
1327   // Store the value to a 128-bits operand.
1328   operand[0] = lo;
1329   operand[1] = hi;
1330   return operand;
1331 }
1332 
1333 // Buffer for 128-bits masks used by SSE instructions.
1334 static jlong float_signflip_pool[2*2];
1335 static jlong double_signflip_pool[2*2];
1336 
1337 void TemplateTable::fneg() {
1338   transition(ftos, ftos);
1339   static jlong *float_signflip  = double_quadword(&float_signflip_pool[1], 0x8000000080000000, 0x8000000080000000);
1340   __ xorps(xmm0, ExternalAddress((address) float_signflip));
1341 }
1342 
1343 void TemplateTable::dneg() {
1344   transition(dtos, dtos);
1345   static jlong *double_signflip  = double_quadword(&double_signflip_pool[1], 0x8000000000000000, 0x8000000000000000);
1346   __ xorpd(xmm0, ExternalAddress((address) double_signflip));
1347 }
1348 
1349 void TemplateTable::iinc() {
1350   transition(vtos, vtos);
1351   __ load_signed_byte(rdx, at_bcp(2)); // get constant
1352   locals_index(rbx);
1353   __ addl(iaddress(rbx), rdx);
1354 }
1355 
1356 void TemplateTable::wide_iinc() {
1357   transition(vtos, vtos);
1358   __ movl(rdx, at_bcp(4)); // get constant
1359   locals_index_wide(rbx);
1360   __ bswapl(rdx); // swap bytes & sign-extend constant
1361   __ sarl(rdx, 16);
1362   __ addl(iaddress(rbx), rdx);
1363   // Note: should probably use only one movl to get both
1364   //       the index and the constant -> fix this
1365 }
1366 
1367 void TemplateTable::convert() {
1368   // Checking
1369 #ifdef ASSERT
1370   {
1371     TosState tos_in  = ilgl;
1372     TosState tos_out = ilgl;
1373     switch (bytecode()) {
1374     case Bytecodes::_i2l: // fall through
1375     case Bytecodes::_i2f: // fall through
1376     case Bytecodes::_i2d: // fall through
1377     case Bytecodes::_i2b: // fall through
1378     case Bytecodes::_i2c: // fall through
1379     case Bytecodes::_i2s: tos_in = itos; break;
1380     case Bytecodes::_l2i: // fall through
1381     case Bytecodes::_l2f: // fall through
1382     case Bytecodes::_l2d: tos_in = ltos; break;
1383     case Bytecodes::_f2i: // fall through
1384     case Bytecodes::_f2l: // fall through
1385     case Bytecodes::_f2d: tos_in = ftos; break;
1386     case Bytecodes::_d2i: // fall through
1387     case Bytecodes::_d2l: // fall through
1388     case Bytecodes::_d2f: tos_in = dtos; break;
1389     default             : ShouldNotReachHere();
1390     }
1391     switch (bytecode()) {
1392     case Bytecodes::_l2i: // fall through
1393     case Bytecodes::_f2i: // fall through
1394     case Bytecodes::_d2i: // fall through
1395     case Bytecodes::_i2b: // fall through
1396     case Bytecodes::_i2c: // fall through
1397     case Bytecodes::_i2s: tos_out = itos; break;
1398     case Bytecodes::_i2l: // fall through
1399     case Bytecodes::_f2l: // fall through
1400     case Bytecodes::_d2l: tos_out = ltos; break;
1401     case Bytecodes::_i2f: // fall through
1402     case Bytecodes::_l2f: // fall through
1403     case Bytecodes::_d2f: tos_out = ftos; break;
1404     case Bytecodes::_i2d: // fall through
1405     case Bytecodes::_l2d: // fall through
1406     case Bytecodes::_f2d: tos_out = dtos; break;
1407     default             : ShouldNotReachHere();
1408     }
1409     transition(tos_in, tos_out);
1410   }
1411 #endif // ASSERT
1412 
1413   static const int64_t is_nan = 0x8000000000000000L;
1414 
1415   // Conversion
1416   switch (bytecode()) {
1417   case Bytecodes::_i2l:
1418     __ movslq(rax, rax);
1419     break;
1420   case Bytecodes::_i2f:
1421     __ cvtsi2ssl(xmm0, rax);
1422     break;
1423   case Bytecodes::_i2d:
1424     __ cvtsi2sdl(xmm0, rax);
1425     break;
1426   case Bytecodes::_i2b:
1427     __ movsbl(rax, rax);
1428     break;
1429   case Bytecodes::_i2c:
1430     __ movzwl(rax, rax);
1431     break;
1432   case Bytecodes::_i2s:
1433     __ movswl(rax, rax);
1434     break;
1435   case Bytecodes::_l2i:
1436     __ movl(rax, rax);
1437     break;
1438   case Bytecodes::_l2f:
1439     __ cvtsi2ssq(xmm0, rax);
1440     break;
1441   case Bytecodes::_l2d:
1442     __ cvtsi2sdq(xmm0, rax);
1443     break;
1444   case Bytecodes::_f2i:
1445   {
1446     Label L;
1447     __ cvttss2sil(rax, xmm0);
1448     __ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
1449     __ jcc(Assembler::notEqual, L);
1450     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
1451     __ bind(L);
1452   }
1453     break;
1454   case Bytecodes::_f2l:
1455   {
1456     Label L;
1457     __ cvttss2siq(rax, xmm0);
1458     // NaN or overflow/underflow?
1459     __ cmp64(rax, ExternalAddress((address) &is_nan));
1460     __ jcc(Assembler::notEqual, L);
1461     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
1462     __ bind(L);
1463   }
1464     break;
1465   case Bytecodes::_f2d:
1466     __ cvtss2sd(xmm0, xmm0);
1467     break;
1468   case Bytecodes::_d2i:
1469   {
1470     Label L;
1471     __ cvttsd2sil(rax, xmm0);
1472     __ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
1473     __ jcc(Assembler::notEqual, L);
1474     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
1475     __ bind(L);
1476   }
1477     break;
1478   case Bytecodes::_d2l:
1479   {
1480     Label L;
1481     __ cvttsd2siq(rax, xmm0);
1482     // NaN or overflow/underflow?
1483     __ cmp64(rax, ExternalAddress((address) &is_nan));
1484     __ jcc(Assembler::notEqual, L);
1485     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
1486     __ bind(L);
1487   }
1488     break;
1489   case Bytecodes::_d2f:
1490     __ cvtsd2ss(xmm0, xmm0);
1491     break;
1492   default:
1493     ShouldNotReachHere();
1494   }
1495 }
1496 
1497 void TemplateTable::lcmp() {
1498   transition(ltos, itos);
1499   Label done;
1500   __ pop_l(rdx);
1501   __ cmpq(rdx, rax);
1502   __ movl(rax, -1);
1503   __ jccb(Assembler::less, done);
1504   __ setb(Assembler::notEqual, rax);
1505   __ movzbl(rax, rax);
1506   __ bind(done);
1507 }
1508 
1509 void TemplateTable::float_cmp(bool is_float, int unordered_result) {
1510   Label done;
1511   if (is_float) {
1512     // XXX get rid of pop here, use ... reg, mem32
1513     __ pop_f(xmm1);
1514     __ ucomiss(xmm1, xmm0);
1515   } else {
1516     // XXX get rid of pop here, use ... reg, mem64
1517     __ pop_d(xmm1);
1518     __ ucomisd(xmm1, xmm0);
1519   }
1520   if (unordered_result < 0) {
1521     __ movl(rax, -1);
1522     __ jccb(Assembler::parity, done);
1523     __ jccb(Assembler::below, done);
1524     __ setb(Assembler::notEqual, rdx);
1525     __ movzbl(rax, rdx);
1526   } else {
1527     __ movl(rax, 1);
1528     __ jccb(Assembler::parity, done);
1529     __ jccb(Assembler::above, done);
1530     __ movl(rax, 0);
1531     __ jccb(Assembler::equal, done);
1532     __ decrementl(rax);
1533   }
1534   __ bind(done);
1535 }
1536 
1537 void TemplateTable::branch(bool is_jsr, bool is_wide) {
1538   __ get_method(rcx); // rcx holds method
1539   __ profile_taken_branch(rax, rbx); // rax holds updated MDP, rbx
1540                                      // holds bumped taken count
1541 
1542   const ByteSize be_offset = methodOopDesc::backedge_counter_offset() +
1543                              InvocationCounter::counter_offset();
1544   const ByteSize inv_offset = methodOopDesc::invocation_counter_offset() +
1545                               InvocationCounter::counter_offset();
1546   const int method_offset = frame::interpreter_frame_method_offset * wordSize;
1547 
1548   // Load up edx with the branch displacement
1549   __ movl(rdx, at_bcp(1));
1550   __ bswapl(rdx);
1551 
1552   if (!is_wide) {
1553     __ sarl(rdx, 16);
1554   }
1555   __ movl2ptr(rdx, rdx);
1556 
1557   // Handle all the JSR stuff here, then exit.
1558   // It's much shorter and cleaner than intermingling with the non-JSR
1559   // normal-branch stuff occurring below.
1560   if (is_jsr) {
1561     // Pre-load the next target bytecode into rbx
1562     __ load_unsigned_byte(rbx, Address(r13, rdx, Address::times_1, 0));
1563 
1564     // compute return address as bci in rax
1565     __ lea(rax, at_bcp((is_wide ? 5 : 3) -
1566                         in_bytes(constMethodOopDesc::codes_offset())));
1567     __ subptr(rax, Address(rcx, methodOopDesc::const_offset()));
1568     // Adjust the bcp in r13 by the displacement in rdx
1569     __ addptr(r13, rdx);
1570     // jsr returns atos that is not an oop
1571     __ push_i(rax);
1572     __ dispatch_only(vtos);
1573     return;
1574   }
1575 
1576   // Normal (non-jsr) branch handling
1577 
1578   // Adjust the bcp in r13 by the displacement in rdx
1579   __ addptr(r13, rdx);
1580 
1581   assert(UseLoopCounter || !UseOnStackReplacement,
1582          "on-stack-replacement requires loop counters");
1583   Label backedge_counter_overflow;
1584   Label profile_method;
1585   Label dispatch;
1586   if (UseLoopCounter) {
1587     // increment backedge counter for backward branches
1588     // rax: MDO
1589     // ebx: MDO bumped taken-count
1590     // rcx: method
1591     // rdx: target offset
1592     // r13: target bcp
1593     // r14: locals pointer
1594     __ testl(rdx, rdx);             // check if forward or backward branch
1595     __ jcc(Assembler::positive, dispatch); // count only if backward branch
1596     if (TieredCompilation) {
1597       Label no_mdo;
1598       int increment = InvocationCounter::count_increment;
1599       int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
1600       if (ProfileInterpreter) {
1601         // Are we profiling?
1602         __ movptr(rbx, Address(rcx, in_bytes(methodOopDesc::method_data_offset())));
1603         __ testptr(rbx, rbx);
1604         __ jccb(Assembler::zero, no_mdo);
1605         // Increment the MDO backedge counter
1606         const Address mdo_backedge_counter(rbx, in_bytes(methodDataOopDesc::backedge_counter_offset()) +
1607                                            in_bytes(InvocationCounter::counter_offset()));
1608         __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
1609                                    rax, false, Assembler::zero, &backedge_counter_overflow);
1610         __ jmp(dispatch);
1611       }
1612       __ bind(no_mdo);
1613       // Increment backedge counter in methodOop
1614       __ increment_mask_and_jump(Address(rcx, be_offset), increment, mask,
1615                                  rax, false, Assembler::zero, &backedge_counter_overflow);
1616     } else {
1617       // increment counter
1618       __ movl(rax, Address(rcx, be_offset));        // load backedge counter
1619       __ incrementl(rax, InvocationCounter::count_increment); // increment counter
1620       __ movl(Address(rcx, be_offset), rax);        // store counter
1621 
1622       __ movl(rax, Address(rcx, inv_offset));    // load invocation counter
1623       __ andl(rax, InvocationCounter::count_mask_value); // and the status bits
1624       __ addl(rax, Address(rcx, be_offset));        // add both counters
1625 
1626       if (ProfileInterpreter) {
1627         // Test to see if we should create a method data oop
1628         __ cmp32(rax,
1629                  ExternalAddress((address) &InvocationCounter::InterpreterProfileLimit));
1630         __ jcc(Assembler::less, dispatch);
1631 
1632         // if no method data exists, go to profile method
1633         __ test_method_data_pointer(rax, profile_method);
1634 
1635         if (UseOnStackReplacement) {
1636           // check for overflow against ebx which is the MDO taken count
1637           __ cmp32(rbx,
1638                    ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit));
1639           __ jcc(Assembler::below, dispatch);
1640 
1641           // When ProfileInterpreter is on, the backedge_count comes
1642           // from the methodDataOop, which value does not get reset on
1643           // the call to frequency_counter_overflow().  To avoid
1644           // excessive calls to the overflow routine while the method is
1645           // being compiled, add a second test to make sure the overflow
1646           // function is called only once every overflow_frequency.
1647           const int overflow_frequency = 1024;
1648           __ andl(rbx, overflow_frequency - 1);
1649           __ jcc(Assembler::zero, backedge_counter_overflow);
1650 
1651         }
1652       } else {
1653         if (UseOnStackReplacement) {
1654           // check for overflow against eax, which is the sum of the
1655           // counters
1656           __ cmp32(rax,
1657                    ExternalAddress((address) &InvocationCounter::InterpreterBackwardBranchLimit));
1658           __ jcc(Assembler::aboveEqual, backedge_counter_overflow);
1659 
1660         }
1661       }
1662     }
1663     __ bind(dispatch);
1664   }
1665 
1666   // Pre-load the next target bytecode into rbx
1667   __ load_unsigned_byte(rbx, Address(r13, 0));
1668 
1669   // continue with the bytecode @ target
1670   // eax: return bci for jsr's, unused otherwise
1671   // ebx: target bytecode
1672   // r13: target bcp
1673   __ dispatch_only(vtos);
1674 
1675   if (UseLoopCounter) {
1676     if (ProfileInterpreter) {
1677       // Out-of-line code to allocate method data oop.
1678       __ bind(profile_method);
1679       __ call_VM(noreg,
1680                  CAST_FROM_FN_PTR(address,
1681                                   InterpreterRuntime::profile_method), r13);
1682       __ load_unsigned_byte(rbx, Address(r13, 0));  // restore target bytecode
1683       __ movptr(rcx, Address(rbp, method_offset));
1684       __ movptr(rcx, Address(rcx,
1685                              in_bytes(methodOopDesc::method_data_offset())));
1686       __ movptr(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize),
1687                 rcx);
1688       __ test_method_data_pointer(rcx, dispatch);
1689       // offset non-null mdp by MDO::data_offset() + IR::profile_method()
1690       __ addptr(rcx, in_bytes(methodDataOopDesc::data_offset()));
1691       __ addptr(rcx, rax);
1692       __ movptr(Address(rbp, frame::interpreter_frame_mdx_offset * wordSize),
1693                 rcx);
1694       __ jmp(dispatch);
1695     }
1696 
1697     if (UseOnStackReplacement) {
1698       // invocation counter overflow
1699       __ bind(backedge_counter_overflow);
1700       __ negptr(rdx);
1701       __ addptr(rdx, r13); // branch bcp
1702       // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
1703       __ call_VM(noreg,
1704                  CAST_FROM_FN_PTR(address,
1705                                   InterpreterRuntime::frequency_counter_overflow),
1706                  rdx);
1707       __ load_unsigned_byte(rbx, Address(r13, 0));  // restore target bytecode
1708 
1709       // rax: osr nmethod (osr ok) or NULL (osr not possible)
1710       // ebx: target bytecode
1711       // rdx: scratch
1712       // r14: locals pointer
1713       // r13: bcp
1714       __ testptr(rax, rax);                        // test result
1715       __ jcc(Assembler::zero, dispatch);         // no osr if null
1716       // nmethod may have been invalidated (VM may block upon call_VM return)
1717       __ movl(rcx, Address(rax, nmethod::entry_bci_offset()));
1718       __ cmpl(rcx, InvalidOSREntryBci);
1719       __ jcc(Assembler::equal, dispatch);
1720 
1721       // We have the address of an on stack replacement routine in eax
1722       // We need to prepare to execute the OSR method. First we must
1723       // migrate the locals and monitors off of the stack.
1724 
1725       __ mov(r13, rax);                             // save the nmethod
1726 
1727       call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
1728 
1729       // eax is OSR buffer, move it to expected parameter location
1730       __ mov(j_rarg0, rax);
1731 
1732       // We use j_rarg definitions here so that registers don't conflict as parameter
1733       // registers change across platforms as we are in the midst of a calling
1734       // sequence to the OSR nmethod and we don't want collision. These are NOT parameters.
1735 
1736       const Register retaddr = j_rarg2;
1737       const Register sender_sp = j_rarg1;
1738 
1739       // pop the interpreter frame
1740       __ movptr(sender_sp, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
1741       __ leave();                                // remove frame anchor
1742       __ pop(retaddr);                           // get return address
1743       __ mov(rsp, sender_sp);                   // set sp to sender sp
1744       // Ensure compiled code always sees stack at proper alignment
1745       __ andptr(rsp, -(StackAlignmentInBytes));
1746 
1747       // unlike x86 we need no specialized return from compiled code
1748       // to the interpreter or the call stub.
1749 
1750       // push the return address
1751       __ push(retaddr);
1752 
1753       // and begin the OSR nmethod
1754       __ jmp(Address(r13, nmethod::osr_entry_point_offset()));
1755     }
1756   }
1757 }
1758 
1759 
1760 void TemplateTable::if_0cmp(Condition cc) {
1761   transition(itos, vtos);
1762   // assume branch is more often taken than not (loops use backward branches)
1763   Label not_taken;
1764   __ testl(rax, rax);
1765   __ jcc(j_not(cc), not_taken);
1766   branch(false, false);
1767   __ bind(not_taken);
1768   __ profile_not_taken_branch(rax);
1769 }
1770 
1771 void TemplateTable::if_icmp(Condition cc) {
1772   transition(itos, vtos);
1773   // assume branch is more often taken than not (loops use backward branches)
1774   Label not_taken;
1775   __ pop_i(rdx);
1776   __ cmpl(rdx, rax);
1777   __ jcc(j_not(cc), not_taken);
1778   branch(false, false);
1779   __ bind(not_taken);
1780   __ profile_not_taken_branch(rax);
1781 }
1782 
1783 void TemplateTable::if_nullcmp(Condition cc) {
1784   transition(atos, vtos);
1785   // assume branch is more often taken than not (loops use backward branches)
1786   Label not_taken;
1787   __ testptr(rax, rax);
1788   __ jcc(j_not(cc), not_taken);
1789   branch(false, false);
1790   __ bind(not_taken);
1791   __ profile_not_taken_branch(rax);
1792 }
1793 
1794 void TemplateTable::if_acmp(Condition cc) {
1795   transition(atos, vtos);
1796   // assume branch is more often taken than not (loops use backward branches)
1797   Label not_taken;
1798   __ pop_ptr(rdx);
1799   __ cmpptr(rdx, rax);
1800   __ jcc(j_not(cc), not_taken);
1801   branch(false, false);
1802   __ bind(not_taken);
1803   __ profile_not_taken_branch(rax);
1804 }
1805 
1806 void TemplateTable::ret() {
1807   transition(vtos, vtos);
1808   locals_index(rbx);
1809   __ movslq(rbx, iaddress(rbx)); // get return bci, compute return bcp
1810   __ profile_ret(rbx, rcx);
1811   __ get_method(rax);
1812   __ movptr(r13, Address(rax, methodOopDesc::const_offset()));
1813   __ lea(r13, Address(r13, rbx, Address::times_1,
1814                       constMethodOopDesc::codes_offset()));
1815   __ dispatch_next(vtos);
1816 }
1817 
1818 void TemplateTable::wide_ret() {
1819   transition(vtos, vtos);
1820   locals_index_wide(rbx);
1821   __ movptr(rbx, aaddress(rbx)); // get return bci, compute return bcp
1822   __ profile_ret(rbx, rcx);
1823   __ get_method(rax);
1824   __ movptr(r13, Address(rax, methodOopDesc::const_offset()));
1825   __ lea(r13, Address(r13, rbx, Address::times_1, constMethodOopDesc::codes_offset()));
1826   __ dispatch_next(vtos);
1827 }
1828 
1829 void TemplateTable::tableswitch() {
1830   Label default_case, continue_execution;
1831   transition(itos, vtos);
1832   // align r13
1833   __ lea(rbx, at_bcp(BytesPerInt));
1834   __ andptr(rbx, -BytesPerInt);
1835   // load lo & hi
1836   __ movl(rcx, Address(rbx, BytesPerInt));
1837   __ movl(rdx, Address(rbx, 2 * BytesPerInt));
1838   __ bswapl(rcx);
1839   __ bswapl(rdx);
1840   // check against lo & hi
1841   __ cmpl(rax, rcx);
1842   __ jcc(Assembler::less, default_case);
1843   __ cmpl(rax, rdx);
1844   __ jcc(Assembler::greater, default_case);
1845   // lookup dispatch offset
1846   __ subl(rax, rcx);
1847   __ movl(rdx, Address(rbx, rax, Address::times_4, 3 * BytesPerInt));
1848   __ profile_switch_case(rax, rbx, rcx);
1849   // continue execution
1850   __ bind(continue_execution);
1851   __ bswapl(rdx);
1852   __ movl2ptr(rdx, rdx);
1853   __ load_unsigned_byte(rbx, Address(r13, rdx, Address::times_1));
1854   __ addptr(r13, rdx);
1855   __ dispatch_only(vtos);
1856   // handle default
1857   __ bind(default_case);
1858   __ profile_switch_default(rax);
1859   __ movl(rdx, Address(rbx, 0));
1860   __ jmp(continue_execution);
1861 }
1862 
1863 void TemplateTable::lookupswitch() {
1864   transition(itos, itos);
1865   __ stop("lookupswitch bytecode should have been rewritten");
1866 }
1867 
1868 void TemplateTable::fast_linearswitch() {
1869   transition(itos, vtos);
1870   Label loop_entry, loop, found, continue_execution;
1871   // bswap rax so we can avoid bswapping the table entries
1872   __ bswapl(rax);
1873   // align r13
1874   __ lea(rbx, at_bcp(BytesPerInt)); // btw: should be able to get rid of
1875                                     // this instruction (change offsets
1876                                     // below)
1877   __ andptr(rbx, -BytesPerInt);
1878   // set counter
1879   __ movl(rcx, Address(rbx, BytesPerInt));
1880   __ bswapl(rcx);
1881   __ jmpb(loop_entry);
1882   // table search
1883   __ bind(loop);
1884   __ cmpl(rax, Address(rbx, rcx, Address::times_8, 2 * BytesPerInt));
1885   __ jcc(Assembler::equal, found);
1886   __ bind(loop_entry);
1887   __ decrementl(rcx);
1888   __ jcc(Assembler::greaterEqual, loop);
1889   // default case
1890   __ profile_switch_default(rax);
1891   __ movl(rdx, Address(rbx, 0));
1892   __ jmp(continue_execution);
1893   // entry found -> get offset
1894   __ bind(found);
1895   __ movl(rdx, Address(rbx, rcx, Address::times_8, 3 * BytesPerInt));
1896   __ profile_switch_case(rcx, rax, rbx);
1897   // continue execution
1898   __ bind(continue_execution);
1899   __ bswapl(rdx);
1900   __ movl2ptr(rdx, rdx);
1901   __ load_unsigned_byte(rbx, Address(r13, rdx, Address::times_1));
1902   __ addptr(r13, rdx);
1903   __ dispatch_only(vtos);
1904 }
1905 
1906 void TemplateTable::fast_binaryswitch() {
1907   transition(itos, vtos);
1908   // Implementation using the following core algorithm:
1909   //
1910   // int binary_search(int key, LookupswitchPair* array, int n) {
1911   //   // Binary search according to "Methodik des Programmierens" by
1912   //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
1913   //   int i = 0;
1914   //   int j = n;
1915   //   while (i+1 < j) {
1916   //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
1917   //     // with      Q: for all i: 0 <= i < n: key < a[i]
1918   //     // where a stands for the array and assuming that the (inexisting)
1919   //     // element a[n] is infinitely big.
1920   //     int h = (i + j) >> 1;
1921   //     // i < h < j
1922   //     if (key < array[h].fast_match()) {
1923   //       j = h;
1924   //     } else {
1925   //       i = h;
1926   //     }
1927   //   }
1928   //   // R: a[i] <= key < a[i+1] or Q
1929   //   // (i.e., if key is within array, i is the correct index)
1930   //   return i;
1931   // }
1932 
1933   // Register allocation
1934   const Register key   = rax; // already set (tosca)
1935   const Register array = rbx;
1936   const Register i     = rcx;
1937   const Register j     = rdx;
1938   const Register h     = rdi;
1939   const Register temp  = rsi;
1940 
1941   // Find array start
1942   __ lea(array, at_bcp(3 * BytesPerInt)); // btw: should be able to
1943                                           // get rid of this
1944                                           // instruction (change
1945                                           // offsets below)
1946   __ andptr(array, -BytesPerInt);
1947 
1948   // Initialize i & j
1949   __ xorl(i, i);                            // i = 0;
1950   __ movl(j, Address(array, -BytesPerInt)); // j = length(array);
1951 
1952   // Convert j into native byteordering
1953   __ bswapl(j);
1954 
1955   // And start
1956   Label entry;
1957   __ jmp(entry);
1958 
1959   // binary search loop
1960   {
1961     Label loop;
1962     __ bind(loop);
1963     // int h = (i + j) >> 1;
1964     __ leal(h, Address(i, j, Address::times_1)); // h = i + j;
1965     __ sarl(h, 1);                               // h = (i + j) >> 1;
1966     // if (key < array[h].fast_match()) {
1967     //   j = h;
1968     // } else {
1969     //   i = h;
1970     // }
1971     // Convert array[h].match to native byte-ordering before compare
1972     __ movl(temp, Address(array, h, Address::times_8));
1973     __ bswapl(temp);
1974     __ cmpl(key, temp);
1975     // j = h if (key <  array[h].fast_match())
1976     __ cmovl(Assembler::less, j, h);
1977     // i = h if (key >= array[h].fast_match())
1978     __ cmovl(Assembler::greaterEqual, i, h);
1979     // while (i+1 < j)
1980     __ bind(entry);
1981     __ leal(h, Address(i, 1)); // i+1
1982     __ cmpl(h, j);             // i+1 < j
1983     __ jcc(Assembler::less, loop);
1984   }
1985 
1986   // end of binary search, result index is i (must check again!)
1987   Label default_case;
1988   // Convert array[i].match to native byte-ordering before compare
1989   __ movl(temp, Address(array, i, Address::times_8));
1990   __ bswapl(temp);
1991   __ cmpl(key, temp);
1992   __ jcc(Assembler::notEqual, default_case);
1993 
1994   // entry found -> j = offset
1995   __ movl(j , Address(array, i, Address::times_8, BytesPerInt));
1996   __ profile_switch_case(i, key, array);
1997   __ bswapl(j);
1998   __ movl2ptr(j, j);
1999   __ load_unsigned_byte(rbx, Address(r13, j, Address::times_1));
2000   __ addptr(r13, j);
2001   __ dispatch_only(vtos);
2002 
2003   // default case -> j = default offset
2004   __ bind(default_case);
2005   __ profile_switch_default(i);
2006   __ movl(j, Address(array, -2 * BytesPerInt));
2007   __ bswapl(j);
2008   __ movl2ptr(j, j);
2009   __ load_unsigned_byte(rbx, Address(r13, j, Address::times_1));
2010   __ addptr(r13, j);
2011   __ dispatch_only(vtos);
2012 }
2013 
2014 
2015 void TemplateTable::_return(TosState state) {
2016   transition(state, state);
2017   assert(_desc->calls_vm(),
2018          "inconsistent calls_vm information"); // call in remove_activation
2019 
2020   if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
2021     assert(state == vtos, "only valid state");
2022     __ movptr(c_rarg1, aaddress(0));
2023     __ load_klass(rdi, c_rarg1);
2024     __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
2025     __ testl(rdi, JVM_ACC_HAS_FINALIZER);
2026     Label skip_register_finalizer;
2027     __ jcc(Assembler::zero, skip_register_finalizer);
2028 
2029     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1);
2030 
2031     __ bind(skip_register_finalizer);
2032   }
2033 
2034   __ remove_activation(state, r13);
2035   __ jmp(r13);
2036 }
2037 
2038 // ----------------------------------------------------------------------------
2039 // Volatile variables demand their effects be made known to all CPU's
2040 // in order.  Store buffers on most chips allow reads & writes to
2041 // reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
2042 // without some kind of memory barrier (i.e., it's not sufficient that
2043 // the interpreter does not reorder volatile references, the hardware
2044 // also must not reorder them).
2045 //
2046 // According to the new Java Memory Model (JMM):
2047 // (1) All volatiles are serialized wrt to each other.  ALSO reads &
2048 //     writes act as aquire & release, so:
2049 // (2) A read cannot let unrelated NON-volatile memory refs that
2050 //     happen after the read float up to before the read.  It's OK for
2051 //     non-volatile memory refs that happen before the volatile read to
2052 //     float down below it.
2053 // (3) Similar a volatile write cannot let unrelated NON-volatile
2054 //     memory refs that happen BEFORE the write float down to after the
2055 //     write.  It's OK for non-volatile memory refs that happen after the
2056 //     volatile write to float up before it.
2057 //
2058 // We only put in barriers around volatile refs (they are expensive),
2059 // not _between_ memory refs (that would require us to track the
2060 // flavor of the previous memory refs).  Requirements (2) and (3)
2061 // require some barriers before volatile stores and after volatile
2062 // loads.  These nearly cover requirement (1) but miss the
2063 // volatile-store-volatile-load case.  This final case is placed after
2064 // volatile-stores although it could just as well go before
2065 // volatile-loads.
2066 void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits
2067                                      order_constraint) {
2068   // Helper function to insert a is-volatile test and memory barrier
2069   if (os::is_MP()) { // Not needed on single CPU
2070     __ membar(order_constraint);
2071   }
2072 }
2073 
2074 void TemplateTable::resolve_cache_and_index(int byte_no,
2075                                             Register result,
2076                                             Register Rcache,
2077                                             Register index,
2078                                             size_t index_size) {
2079   const Register temp = rbx;
2080   assert_different_registers(result, Rcache, index, temp);
2081 
2082   Label resolved;
2083   __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
2084   if (byte_no == f1_oop) {
2085     // We are resolved if the f1 field contains a non-null object (CallSite, etc.)
2086     // This kind of CP cache entry does not need to match the flags byte, because
2087     // there is a 1-1 relation between bytecode type and CP entry type.
2088     assert(result != noreg, ""); //else do cmpptr(Address(...), (int32_t) NULL_WORD)
2089     __ movptr(result, Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f1_offset()));
2090     __ testptr(result, result);
2091     __ jcc(Assembler::notEqual, resolved);
2092   } else {
2093     assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
2094     assert(result == noreg, "");  //else change code for setting result
2095     const int shift_count = (1 + byte_no) * BitsPerByte;
2096     __ movl(temp, Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()));
2097     __ shrl(temp, shift_count);
2098     // have we resolved this bytecode?
2099     __ andl(temp, 0xFF);
2100     __ cmpl(temp, (int) bytecode());
2101     __ jcc(Assembler::equal, resolved);
2102   }
2103 
2104   // resolve first time through
2105   address entry;
2106   switch (bytecode()) {
2107   case Bytecodes::_getstatic:
2108   case Bytecodes::_putstatic:
2109   case Bytecodes::_getfield:
2110   case Bytecodes::_putfield:
2111     entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
2112     break;
2113   case Bytecodes::_invokevirtual:
2114   case Bytecodes::_invokespecial:
2115   case Bytecodes::_invokestatic:
2116   case Bytecodes::_invokeinterface:
2117     entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
2118     break;
2119   case Bytecodes::_invokedynamic:
2120     entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
2121     break;
2122   case Bytecodes::_fast_aldc:
2123     entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
2124     break;
2125   case Bytecodes::_fast_aldc_w:
2126     entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
2127     break;
2128   default:
2129     ShouldNotReachHere();
2130     break;
2131   }
2132   __ movl(temp, (int) bytecode());
2133   __ call_VM(noreg, entry, temp);
2134 
2135   // Update registers with resolved info
2136   __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
2137   if (result != noreg)
2138     __ movptr(result, Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f1_offset()));
2139   __ bind(resolved);
2140 }
2141 
2142 // The Rcache and index registers must be set before call
2143 void TemplateTable::load_field_cp_cache_entry(Register obj,
2144                                               Register cache,
2145                                               Register index,
2146                                               Register off,
2147                                               Register flags,
2148                                               bool is_static = false) {
2149   assert_different_registers(cache, index, flags, off);
2150 
2151   ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
2152   // Field offset
2153   __ movptr(off, Address(cache, index, Address::times_8,
2154                          in_bytes(cp_base_offset +
2155                                   ConstantPoolCacheEntry::f2_offset())));
2156   // Flags
2157   __ movl(flags, Address(cache, index, Address::times_8,
2158                          in_bytes(cp_base_offset +
2159                                   ConstantPoolCacheEntry::flags_offset())));
2160 
2161   // klass overwrite register
2162   if (is_static) {
2163     __ movptr(obj, Address(cache, index, Address::times_8,
2164                            in_bytes(cp_base_offset +
2165                                     ConstantPoolCacheEntry::f1_offset())));
2166   }
2167 }
2168 
2169 void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
2170                                                Register method,
2171                                                Register itable_index,
2172                                                Register flags,
2173                                                bool is_invokevirtual,
2174                                                bool is_invokevfinal, /*unused*/
2175                                                bool is_invokedynamic) {
2176   // setup registers
2177   const Register cache = rcx;
2178   const Register index = rdx;
2179   assert_different_registers(method, flags);
2180   assert_different_registers(method, cache, index);
2181   assert_different_registers(itable_index, flags);
2182   assert_different_registers(itable_index, cache, index);
2183   // determine constant pool cache field offsets
2184   const int method_offset = in_bytes(
2185     constantPoolCacheOopDesc::base_offset() +
2186       (is_invokevirtual
2187        ? ConstantPoolCacheEntry::f2_offset()
2188        : ConstantPoolCacheEntry::f1_offset()));
2189   const int flags_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
2190                                     ConstantPoolCacheEntry::flags_offset());
2191   // access constant pool cache fields
2192   const int index_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
2193                                     ConstantPoolCacheEntry::f2_offset());
2194 
2195   if (byte_no == f1_oop) {
2196     // Resolved f1_oop goes directly into 'method' register.
2197     assert(is_invokedynamic, "");
2198     resolve_cache_and_index(byte_no, method, cache, index, sizeof(u4));
2199   } else {
2200     resolve_cache_and_index(byte_no, noreg, cache, index, sizeof(u2));
2201     __ movptr(method, Address(cache, index, Address::times_ptr, method_offset));
2202   }
2203   if (itable_index != noreg) {
2204     __ movptr(itable_index, Address(cache, index, Address::times_ptr, index_offset));
2205   }
2206   __ movl(flags, Address(cache, index, Address::times_ptr, flags_offset));
2207 }
2208 
2209 
2210 // The registers cache and index expected to be set before call.
2211 // Correct values of the cache and index registers are preserved.
2212 void TemplateTable::jvmti_post_field_access(Register cache, Register index,
2213                                             bool is_static, bool has_tos) {
2214   // do the JVMTI work here to avoid disturbing the register state below
2215   // We use c_rarg registers here because we want to use the register used in
2216   // the call to the VM
2217   if (JvmtiExport::can_post_field_access()) {
2218     // Check to see if a field access watch has been set before we
2219     // take the time to call into the VM.
2220     Label L1;
2221     assert_different_registers(cache, index, rax);
2222     __ mov32(rax, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
2223     __ testl(rax, rax);
2224     __ jcc(Assembler::zero, L1);
2225 
2226     __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
2227 
2228     // cache entry pointer
2229     __ addptr(c_rarg2, in_bytes(constantPoolCacheOopDesc::base_offset()));
2230     __ shll(c_rarg3, LogBytesPerWord);
2231     __ addptr(c_rarg2, c_rarg3);
2232     if (is_static) {
2233       __ xorl(c_rarg1, c_rarg1); // NULL object reference
2234     } else {
2235       __ movptr(c_rarg1, at_tos()); // get object pointer without popping it
2236       __ verify_oop(c_rarg1);
2237     }
2238     // c_rarg1: object pointer or NULL
2239     // c_rarg2: cache entry pointer
2240     // c_rarg3: jvalue object on the stack
2241     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
2242                                        InterpreterRuntime::post_field_access),
2243                c_rarg1, c_rarg2, c_rarg3);
2244     __ get_cache_and_index_at_bcp(cache, index, 1);
2245     __ bind(L1);
2246   }
2247 }
2248 
2249 void TemplateTable::pop_and_check_object(Register r) {
2250   __ pop_ptr(r);
2251   __ null_check(r);  // for field access must check obj.
2252   __ verify_oop(r);
2253 }
2254 
2255 void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
2256   transition(vtos, vtos);
2257 
2258   const Register cache = rcx;
2259   const Register index = rdx;
2260   const Register obj   = c_rarg3;
2261   const Register off   = rbx;
2262   const Register flags = rax;
2263   const Register bc = c_rarg3; // uses same reg as obj, so don't mix them
2264 
2265   resolve_cache_and_index(byte_no, noreg, cache, index, sizeof(u2));
2266   jvmti_post_field_access(cache, index, is_static, false);
2267   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
2268 
2269   if (!is_static) {
2270     // obj is on the stack
2271     pop_and_check_object(obj);
2272   }
2273 
2274   const Address field(obj, off, Address::times_1);
2275 
2276   Label Done, notByte, notInt, notShort, notChar,
2277               notLong, notFloat, notObj, notDouble;
2278 
2279   __ shrl(flags, ConstantPoolCacheEntry::tosBits);
2280   assert(btos == 0, "change code, btos != 0");
2281 
2282   __ andl(flags, 0x0F);
2283   __ jcc(Assembler::notZero, notByte);
2284   // btos
2285   __ load_signed_byte(rax, field);
2286   __ push(btos);
2287   // Rewrite bytecode to be faster
2288   if (!is_static) {
2289     patch_bytecode(Bytecodes::_fast_bgetfield, bc, rbx);
2290   }
2291   __ jmp(Done);
2292 
2293   __ bind(notByte);
2294   __ cmpl(flags, atos);
2295   __ jcc(Assembler::notEqual, notObj);
2296   // atos
2297   __ load_heap_oop(rax, field);
2298   __ push(atos);
2299   if (!is_static) {
2300     patch_bytecode(Bytecodes::_fast_agetfield, bc, rbx);
2301   }
2302   __ jmp(Done);
2303 
2304   __ bind(notObj);
2305   __ cmpl(flags, itos);
2306   __ jcc(Assembler::notEqual, notInt);
2307   // itos
2308   __ movl(rax, field);
2309   __ push(itos);
2310   // Rewrite bytecode to be faster
2311   if (!is_static) {
2312     patch_bytecode(Bytecodes::_fast_igetfield, bc, rbx);
2313   }
2314   __ jmp(Done);
2315 
2316   __ bind(notInt);
2317   __ cmpl(flags, ctos);
2318   __ jcc(Assembler::notEqual, notChar);
2319   // ctos
2320   __ load_unsigned_short(rax, field);
2321   __ push(ctos);
2322   // Rewrite bytecode to be faster
2323   if (!is_static) {
2324     patch_bytecode(Bytecodes::_fast_cgetfield, bc, rbx);
2325   }
2326   __ jmp(Done);
2327 
2328   __ bind(notChar);
2329   __ cmpl(flags, stos);
2330   __ jcc(Assembler::notEqual, notShort);
2331   // stos
2332   __ load_signed_short(rax, field);
2333   __ push(stos);
2334   // Rewrite bytecode to be faster
2335   if (!is_static) {
2336     patch_bytecode(Bytecodes::_fast_sgetfield, bc, rbx);
2337   }
2338   __ jmp(Done);
2339 
2340   __ bind(notShort);
2341   __ cmpl(flags, ltos);
2342   __ jcc(Assembler::notEqual, notLong);
2343   // ltos
2344   __ movq(rax, field);
2345   __ push(ltos);
2346   // Rewrite bytecode to be faster
2347   if (!is_static) {
2348     patch_bytecode(Bytecodes::_fast_lgetfield, bc, rbx);
2349   }
2350   __ jmp(Done);
2351 
2352   __ bind(notLong);
2353   __ cmpl(flags, ftos);
2354   __ jcc(Assembler::notEqual, notFloat);
2355   // ftos
2356   __ movflt(xmm0, field);
2357   __ push(ftos);
2358   // Rewrite bytecode to be faster
2359   if (!is_static) {
2360     patch_bytecode(Bytecodes::_fast_fgetfield, bc, rbx);
2361   }
2362   __ jmp(Done);
2363 
2364   __ bind(notFloat);
2365 #ifdef ASSERT
2366   __ cmpl(flags, dtos);
2367   __ jcc(Assembler::notEqual, notDouble);
2368 #endif
2369   // dtos
2370   __ movdbl(xmm0, field);
2371   __ push(dtos);
2372   // Rewrite bytecode to be faster
2373   if (!is_static) {
2374     patch_bytecode(Bytecodes::_fast_dgetfield, bc, rbx);
2375   }
2376 #ifdef ASSERT
2377   __ jmp(Done);
2378 
2379   __ bind(notDouble);
2380   __ stop("Bad state");
2381 #endif
2382 
2383   __ bind(Done);
2384   // [jk] not needed currently
2385   // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadLoad |
2386   //                                              Assembler::LoadStore));
2387 }
2388 
2389 
2390 void TemplateTable::getfield(int byte_no) {
2391   getfield_or_static(byte_no, false);
2392 }
2393 
2394 void TemplateTable::getstatic(int byte_no) {
2395   getfield_or_static(byte_no, true);
2396 }
2397 
2398 // The registers cache and index expected to be set before call.
2399 // The function may destroy various registers, just not the cache and index registers.
2400 void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
2401   transition(vtos, vtos);
2402 
2403   ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
2404 
2405   if (JvmtiExport::can_post_field_modification()) {
2406     // Check to see if a field modification watch has been set before
2407     // we take the time to call into the VM.
2408     Label L1;
2409     assert_different_registers(cache, index, rax);
2410     __ mov32(rax, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
2411     __ testl(rax, rax);
2412     __ jcc(Assembler::zero, L1);
2413 
2414     __ get_cache_and_index_at_bcp(c_rarg2, rscratch1, 1);
2415 
2416     if (is_static) {
2417       // Life is simple.  Null out the object pointer.
2418       __ xorl(c_rarg1, c_rarg1);
2419     } else {
2420       // Life is harder. The stack holds the value on top, followed by
2421       // the object.  We don't know the size of the value, though; it
2422       // could be one or two words depending on its type. As a result,
2423       // we must find the type to determine where the object is.
2424       __ movl(c_rarg3, Address(c_rarg2, rscratch1,
2425                            Address::times_8,
2426                            in_bytes(cp_base_offset +
2427                                      ConstantPoolCacheEntry::flags_offset())));
2428       __ shrl(c_rarg3, ConstantPoolCacheEntry::tosBits);
2429       // Make sure we don't need to mask rcx for tosBits after the
2430       // above shift
2431       ConstantPoolCacheEntry::verify_tosBits();
2432       __ movptr(c_rarg1, at_tos_p1());  // initially assume a one word jvalue
2433       __ cmpl(c_rarg3, ltos);
2434       __ cmovptr(Assembler::equal,
2435                  c_rarg1, at_tos_p2()); // ltos (two word jvalue)
2436       __ cmpl(c_rarg3, dtos);
2437       __ cmovptr(Assembler::equal,
2438                  c_rarg1, at_tos_p2()); // dtos (two word jvalue)
2439     }
2440     // cache entry pointer
2441     __ addptr(c_rarg2, in_bytes(cp_base_offset));
2442     __ shll(rscratch1, LogBytesPerWord);
2443     __ addptr(c_rarg2, rscratch1);
2444     // object (tos)
2445     __ mov(c_rarg3, rsp);
2446     // c_rarg1: object pointer set up above (NULL if static)
2447     // c_rarg2: cache entry pointer
2448     // c_rarg3: jvalue object on the stack
2449     __ call_VM(noreg,
2450                CAST_FROM_FN_PTR(address,
2451                                 InterpreterRuntime::post_field_modification),
2452                c_rarg1, c_rarg2, c_rarg3);
2453     __ get_cache_and_index_at_bcp(cache, index, 1);
2454     __ bind(L1);
2455   }
2456 }
2457 
2458 void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
2459   transition(vtos, vtos);
2460 
2461   const Register cache = rcx;
2462   const Register index = rdx;
2463   const Register obj   = rcx;
2464   const Register off   = rbx;
2465   const Register flags = rax;
2466   const Register bc    = c_rarg3;
2467 
2468   resolve_cache_and_index(byte_no, noreg, cache, index, sizeof(u2));
2469   jvmti_post_field_mod(cache, index, is_static);
2470   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
2471 
2472   // [jk] not needed currently
2473   // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadStore |
2474   //                                              Assembler::StoreStore));
2475 
2476   Label notVolatile, Done;
2477   __ movl(rdx, flags);
2478   __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
2479   __ andl(rdx, 0x1);
2480 
2481   // field address
2482   const Address field(obj, off, Address::times_1);
2483 
2484   Label notByte, notInt, notShort, notChar,
2485         notLong, notFloat, notObj, notDouble;
2486 
2487   __ shrl(flags, ConstantPoolCacheEntry::tosBits);
2488 
2489   assert(btos == 0, "change code, btos != 0");
2490   __ andl(flags, 0x0f);
2491   __ jcc(Assembler::notZero, notByte);
2492   // btos
2493   __ pop(btos);
2494   if (!is_static) pop_and_check_object(obj);
2495   __ movb(field, rax);
2496   if (!is_static) {
2497     patch_bytecode(Bytecodes::_fast_bputfield, bc, rbx);
2498   }
2499   __ jmp(Done);
2500 
2501   __ bind(notByte);
2502   __ cmpl(flags, atos);
2503   __ jcc(Assembler::notEqual, notObj);
2504   // atos
2505   __ pop(atos);
2506   if (!is_static) pop_and_check_object(obj);
2507 
2508   // Store into the field
2509   do_oop_store(_masm, field, rax, _bs->kind(), false);
2510 
2511   if (!is_static) {
2512     patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx);
2513   }
2514   __ jmp(Done);
2515 
2516   __ bind(notObj);
2517   __ cmpl(flags, itos);
2518   __ jcc(Assembler::notEqual, notInt);
2519   // itos
2520   __ pop(itos);
2521   if (!is_static) pop_and_check_object(obj);
2522   __ movl(field, rax);
2523   if (!is_static) {
2524     patch_bytecode(Bytecodes::_fast_iputfield, bc, rbx);
2525   }
2526   __ jmp(Done);
2527 
2528   __ bind(notInt);
2529   __ cmpl(flags, ctos);
2530   __ jcc(Assembler::notEqual, notChar);
2531   // ctos
2532   __ pop(ctos);
2533   if (!is_static) pop_and_check_object(obj);
2534   __ movw(field, rax);
2535   if (!is_static) {
2536     patch_bytecode(Bytecodes::_fast_cputfield, bc, rbx);
2537   }
2538   __ jmp(Done);
2539 
2540   __ bind(notChar);
2541   __ cmpl(flags, stos);
2542   __ jcc(Assembler::notEqual, notShort);
2543   // stos
2544   __ pop(stos);
2545   if (!is_static) pop_and_check_object(obj);
2546   __ movw(field, rax);
2547   if (!is_static) {
2548     patch_bytecode(Bytecodes::_fast_sputfield, bc, rbx);
2549   }
2550   __ jmp(Done);
2551 
2552   __ bind(notShort);
2553   __ cmpl(flags, ltos);
2554   __ jcc(Assembler::notEqual, notLong);
2555   // ltos
2556   __ pop(ltos);
2557   if (!is_static) pop_and_check_object(obj);
2558   __ movq(field, rax);
2559   if (!is_static) {
2560     patch_bytecode(Bytecodes::_fast_lputfield, bc, rbx);
2561   }
2562   __ jmp(Done);
2563 
2564   __ bind(notLong);
2565   __ cmpl(flags, ftos);
2566   __ jcc(Assembler::notEqual, notFloat);
2567   // ftos
2568   __ pop(ftos);
2569   if (!is_static) pop_and_check_object(obj);
2570   __ movflt(field, xmm0);
2571   if (!is_static) {
2572     patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx);
2573   }
2574   __ jmp(Done);
2575 
2576   __ bind(notFloat);
2577 #ifdef ASSERT
2578   __ cmpl(flags, dtos);
2579   __ jcc(Assembler::notEqual, notDouble);
2580 #endif
2581   // dtos
2582   __ pop(dtos);
2583   if (!is_static) pop_and_check_object(obj);
2584   __ movdbl(field, xmm0);
2585   if (!is_static) {
2586     patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx);
2587   }
2588 
2589 #ifdef ASSERT
2590   __ jmp(Done);
2591 
2592   __ bind(notDouble);
2593   __ stop("Bad state");
2594 #endif
2595 
2596   __ bind(Done);
2597   // Check for volatile store
2598   __ testl(rdx, rdx);
2599   __ jcc(Assembler::zero, notVolatile);
2600   volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
2601                                                Assembler::StoreStore));
2602 
2603   __ bind(notVolatile);
2604 }
2605 
2606 void TemplateTable::putfield(int byte_no) {
2607   putfield_or_static(byte_no, false);
2608 }
2609 
2610 void TemplateTable::putstatic(int byte_no) {
2611   putfield_or_static(byte_no, true);
2612 }
2613 
2614 void TemplateTable::jvmti_post_fast_field_mod() {
2615   if (JvmtiExport::can_post_field_modification()) {
2616     // Check to see if a field modification watch has been set before
2617     // we take the time to call into the VM.
2618     Label L2;
2619     __ mov32(c_rarg3, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
2620     __ testl(c_rarg3, c_rarg3);
2621     __ jcc(Assembler::zero, L2);
2622     __ pop_ptr(rbx);                  // copy the object pointer from tos
2623     __ verify_oop(rbx);
2624     __ push_ptr(rbx);                 // put the object pointer back on tos
2625     __ subptr(rsp, sizeof(jvalue));  // add space for a jvalue object
2626     __ mov(c_rarg3, rsp);
2627     const Address field(c_rarg3, 0);
2628 
2629     switch (bytecode()) {          // load values into the jvalue object
2630     case Bytecodes::_fast_aputfield: __ movq(field, rax); break;
2631     case Bytecodes::_fast_lputfield: __ movq(field, rax); break;
2632     case Bytecodes::_fast_iputfield: __ movl(field, rax); break;
2633     case Bytecodes::_fast_bputfield: __ movb(field, rax); break;
2634     case Bytecodes::_fast_sputfield: // fall through
2635     case Bytecodes::_fast_cputfield: __ movw(field, rax); break;
2636     case Bytecodes::_fast_fputfield: __ movflt(field, xmm0); break;
2637     case Bytecodes::_fast_dputfield: __ movdbl(field, xmm0); break;
2638     default:
2639       ShouldNotReachHere();
2640     }
2641 
2642     // Save rax because call_VM() will clobber it, then use it for
2643     // JVMTI purposes
2644     __ push(rax);
2645     // access constant pool cache entry
2646     __ get_cache_entry_pointer_at_bcp(c_rarg2, rax, 1);
2647     __ verify_oop(rbx);
2648     // rbx: object pointer copied above
2649     // c_rarg2: cache entry pointer
2650     // c_rarg3: jvalue object on the stack
2651     __ call_VM(noreg,
2652                CAST_FROM_FN_PTR(address,
2653                                 InterpreterRuntime::post_field_modification),
2654                rbx, c_rarg2, c_rarg3);
2655     __ pop(rax);     // restore lower value
2656     __ addptr(rsp, sizeof(jvalue));  // release jvalue object space
2657     __ bind(L2);
2658   }
2659 }
2660 
2661 void TemplateTable::fast_storefield(TosState state) {
2662   transition(state, vtos);
2663 
2664   ByteSize base = constantPoolCacheOopDesc::base_offset();
2665 
2666   jvmti_post_fast_field_mod();
2667 
2668   // access constant pool cache
2669   __ get_cache_and_index_at_bcp(rcx, rbx, 1);
2670 
2671   // test for volatile with rdx
2672   __ movl(rdx, Address(rcx, rbx, Address::times_8,
2673                        in_bytes(base +
2674                                 ConstantPoolCacheEntry::flags_offset())));
2675 
2676   // replace index with field offset from cache entry
2677   __ movptr(rbx, Address(rcx, rbx, Address::times_8,
2678                          in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
2679 
2680   // [jk] not needed currently
2681   // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadStore |
2682   //                                              Assembler::StoreStore));
2683 
2684   Label notVolatile;
2685   __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
2686   __ andl(rdx, 0x1);
2687 
2688   // Get object from stack
2689   pop_and_check_object(rcx);
2690 
2691   // field address
2692   const Address field(rcx, rbx, Address::times_1);
2693 
2694   // access field
2695   switch (bytecode()) {
2696   case Bytecodes::_fast_aputfield:
2697     do_oop_store(_masm, field, rax, _bs->kind(), false);
2698     break;
2699   case Bytecodes::_fast_lputfield:
2700     __ movq(field, rax);
2701     break;
2702   case Bytecodes::_fast_iputfield:
2703     __ movl(field, rax);
2704     break;
2705   case Bytecodes::_fast_bputfield:
2706     __ movb(field, rax);
2707     break;
2708   case Bytecodes::_fast_sputfield:
2709     // fall through
2710   case Bytecodes::_fast_cputfield:
2711     __ movw(field, rax);
2712     break;
2713   case Bytecodes::_fast_fputfield:
2714     __ movflt(field, xmm0);
2715     break;
2716   case Bytecodes::_fast_dputfield:
2717     __ movdbl(field, xmm0);
2718     break;
2719   default:
2720     ShouldNotReachHere();
2721   }
2722 
2723   // Check for volatile store
2724   __ testl(rdx, rdx);
2725   __ jcc(Assembler::zero, notVolatile);
2726   volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
2727                                                Assembler::StoreStore));
2728   __ bind(notVolatile);
2729 }
2730 
2731 
2732 void TemplateTable::fast_accessfield(TosState state) {
2733   transition(atos, state);
2734 
2735   // Do the JVMTI work here to avoid disturbing the register state below
2736   if (JvmtiExport::can_post_field_access()) {
2737     // Check to see if a field access watch has been set before we
2738     // take the time to call into the VM.
2739     Label L1;
2740     __ mov32(rcx, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
2741     __ testl(rcx, rcx);
2742     __ jcc(Assembler::zero, L1);
2743     // access constant pool cache entry
2744     __ get_cache_entry_pointer_at_bcp(c_rarg2, rcx, 1);
2745     __ verify_oop(rax);
2746     __ mov(r12, rax);  // save object pointer before call_VM() clobbers it
2747     __ mov(c_rarg1, rax);
2748     // c_rarg1: object pointer copied above
2749     // c_rarg2: cache entry pointer
2750     __ call_VM(noreg,
2751                CAST_FROM_FN_PTR(address,
2752                                 InterpreterRuntime::post_field_access),
2753                c_rarg1, c_rarg2);
2754     __ mov(rax, r12); // restore object pointer
2755     __ reinit_heapbase();
2756     __ bind(L1);
2757   }
2758 
2759   // access constant pool cache
2760   __ get_cache_and_index_at_bcp(rcx, rbx, 1);
2761   // replace index with field offset from cache entry
2762   // [jk] not needed currently
2763   // if (os::is_MP()) {
2764   //   __ movl(rdx, Address(rcx, rbx, Address::times_8,
2765   //                        in_bytes(constantPoolCacheOopDesc::base_offset() +
2766   //                                 ConstantPoolCacheEntry::flags_offset())));
2767   //   __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
2768   //   __ andl(rdx, 0x1);
2769   // }
2770   __ movptr(rbx, Address(rcx, rbx, Address::times_8,
2771                          in_bytes(constantPoolCacheOopDesc::base_offset() +
2772                                   ConstantPoolCacheEntry::f2_offset())));
2773 
2774   // rax: object
2775   __ verify_oop(rax);
2776   __ null_check(rax);
2777   Address field(rax, rbx, Address::times_1);
2778 
2779   // access field
2780   switch (bytecode()) {
2781   case Bytecodes::_fast_agetfield:
2782     __ load_heap_oop(rax, field);
2783     __ verify_oop(rax);
2784     break;
2785   case Bytecodes::_fast_lgetfield:
2786     __ movq(rax, field);
2787     break;
2788   case Bytecodes::_fast_igetfield:
2789     __ movl(rax, field);
2790     break;
2791   case Bytecodes::_fast_bgetfield:
2792     __ movsbl(rax, field);
2793     break;
2794   case Bytecodes::_fast_sgetfield:
2795     __ load_signed_short(rax, field);
2796     break;
2797   case Bytecodes::_fast_cgetfield:
2798     __ load_unsigned_short(rax, field);
2799     break;
2800   case Bytecodes::_fast_fgetfield:
2801     __ movflt(xmm0, field);
2802     break;
2803   case Bytecodes::_fast_dgetfield:
2804     __ movdbl(xmm0, field);
2805     break;
2806   default:
2807     ShouldNotReachHere();
2808   }
2809   // [jk] not needed currently
2810   // if (os::is_MP()) {
2811   //   Label notVolatile;
2812   //   __ testl(rdx, rdx);
2813   //   __ jcc(Assembler::zero, notVolatile);
2814   //   __ membar(Assembler::LoadLoad);
2815   //   __ bind(notVolatile);
2816   //};
2817 }
2818 
2819 void TemplateTable::fast_xaccess(TosState state) {
2820   transition(vtos, state);
2821 
2822   // get receiver
2823   __ movptr(rax, aaddress(0));
2824   // access constant pool cache
2825   __ get_cache_and_index_at_bcp(rcx, rdx, 2);
2826   __ movptr(rbx,
2827             Address(rcx, rdx, Address::times_8,
2828                     in_bytes(constantPoolCacheOopDesc::base_offset() +
2829                              ConstantPoolCacheEntry::f2_offset())));
2830   // make sure exception is reported in correct bcp range (getfield is
2831   // next instruction)
2832   __ increment(r13);
2833   __ null_check(rax);
2834   switch (state) {
2835   case itos:
2836     __ movl(rax, Address(rax, rbx, Address::times_1));
2837     break;
2838   case atos:
2839     __ load_heap_oop(rax, Address(rax, rbx, Address::times_1));
2840     __ verify_oop(rax);
2841     break;
2842   case ftos:
2843     __ movflt(xmm0, Address(rax, rbx, Address::times_1));
2844     break;
2845   default:
2846     ShouldNotReachHere();
2847   }
2848 
2849   // [jk] not needed currently
2850   // if (os::is_MP()) {
2851   //   Label notVolatile;
2852   //   __ movl(rdx, Address(rcx, rdx, Address::times_8,
2853   //                        in_bytes(constantPoolCacheOopDesc::base_offset() +
2854   //                                 ConstantPoolCacheEntry::flags_offset())));
2855   //   __ shrl(rdx, ConstantPoolCacheEntry::volatileField);
2856   //   __ testl(rdx, 0x1);
2857   //   __ jcc(Assembler::zero, notVolatile);
2858   //   __ membar(Assembler::LoadLoad);
2859   //   __ bind(notVolatile);
2860   // }
2861 
2862   __ decrement(r13);
2863 }
2864 
2865 
2866 
2867 //-----------------------------------------------------------------------------
2868 // Calls
2869 
2870 void TemplateTable::count_calls(Register method, Register temp) {
2871   // implemented elsewhere
2872   ShouldNotReachHere();
2873 }
2874 
2875 void TemplateTable::prepare_invoke(Register method, Register index, int byte_no) {
2876   // determine flags
2877   Bytecodes::Code code = bytecode();
2878   const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
2879   const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
2880   const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
2881   const bool is_invokespecial    = code == Bytecodes::_invokespecial;
2882   const bool load_receiver      = (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic);
2883   const bool receiver_null_check = is_invokespecial;
2884   const bool save_flags = is_invokeinterface || is_invokevirtual;
2885   // setup registers & access constant pool cache
2886   const Register recv   = rcx;
2887   const Register flags  = rdx;
2888   assert_different_registers(method, index, recv, flags);
2889 
2890   // save 'interpreter return address'
2891   __ save_bcp();
2892 
2893   load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
2894 
2895   // load receiver if needed (note: no return address pushed yet)
2896   if (load_receiver) {
2897     assert(!is_invokedynamic, "");
2898     __ movl(recv, flags);
2899     __ andl(recv, 0xFF);
2900     Address recv_addr(rsp, recv, Address::times_8, -Interpreter::expr_offset_in_bytes(1));
2901     __ movptr(recv, recv_addr);
2902     __ verify_oop(recv);
2903   }
2904 
2905   // do null check if needed
2906   if (receiver_null_check) {
2907     __ null_check(recv);
2908   }
2909 
2910   if (save_flags) {
2911     __ movl(r13, flags);
2912   }
2913 
2914   // compute return type
2915   __ shrl(flags, ConstantPoolCacheEntry::tosBits);
2916   // Make sure we don't need to mask flags for tosBits after the above shift
2917   ConstantPoolCacheEntry::verify_tosBits();
2918   // load return address
2919   {
2920     address table_addr;
2921     if (is_invokeinterface || is_invokedynamic)
2922       table_addr = (address)Interpreter::return_5_addrs_by_index_table();
2923     else
2924       table_addr = (address)Interpreter::return_3_addrs_by_index_table();
2925     ExternalAddress table(table_addr);
2926     __ lea(rscratch1, table);
2927     __ movptr(flags, Address(rscratch1, flags, Address::times_ptr));
2928   }
2929 
2930   // push return address
2931   __ push(flags);
2932 
2933   // Restore flag field from the constant pool cache, and restore esi
2934   // for later null checks.  r13 is the bytecode pointer
2935   if (save_flags) {
2936     __ movl(flags, r13);
2937     __ restore_bcp();
2938   }
2939 }
2940 
2941 
2942 void TemplateTable::invokevirtual_helper(Register index,
2943                                          Register recv,
2944                                          Register flags) {
2945   // Uses temporary registers rax, rdx
2946   assert_different_registers(index, recv, rax, rdx);
2947 
2948   // Test for an invoke of a final method
2949   Label notFinal;
2950   __ movl(rax, flags);
2951   __ andl(rax, (1 << ConstantPoolCacheEntry::vfinalMethod));
2952   __ jcc(Assembler::zero, notFinal);
2953 
2954   const Register method = index;  // method must be rbx
2955   assert(method == rbx,
2956          "methodOop must be rbx for interpreter calling convention");
2957 
2958   // do the call - the index is actually the method to call
2959   __ verify_oop(method);
2960 
2961   // It's final, need a null check here!
2962   __ null_check(recv);
2963 
2964   // profile this call
2965   __ profile_final_call(rax);
2966 
2967   __ jump_from_interpreted(method, rax);
2968 
2969   __ bind(notFinal);
2970 
2971   // get receiver klass
2972   __ null_check(recv, oopDesc::klass_offset_in_bytes());
2973   __ load_klass(rax, recv);
2974 
2975   __ verify_oop(rax);
2976 
2977   // profile this call
2978   __ profile_virtual_call(rax, r14, rdx);
2979 
2980   // get target methodOop & entry point
2981   const int base = instanceKlass::vtable_start_offset() * wordSize;
2982   assert(vtableEntry::size() * wordSize == 8,
2983          "adjust the scaling in the code below");
2984   __ movptr(method, Address(rax, index,
2985                                  Address::times_8,
2986                                  base + vtableEntry::method_offset_in_bytes()));
2987   __ movptr(rdx, Address(method, methodOopDesc::interpreter_entry_offset()));
2988   __ jump_from_interpreted(method, rdx);
2989 }
2990 
2991 
2992 void TemplateTable::invokevirtual(int byte_no) {
2993   transition(vtos, vtos);
2994   assert(byte_no == f2_byte, "use this argument");
2995   prepare_invoke(rbx, noreg, byte_no);
2996 
2997   // rbx: index
2998   // rcx: receiver
2999   // rdx: flags
3000 
3001   invokevirtual_helper(rbx, rcx, rdx);
3002 }
3003 
3004 
3005 void TemplateTable::invokespecial(int byte_no) {
3006   transition(vtos, vtos);
3007   assert(byte_no == f1_byte, "use this argument");
3008   prepare_invoke(rbx, noreg, byte_no);
3009   // do the call
3010   __ verify_oop(rbx);
3011   __ profile_call(rax);
3012   __ jump_from_interpreted(rbx, rax);
3013 }
3014 
3015 
3016 void TemplateTable::invokestatic(int byte_no) {
3017   transition(vtos, vtos);
3018   assert(byte_no == f1_byte, "use this argument");
3019   prepare_invoke(rbx, noreg, byte_no);
3020   // do the call
3021   __ verify_oop(rbx);
3022   __ profile_call(rax);
3023   __ jump_from_interpreted(rbx, rax);
3024 }
3025 
3026 void TemplateTable::fast_invokevfinal(int byte_no) {
3027   transition(vtos, vtos);
3028   assert(byte_no == f2_byte, "use this argument");
3029   __ stop("fast_invokevfinal not used on amd64");
3030 }
3031 
3032 void TemplateTable::invokeinterface(int byte_no) {
3033   transition(vtos, vtos);
3034   assert(byte_no == f1_byte, "use this argument");
3035   prepare_invoke(rax, rbx, byte_no);
3036 
3037   // rax: Interface
3038   // rbx: index
3039   // rcx: receiver
3040   // rdx: flags
3041 
3042   // Special case of invokeinterface called for virtual method of
3043   // java.lang.Object.  See cpCacheOop.cpp for details.
3044   // This code isn't produced by javac, but could be produced by
3045   // another compliant java compiler.
3046   Label notMethod;
3047   __ movl(r14, rdx);
3048   __ andl(r14, (1 << ConstantPoolCacheEntry::methodInterface));
3049   __ jcc(Assembler::zero, notMethod);
3050 
3051   invokevirtual_helper(rbx, rcx, rdx);
3052   __ bind(notMethod);
3053 
3054   // Get receiver klass into rdx - also a null check
3055   __ restore_locals(); // restore r14
3056   __ load_klass(rdx, rcx);
3057   __ verify_oop(rdx);
3058 
3059   // profile this call
3060   __ profile_virtual_call(rdx, r13, r14);
3061 
3062   Label no_such_interface, no_such_method;
3063 
3064   __ lookup_interface_method(// inputs: rec. class, interface, itable index
3065                              rdx, rax, rbx,
3066                              // outputs: method, scan temp. reg
3067                              rbx, r13,
3068                              no_such_interface);
3069 
3070   // rbx,: methodOop to call
3071   // rcx: receiver
3072   // Check for abstract method error
3073   // Note: This should be done more efficiently via a throw_abstract_method_error
3074   //       interpreter entry point and a conditional jump to it in case of a null
3075   //       method.
3076   __ testptr(rbx, rbx);
3077   __ jcc(Assembler::zero, no_such_method);
3078 
3079   // do the call
3080   // rcx: receiver
3081   // rbx,: methodOop
3082   __ jump_from_interpreted(rbx, rdx);
3083   __ should_not_reach_here();
3084 
3085   // exception handling code follows...
3086   // note: must restore interpreter registers to canonical
3087   //       state for exception handling to work correctly!
3088 
3089   __ bind(no_such_method);
3090   // throw exception
3091   __ pop(rbx);           // pop return address (pushed by prepare_invoke)
3092   __ restore_bcp();      // r13 must be correct for exception handler   (was destroyed)
3093   __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
3094   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
3095   // the call_VM checks for exception, so we should never return here.
3096   __ should_not_reach_here();
3097 
3098   __ bind(no_such_interface);
3099   // throw exception
3100   __ pop(rbx);           // pop return address (pushed by prepare_invoke)
3101   __ restore_bcp();      // r13 must be correct for exception handler   (was destroyed)
3102   __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
3103   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
3104                    InterpreterRuntime::throw_IncompatibleClassChangeError));
3105   // the call_VM checks for exception, so we should never return here.
3106   __ should_not_reach_here();
3107   return;
3108 }
3109 
3110 void TemplateTable::invokedynamic(int byte_no) {
3111   transition(vtos, vtos);
3112   assert(byte_no == f1_oop, "use this argument");
3113 
3114   if (!EnableInvokeDynamic) {
3115     // We should not encounter this bytecode if !EnableInvokeDynamic.
3116     // The verifier will stop it.  However, if we get past the verifier,
3117     // this will stop the thread in a reasonable way, without crashing the JVM.
3118     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
3119                      InterpreterRuntime::throw_IncompatibleClassChangeError));
3120     // the call_VM checks for exception, so we should never return here.
3121     __ should_not_reach_here();
3122     return;
3123   }
3124 
3125   assert(byte_no == f1_oop, "use this argument");
3126   prepare_invoke(rax, rbx, byte_no);
3127 
3128   // rax: CallSite object (f1)
3129   // rbx: unused (f2)
3130   // rcx: receiver address
3131   // rdx: flags (unused)
3132 
3133   Register rax_callsite      = rax;
3134   Register rcx_method_handle = rcx;
3135 
3136   if (ProfileInterpreter) {
3137     // %%% should make a type profile for any invokedynamic that takes a ref argument
3138     // profile this call
3139     __ profile_call(r13);
3140   }
3141 
3142   __ load_heap_oop(rcx_method_handle, Address(rax_callsite, __ delayed_value(java_dyn_CallSite::target_offset_in_bytes, rcx)));
3143   __ null_check(rcx_method_handle);
3144   __ prepare_to_jump_from_interpreted();
3145   __ jump_to_method_handle_entry(rcx_method_handle, rdx);
3146 }
3147 
3148 
3149 //-----------------------------------------------------------------------------
3150 // Allocation
3151 
3152 void TemplateTable::_new() {
3153   transition(vtos, atos);
3154   __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
3155   Label slow_case;
3156   Label done;
3157   Label initialize_header;
3158   Label initialize_object; // including clearing the fields
3159   Label allocate_shared;
3160 
3161   __ get_cpool_and_tags(rsi, rax);
3162   // Make sure the class we're about to instantiate has been resolved.
3163   // This is done before loading instanceKlass to be consistent with the order
3164   // how Constant Pool is updated (see constantPoolOopDesc::klass_at_put)
3165   const int tags_offset = typeArrayOopDesc::header_size(T_BYTE) * wordSize;
3166   __ cmpb(Address(rax, rdx, Address::times_1, tags_offset),
3167           JVM_CONSTANT_Class);
3168   __ jcc(Assembler::notEqual, slow_case);
3169 
3170   // get instanceKlass
3171   __ movptr(rsi, Address(rsi, rdx,
3172             Address::times_8, sizeof(constantPoolOopDesc)));
3173 
3174   // make sure klass is initialized & doesn't have finalizer
3175   // make sure klass is fully initialized
3176   __ cmpl(Address(rsi,
3177                   instanceKlass::init_state_offset_in_bytes() +
3178                   sizeof(oopDesc)),
3179           instanceKlass::fully_initialized);
3180   __ jcc(Assembler::notEqual, slow_case);
3181 
3182   // get instance_size in instanceKlass (scaled to a count of bytes)
3183   __ movl(rdx,
3184           Address(rsi,
3185                   Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)));
3186   // test to see if it has a finalizer or is malformed in some way
3187   __ testl(rdx, Klass::_lh_instance_slow_path_bit);
3188   __ jcc(Assembler::notZero, slow_case);
3189 
3190   // Allocate the instance
3191   // 1) Try to allocate in the TLAB
3192   // 2) if fail and the object is large allocate in the shared Eden
3193   // 3) if the above fails (or is not applicable), go to a slow case
3194   // (creates a new TLAB, etc.)
3195 
3196   const bool allow_shared_alloc =
3197     Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
3198 
3199   if (UseTLAB) {
3200     __ movptr(rax, Address(r15_thread, in_bytes(JavaThread::tlab_top_offset())));
3201     __ lea(rbx, Address(rax, rdx, Address::times_1));
3202     __ cmpptr(rbx, Address(r15_thread, in_bytes(JavaThread::tlab_end_offset())));
3203     __ jcc(Assembler::above, allow_shared_alloc ? allocate_shared : slow_case);
3204     __ movptr(Address(r15_thread, in_bytes(JavaThread::tlab_top_offset())), rbx);
3205     if (ZeroTLAB) {
3206       // the fields have been already cleared
3207       __ jmp(initialize_header);
3208     } else {
3209       // initialize both the header and fields
3210       __ jmp(initialize_object);
3211     }
3212   }
3213 
3214   // Allocation in the shared Eden, if allowed.
3215   //
3216   // rdx: instance size in bytes
3217   if (allow_shared_alloc) {
3218     __ bind(allocate_shared);
3219 
3220     ExternalAddress top((address)Universe::heap()->top_addr());
3221     ExternalAddress end((address)Universe::heap()->end_addr());
3222 
3223     const Register RtopAddr = rscratch1;
3224     const Register RendAddr = rscratch2;
3225 
3226     __ lea(RtopAddr, top);
3227     __ lea(RendAddr, end);
3228     __ movptr(rax, Address(RtopAddr, 0));
3229 
3230     // For retries rax gets set by cmpxchgq
3231     Label retry;
3232     __ bind(retry);
3233     __ lea(rbx, Address(rax, rdx, Address::times_1));
3234     __ cmpptr(rbx, Address(RendAddr, 0));
3235     __ jcc(Assembler::above, slow_case);
3236 
3237     // Compare rax with the top addr, and if still equal, store the new
3238     // top addr in rbx at the address of the top addr pointer. Sets ZF if was
3239     // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
3240     //
3241     // rax: object begin
3242     // rbx: object end
3243     // rdx: instance size in bytes
3244     if (os::is_MP()) {
3245       __ lock();
3246     }
3247     __ cmpxchgptr(rbx, Address(RtopAddr, 0));
3248 
3249     // if someone beat us on the allocation, try again, otherwise continue
3250     __ jcc(Assembler::notEqual, retry);
3251   }
3252 
3253   if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
3254     // The object is initialized before the header.  If the object size is
3255     // zero, go directly to the header initialization.
3256     __ bind(initialize_object);
3257     __ decrementl(rdx, sizeof(oopDesc));
3258     __ jcc(Assembler::zero, initialize_header);
3259 
3260     // Initialize object fields
3261     __ xorl(rcx, rcx); // use zero reg to clear memory (shorter code)
3262     __ shrl(rdx, LogBytesPerLong);  // divide by oopSize to simplify the loop
3263     {
3264       Label loop;
3265       __ bind(loop);
3266       __ movq(Address(rax, rdx, Address::times_8,
3267                       sizeof(oopDesc) - oopSize),
3268               rcx);
3269       __ decrementl(rdx);
3270       __ jcc(Assembler::notZero, loop);
3271     }
3272 
3273     // initialize object header only.
3274     __ bind(initialize_header);
3275     if (UseBiasedLocking) {
3276       __ movptr(rscratch1, Address(rsi, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
3277       __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()), rscratch1);
3278     } else {
3279       __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()),
3280                (intptr_t) markOopDesc::prototype()); // header (address 0x1)
3281     }
3282     __ xorl(rcx, rcx); // use zero reg to clear memory (shorter code)
3283     __ store_klass_gap(rax, rcx);  // zero klass gap for compressed oops
3284     __ store_klass(rax, rsi);      // store klass last
3285 
3286     {
3287       SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
3288       // Trigger dtrace event for fastpath
3289       __ push(atos); // save the return value
3290       __ call_VM_leaf(
3291            CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), rax);
3292       __ pop(atos); // restore the return value
3293 
3294     }
3295     __ jmp(done);
3296   }
3297 
3298 
3299   // slow case
3300   __ bind(slow_case);
3301   __ get_constant_pool(c_rarg1);
3302   __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
3303   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2);
3304   __ verify_oop(rax);
3305 
3306   // continue
3307   __ bind(done);
3308 }
3309 
3310 void TemplateTable::newarray() {
3311   transition(itos, atos);
3312   __ load_unsigned_byte(c_rarg1, at_bcp(1));
3313   __ movl(c_rarg2, rax);
3314   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
3315           c_rarg1, c_rarg2);
3316 }
3317 
3318 void TemplateTable::anewarray() {
3319   transition(itos, atos);
3320   __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
3321   __ get_constant_pool(c_rarg1);
3322   __ movl(c_rarg3, rax);
3323   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
3324           c_rarg1, c_rarg2, c_rarg3);
3325 }
3326 
3327 void TemplateTable::arraylength() {
3328   transition(atos, itos);
3329   __ null_check(rax, arrayOopDesc::length_offset_in_bytes());
3330   __ movl(rax, Address(rax, arrayOopDesc::length_offset_in_bytes()));
3331 }
3332 
3333 void TemplateTable::checkcast() {
3334   transition(atos, atos);
3335   Label done, is_null, ok_is_subtype, quicked, resolved;
3336   __ testptr(rax, rax); // object is in rax
3337   __ jcc(Assembler::zero, is_null);
3338 
3339   // Get cpool & tags index
3340   __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
3341   __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
3342   // See if bytecode has already been quicked
3343   __ cmpb(Address(rdx, rbx,
3344                   Address::times_1,
3345                   typeArrayOopDesc::header_size(T_BYTE) * wordSize),
3346           JVM_CONSTANT_Class);
3347   __ jcc(Assembler::equal, quicked);
3348   __ push(atos); // save receiver for result, and for GC
3349   __ mov(r12, rcx); // save rcx XXX
3350   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
3351   __ movq(rcx, r12); // restore rcx XXX
3352   __ reinit_heapbase();
3353   __ pop_ptr(rdx); // restore receiver
3354   __ jmpb(resolved);
3355 
3356   // Get superklass in rax and subklass in rbx
3357   __ bind(quicked);
3358   __ mov(rdx, rax); // Save object in rdx; rax needed for subtype check
3359   __ movptr(rax, Address(rcx, rbx,
3360                        Address::times_8, sizeof(constantPoolOopDesc)));
3361 
3362   __ bind(resolved);
3363   __ load_klass(rbx, rdx);
3364 
3365   // Generate subtype check.  Blows rcx, rdi.  Object in rdx.
3366   // Superklass in rax.  Subklass in rbx.
3367   __ gen_subtype_check(rbx, ok_is_subtype);
3368 
3369   // Come here on failure
3370   __ push_ptr(rdx);
3371   // object is at TOS
3372   __ jump(ExternalAddress(Interpreter::_throw_ClassCastException_entry));
3373 
3374   // Come here on success
3375   __ bind(ok_is_subtype);
3376   __ mov(rax, rdx); // Restore object in rdx
3377 
3378   // Collect counts on whether this check-cast sees NULLs a lot or not.
3379   if (ProfileInterpreter) {
3380     __ jmp(done);
3381     __ bind(is_null);
3382     __ profile_null_seen(rcx);
3383   } else {
3384     __ bind(is_null);   // same as 'done'
3385   }
3386   __ bind(done);
3387 }
3388 
3389 void TemplateTable::instanceof() {
3390   transition(atos, itos);
3391   Label done, is_null, ok_is_subtype, quicked, resolved;
3392   __ testptr(rax, rax);
3393   __ jcc(Assembler::zero, is_null);
3394 
3395   // Get cpool & tags index
3396   __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
3397   __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
3398   // See if bytecode has already been quicked
3399   __ cmpb(Address(rdx, rbx,
3400                   Address::times_1,
3401                   typeArrayOopDesc::header_size(T_BYTE) * wordSize),
3402           JVM_CONSTANT_Class);
3403   __ jcc(Assembler::equal, quicked);
3404 
3405   __ push(atos); // save receiver for result, and for GC
3406   __ mov(r12, rcx); // save rcx
3407   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
3408   __ movq(rcx, r12); // restore rcx
3409   __ reinit_heapbase();
3410   __ pop_ptr(rdx); // restore receiver
3411   __ load_klass(rdx, rdx);
3412   __ jmpb(resolved);
3413 
3414   // Get superklass in rax and subklass in rdx
3415   __ bind(quicked);
3416   __ load_klass(rdx, rax);
3417   __ movptr(rax, Address(rcx, rbx,
3418                          Address::times_8, sizeof(constantPoolOopDesc)));
3419 
3420   __ bind(resolved);
3421 
3422   // Generate subtype check.  Blows rcx, rdi
3423   // Superklass in rax.  Subklass in rdx.
3424   __ gen_subtype_check(rdx, ok_is_subtype);
3425 
3426   // Come here on failure
3427   __ xorl(rax, rax);
3428   __ jmpb(done);
3429   // Come here on success
3430   __ bind(ok_is_subtype);
3431   __ movl(rax, 1);
3432 
3433   // Collect counts on whether this test sees NULLs a lot or not.
3434   if (ProfileInterpreter) {
3435     __ jmp(done);
3436     __ bind(is_null);
3437     __ profile_null_seen(rcx);
3438   } else {
3439     __ bind(is_null);   // same as 'done'
3440   }
3441   __ bind(done);
3442   // rax = 0: obj == NULL or  obj is not an instanceof the specified klass
3443   // rax = 1: obj != NULL and obj is     an instanceof the specified klass
3444 }
3445 
3446 //-----------------------------------------------------------------------------
3447 // Breakpoints
3448 void TemplateTable::_breakpoint() {
3449   // Note: We get here even if we are single stepping..
3450   // jbug inists on setting breakpoints at every bytecode
3451   // even if we are in single step mode.
3452 
3453   transition(vtos, vtos);
3454 
3455   // get the unpatched byte code
3456   __ get_method(c_rarg1);
3457   __ call_VM(noreg,
3458              CAST_FROM_FN_PTR(address,
3459                               InterpreterRuntime::get_original_bytecode_at),
3460              c_rarg1, r13);
3461   __ mov(rbx, rax);
3462 
3463   // post the breakpoint event
3464   __ get_method(c_rarg1);
3465   __ call_VM(noreg,
3466              CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
3467              c_rarg1, r13);
3468 
3469   // complete the execution of original bytecode
3470   __ dispatch_only_normal(vtos);
3471 }
3472 
3473 //-----------------------------------------------------------------------------
3474 // Exceptions
3475 
3476 void TemplateTable::athrow() {
3477   transition(atos, vtos);
3478   __ null_check(rax);
3479   __ jump(ExternalAddress(Interpreter::throw_exception_entry()));
3480 }
3481 
3482 //-----------------------------------------------------------------------------
3483 // Synchronization
3484 //
3485 // Note: monitorenter & exit are symmetric routines; which is reflected
3486 //       in the assembly code structure as well
3487 //
3488 // Stack layout:
3489 //
3490 // [expressions  ] <--- rsp               = expression stack top
3491 // ..
3492 // [expressions  ]
3493 // [monitor entry] <--- monitor block top = expression stack bot
3494 // ..
3495 // [monitor entry]
3496 // [frame data   ] <--- monitor block bot
3497 // ...
3498 // [saved rbp    ] <--- rbp
3499 void TemplateTable::monitorenter() {
3500   transition(atos, vtos);
3501 
3502   // check for NULL object
3503   __ null_check(rax);
3504 
3505   const Address monitor_block_top(
3506         rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
3507   const Address monitor_block_bot(
3508         rbp, frame::interpreter_frame_initial_sp_offset * wordSize);
3509   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
3510 
3511   Label allocated;
3512 
3513   // initialize entry pointer
3514   __ xorl(c_rarg1, c_rarg1); // points to free slot or NULL
3515 
3516   // find a free slot in the monitor block (result in c_rarg1)
3517   {
3518     Label entry, loop, exit;
3519     __ movptr(c_rarg3, monitor_block_top); // points to current entry,
3520                                      // starting with top-most entry
3521     __ lea(c_rarg2, monitor_block_bot); // points to word before bottom
3522                                      // of monitor block
3523     __ jmpb(entry);
3524 
3525     __ bind(loop);
3526     // check if current entry is used
3527     __ cmpptr(Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()), (int32_t) NULL_WORD);
3528     // if not used then remember entry in c_rarg1
3529     __ cmov(Assembler::equal, c_rarg1, c_rarg3);
3530     // check if current entry is for same object
3531     __ cmpptr(rax, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()));
3532     // if same object then stop searching
3533     __ jccb(Assembler::equal, exit);
3534     // otherwise advance to next entry
3535     __ addptr(c_rarg3, entry_size);
3536     __ bind(entry);
3537     // check if bottom reached
3538     __ cmpptr(c_rarg3, c_rarg2);
3539     // if not at bottom then check this entry
3540     __ jcc(Assembler::notEqual, loop);
3541     __ bind(exit);
3542   }
3543 
3544   __ testptr(c_rarg1, c_rarg1); // check if a slot has been found
3545   __ jcc(Assembler::notZero, allocated); // if found, continue with that one
3546 
3547   // allocate one if there's no free slot
3548   {
3549     Label entry, loop;
3550     // 1. compute new pointers             // rsp: old expression stack top
3551     __ movptr(c_rarg1, monitor_block_bot); // c_rarg1: old expression stack bottom
3552     __ subptr(rsp, entry_size);            // move expression stack top
3553     __ subptr(c_rarg1, entry_size);        // move expression stack bottom
3554     __ mov(c_rarg3, rsp);                  // set start value for copy loop
3555     __ movptr(monitor_block_bot, c_rarg1); // set new monitor block bottom
3556     __ jmp(entry);
3557     // 2. move expression stack contents
3558     __ bind(loop);
3559     __ movptr(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack
3560                                                       // word from old location
3561     __ movptr(Address(c_rarg3, 0), c_rarg2);          // and store it at new location
3562     __ addptr(c_rarg3, wordSize);                     // advance to next word
3563     __ bind(entry);
3564     __ cmpptr(c_rarg3, c_rarg1);            // check if bottom reached
3565     __ jcc(Assembler::notEqual, loop);      // if not at bottom then
3566                                             // copy next word
3567   }
3568 
3569   // call run-time routine
3570   // c_rarg1: points to monitor entry
3571   __ bind(allocated);
3572 
3573   // Increment bcp to point to the next bytecode, so exception
3574   // handling for async. exceptions work correctly.
3575   // The object has already been poped from the stack, so the
3576   // expression stack looks correct.
3577   __ increment(r13);
3578 
3579   // store object
3580   __ movptr(Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()), rax);
3581   __ lock_object(c_rarg1);
3582 
3583   // check to make sure this monitor doesn't cause stack overflow after locking
3584   __ save_bcp();  // in case of exception
3585   __ generate_stack_overflow_check(0);
3586 
3587   // The bcp has already been incremented. Just need to dispatch to
3588   // next instruction.
3589   __ dispatch_next(vtos);
3590 }
3591 
3592 
3593 void TemplateTable::monitorexit() {
3594   transition(atos, vtos);
3595 
3596   // check for NULL object
3597   __ null_check(rax);
3598 
3599   const Address monitor_block_top(
3600         rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
3601   const Address monitor_block_bot(
3602         rbp, frame::interpreter_frame_initial_sp_offset * wordSize);
3603   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
3604 
3605   Label found;
3606 
3607   // find matching slot
3608   {
3609     Label entry, loop;
3610     __ movptr(c_rarg1, monitor_block_top); // points to current entry,
3611                                      // starting with top-most entry
3612     __ lea(c_rarg2, monitor_block_bot); // points to word before bottom
3613                                      // of monitor block
3614     __ jmpb(entry);
3615 
3616     __ bind(loop);
3617     // check if current entry is for same object
3618     __ cmpptr(rax, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
3619     // if same object then stop searching
3620     __ jcc(Assembler::equal, found);
3621     // otherwise advance to next entry
3622     __ addptr(c_rarg1, entry_size);
3623     __ bind(entry);
3624     // check if bottom reached
3625     __ cmpptr(c_rarg1, c_rarg2);
3626     // if not at bottom then check this entry
3627     __ jcc(Assembler::notEqual, loop);
3628   }
3629 
3630   // error handling. Unlocking was not block-structured
3631   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
3632                    InterpreterRuntime::throw_illegal_monitor_state_exception));
3633   __ should_not_reach_here();
3634 
3635   // call run-time routine
3636   // rsi: points to monitor entry
3637   __ bind(found);
3638   __ push_ptr(rax); // make sure object is on stack (contract with oopMaps)
3639   __ unlock_object(c_rarg1);
3640   __ pop_ptr(rax); // discard object
3641 }
3642 
3643 
3644 // Wide instructions
3645 void TemplateTable::wide() {
3646   transition(vtos, vtos);
3647   __ load_unsigned_byte(rbx, at_bcp(1));
3648   __ lea(rscratch1, ExternalAddress((address)Interpreter::_wentry_point));
3649   __ jmp(Address(rscratch1, rbx, Address::times_8));
3650   // Note: the r13 increment step is part of the individual wide
3651   // bytecode implementations
3652 }
3653 
3654 
3655 // Multi arrays
3656 void TemplateTable::multianewarray() {
3657   transition(vtos, atos);
3658   __ load_unsigned_byte(rax, at_bcp(3)); // get number of dimensions
3659   // last dim is on top of stack; we want address of first one:
3660   // first_addr = last_addr + (ndims - 1) * wordSize
3661   __ lea(c_rarg1, Address(rsp, rax, Address::times_8, -wordSize));
3662   call_VM(rax,
3663           CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
3664           c_rarg1);
3665   __ load_unsigned_byte(rbx, at_bcp(3));
3666   __ lea(rsp, Address(rsp, rbx, Address::times_8));
3667 }
3668 #endif // !CC_INTERP