1 /*
   2  * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.hpp"
  28 #include "interpreter/interpreter.hpp"
  29 #include "interpreter/interpreterRuntime.hpp"
  30 #include "interpreter/interp_masm.hpp"
  31 #include "interpreter/templateTable.hpp"
  32 #include "memory/universe.hpp"
  33 #include "oops/methodData.hpp"
  34 #include "oops/method.hpp"
  35 #include "oops/objArrayKlass.hpp"
  36 #include "oops/oop.inline.hpp"
  37 #include "prims/methodHandles.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 #include "runtime/stubRoutines.hpp"
  40 #include "runtime/synchronizer.hpp"
  41 
  42 #define __ _masm->
  43 
  44 // Platform-dependent initialization
  45 
  46 void TemplateTable::pd_initialize() {
  47   // No aarch64 specific initialization
  48 }
  49 
  50 // Address computation: local variables
  51 
  52 static inline Address iaddress(int n) {
  53   return Address(rlocals, Interpreter::local_offset_in_bytes(n));
  54 }
  55 
  56 static inline Address laddress(int n) {
  57   return iaddress(n + 1);
  58 }
  59 
  60 static inline Address faddress(int n) {
  61   return iaddress(n);
  62 }
  63 
  64 static inline Address daddress(int n) {
  65   return laddress(n);
  66 }
  67 
  68 static inline Address aaddress(int n) {
  69   return iaddress(n);
  70 }
  71 
  72 static inline Address iaddress(Register r) {
  73   return Address(rlocals, r, Address::lsl(3));
  74 }
  75 
  76 static inline Address laddress(Register r, Register scratch,
  77                                InterpreterMacroAssembler* _masm) {
  78   __ lea(scratch, Address(rlocals, r, Address::lsl(3)));
  79   return Address(scratch, Interpreter::local_offset_in_bytes(1));
  80 }
  81 
  82 static inline Address faddress(Register r) {
  83   return iaddress(r);
  84 }
  85 
  86 static inline Address daddress(Register r, Register scratch,
  87                                InterpreterMacroAssembler* _masm) {
  88   return laddress(r, scratch, _masm);
  89 }
  90 
  91 static inline Address aaddress(Register r) {
  92   return iaddress(r);
  93 }
  94 
  95 static inline Address at_rsp() {
  96   return Address(esp, 0);
  97 }
  98 
  99 // At top of Java expression stack which may be different than esp().  It
 100 // isn't for category 1 objects.
 101 static inline Address at_tos   () {
 102   return Address(esp,  Interpreter::expr_offset_in_bytes(0));
 103 }
 104 
 105 static inline Address at_tos_p1() {
 106   return Address(esp,  Interpreter::expr_offset_in_bytes(1));
 107 }
 108 
 109 static inline Address at_tos_p2() {
 110   return Address(esp,  Interpreter::expr_offset_in_bytes(2));
 111 }
 112 
 113 static inline Address at_tos_p3() {
 114   return Address(esp,  Interpreter::expr_offset_in_bytes(3));
 115 }
 116 
 117 static inline Address at_tos_p4() {
 118   return Address(esp,  Interpreter::expr_offset_in_bytes(4));
 119 }
 120 
 121 static inline Address at_tos_p5() {
 122   return Address(esp,  Interpreter::expr_offset_in_bytes(5));
 123 }
 124 
 125 // Condition conversion
 126 static Assembler::Condition j_not(TemplateTable::Condition cc) {
 127   switch (cc) {
 128   case TemplateTable::equal        : return Assembler::NE;
 129   case TemplateTable::not_equal    : return Assembler::EQ;
 130   case TemplateTable::less         : return Assembler::GE;
 131   case TemplateTable::less_equal   : return Assembler::GT;
 132   case TemplateTable::greater      : return Assembler::LE;
 133   case TemplateTable::greater_equal: return Assembler::LT;
 134   }
 135   ShouldNotReachHere();
 136   return Assembler::EQ;
 137 }
 138 
 139 
 140 // Miscelaneous helper routines
 141 // Store an oop (or NULL) at the Address described by obj.
 142 // If val == noreg this means store a NULL
 143 static void do_oop_store(InterpreterMacroAssembler* _masm,
 144                          Address obj,
 145                          Register val,
 146                          BarrierSet::Name barrier,
 147                          bool precise) {
 148   assert(val == noreg || val == r0, "parameter is just for looks");
 149   switch (barrier) {
 150 #if INCLUDE_ALL_GCS
 151     case BarrierSet::G1BarrierSet:
 152       {
 153         // flatten object address if needed
 154         if (obj.index() == noreg && obj.offset() == 0) {
 155           if (obj.base() != r3) {
 156             __ mov(r3, obj.base());
 157           }
 158         } else {
 159           __ lea(r3, obj);
 160         }
 161         __ g1_write_barrier_pre(r3 /* obj */,
 162                                 r1 /* pre_val */,
 163                                 rthread /* thread */,
 164                                 r10  /* tmp */,
 165                                 val != noreg /* tosca_live */,
 166                                 false /* expand_call */);
 167         if (val == noreg) {
 168           __ store_heap_oop_null(Address(r3, 0));
 169         } else {
 170           // G1 barrier needs uncompressed oop for region cross check.
 171           Register new_val = val;
 172           if (UseCompressedOops) {
 173             new_val = rscratch2;
 174             __ mov(new_val, val);
 175           }
 176           __ store_heap_oop(Address(r3, 0), val);
 177           __ g1_write_barrier_post(r3 /* store_adr */,
 178                                    new_val /* new_val */,
 179                                    rthread /* thread */,
 180                                    r10 /* tmp */,
 181                                    r1 /* tmp2 */);
 182         }
 183 
 184       }
 185       break;
 186 #endif // INCLUDE_ALL_GCS
 187     case BarrierSet::CardTableBarrierSet:
 188       {
 189         if (val == noreg) {
 190           __ store_heap_oop_null(obj);
 191         } else {
 192           __ store_heap_oop(obj, val);
 193           // flatten object address if needed
 194           if (!precise || (obj.index() == noreg && obj.offset() == 0)) {
 195             __ store_check(obj.base());
 196           } else {
 197             __ lea(r3, obj);
 198             __ store_check(r3);
 199           }
 200         }
 201       }
 202       break;
 203     case BarrierSet::ModRef:
 204       if (val == noreg) {
 205         __ store_heap_oop_null(obj);
 206       } else {
 207         __ store_heap_oop(obj, val);
 208       }
 209       break;
 210     default      :
 211       ShouldNotReachHere();
 212 
 213   }
 214 }
 215 
 216 Address TemplateTable::at_bcp(int offset) {
 217   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
 218   return Address(rbcp, offset);
 219 }
 220 
 221 void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
 222                                    Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
 223                                    int byte_no)
 224 {
 225   if (!RewriteBytecodes)  return;
 226   Label L_patch_done;
 227 
 228   switch (bc) {
 229   case Bytecodes::_fast_aputfield:
 230   case Bytecodes::_fast_bputfield:
 231   case Bytecodes::_fast_zputfield:
 232   case Bytecodes::_fast_cputfield:
 233   case Bytecodes::_fast_dputfield:
 234   case Bytecodes::_fast_fputfield:
 235   case Bytecodes::_fast_iputfield:
 236   case Bytecodes::_fast_lputfield:
 237   case Bytecodes::_fast_sputfield:
 238     {
 239       // We skip bytecode quickening for putfield instructions when
 240       // the put_code written to the constant pool cache is zero.
 241       // This is required so that every execution of this instruction
 242       // calls out to InterpreterRuntime::resolve_get_put to do
 243       // additional, required work.
 244       assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
 245       assert(load_bc_into_bc_reg, "we use bc_reg as temp");
 246       __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
 247       __ movw(bc_reg, bc);
 248       __ cbzw(temp_reg, L_patch_done);  // don't patch
 249     }
 250     break;
 251   default:
 252     assert(byte_no == -1, "sanity");
 253     // the pair bytecodes have already done the load.
 254     if (load_bc_into_bc_reg) {
 255       __ movw(bc_reg, bc);
 256     }
 257   }
 258 
 259   if (JvmtiExport::can_post_breakpoint()) {
 260     Label L_fast_patch;
 261     // if a breakpoint is present we can't rewrite the stream directly
 262     __ load_unsigned_byte(temp_reg, at_bcp(0));
 263     __ cmpw(temp_reg, Bytecodes::_breakpoint);
 264     __ br(Assembler::NE, L_fast_patch);
 265     // Let breakpoint table handling rewrite to quicker bytecode
 266     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), rmethod, rbcp, bc_reg);
 267     __ b(L_patch_done);
 268     __ bind(L_fast_patch);
 269   }
 270 
 271 #ifdef ASSERT
 272   Label L_okay;
 273   __ load_unsigned_byte(temp_reg, at_bcp(0));
 274   __ cmpw(temp_reg, (int) Bytecodes::java_code(bc));
 275   __ br(Assembler::EQ, L_okay);
 276   __ cmpw(temp_reg, bc_reg);
 277   __ br(Assembler::EQ, L_okay);
 278   __ stop("patching the wrong bytecode");
 279   __ bind(L_okay);
 280 #endif
 281 
 282   // patch bytecode
 283   __ strb(bc_reg, at_bcp(0));
 284   __ bind(L_patch_done);
 285 }
 286 
 287 
 288 // Individual instructions
 289 
 290 void TemplateTable::nop() {
 291   transition(vtos, vtos);
 292   // nothing to do
 293 }
 294 
 295 void TemplateTable::shouldnotreachhere() {
 296   transition(vtos, vtos);
 297   __ stop("shouldnotreachhere bytecode");
 298 }
 299 
 300 void TemplateTable::aconst_null()
 301 {
 302   transition(vtos, atos);
 303   __ mov(r0, 0);
 304 }
 305 
 306 void TemplateTable::iconst(int value)
 307 {
 308   transition(vtos, itos);
 309   __ mov(r0, value);
 310 }
 311 
 312 void TemplateTable::lconst(int value)
 313 {
 314   __ mov(r0, value);
 315 }
 316 
 317 void TemplateTable::fconst(int value)
 318 {
 319   transition(vtos, ftos);
 320   switch (value) {
 321   case 0:
 322     __ fmovs(v0, zr);
 323     break;
 324   case 1:
 325     __ fmovs(v0, 1.0);
 326     break;
 327   case 2:
 328     __ fmovs(v0, 2.0);
 329     break;
 330   default:
 331     ShouldNotReachHere();
 332     break;
 333   }
 334 }
 335 
 336 void TemplateTable::dconst(int value)
 337 {
 338   transition(vtos, dtos);
 339   switch (value) {
 340   case 0:
 341     __ fmovd(v0, zr);
 342     break;
 343   case 1:
 344     __ fmovd(v0, 1.0);
 345     break;
 346   case 2:
 347     __ fmovd(v0, 2.0);
 348     break;
 349   default:
 350     ShouldNotReachHere();
 351     break;
 352   }
 353 }
 354 
 355 void TemplateTable::bipush()
 356 {
 357   transition(vtos, itos);
 358   __ load_signed_byte32(r0, at_bcp(1));
 359 }
 360 
 361 void TemplateTable::sipush()
 362 {
 363   transition(vtos, itos);
 364   __ load_unsigned_short(r0, at_bcp(1));
 365   __ revw(r0, r0);
 366   __ asrw(r0, r0, 16);
 367 }
 368 
 369 void TemplateTable::ldc(bool wide)
 370 {
 371   transition(vtos, vtos);
 372   Label call_ldc, notFloat, notClass, notInt, Done;
 373 
 374   if (wide) {
 375     __ get_unsigned_2_byte_index_at_bcp(r1, 1);
 376   } else {
 377     __ load_unsigned_byte(r1, at_bcp(1));
 378   }
 379   __ get_cpool_and_tags(r2, r0);
 380 
 381   const int base_offset = ConstantPool::header_size() * wordSize;
 382   const int tags_offset = Array<u1>::base_offset_in_bytes();
 383 
 384   // get type
 385   __ add(r3, r1, tags_offset);
 386   __ lea(r3, Address(r0, r3));
 387   __ ldarb(r3, r3);
 388 
 389   // unresolved class - get the resolved class
 390   __ cmp(r3, JVM_CONSTANT_UnresolvedClass);
 391   __ br(Assembler::EQ, call_ldc);
 392 
 393   // unresolved class in error state - call into runtime to throw the error
 394   // from the first resolution attempt
 395   __ cmp(r3, JVM_CONSTANT_UnresolvedClassInError);
 396   __ br(Assembler::EQ, call_ldc);
 397 
 398   // resolved class - need to call vm to get java mirror of the class
 399   __ cmp(r3, JVM_CONSTANT_Class);
 400   __ br(Assembler::NE, notClass);
 401 
 402   __ bind(call_ldc);
 403   __ mov(c_rarg1, wide);
 404   call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1);
 405   __ push_ptr(r0);
 406   __ verify_oop(r0);
 407   __ b(Done);
 408 
 409   __ bind(notClass);
 410   __ cmp(r3, JVM_CONSTANT_Float);
 411   __ br(Assembler::NE, notFloat);
 412   // ftos
 413   __ adds(r1, r2, r1, Assembler::LSL, 3);
 414   __ ldrs(v0, Address(r1, base_offset));
 415   __ push_f();
 416   __ b(Done);
 417 
 418   __ bind(notFloat);
 419 
 420   __ cmp(r3, JVM_CONSTANT_Integer);
 421   __ br(Assembler::NE, notInt);
 422 
 423   // itos
 424   __ adds(r1, r2, r1, Assembler::LSL, 3);
 425   __ ldrw(r0, Address(r1, base_offset));
 426   __ push_i(r0);
 427   __ b(Done);
 428 
 429   __ bind(notInt);
 430   condy_helper(Done);
 431 
 432   __ bind(Done);
 433 }
 434 
 435 // Fast path for caching oop constants.
 436 void TemplateTable::fast_aldc(bool wide)
 437 {
 438   transition(vtos, atos);
 439 
 440   Register result = r0;
 441   Register tmp = r1;
 442   Register rarg = r2;
 443 
 444   int index_size = wide ? sizeof(u2) : sizeof(u1);
 445 
 446   Label resolved;
 447 
 448   // We are resolved if the resolved reference cache entry contains a
 449   // non-null object (String, MethodType, etc.)
 450   assert_different_registers(result, tmp);
 451   __ get_cache_index_at_bcp(tmp, 1, index_size);
 452   __ load_resolved_reference_at_index(result, tmp);
 453   __ cbnz(result, resolved);
 454 
 455   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
 456 
 457   // first time invocation - must resolve first
 458   __ mov(rarg, (int)bytecode());
 459   __ call_VM(result, entry, rarg);
 460 
 461   __ bind(resolved);
 462 
 463   { // Check for the null sentinel.
 464     // If we just called the VM, that already did the mapping for us,
 465     // but it's harmless to retry.
 466     Label notNull;
 467 
 468     // Stash null_sentinel address to get its value later
 469     __ movptr(rarg, (uintptr_t)Universe::the_null_sentinel_addr());
 470     __ ldr(tmp, Address(rarg));
 471     __ cmp(result, tmp);
 472     __ br(Assembler::NE, notNull);
 473     __ mov(result, 0);  // NULL object reference
 474     __ bind(notNull);
 475   }
 476 
 477   if (VerifyOops) {
 478     // Safe to call with 0 result
 479     __ verify_oop(result);
 480   }
 481 }
 482 
 483 void TemplateTable::ldc2_w()
 484 {
 485   transition(vtos, vtos);
 486   Label notDouble, notLong, Done;
 487   __ get_unsigned_2_byte_index_at_bcp(r0, 1);
 488 
 489   __ get_cpool_and_tags(r1, r2);
 490   const int base_offset = ConstantPool::header_size() * wordSize;
 491   const int tags_offset = Array<u1>::base_offset_in_bytes();
 492 
 493   // get type
 494   __ lea(r2, Address(r2, r0, Address::lsl(0)));
 495   __ load_unsigned_byte(r2, Address(r2, tags_offset));
 496   __ cmpw(r2, (int)JVM_CONSTANT_Double);
 497   __ br(Assembler::NE, notDouble);
 498 
 499   // dtos
 500   __ lea (r2, Address(r1, r0, Address::lsl(3)));
 501   __ ldrd(v0, Address(r2, base_offset));
 502   __ push_d();
 503   __ b(Done);
 504 
 505   __ bind(notDouble);
 506   __ cmpw(r2, (int)JVM_CONSTANT_Long);
 507   __ br(Assembler::NE, notLong);
 508 
 509   // ltos
 510   __ lea(r0, Address(r1, r0, Address::lsl(3)));
 511   __ ldr(r0, Address(r0, base_offset));
 512   __ push_l();
 513   __ b(Done);
 514 
 515   __ bind(notLong);
 516   condy_helper(Done);
 517 
 518   __ bind(Done);
 519 }
 520 
 521 void TemplateTable::condy_helper(Label& Done)
 522 {
 523   Register obj = r0;
 524   Register rarg = r1;
 525   Register flags = r2;
 526   Register off = r3;
 527 
 528   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
 529 
 530   __ mov(rarg, (int) bytecode());
 531   __ call_VM(obj, entry, rarg);
 532 
 533   __ get_vm_result_2(flags, rthread);
 534 
 535   // VMr = obj = base address to find primitive value to push
 536   // VMr2 = flags = (tos, off) using format of CPCE::_flags
 537   __ mov(off, flags);
 538   __ andw(off, off, ConstantPoolCacheEntry::field_index_mask);
 539 
 540   const Address field(obj, off);
 541 
 542   // What sort of thing are we loading?
 543   // x86 uses a shift and mask or wings it with a shift plus assert
 544   // the mask is not needed. aarch64 just uses bitfield extract
 545   __ ubfxw(flags, flags, ConstantPoolCacheEntry::tos_state_shift,
 546            ConstantPoolCacheEntry::tos_state_bits);
 547 
 548   switch (bytecode()) {
 549     case Bytecodes::_ldc:
 550     case Bytecodes::_ldc_w:
 551       {
 552         // tos in (itos, ftos, stos, btos, ctos, ztos)
 553         Label notInt, notFloat, notShort, notByte, notChar, notBool;
 554         __ cmpw(flags, itos);
 555         __ br(Assembler::NE, notInt);
 556         // itos
 557         __ ldrw(r0, field);
 558         __ push(itos);
 559         __ b(Done);
 560 
 561         __ bind(notInt);
 562         __ cmpw(flags, ftos);
 563         __ br(Assembler::NE, notFloat);
 564         // ftos
 565         __ load_float(field);
 566         __ push(ftos);
 567         __ b(Done);
 568 
 569         __ bind(notFloat);
 570         __ cmpw(flags, stos);
 571         __ br(Assembler::NE, notShort);
 572         // stos
 573         __ load_signed_short(r0, field);
 574         __ push(stos);
 575         __ b(Done);
 576 
 577         __ bind(notShort);
 578         __ cmpw(flags, btos);
 579         __ br(Assembler::NE, notByte);
 580         // btos
 581         __ load_signed_byte(r0, field);
 582         __ push(btos);
 583         __ b(Done);
 584 
 585         __ bind(notByte);
 586         __ cmpw(flags, ctos);
 587         __ br(Assembler::NE, notChar);
 588         // ctos
 589         __ load_unsigned_short(r0, field);
 590         __ push(ctos);
 591         __ b(Done);
 592 
 593         __ bind(notChar);
 594         __ cmpw(flags, ztos);
 595         __ br(Assembler::NE, notBool);
 596         // ztos
 597         __ load_signed_byte(r0, field);
 598         __ push(ztos);
 599         __ b(Done);
 600 
 601         __ bind(notBool);
 602         break;
 603       }
 604 
 605     case Bytecodes::_ldc2_w:
 606       {
 607         Label notLong, notDouble;
 608         __ cmpw(flags, ltos);
 609         __ br(Assembler::NE, notLong);
 610         // ltos
 611         __ ldr(r0, field);
 612         __ push(ltos);
 613         __ b(Done);
 614 
 615         __ bind(notLong);
 616         __ cmpw(flags, dtos);
 617         __ br(Assembler::NE, notDouble);
 618         // dtos
 619         __ load_double(field);
 620         __ push(dtos);
 621         __ b(Done);
 622 
 623        __ bind(notDouble);
 624         break;
 625       }
 626 
 627     default:
 628       ShouldNotReachHere();
 629     }
 630 
 631     __ stop("bad ldc/condy");
 632 }
 633 
 634 void TemplateTable::locals_index(Register reg, int offset)
 635 {
 636   __ ldrb(reg, at_bcp(offset));
 637   __ neg(reg, reg);
 638 }
 639 
 640 void TemplateTable::iload() {
 641   iload_internal();
 642 }
 643 
 644 void TemplateTable::nofast_iload() {
 645   iload_internal(may_not_rewrite);
 646 }
 647 
 648 void TemplateTable::iload_internal(RewriteControl rc) {
 649   transition(vtos, itos);
 650   if (RewriteFrequentPairs && rc == may_rewrite) {
 651     Label rewrite, done;
 652     Register bc = r4;
 653 
 654     // get next bytecode
 655     __ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
 656 
 657     // if _iload, wait to rewrite to iload2.  We only want to rewrite the
 658     // last two iloads in a pair.  Comparing against fast_iload means that
 659     // the next bytecode is neither an iload or a caload, and therefore
 660     // an iload pair.
 661     __ cmpw(r1, Bytecodes::_iload);
 662     __ br(Assembler::EQ, done);
 663 
 664     // if _fast_iload rewrite to _fast_iload2
 665     __ cmpw(r1, Bytecodes::_fast_iload);
 666     __ movw(bc, Bytecodes::_fast_iload2);
 667     __ br(Assembler::EQ, rewrite);
 668 
 669     // if _caload rewrite to _fast_icaload
 670     __ cmpw(r1, Bytecodes::_caload);
 671     __ movw(bc, Bytecodes::_fast_icaload);
 672     __ br(Assembler::EQ, rewrite);
 673 
 674     // else rewrite to _fast_iload
 675     __ movw(bc, Bytecodes::_fast_iload);
 676 
 677     // rewrite
 678     // bc: new bytecode
 679     __ bind(rewrite);
 680     patch_bytecode(Bytecodes::_iload, bc, r1, false);
 681     __ bind(done);
 682 
 683   }
 684 
 685   // do iload, get the local value into tos
 686   locals_index(r1);
 687   __ ldr(r0, iaddress(r1));
 688 
 689 }
 690 
 691 void TemplateTable::fast_iload2()
 692 {
 693   transition(vtos, itos);
 694   locals_index(r1);
 695   __ ldr(r0, iaddress(r1));
 696   __ push(itos);
 697   locals_index(r1, 3);
 698   __ ldr(r0, iaddress(r1));
 699 }
 700 
 701 void TemplateTable::fast_iload()
 702 {
 703   transition(vtos, itos);
 704   locals_index(r1);
 705   __ ldr(r0, iaddress(r1));
 706 }
 707 
 708 void TemplateTable::lload()
 709 {
 710   transition(vtos, ltos);
 711   __ ldrb(r1, at_bcp(1));
 712   __ sub(r1, rlocals, r1, ext::uxtw, LogBytesPerWord);
 713   __ ldr(r0, Address(r1, Interpreter::local_offset_in_bytes(1)));
 714 }
 715 
 716 void TemplateTable::fload()
 717 {
 718   transition(vtos, ftos);
 719   locals_index(r1);
 720   // n.b. we use ldrd here because this is a 64 bit slot
 721   // this is comparable to the iload case
 722   __ ldrd(v0, faddress(r1));
 723 }
 724 
 725 void TemplateTable::dload()
 726 {
 727   transition(vtos, dtos);
 728   __ ldrb(r1, at_bcp(1));
 729   __ sub(r1, rlocals, r1, ext::uxtw, LogBytesPerWord);
 730   __ ldrd(v0, Address(r1, Interpreter::local_offset_in_bytes(1)));
 731 }
 732 
 733 void TemplateTable::aload()
 734 {
 735   transition(vtos, atos);
 736   locals_index(r1);
 737   __ ldr(r0, iaddress(r1));
 738 }
 739 
 740 void TemplateTable::locals_index_wide(Register reg) {
 741   __ ldrh(reg, at_bcp(2));
 742   __ rev16w(reg, reg);
 743   __ neg(reg, reg);
 744 }
 745 
 746 void TemplateTable::wide_iload() {
 747   transition(vtos, itos);
 748   locals_index_wide(r1);
 749   __ ldr(r0, iaddress(r1));
 750 }
 751 
 752 void TemplateTable::wide_lload()
 753 {
 754   transition(vtos, ltos);
 755   __ ldrh(r1, at_bcp(2));
 756   __ rev16w(r1, r1);
 757   __ sub(r1, rlocals, r1, ext::uxtw, LogBytesPerWord);
 758   __ ldr(r0, Address(r1, Interpreter::local_offset_in_bytes(1)));
 759 }
 760 
 761 void TemplateTable::wide_fload()
 762 {
 763   transition(vtos, ftos);
 764   locals_index_wide(r1);
 765   // n.b. we use ldrd here because this is a 64 bit slot
 766   // this is comparable to the iload case
 767   __ ldrd(v0, faddress(r1));
 768 }
 769 
 770 void TemplateTable::wide_dload()
 771 {
 772   transition(vtos, dtos);
 773   __ ldrh(r1, at_bcp(2));
 774   __ rev16w(r1, r1);
 775   __ sub(r1, rlocals, r1, ext::uxtw, LogBytesPerWord);
 776   __ ldrd(v0, Address(r1, Interpreter::local_offset_in_bytes(1)));
 777 }
 778 
 779 void TemplateTable::wide_aload()
 780 {
 781   transition(vtos, atos);
 782   locals_index_wide(r1);
 783   __ ldr(r0, aaddress(r1));
 784 }
 785 
 786 void TemplateTable::index_check(Register array, Register index)
 787 {
 788   // destroys r1, rscratch1
 789   // check array
 790   __ null_check(array, arrayOopDesc::length_offset_in_bytes());
 791   // sign extend index for use by indexed load
 792   // __ movl2ptr(index, index);
 793   // check index
 794   Register length = rscratch1;
 795   __ ldrw(length, Address(array, arrayOopDesc::length_offset_in_bytes()));
 796   __ cmpw(index, length);
 797   if (index != r1) {
 798     // ??? convention: move aberrant index into r1 for exception message
 799     assert(r1 != array, "different registers");
 800     __ mov(r1, index);
 801   }
 802   Label ok;
 803   __ br(Assembler::LO, ok);
 804   __ mov(rscratch1, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
 805   __ br(rscratch1);
 806   __ bind(ok);
 807 }
 808 
 809 void TemplateTable::iaload()
 810 {
 811   transition(itos, itos);
 812   __ mov(r1, r0);
 813   __ pop_ptr(r0);
 814   // r0: array
 815   // r1: index
 816   index_check(r0, r1); // leaves index in r1, kills rscratch1
 817   __ lea(r1, Address(r0, r1, Address::uxtw(2)));
 818   __ ldrw(r0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_INT)));
 819 }
 820 
 821 void TemplateTable::laload()
 822 {
 823   transition(itos, ltos);
 824   __ mov(r1, r0);
 825   __ pop_ptr(r0);
 826   // r0: array
 827   // r1: index
 828   index_check(r0, r1); // leaves index in r1, kills rscratch1
 829   __ lea(r1, Address(r0, r1, Address::uxtw(3)));
 830   __ ldr(r0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_LONG)));
 831 }
 832 
 833 void TemplateTable::faload()
 834 {
 835   transition(itos, ftos);
 836   __ mov(r1, r0);
 837   __ pop_ptr(r0);
 838   // r0: array
 839   // r1: index
 840   index_check(r0, r1); // leaves index in r1, kills rscratch1
 841   __ lea(r1,  Address(r0, r1, Address::uxtw(2)));
 842   __ ldrs(v0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
 843 }
 844 
 845 void TemplateTable::daload()
 846 {
 847   transition(itos, dtos);
 848   __ mov(r1, r0);
 849   __ pop_ptr(r0);
 850   // r0: array
 851   // r1: index
 852   index_check(r0, r1); // leaves index in r1, kills rscratch1
 853   __ lea(r1,  Address(r0, r1, Address::uxtw(3)));
 854   __ ldrd(v0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
 855 }
 856 
 857 void TemplateTable::aaload()
 858 {
 859   transition(itos, atos);
 860   __ mov(r1, r0);
 861   __ pop_ptr(r0);
 862   // r0: array
 863   // r1: index
 864   index_check(r0, r1); // leaves index in r1, kills rscratch1
 865   int s = (UseCompressedOops ? 2 : 3);
 866   __ lea(r1, Address(r0, r1, Address::uxtw(s)));
 867   __ load_heap_oop(r0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
 868 }
 869 
 870 void TemplateTable::baload()
 871 {
 872   transition(itos, itos);
 873   __ mov(r1, r0);
 874   __ pop_ptr(r0);
 875   // r0: array
 876   // r1: index
 877   index_check(r0, r1); // leaves index in r1, kills rscratch1
 878   __ lea(r1,  Address(r0, r1, Address::uxtw(0)));
 879   __ load_signed_byte(r0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_BYTE)));
 880 }
 881 
 882 void TemplateTable::caload()
 883 {
 884   transition(itos, itos);
 885   __ mov(r1, r0);
 886   __ pop_ptr(r0);
 887   // r0: array
 888   // r1: index
 889   index_check(r0, r1); // leaves index in r1, kills rscratch1
 890   __ lea(r1,  Address(r0, r1, Address::uxtw(1)));
 891   __ load_unsigned_short(r0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 892 }
 893 
 894 // iload followed by caload frequent pair
 895 void TemplateTable::fast_icaload()
 896 {
 897   transition(vtos, itos);
 898   // load index out of locals
 899   locals_index(r2);
 900   __ ldr(r1, iaddress(r2));
 901 
 902   __ pop_ptr(r0);
 903 
 904   // r0: array
 905   // r1: index
 906   index_check(r0, r1); // leaves index in r1, kills rscratch1
 907   __ lea(r1,  Address(r0, r1, Address::uxtw(1)));
 908   __ load_unsigned_short(r0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 909 }
 910 
 911 void TemplateTable::saload()
 912 {
 913   transition(itos, itos);
 914   __ mov(r1, r0);
 915   __ pop_ptr(r0);
 916   // r0: array
 917   // r1: index
 918   index_check(r0, r1); // leaves index in r1, kills rscratch1
 919   __ lea(r1,  Address(r0, r1, Address::uxtw(1)));
 920   __ load_signed_short(r0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_SHORT)));
 921 }
 922 
 923 void TemplateTable::iload(int n)
 924 {
 925   transition(vtos, itos);
 926   __ ldr(r0, iaddress(n));
 927 }
 928 
 929 void TemplateTable::lload(int n)
 930 {
 931   transition(vtos, ltos);
 932   __ ldr(r0, laddress(n));
 933 }
 934 
 935 void TemplateTable::fload(int n)
 936 {
 937   transition(vtos, ftos);
 938   __ ldrs(v0, faddress(n));
 939 }
 940 
 941 void TemplateTable::dload(int n)
 942 {
 943   transition(vtos, dtos);
 944   __ ldrd(v0, daddress(n));
 945 }
 946 
 947 void TemplateTable::aload(int n)
 948 {
 949   transition(vtos, atos);
 950   __ ldr(r0, iaddress(n));
 951 }
 952 
 953 void TemplateTable::aload_0() {
 954   aload_0_internal();
 955 }
 956 
 957 void TemplateTable::nofast_aload_0() {
 958   aload_0_internal(may_not_rewrite);
 959 }
 960 
 961 void TemplateTable::aload_0_internal(RewriteControl rc) {
 962   // According to bytecode histograms, the pairs:
 963   //
 964   // _aload_0, _fast_igetfield
 965   // _aload_0, _fast_agetfield
 966   // _aload_0, _fast_fgetfield
 967   //
 968   // occur frequently. If RewriteFrequentPairs is set, the (slow)
 969   // _aload_0 bytecode checks if the next bytecode is either
 970   // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
 971   // rewrites the current bytecode into a pair bytecode; otherwise it
 972   // rewrites the current bytecode into _fast_aload_0 that doesn't do
 973   // the pair check anymore.
 974   //
 975   // Note: If the next bytecode is _getfield, the rewrite must be
 976   //       delayed, otherwise we may miss an opportunity for a pair.
 977   //
 978   // Also rewrite frequent pairs
 979   //   aload_0, aload_1
 980   //   aload_0, iload_1
 981   // These bytecodes with a small amount of code are most profitable
 982   // to rewrite
 983   if (RewriteFrequentPairs && rc == may_rewrite) {
 984     Label rewrite, done;
 985     const Register bc = r4;
 986 
 987     // get next bytecode
 988     __ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
 989 
 990     // if _getfield then wait with rewrite
 991     __ cmpw(r1, Bytecodes::Bytecodes::_getfield);
 992     __ br(Assembler::EQ, done);
 993 
 994     // if _igetfield then rewrite to _fast_iaccess_0
 995     assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
 996     __ cmpw(r1, Bytecodes::_fast_igetfield);
 997     __ movw(bc, Bytecodes::_fast_iaccess_0);
 998     __ br(Assembler::EQ, rewrite);
 999 
1000     // if _agetfield then rewrite to _fast_aaccess_0
1001     assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
1002     __ cmpw(r1, Bytecodes::_fast_agetfield);
1003     __ movw(bc, Bytecodes::_fast_aaccess_0);
1004     __ br(Assembler::EQ, rewrite);
1005 
1006     // if _fgetfield then rewrite to _fast_faccess_0
1007     assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
1008     __ cmpw(r1, Bytecodes::_fast_fgetfield);
1009     __ movw(bc, Bytecodes::_fast_faccess_0);
1010     __ br(Assembler::EQ, rewrite);
1011 
1012     // else rewrite to _fast_aload0
1013     assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
1014     __ movw(bc, Bytecodes::Bytecodes::_fast_aload_0);
1015 
1016     // rewrite
1017     // bc: new bytecode
1018     __ bind(rewrite);
1019     patch_bytecode(Bytecodes::_aload_0, bc, r1, false);
1020 
1021     __ bind(done);
1022   }
1023 
1024   // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
1025   aload(0);
1026 }
1027 
1028 void TemplateTable::istore()
1029 {
1030   transition(itos, vtos);
1031   locals_index(r1);
1032   // FIXME: We're being very pernickerty here storing a jint in a
1033   // local with strw, which costs an extra instruction over what we'd
1034   // be able to do with a simple str.  We should just store the whole
1035   // word.
1036   __ lea(rscratch1, iaddress(r1));
1037   __ strw(r0, Address(rscratch1));
1038 }
1039 
1040 void TemplateTable::lstore()
1041 {
1042   transition(ltos, vtos);
1043   locals_index(r1);
1044   __ str(r0, laddress(r1, rscratch1, _masm));
1045 }
1046 
1047 void TemplateTable::fstore() {
1048   transition(ftos, vtos);
1049   locals_index(r1);
1050   __ lea(rscratch1, iaddress(r1));
1051   __ strs(v0, Address(rscratch1));
1052 }
1053 
1054 void TemplateTable::dstore() {
1055   transition(dtos, vtos);
1056   locals_index(r1);
1057   __ strd(v0, daddress(r1, rscratch1, _masm));
1058 }
1059 
1060 void TemplateTable::astore()
1061 {
1062   transition(vtos, vtos);
1063   __ pop_ptr(r0);
1064   locals_index(r1);
1065   __ str(r0, aaddress(r1));
1066 }
1067 
1068 void TemplateTable::wide_istore() {
1069   transition(vtos, vtos);
1070   __ pop_i();
1071   locals_index_wide(r1);
1072   __ lea(rscratch1, iaddress(r1));
1073   __ strw(r0, Address(rscratch1));
1074 }
1075 
1076 void TemplateTable::wide_lstore() {
1077   transition(vtos, vtos);
1078   __ pop_l();
1079   locals_index_wide(r1);
1080   __ str(r0, laddress(r1, rscratch1, _masm));
1081 }
1082 
1083 void TemplateTable::wide_fstore() {
1084   transition(vtos, vtos);
1085   __ pop_f();
1086   locals_index_wide(r1);
1087   __ lea(rscratch1, faddress(r1));
1088   __ strs(v0, rscratch1);
1089 }
1090 
1091 void TemplateTable::wide_dstore() {
1092   transition(vtos, vtos);
1093   __ pop_d();
1094   locals_index_wide(r1);
1095   __ strd(v0, daddress(r1, rscratch1, _masm));
1096 }
1097 
1098 void TemplateTable::wide_astore() {
1099   transition(vtos, vtos);
1100   __ pop_ptr(r0);
1101   locals_index_wide(r1);
1102   __ str(r0, aaddress(r1));
1103 }
1104 
1105 void TemplateTable::iastore() {
1106   transition(itos, vtos);
1107   __ pop_i(r1);
1108   __ pop_ptr(r3);
1109   // r0: value
1110   // r1: index
1111   // r3: array
1112   index_check(r3, r1); // prefer index in r1
1113   __ lea(rscratch1, Address(r3, r1, Address::uxtw(2)));
1114   __ strw(r0, Address(rscratch1,
1115                       arrayOopDesc::base_offset_in_bytes(T_INT)));
1116 }
1117 
1118 void TemplateTable::lastore() {
1119   transition(ltos, vtos);
1120   __ pop_i(r1);
1121   __ pop_ptr(r3);
1122   // r0: value
1123   // r1: index
1124   // r3: array
1125   index_check(r3, r1); // prefer index in r1
1126   __ lea(rscratch1, Address(r3, r1, Address::uxtw(3)));
1127   __ str(r0, Address(rscratch1,
1128                       arrayOopDesc::base_offset_in_bytes(T_LONG)));
1129 }
1130 
1131 void TemplateTable::fastore() {
1132   transition(ftos, vtos);
1133   __ pop_i(r1);
1134   __ pop_ptr(r3);
1135   // v0: value
1136   // r1:  index
1137   // r3:  array
1138   index_check(r3, r1); // prefer index in r1
1139   __ lea(rscratch1, Address(r3, r1, Address::uxtw(2)));
1140   __ strs(v0, Address(rscratch1,
1141                       arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
1142 }
1143 
1144 void TemplateTable::dastore() {
1145   transition(dtos, vtos);
1146   __ pop_i(r1);
1147   __ pop_ptr(r3);
1148   // v0: value
1149   // r1:  index
1150   // r3:  array
1151   index_check(r3, r1); // prefer index in r1
1152   __ lea(rscratch1, Address(r3, r1, Address::uxtw(3)));
1153   __ strd(v0, Address(rscratch1,
1154                       arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
1155 }
1156 
1157 void TemplateTable::aastore() {
1158   Label is_null, ok_is_subtype, done;
1159   transition(vtos, vtos);
1160   // stack: ..., array, index, value
1161   __ ldr(r0, at_tos());    // value
1162   __ ldr(r2, at_tos_p1()); // index
1163   __ ldr(r3, at_tos_p2()); // array
1164 
1165   Address element_address(r4, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
1166 
1167   index_check(r3, r2);     // kills r1
1168   __ lea(r4, Address(r3, r2, Address::uxtw(UseCompressedOops? 2 : 3)));
1169 
1170   // do array store check - check for NULL value first
1171   __ cbz(r0, is_null);
1172 
1173   // Move subklass into r1
1174   __ load_klass(r1, r0);
1175   // Move superklass into r0
1176   __ load_klass(r0, r3);
1177   __ ldr(r0, Address(r0,
1178                      ObjArrayKlass::element_klass_offset()));
1179   // Compress array + index*oopSize + 12 into a single register.  Frees r2.
1180 
1181   // Generate subtype check.  Blows r2, r5
1182   // Superklass in r0.  Subklass in r1.
1183   __ gen_subtype_check(r1, ok_is_subtype);
1184 
1185   // Come here on failure
1186   // object is at TOS
1187   __ b(Interpreter::_throw_ArrayStoreException_entry);
1188 
1189   // Come here on success
1190   __ bind(ok_is_subtype);
1191 
1192   // Get the value we will store
1193   __ ldr(r0, at_tos());
1194   // Now store using the appropriate barrier
1195   do_oop_store(_masm, element_address, r0, _bs->kind(), true);
1196   __ b(done);
1197 
1198   // Have a NULL in r0, r3=array, r2=index.  Store NULL at ary[idx]
1199   __ bind(is_null);
1200   __ profile_null_seen(r2);
1201 
1202   // Store a NULL
1203   do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
1204 
1205   // Pop stack arguments
1206   __ bind(done);
1207   __ add(esp, esp, 3 * Interpreter::stackElementSize);
1208 }
1209 
1210 void TemplateTable::bastore()
1211 {
1212   transition(itos, vtos);
1213   __ pop_i(r1);
1214   __ pop_ptr(r3);
1215   // r0: value
1216   // r1: index
1217   // r3: array
1218   index_check(r3, r1); // prefer index in r1
1219 
1220   // Need to check whether array is boolean or byte
1221   // since both types share the bastore bytecode.
1222   __ load_klass(r2, r3);
1223   __ ldrw(r2, Address(r2, Klass::layout_helper_offset()));
1224   int diffbit_index = exact_log2(Klass::layout_helper_boolean_diffbit());
1225   Label L_skip;
1226   __ tbz(r2, diffbit_index, L_skip);
1227   __ andw(r0, r0, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
1228   __ bind(L_skip);
1229 
1230   __ lea(rscratch1, Address(r3, r1, Address::uxtw(0)));
1231   __ strb(r0, Address(rscratch1,
1232                       arrayOopDesc::base_offset_in_bytes(T_BYTE)));
1233 }
1234 
1235 void TemplateTable::castore()
1236 {
1237   transition(itos, vtos);
1238   __ pop_i(r1);
1239   __ pop_ptr(r3);
1240   // r0: value
1241   // r1: index
1242   // r3: array
1243   index_check(r3, r1); // prefer index in r1
1244   __ lea(rscratch1, Address(r3, r1, Address::uxtw(1)));
1245   __ strh(r0, Address(rscratch1,
1246                       arrayOopDesc::base_offset_in_bytes(T_CHAR)));
1247 }
1248 
1249 void TemplateTable::sastore()
1250 {
1251   castore();
1252 }
1253 
1254 void TemplateTable::istore(int n)
1255 {
1256   transition(itos, vtos);
1257   __ str(r0, iaddress(n));
1258 }
1259 
1260 void TemplateTable::lstore(int n)
1261 {
1262   transition(ltos, vtos);
1263   __ str(r0, laddress(n));
1264 }
1265 
1266 void TemplateTable::fstore(int n)
1267 {
1268   transition(ftos, vtos);
1269   __ strs(v0, faddress(n));
1270 }
1271 
1272 void TemplateTable::dstore(int n)
1273 {
1274   transition(dtos, vtos);
1275   __ strd(v0, daddress(n));
1276 }
1277 
1278 void TemplateTable::astore(int n)
1279 {
1280   transition(vtos, vtos);
1281   __ pop_ptr(r0);
1282   __ str(r0, iaddress(n));
1283 }
1284 
1285 void TemplateTable::pop()
1286 {
1287   transition(vtos, vtos);
1288   __ add(esp, esp, Interpreter::stackElementSize);
1289 }
1290 
1291 void TemplateTable::pop2()
1292 {
1293   transition(vtos, vtos);
1294   __ add(esp, esp, 2 * Interpreter::stackElementSize);
1295 }
1296 
1297 void TemplateTable::dup()
1298 {
1299   transition(vtos, vtos);
1300   __ ldr(r0, Address(esp, 0));
1301   __ push(r0);
1302   // stack: ..., a, a
1303 }
1304 
1305 void TemplateTable::dup_x1()
1306 {
1307   transition(vtos, vtos);
1308   // stack: ..., a, b
1309   __ ldr(r0, at_tos());  // load b
1310   __ ldr(r2, at_tos_p1());  // load a
1311   __ str(r0, at_tos_p1());  // store b
1312   __ str(r2, at_tos());  // store a
1313   __ push(r0);                  // push b
1314   // stack: ..., b, a, b
1315 }
1316 
1317 void TemplateTable::dup_x2()
1318 {
1319   transition(vtos, vtos);
1320   // stack: ..., a, b, c
1321   __ ldr(r0, at_tos());  // load c
1322   __ ldr(r2, at_tos_p2());  // load a
1323   __ str(r0, at_tos_p2());  // store c in a
1324   __ push(r0);      // push c
1325   // stack: ..., c, b, c, c
1326   __ ldr(r0, at_tos_p2());  // load b
1327   __ str(r2, at_tos_p2());  // store a in b
1328   // stack: ..., c, a, c, c
1329   __ str(r0, at_tos_p1());  // store b in c
1330   // stack: ..., c, a, b, c
1331 }
1332 
1333 void TemplateTable::dup2()
1334 {
1335   transition(vtos, vtos);
1336   // stack: ..., a, b
1337   __ ldr(r0, at_tos_p1());  // load a
1338   __ push(r0);                  // push a
1339   __ ldr(r0, at_tos_p1());  // load b
1340   __ push(r0);                  // push b
1341   // stack: ..., a, b, a, b
1342 }
1343 
1344 void TemplateTable::dup2_x1()
1345 {
1346   transition(vtos, vtos);
1347   // stack: ..., a, b, c
1348   __ ldr(r2, at_tos());  // load c
1349   __ ldr(r0, at_tos_p1());  // load b
1350   __ push(r0);                  // push b
1351   __ push(r2);                  // push c
1352   // stack: ..., a, b, c, b, c
1353   __ str(r2, at_tos_p3());  // store c in b
1354   // stack: ..., a, c, c, b, c
1355   __ ldr(r2, at_tos_p4());  // load a
1356   __ str(r2, at_tos_p2());  // store a in 2nd c
1357   // stack: ..., a, c, a, b, c
1358   __ str(r0, at_tos_p4());  // store b in a
1359   // stack: ..., b, c, a, b, c
1360 }
1361 
1362 void TemplateTable::dup2_x2()
1363 {
1364   transition(vtos, vtos);
1365   // stack: ..., a, b, c, d
1366   __ ldr(r2, at_tos());  // load d
1367   __ ldr(r0, at_tos_p1());  // load c
1368   __ push(r0)            ;      // push c
1369   __ push(r2);                  // push d
1370   // stack: ..., a, b, c, d, c, d
1371   __ ldr(r0, at_tos_p4());  // load b
1372   __ str(r0, at_tos_p2());  // store b in d
1373   __ str(r2, at_tos_p4());  // store d in b
1374   // stack: ..., a, d, c, b, c, d
1375   __ ldr(r2, at_tos_p5());  // load a
1376   __ ldr(r0, at_tos_p3());  // load c
1377   __ str(r2, at_tos_p3());  // store a in c
1378   __ str(r0, at_tos_p5());  // store c in a
1379   // stack: ..., c, d, a, b, c, d
1380 }
1381 
1382 void TemplateTable::swap()
1383 {
1384   transition(vtos, vtos);
1385   // stack: ..., a, b
1386   __ ldr(r2, at_tos_p1());  // load a
1387   __ ldr(r0, at_tos());  // load b
1388   __ str(r2, at_tos());  // store a in b
1389   __ str(r0, at_tos_p1());  // store b in a
1390   // stack: ..., b, a
1391 }
1392 
1393 void TemplateTable::iop2(Operation op)
1394 {
1395   transition(itos, itos);
1396   // r0 <== r1 op r0
1397   __ pop_i(r1);
1398   switch (op) {
1399   case add  : __ addw(r0, r1, r0); break;
1400   case sub  : __ subw(r0, r1, r0); break;
1401   case mul  : __ mulw(r0, r1, r0); break;
1402   case _and : __ andw(r0, r1, r0); break;
1403   case _or  : __ orrw(r0, r1, r0); break;
1404   case _xor : __ eorw(r0, r1, r0); break;
1405   case shl  : __ lslvw(r0, r1, r0); break;
1406   case shr  : __ asrvw(r0, r1, r0); break;
1407   case ushr : __ lsrvw(r0, r1, r0);break;
1408   default   : ShouldNotReachHere();
1409   }
1410 }
1411 
1412 void TemplateTable::lop2(Operation op)
1413 {
1414   transition(ltos, ltos);
1415   // r0 <== r1 op r0
1416   __ pop_l(r1);
1417   switch (op) {
1418   case add  : __ add(r0, r1, r0); break;
1419   case sub  : __ sub(r0, r1, r0); break;
1420   case mul  : __ mul(r0, r1, r0); break;
1421   case _and : __ andr(r0, r1, r0); break;
1422   case _or  : __ orr(r0, r1, r0); break;
1423   case _xor : __ eor(r0, r1, r0); break;
1424   default   : ShouldNotReachHere();
1425   }
1426 }
1427 
1428 void TemplateTable::idiv()
1429 {
1430   transition(itos, itos);
1431   // explicitly check for div0
1432   Label no_div0;
1433   __ cbnzw(r0, no_div0);
1434   __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry);
1435   __ br(rscratch1);
1436   __ bind(no_div0);
1437   __ pop_i(r1);
1438   // r0 <== r1 idiv r0
1439   __ corrected_idivl(r0, r1, r0, /* want_remainder */ false);
1440 }
1441 
1442 void TemplateTable::irem()
1443 {
1444   transition(itos, itos);
1445   // explicitly check for div0
1446   Label no_div0;
1447   __ cbnzw(r0, no_div0);
1448   __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry);
1449   __ br(rscratch1);
1450   __ bind(no_div0);
1451   __ pop_i(r1);
1452   // r0 <== r1 irem r0
1453   __ corrected_idivl(r0, r1, r0, /* want_remainder */ true);
1454 }
1455 
1456 void TemplateTable::lmul()
1457 {
1458   transition(ltos, ltos);
1459   __ pop_l(r1);
1460   __ mul(r0, r0, r1);
1461 }
1462 
1463 void TemplateTable::ldiv()
1464 {
1465   transition(ltos, ltos);
1466   // explicitly check for div0
1467   Label no_div0;
1468   __ cbnz(r0, no_div0);
1469   __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry);
1470   __ br(rscratch1);
1471   __ bind(no_div0);
1472   __ pop_l(r1);
1473   // r0 <== r1 ldiv r0
1474   __ corrected_idivq(r0, r1, r0, /* want_remainder */ false);
1475 }
1476 
1477 void TemplateTable::lrem()
1478 {
1479   transition(ltos, ltos);
1480   // explicitly check for div0
1481   Label no_div0;
1482   __ cbnz(r0, no_div0);
1483   __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry);
1484   __ br(rscratch1);
1485   __ bind(no_div0);
1486   __ pop_l(r1);
1487   // r0 <== r1 lrem r0
1488   __ corrected_idivq(r0, r1, r0, /* want_remainder */ true);
1489 }
1490 
1491 void TemplateTable::lshl()
1492 {
1493   transition(itos, ltos);
1494   // shift count is in r0
1495   __ pop_l(r1);
1496   __ lslv(r0, r1, r0);
1497 }
1498 
1499 void TemplateTable::lshr()
1500 {
1501   transition(itos, ltos);
1502   // shift count is in r0
1503   __ pop_l(r1);
1504   __ asrv(r0, r1, r0);
1505 }
1506 
1507 void TemplateTable::lushr()
1508 {
1509   transition(itos, ltos);
1510   // shift count is in r0
1511   __ pop_l(r1);
1512   __ lsrv(r0, r1, r0);
1513 }
1514 
1515 void TemplateTable::fop2(Operation op)
1516 {
1517   transition(ftos, ftos);
1518   switch (op) {
1519   case add:
1520     // n.b. use ldrd because this is a 64 bit slot
1521     __ pop_f(v1);
1522     __ fadds(v0, v1, v0);
1523     break;
1524   case sub:
1525     __ pop_f(v1);
1526     __ fsubs(v0, v1, v0);
1527     break;
1528   case mul:
1529     __ pop_f(v1);
1530     __ fmuls(v0, v1, v0);
1531     break;
1532   case div:
1533     __ pop_f(v1);
1534     __ fdivs(v0, v1, v0);
1535     break;
1536   case rem:
1537     __ fmovs(v1, v0);
1538     __ pop_f(v0);
1539     __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::frem),
1540                          0, 2, MacroAssembler::ret_type_float);
1541     break;
1542   default:
1543     ShouldNotReachHere();
1544     break;
1545   }
1546 }
1547 
1548 void TemplateTable::dop2(Operation op)
1549 {
1550   transition(dtos, dtos);
1551   switch (op) {
1552   case add:
1553     // n.b. use ldrd because this is a 64 bit slot
1554     __ pop_d(v1);
1555     __ faddd(v0, v1, v0);
1556     break;
1557   case sub:
1558     __ pop_d(v1);
1559     __ fsubd(v0, v1, v0);
1560     break;
1561   case mul:
1562     __ pop_d(v1);
1563     __ fmuld(v0, v1, v0);
1564     break;
1565   case div:
1566     __ pop_d(v1);
1567     __ fdivd(v0, v1, v0);
1568     break;
1569   case rem:
1570     __ fmovd(v1, v0);
1571     __ pop_d(v0);
1572     __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::drem),
1573                          0, 2, MacroAssembler::ret_type_double);
1574     break;
1575   default:
1576     ShouldNotReachHere();
1577     break;
1578   }
1579 }
1580 
1581 void TemplateTable::ineg()
1582 {
1583   transition(itos, itos);
1584   __ negw(r0, r0);
1585 
1586 }
1587 
1588 void TemplateTable::lneg()
1589 {
1590   transition(ltos, ltos);
1591   __ neg(r0, r0);
1592 }
1593 
1594 void TemplateTable::fneg()
1595 {
1596   transition(ftos, ftos);
1597   __ fnegs(v0, v0);
1598 }
1599 
1600 void TemplateTable::dneg()
1601 {
1602   transition(dtos, dtos);
1603   __ fnegd(v0, v0);
1604 }
1605 
1606 void TemplateTable::iinc()
1607 {
1608   transition(vtos, vtos);
1609   __ load_signed_byte(r1, at_bcp(2)); // get constant
1610   locals_index(r2);
1611   __ ldr(r0, iaddress(r2));
1612   __ addw(r0, r0, r1);
1613   __ str(r0, iaddress(r2));
1614 }
1615 
1616 void TemplateTable::wide_iinc()
1617 {
1618   transition(vtos, vtos);
1619   // __ mov(r1, zr);
1620   __ ldrw(r1, at_bcp(2)); // get constant and index
1621   __ rev16(r1, r1);
1622   __ ubfx(r2, r1, 0, 16);
1623   __ neg(r2, r2);
1624   __ sbfx(r1, r1, 16, 16);
1625   __ ldr(r0, iaddress(r2));
1626   __ addw(r0, r0, r1);
1627   __ str(r0, iaddress(r2));
1628 }
1629 
1630 void TemplateTable::convert()
1631 {
1632   // Checking
1633 #ifdef ASSERT
1634   {
1635     TosState tos_in  = ilgl;
1636     TosState tos_out = ilgl;
1637     switch (bytecode()) {
1638     case Bytecodes::_i2l: // fall through
1639     case Bytecodes::_i2f: // fall through
1640     case Bytecodes::_i2d: // fall through
1641     case Bytecodes::_i2b: // fall through
1642     case Bytecodes::_i2c: // fall through
1643     case Bytecodes::_i2s: tos_in = itos; break;
1644     case Bytecodes::_l2i: // fall through
1645     case Bytecodes::_l2f: // fall through
1646     case Bytecodes::_l2d: tos_in = ltos; break;
1647     case Bytecodes::_f2i: // fall through
1648     case Bytecodes::_f2l: // fall through
1649     case Bytecodes::_f2d: tos_in = ftos; break;
1650     case Bytecodes::_d2i: // fall through
1651     case Bytecodes::_d2l: // fall through
1652     case Bytecodes::_d2f: tos_in = dtos; break;
1653     default             : ShouldNotReachHere();
1654     }
1655     switch (bytecode()) {
1656     case Bytecodes::_l2i: // fall through
1657     case Bytecodes::_f2i: // fall through
1658     case Bytecodes::_d2i: // fall through
1659     case Bytecodes::_i2b: // fall through
1660     case Bytecodes::_i2c: // fall through
1661     case Bytecodes::_i2s: tos_out = itos; break;
1662     case Bytecodes::_i2l: // fall through
1663     case Bytecodes::_f2l: // fall through
1664     case Bytecodes::_d2l: tos_out = ltos; break;
1665     case Bytecodes::_i2f: // fall through
1666     case Bytecodes::_l2f: // fall through
1667     case Bytecodes::_d2f: tos_out = ftos; break;
1668     case Bytecodes::_i2d: // fall through
1669     case Bytecodes::_l2d: // fall through
1670     case Bytecodes::_f2d: tos_out = dtos; break;
1671     default             : ShouldNotReachHere();
1672     }
1673     transition(tos_in, tos_out);
1674   }
1675 #endif // ASSERT
1676   // static const int64_t is_nan = 0x8000000000000000L;
1677 
1678   // Conversion
1679   switch (bytecode()) {
1680   case Bytecodes::_i2l:
1681     __ sxtw(r0, r0);
1682     break;
1683   case Bytecodes::_i2f:
1684     __ scvtfws(v0, r0);
1685     break;
1686   case Bytecodes::_i2d:
1687     __ scvtfwd(v0, r0);
1688     break;
1689   case Bytecodes::_i2b:
1690     __ sxtbw(r0, r0);
1691     break;
1692   case Bytecodes::_i2c:
1693     __ uxthw(r0, r0);
1694     break;
1695   case Bytecodes::_i2s:
1696     __ sxthw(r0, r0);
1697     break;
1698   case Bytecodes::_l2i:
1699     __ uxtw(r0, r0);
1700     break;
1701   case Bytecodes::_l2f:
1702     __ scvtfs(v0, r0);
1703     break;
1704   case Bytecodes::_l2d:
1705     __ scvtfd(v0, r0);
1706     break;
1707   case Bytecodes::_f2i:
1708   {
1709     Label L_Okay;
1710     __ clear_fpsr();
1711     __ fcvtzsw(r0, v0);
1712     __ get_fpsr(r1);
1713     __ cbzw(r1, L_Okay);
1714     __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::f2i),
1715                          0, 1, MacroAssembler::ret_type_integral);
1716     __ bind(L_Okay);
1717   }
1718     break;
1719   case Bytecodes::_f2l:
1720   {
1721     Label L_Okay;
1722     __ clear_fpsr();
1723     __ fcvtzs(r0, v0);
1724     __ get_fpsr(r1);
1725     __ cbzw(r1, L_Okay);
1726     __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::f2l),
1727                          0, 1, MacroAssembler::ret_type_integral);
1728     __ bind(L_Okay);
1729   }
1730     break;
1731   case Bytecodes::_f2d:
1732     __ fcvts(v0, v0);
1733     break;
1734   case Bytecodes::_d2i:
1735   {
1736     Label L_Okay;
1737     __ clear_fpsr();
1738     __ fcvtzdw(r0, v0);
1739     __ get_fpsr(r1);
1740     __ cbzw(r1, L_Okay);
1741     __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::d2i),
1742                          0, 1, MacroAssembler::ret_type_integral);
1743     __ bind(L_Okay);
1744   }
1745     break;
1746   case Bytecodes::_d2l:
1747   {
1748     Label L_Okay;
1749     __ clear_fpsr();
1750     __ fcvtzd(r0, v0);
1751     __ get_fpsr(r1);
1752     __ cbzw(r1, L_Okay);
1753     __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::d2l),
1754                          0, 1, MacroAssembler::ret_type_integral);
1755     __ bind(L_Okay);
1756   }
1757     break;
1758   case Bytecodes::_d2f:
1759     __ fcvtd(v0, v0);
1760     break;
1761   default:
1762     ShouldNotReachHere();
1763   }
1764 }
1765 
1766 void TemplateTable::lcmp()
1767 {
1768   transition(ltos, itos);
1769   Label done;
1770   __ pop_l(r1);
1771   __ cmp(r1, r0);
1772   __ mov(r0, (u_int64_t)-1L);
1773   __ br(Assembler::LT, done);
1774   // __ mov(r0, 1UL);
1775   // __ csel(r0, r0, zr, Assembler::NE);
1776   // and here is a faster way
1777   __ csinc(r0, zr, zr, Assembler::EQ);
1778   __ bind(done);
1779 }
1780 
1781 void TemplateTable::float_cmp(bool is_float, int unordered_result)
1782 {
1783   Label done;
1784   if (is_float) {
1785     // XXX get rid of pop here, use ... reg, mem32
1786     __ pop_f(v1);
1787     __ fcmps(v1, v0);
1788   } else {
1789     // XXX get rid of pop here, use ... reg, mem64
1790     __ pop_d(v1);
1791     __ fcmpd(v1, v0);
1792   }
1793   if (unordered_result < 0) {
1794     // we want -1 for unordered or less than, 0 for equal and 1 for
1795     // greater than.
1796     __ mov(r0, (u_int64_t)-1L);
1797     // for FP LT tests less than or unordered
1798     __ br(Assembler::LT, done);
1799     // install 0 for EQ otherwise 1
1800     __ csinc(r0, zr, zr, Assembler::EQ);
1801   } else {
1802     // we want -1 for less than, 0 for equal and 1 for unordered or
1803     // greater than.
1804     __ mov(r0, 1L);
1805     // for FP HI tests greater than or unordered
1806     __ br(Assembler::HI, done);
1807     // install 0 for EQ otherwise ~0
1808     __ csinv(r0, zr, zr, Assembler::EQ);
1809 
1810   }
1811   __ bind(done);
1812 }
1813 
1814 void TemplateTable::branch(bool is_jsr, bool is_wide)
1815 {
1816   // We might be moving to a safepoint.  The thread which calls
1817   // Interpreter::notice_safepoints() will effectively flush its cache
1818   // when it makes a system call, but we need to do something to
1819   // ensure that we see the changed dispatch table.
1820   __ membar(MacroAssembler::LoadLoad);
1821 
1822   __ profile_taken_branch(r0, r1);
1823   const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
1824                              InvocationCounter::counter_offset();
1825   const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
1826                               InvocationCounter::counter_offset();
1827 
1828   // load branch displacement
1829   if (!is_wide) {
1830     __ ldrh(r2, at_bcp(1));
1831     __ rev16(r2, r2);
1832     // sign extend the 16 bit value in r2
1833     __ sbfm(r2, r2, 0, 15);
1834   } else {
1835     __ ldrw(r2, at_bcp(1));
1836     __ revw(r2, r2);
1837     // sign extend the 32 bit value in r2
1838     __ sbfm(r2, r2, 0, 31);
1839   }
1840 
1841   // Handle all the JSR stuff here, then exit.
1842   // It's much shorter and cleaner than intermingling with the non-JSR
1843   // normal-branch stuff occurring below.
1844 
1845   if (is_jsr) {
1846     // Pre-load the next target bytecode into rscratch1
1847     __ load_unsigned_byte(rscratch1, Address(rbcp, r2));
1848     // compute return address as bci
1849     __ ldr(rscratch2, Address(rmethod, Method::const_offset()));
1850     __ add(rscratch2, rscratch2,
1851            in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3));
1852     __ sub(r1, rbcp, rscratch2);
1853     __ push_i(r1);
1854     // Adjust the bcp by the 16-bit displacement in r2
1855     __ add(rbcp, rbcp, r2);
1856     __ dispatch_only(vtos, /*generate_poll*/true);
1857     return;
1858   }
1859 
1860   // Normal (non-jsr) branch handling
1861 
1862   // Adjust the bcp by the displacement in r2
1863   __ add(rbcp, rbcp, r2);
1864 
1865   assert(UseLoopCounter || !UseOnStackReplacement,
1866          "on-stack-replacement requires loop counters");
1867   Label backedge_counter_overflow;
1868   Label profile_method;
1869   Label dispatch;
1870   if (UseLoopCounter) {
1871     // increment backedge counter for backward branches
1872     // r0: MDO
1873     // w1: MDO bumped taken-count
1874     // r2: target offset
1875     __ cmp(r2, zr);
1876     __ br(Assembler::GT, dispatch); // count only if backward branch
1877 
1878     // ECN: FIXME: This code smells
1879     // check if MethodCounters exists
1880     Label has_counters;
1881     __ ldr(rscratch1, Address(rmethod, Method::method_counters_offset()));
1882     __ cbnz(rscratch1, has_counters);
1883     __ push(r0);
1884     __ push(r1);
1885     __ push(r2);
1886     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
1887             InterpreterRuntime::build_method_counters), rmethod);
1888     __ pop(r2);
1889     __ pop(r1);
1890     __ pop(r0);
1891     __ ldr(rscratch1, Address(rmethod, Method::method_counters_offset()));
1892     __ cbz(rscratch1, dispatch); // No MethodCounters allocated, OutOfMemory
1893     __ bind(has_counters);
1894 
1895     if (TieredCompilation) {
1896       Label no_mdo;
1897       int increment = InvocationCounter::count_increment;
1898       if (ProfileInterpreter) {
1899         // Are we profiling?
1900         __ ldr(r1, Address(rmethod, in_bytes(Method::method_data_offset())));
1901         __ cbz(r1, no_mdo);
1902         // Increment the MDO backedge counter
1903         const Address mdo_backedge_counter(r1, in_bytes(MethodData::backedge_counter_offset()) +
1904                                            in_bytes(InvocationCounter::counter_offset()));
1905         const Address mask(r1, in_bytes(MethodData::backedge_mask_offset()));
1906         __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
1907                                    r0, rscratch1, false, Assembler::EQ,
1908                                    UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
1909         __ b(dispatch);
1910       }
1911       __ bind(no_mdo);
1912       // Increment backedge counter in MethodCounters*
1913       __ ldr(rscratch1, Address(rmethod, Method::method_counters_offset()));
1914       const Address mask(rscratch1, in_bytes(MethodCounters::backedge_mask_offset()));
1915       __ increment_mask_and_jump(Address(rscratch1, be_offset), increment, mask,
1916                                  r0, rscratch2, false, Assembler::EQ,
1917                                  UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
1918     } else { // not TieredCompilation
1919       // increment counter
1920       __ ldr(rscratch2, Address(rmethod, Method::method_counters_offset()));
1921       __ ldrw(r0, Address(rscratch2, be_offset));        // load backedge counter
1922       __ addw(rscratch1, r0, InvocationCounter::count_increment); // increment counter
1923       __ strw(rscratch1, Address(rscratch2, be_offset));        // store counter
1924 
1925       __ ldrw(r0, Address(rscratch2, inv_offset));    // load invocation counter
1926       __ andw(r0, r0, (unsigned)InvocationCounter::count_mask_value); // and the status bits
1927       __ addw(r0, r0, rscratch1);        // add both counters
1928 
1929       if (ProfileInterpreter) {
1930         // Test to see if we should create a method data oop
1931         __ ldrw(rscratch1, Address(rscratch2, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
1932         __ cmpw(r0, rscratch1);
1933         __ br(Assembler::LT, dispatch);
1934 
1935         // if no method data exists, go to profile method
1936         __ test_method_data_pointer(r0, profile_method);
1937 
1938         if (UseOnStackReplacement) {
1939           // check for overflow against w1 which is the MDO taken count
1940           __ ldrw(rscratch1, Address(rscratch2, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
1941           __ cmpw(r1, rscratch1);
1942           __ br(Assembler::LO, dispatch); // Intel == Assembler::below
1943 
1944           // When ProfileInterpreter is on, the backedge_count comes
1945           // from the MethodData*, which value does not get reset on
1946           // the call to frequency_counter_overflow().  To avoid
1947           // excessive calls to the overflow routine while the method is
1948           // being compiled, add a second test to make sure the overflow
1949           // function is called only once every overflow_frequency.
1950           const int overflow_frequency = 1024;
1951           __ andsw(r1, r1, overflow_frequency - 1);
1952           __ br(Assembler::EQ, backedge_counter_overflow);
1953 
1954         }
1955       } else {
1956         if (UseOnStackReplacement) {
1957           // check for overflow against w0, which is the sum of the
1958           // counters
1959           __ ldrw(rscratch1, Address(rscratch2, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
1960           __ cmpw(r0, rscratch1);
1961           __ br(Assembler::HS, backedge_counter_overflow); // Intel == Assembler::aboveEqual
1962         }
1963       }
1964     }
1965     __ bind(dispatch);
1966   }
1967 
1968   // Pre-load the next target bytecode into rscratch1
1969   __ load_unsigned_byte(rscratch1, Address(rbcp, 0));
1970 
1971   // continue with the bytecode @ target
1972   // rscratch1: target bytecode
1973   // rbcp: target bcp
1974   __ dispatch_only(vtos, /*generate_poll*/true);
1975 
1976   if (UseLoopCounter) {
1977     if (ProfileInterpreter) {
1978       // Out-of-line code to allocate method data oop.
1979       __ bind(profile_method);
1980       __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
1981       __ load_unsigned_byte(r1, Address(rbcp, 0));  // restore target bytecode
1982       __ set_method_data_pointer_for_bcp();
1983       __ b(dispatch);
1984     }
1985 
1986     if (UseOnStackReplacement) {
1987       // invocation counter overflow
1988       __ bind(backedge_counter_overflow);
1989       __ neg(r2, r2);
1990       __ add(r2, r2, rbcp);     // branch bcp
1991       // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
1992       __ call_VM(noreg,
1993                  CAST_FROM_FN_PTR(address,
1994                                   InterpreterRuntime::frequency_counter_overflow),
1995                  r2);
1996       __ load_unsigned_byte(r1, Address(rbcp, 0));  // restore target bytecode
1997 
1998       // r0: osr nmethod (osr ok) or NULL (osr not possible)
1999       // w1: target bytecode
2000       // r2: scratch
2001       __ cbz(r0, dispatch);     // test result -- no osr if null
2002       // nmethod may have been invalidated (VM may block upon call_VM return)
2003       __ ldrb(r2, Address(r0, nmethod::state_offset()));
2004       if (nmethod::in_use != 0)
2005         __ sub(r2, r2, nmethod::in_use);
2006       __ cbnz(r2, dispatch);
2007 
2008       // We have the address of an on stack replacement routine in r0
2009       // We need to prepare to execute the OSR method. First we must
2010       // migrate the locals and monitors off of the stack.
2011 
2012       __ mov(r19, r0);                             // save the nmethod
2013 
2014       call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
2015 
2016       // r0 is OSR buffer, move it to expected parameter location
2017       __ mov(j_rarg0, r0);
2018 
2019       // remove activation
2020       // get sender esp
2021       __ ldr(esp,
2022           Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize));
2023       // remove frame anchor
2024       __ leave();
2025       // Ensure compiled code always sees stack at proper alignment
2026       __ andr(sp, esp, -16);
2027 
2028       // and begin the OSR nmethod
2029       __ ldr(rscratch1, Address(r19, nmethod::osr_entry_point_offset()));
2030       __ br(rscratch1);
2031     }
2032   }
2033 }
2034 
2035 
2036 void TemplateTable::if_0cmp(Condition cc)
2037 {
2038   transition(itos, vtos);
2039   // assume branch is more often taken than not (loops use backward branches)
2040   Label not_taken;
2041   if (cc == equal)
2042     __ cbnzw(r0, not_taken);
2043   else if (cc == not_equal)
2044     __ cbzw(r0, not_taken);
2045   else {
2046     __ andsw(zr, r0, r0);
2047     __ br(j_not(cc), not_taken);
2048   }
2049 
2050   branch(false, false);
2051   __ bind(not_taken);
2052   __ profile_not_taken_branch(r0);
2053 }
2054 
2055 void TemplateTable::if_icmp(Condition cc)
2056 {
2057   transition(itos, vtos);
2058   // assume branch is more often taken than not (loops use backward branches)
2059   Label not_taken;
2060   __ pop_i(r1);
2061   __ cmpw(r1, r0, Assembler::LSL);
2062   __ br(j_not(cc), not_taken);
2063   branch(false, false);
2064   __ bind(not_taken);
2065   __ profile_not_taken_branch(r0);
2066 }
2067 
2068 void TemplateTable::if_nullcmp(Condition cc)
2069 {
2070   transition(atos, vtos);
2071   // assume branch is more often taken than not (loops use backward branches)
2072   Label not_taken;
2073   if (cc == equal)
2074     __ cbnz(r0, not_taken);
2075   else
2076     __ cbz(r0, not_taken);
2077   branch(false, false);
2078   __ bind(not_taken);
2079   __ profile_not_taken_branch(r0);
2080 }
2081 
2082 void TemplateTable::if_acmp(Condition cc)
2083 {
2084   transition(atos, vtos);
2085   // assume branch is more often taken than not (loops use backward branches)
2086   Label not_taken;
2087   __ pop_ptr(r1);
2088   __ cmp(r1, r0);
2089   __ br(j_not(cc), not_taken);
2090   branch(false, false);
2091   __ bind(not_taken);
2092   __ profile_not_taken_branch(r0);
2093 }
2094 
2095 void TemplateTable::ret() {
2096   transition(vtos, vtos);
2097   // We might be moving to a safepoint.  The thread which calls
2098   // Interpreter::notice_safepoints() will effectively flush its cache
2099   // when it makes a system call, but we need to do something to
2100   // ensure that we see the changed dispatch table.
2101   __ membar(MacroAssembler::LoadLoad);
2102 
2103   locals_index(r1);
2104   __ ldr(r1, aaddress(r1)); // get return bci, compute return bcp
2105   __ profile_ret(r1, r2);
2106   __ ldr(rbcp, Address(rmethod, Method::const_offset()));
2107   __ lea(rbcp, Address(rbcp, r1));
2108   __ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset()));
2109   __ dispatch_next(vtos, 0, /*generate_poll*/true);
2110 }
2111 
2112 void TemplateTable::wide_ret() {
2113   transition(vtos, vtos);
2114   locals_index_wide(r1);
2115   __ ldr(r1, aaddress(r1)); // get return bci, compute return bcp
2116   __ profile_ret(r1, r2);
2117   __ ldr(rbcp, Address(rmethod, Method::const_offset()));
2118   __ lea(rbcp, Address(rbcp, r1));
2119   __ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset()));
2120   __ dispatch_next(vtos, 0, /*generate_poll*/true);
2121 }
2122 
2123 
2124 void TemplateTable::tableswitch() {
2125   Label default_case, continue_execution;
2126   transition(itos, vtos);
2127   // align rbcp
2128   __ lea(r1, at_bcp(BytesPerInt));
2129   __ andr(r1, r1, -BytesPerInt);
2130   // load lo & hi
2131   __ ldrw(r2, Address(r1, BytesPerInt));
2132   __ ldrw(r3, Address(r1, 2 * BytesPerInt));
2133   __ rev32(r2, r2);
2134   __ rev32(r3, r3);
2135   // check against lo & hi
2136   __ cmpw(r0, r2);
2137   __ br(Assembler::LT, default_case);
2138   __ cmpw(r0, r3);
2139   __ br(Assembler::GT, default_case);
2140   // lookup dispatch offset
2141   __ subw(r0, r0, r2);
2142   __ lea(r3, Address(r1, r0, Address::uxtw(2)));
2143   __ ldrw(r3, Address(r3, 3 * BytesPerInt));
2144   __ profile_switch_case(r0, r1, r2);
2145   // continue execution
2146   __ bind(continue_execution);
2147   __ rev32(r3, r3);
2148   __ load_unsigned_byte(rscratch1, Address(rbcp, r3, Address::sxtw(0)));
2149   __ add(rbcp, rbcp, r3, ext::sxtw);
2150   __ dispatch_only(vtos, /*generate_poll*/true);
2151   // handle default
2152   __ bind(default_case);
2153   __ profile_switch_default(r0);
2154   __ ldrw(r3, Address(r1, 0));
2155   __ b(continue_execution);
2156 }
2157 
2158 void TemplateTable::lookupswitch() {
2159   transition(itos, itos);
2160   __ stop("lookupswitch bytecode should have been rewritten");
2161 }
2162 
2163 void TemplateTable::fast_linearswitch() {
2164   transition(itos, vtos);
2165   Label loop_entry, loop, found, continue_execution;
2166   // bswap r0 so we can avoid bswapping the table entries
2167   __ rev32(r0, r0);
2168   // align rbcp
2169   __ lea(r19, at_bcp(BytesPerInt)); // btw: should be able to get rid of
2170                                     // this instruction (change offsets
2171                                     // below)
2172   __ andr(r19, r19, -BytesPerInt);
2173   // set counter
2174   __ ldrw(r1, Address(r19, BytesPerInt));
2175   __ rev32(r1, r1);
2176   __ b(loop_entry);
2177   // table search
2178   __ bind(loop);
2179   __ lea(rscratch1, Address(r19, r1, Address::lsl(3)));
2180   __ ldrw(rscratch1, Address(rscratch1, 2 * BytesPerInt));
2181   __ cmpw(r0, rscratch1);
2182   __ br(Assembler::EQ, found);
2183   __ bind(loop_entry);
2184   __ subs(r1, r1, 1);
2185   __ br(Assembler::PL, loop);
2186   // default case
2187   __ profile_switch_default(r0);
2188   __ ldrw(r3, Address(r19, 0));
2189   __ b(continue_execution);
2190   // entry found -> get offset
2191   __ bind(found);
2192   __ lea(rscratch1, Address(r19, r1, Address::lsl(3)));
2193   __ ldrw(r3, Address(rscratch1, 3 * BytesPerInt));
2194   __ profile_switch_case(r1, r0, r19);
2195   // continue execution
2196   __ bind(continue_execution);
2197   __ rev32(r3, r3);
2198   __ add(rbcp, rbcp, r3, ext::sxtw);
2199   __ ldrb(rscratch1, Address(rbcp, 0));
2200   __ dispatch_only(vtos, /*generate_poll*/true);
2201 }
2202 
2203 void TemplateTable::fast_binaryswitch() {
2204   transition(itos, vtos);
2205   // Implementation using the following core algorithm:
2206   //
2207   // int binary_search(int key, LookupswitchPair* array, int n) {
2208   //   // Binary search according to "Methodik des Programmierens" by
2209   //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
2210   //   int i = 0;
2211   //   int j = n;
2212   //   while (i+1 < j) {
2213   //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
2214   //     // with      Q: for all i: 0 <= i < n: key < a[i]
2215   //     // where a stands for the array and assuming that the (inexisting)
2216   //     // element a[n] is infinitely big.
2217   //     int h = (i + j) >> 1;
2218   //     // i < h < j
2219   //     if (key < array[h].fast_match()) {
2220   //       j = h;
2221   //     } else {
2222   //       i = h;
2223   //     }
2224   //   }
2225   //   // R: a[i] <= key < a[i+1] or Q
2226   //   // (i.e., if key is within array, i is the correct index)
2227   //   return i;
2228   // }
2229 
2230   // Register allocation
2231   const Register key   = r0; // already set (tosca)
2232   const Register array = r1;
2233   const Register i     = r2;
2234   const Register j     = r3;
2235   const Register h     = rscratch1;
2236   const Register temp  = rscratch2;
2237 
2238   // Find array start
2239   __ lea(array, at_bcp(3 * BytesPerInt)); // btw: should be able to
2240                                           // get rid of this
2241                                           // instruction (change
2242                                           // offsets below)
2243   __ andr(array, array, -BytesPerInt);
2244 
2245   // Initialize i & j
2246   __ mov(i, 0);                            // i = 0;
2247   __ ldrw(j, Address(array, -BytesPerInt)); // j = length(array);
2248 
2249   // Convert j into native byteordering
2250   __ rev32(j, j);
2251 
2252   // And start
2253   Label entry;
2254   __ b(entry);
2255 
2256   // binary search loop
2257   {
2258     Label loop;
2259     __ bind(loop);
2260     // int h = (i + j) >> 1;
2261     __ addw(h, i, j);                           // h = i + j;
2262     __ lsrw(h, h, 1);                                   // h = (i + j) >> 1;
2263     // if (key < array[h].fast_match()) {
2264     //   j = h;
2265     // } else {
2266     //   i = h;
2267     // }
2268     // Convert array[h].match to native byte-ordering before compare
2269     __ ldr(temp, Address(array, h, Address::lsl(3)));
2270     __ rev32(temp, temp);
2271     __ cmpw(key, temp);
2272     // j = h if (key <  array[h].fast_match())
2273     __ csel(j, h, j, Assembler::LT);
2274     // i = h if (key >= array[h].fast_match())
2275     __ csel(i, h, i, Assembler::GE);
2276     // while (i+1 < j)
2277     __ bind(entry);
2278     __ addw(h, i, 1);          // i+1
2279     __ cmpw(h, j);             // i+1 < j
2280     __ br(Assembler::LT, loop);
2281   }
2282 
2283   // end of binary search, result index is i (must check again!)
2284   Label default_case;
2285   // Convert array[i].match to native byte-ordering before compare
2286   __ ldr(temp, Address(array, i, Address::lsl(3)));
2287   __ rev32(temp, temp);
2288   __ cmpw(key, temp);
2289   __ br(Assembler::NE, default_case);
2290 
2291   // entry found -> j = offset
2292   __ add(j, array, i, ext::uxtx, 3);
2293   __ ldrw(j, Address(j, BytesPerInt));
2294   __ profile_switch_case(i, key, array);
2295   __ rev32(j, j);
2296   __ load_unsigned_byte(rscratch1, Address(rbcp, j, Address::sxtw(0)));
2297   __ lea(rbcp, Address(rbcp, j, Address::sxtw(0)));
2298   __ dispatch_only(vtos, /*generate_poll*/true);
2299 
2300   // default case -> j = default offset
2301   __ bind(default_case);
2302   __ profile_switch_default(i);
2303   __ ldrw(j, Address(array, -2 * BytesPerInt));
2304   __ rev32(j, j);
2305   __ load_unsigned_byte(rscratch1, Address(rbcp, j, Address::sxtw(0)));
2306   __ lea(rbcp, Address(rbcp, j, Address::sxtw(0)));
2307   __ dispatch_only(vtos, /*generate_poll*/true);
2308 }
2309 
2310 
2311 void TemplateTable::_return(TosState state)
2312 {
2313   transition(state, state);
2314   assert(_desc->calls_vm(),
2315          "inconsistent calls_vm information"); // call in remove_activation
2316 
2317   if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
2318     assert(state == vtos, "only valid state");
2319 
2320     __ ldr(c_rarg1, aaddress(0));
2321     __ load_klass(r3, c_rarg1);
2322     __ ldrw(r3, Address(r3, Klass::access_flags_offset()));
2323     Label skip_register_finalizer;
2324     __ tbz(r3, exact_log2(JVM_ACC_HAS_FINALIZER), skip_register_finalizer);
2325 
2326     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1);
2327 
2328     __ bind(skip_register_finalizer);
2329   }
2330 
2331   // Issue a StoreStore barrier after all stores but before return
2332   // from any constructor for any class with a final field.  We don't
2333   // know if this is a finalizer, so we always do so.
2334   if (_desc->bytecode() == Bytecodes::_return)
2335     __ membar(MacroAssembler::StoreStore);
2336 
2337   // Narrow result if state is itos but result type is smaller.
2338   // Need to narrow in the return bytecode rather than in generate_return_entry
2339   // since compiled code callers expect the result to already be narrowed.
2340   if (state == itos) {
2341     __ narrow(r0);
2342   }
2343 
2344   __ remove_activation(state);
2345   __ ret(lr);
2346 }
2347 
2348 // ----------------------------------------------------------------------------
2349 // Volatile variables demand their effects be made known to all CPU's
2350 // in order.  Store buffers on most chips allow reads & writes to
2351 // reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
2352 // without some kind of memory barrier (i.e., it's not sufficient that
2353 // the interpreter does not reorder volatile references, the hardware
2354 // also must not reorder them).
2355 //
2356 // According to the new Java Memory Model (JMM):
2357 // (1) All volatiles are serialized wrt to each other.  ALSO reads &
2358 //     writes act as aquire & release, so:
2359 // (2) A read cannot let unrelated NON-volatile memory refs that
2360 //     happen after the read float up to before the read.  It's OK for
2361 //     non-volatile memory refs that happen before the volatile read to
2362 //     float down below it.
2363 // (3) Similar a volatile write cannot let unrelated NON-volatile
2364 //     memory refs that happen BEFORE the write float down to after the
2365 //     write.  It's OK for non-volatile memory refs that happen after the
2366 //     volatile write to float up before it.
2367 //
2368 // We only put in barriers around volatile refs (they are expensive),
2369 // not _between_ memory refs (that would require us to track the
2370 // flavor of the previous memory refs).  Requirements (2) and (3)
2371 // require some barriers before volatile stores and after volatile
2372 // loads.  These nearly cover requirement (1) but miss the
2373 // volatile-store-volatile-load case.  This final case is placed after
2374 // volatile-stores although it could just as well go before
2375 // volatile-loads.
2376 
2377 void TemplateTable::resolve_cache_and_index(int byte_no,
2378                                             Register Rcache,
2379                                             Register index,
2380                                             size_t index_size) {
2381   const Register temp = r19;
2382   assert_different_registers(Rcache, index, temp);
2383 
2384   Label resolved;
2385 
2386   Bytecodes::Code code = bytecode();
2387   switch (code) {
2388   case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
2389   case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
2390   }
2391 
2392   assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
2393   __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
2394   __ cmp(temp, (int) code);  // have we resolved this bytecode?
2395   __ br(Assembler::EQ, resolved);
2396 
2397   // resolve first time through
2398   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
2399   __ mov(temp, (int) code);
2400   __ call_VM(noreg, entry, temp);
2401 
2402   // Update registers with resolved info
2403   __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
2404   // n.b. unlike x86 Rcache is now rcpool plus the indexed offset
2405   // so all clients ofthis method must be modified accordingly
2406   __ bind(resolved);
2407 }
2408 
2409 // The Rcache and index registers must be set before call
2410 // n.b unlike x86 cache already includes the index offset
2411 void TemplateTable::load_field_cp_cache_entry(Register obj,
2412                                               Register cache,
2413                                               Register index,
2414                                               Register off,
2415                                               Register flags,
2416                                               bool is_static = false) {
2417   assert_different_registers(cache, index, flags, off);
2418 
2419   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
2420   // Field offset
2421   __ ldr(off, Address(cache, in_bytes(cp_base_offset +
2422                                           ConstantPoolCacheEntry::f2_offset())));
2423   // Flags
2424   __ ldrw(flags, Address(cache, in_bytes(cp_base_offset +
2425                                            ConstantPoolCacheEntry::flags_offset())));
2426 
2427   // klass overwrite register
2428   if (is_static) {
2429     __ ldr(obj, Address(cache, in_bytes(cp_base_offset +
2430                                         ConstantPoolCacheEntry::f1_offset())));
2431     const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2432     __ ldr(obj, Address(obj, mirror_offset));
2433     __ resolve_oop_handle(obj);
2434   }
2435 }
2436 
2437 void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
2438                                                Register method,
2439                                                Register itable_index,
2440                                                Register flags,
2441                                                bool is_invokevirtual,
2442                                                bool is_invokevfinal, /*unused*/
2443                                                bool is_invokedynamic) {
2444   // setup registers
2445   const Register cache = rscratch2;
2446   const Register index = r4;
2447   assert_different_registers(method, flags);
2448   assert_different_registers(method, cache, index);
2449   assert_different_registers(itable_index, flags);
2450   assert_different_registers(itable_index, cache, index);
2451   // determine constant pool cache field offsets
2452   assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
2453   const int method_offset = in_bytes(
2454     ConstantPoolCache::base_offset() +
2455       (is_invokevirtual
2456        ? ConstantPoolCacheEntry::f2_offset()
2457        : ConstantPoolCacheEntry::f1_offset()));
2458   const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
2459                                     ConstantPoolCacheEntry::flags_offset());
2460   // access constant pool cache fields
2461   const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
2462                                     ConstantPoolCacheEntry::f2_offset());
2463 
2464   size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
2465   resolve_cache_and_index(byte_no, cache, index, index_size);
2466   __ ldr(method, Address(cache, method_offset));
2467 
2468   if (itable_index != noreg) {
2469     __ ldr(itable_index, Address(cache, index_offset));
2470   }
2471   __ ldrw(flags, Address(cache, flags_offset));
2472 }
2473 
2474 
2475 // The registers cache and index expected to be set before call.
2476 // Correct values of the cache and index registers are preserved.
2477 void TemplateTable::jvmti_post_field_access(Register cache, Register index,
2478                                             bool is_static, bool has_tos) {
2479   // do the JVMTI work here to avoid disturbing the register state below
2480   // We use c_rarg registers here because we want to use the register used in
2481   // the call to the VM
2482   if (JvmtiExport::can_post_field_access()) {
2483     // Check to see if a field access watch has been set before we
2484     // take the time to call into the VM.
2485     Label L1;
2486     assert_different_registers(cache, index, r0);
2487     __ lea(rscratch1, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
2488     __ ldrw(r0, Address(rscratch1));
2489     __ cbzw(r0, L1);
2490 
2491     __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
2492     __ lea(c_rarg2, Address(c_rarg2, in_bytes(ConstantPoolCache::base_offset())));
2493 
2494     if (is_static) {
2495       __ mov(c_rarg1, zr); // NULL object reference
2496     } else {
2497       __ ldr(c_rarg1, at_tos()); // get object pointer without popping it
2498       __ verify_oop(c_rarg1);
2499     }
2500     // c_rarg1: object pointer or NULL
2501     // c_rarg2: cache entry pointer
2502     // c_rarg3: jvalue object on the stack
2503     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
2504                                        InterpreterRuntime::post_field_access),
2505                c_rarg1, c_rarg2, c_rarg3);
2506     __ get_cache_and_index_at_bcp(cache, index, 1);
2507     __ bind(L1);
2508   }
2509 }
2510 
2511 void TemplateTable::pop_and_check_object(Register r)
2512 {
2513   __ pop_ptr(r);
2514   __ null_check(r);  // for field access must check obj.
2515   __ verify_oop(r);
2516 }
2517 
2518 void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc)
2519 {
2520   const Register cache = r2;
2521   const Register index = r3;
2522   const Register obj   = r4;
2523   const Register off   = r19;
2524   const Register flags = r0;
2525   const Register raw_flags = r6;
2526   const Register bc    = r4; // uses same reg as obj, so don't mix them
2527 
2528   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
2529   jvmti_post_field_access(cache, index, is_static, false);
2530   load_field_cp_cache_entry(obj, cache, index, off, raw_flags, is_static);
2531 
2532   if (!is_static) {
2533     // obj is on the stack
2534     pop_and_check_object(obj);
2535   }
2536 
2537   // 8179954: We need to make sure that the code generated for
2538   // volatile accesses forms a sequentially-consistent set of
2539   // operations when combined with STLR and LDAR.  Without a leading
2540   // membar it's possible for a simple Dekker test to fail if loads
2541   // use LDR;DMB but stores use STLR.  This can happen if C2 compiles
2542   // the stores in one method and we interpret the loads in another.
2543   if (! UseBarriersForVolatile) {
2544     Label notVolatile;
2545     __ tbz(raw_flags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
2546     __ membar(MacroAssembler::AnyAny);
2547     __ bind(notVolatile);
2548   }
2549 
2550   const Address field(obj, off);
2551 
2552   Label Done, notByte, notBool, notInt, notShort, notChar,
2553               notLong, notFloat, notObj, notDouble;
2554 
2555   // x86 uses a shift and mask or wings it with a shift plus assert
2556   // the mask is not needed. aarch64 just uses bitfield extract
2557   __ ubfxw(flags, raw_flags, ConstantPoolCacheEntry::tos_state_shift,
2558            ConstantPoolCacheEntry::tos_state_bits);
2559 
2560   assert(btos == 0, "change code, btos != 0");
2561   __ cbnz(flags, notByte);
2562 
2563   // Don't rewrite getstatic, only getfield
2564   if (is_static) rc = may_not_rewrite;
2565 
2566   // btos
2567   __ load_signed_byte(r0, field);
2568   __ push(btos);
2569   // Rewrite bytecode to be faster
2570   if (rc == may_rewrite) {
2571     patch_bytecode(Bytecodes::_fast_bgetfield, bc, r1);
2572   }
2573   __ b(Done);
2574 
2575   __ bind(notByte);
2576   __ cmp(flags, ztos);
2577   __ br(Assembler::NE, notBool);
2578 
2579   // ztos (same code as btos)
2580   __ ldrsb(r0, field);
2581   __ push(ztos);
2582   // Rewrite bytecode to be faster
2583   if (rc == may_rewrite) {
2584     // use btos rewriting, no truncating to t/f bit is needed for getfield.
2585     patch_bytecode(Bytecodes::_fast_bgetfield, bc, r1);
2586   }
2587   __ b(Done);
2588 
2589   __ bind(notBool);
2590   __ cmp(flags, atos);
2591   __ br(Assembler::NE, notObj);
2592   // atos
2593   __ load_heap_oop(r0, field);
2594   __ push(atos);
2595   if (rc == may_rewrite) {
2596     patch_bytecode(Bytecodes::_fast_agetfield, bc, r1);
2597   }
2598   __ b(Done);
2599 
2600   __ bind(notObj);
2601   __ cmp(flags, itos);
2602   __ br(Assembler::NE, notInt);
2603   // itos
2604   __ ldrw(r0, field);
2605   __ push(itos);
2606   // Rewrite bytecode to be faster
2607   if (rc == may_rewrite) {
2608     patch_bytecode(Bytecodes::_fast_igetfield, bc, r1);
2609   }
2610   __ b(Done);
2611 
2612   __ bind(notInt);
2613   __ cmp(flags, ctos);
2614   __ br(Assembler::NE, notChar);
2615   // ctos
2616   __ load_unsigned_short(r0, field);
2617   __ push(ctos);
2618   // Rewrite bytecode to be faster
2619   if (rc == may_rewrite) {
2620     patch_bytecode(Bytecodes::_fast_cgetfield, bc, r1);
2621   }
2622   __ b(Done);
2623 
2624   __ bind(notChar);
2625   __ cmp(flags, stos);
2626   __ br(Assembler::NE, notShort);
2627   // stos
2628   __ load_signed_short(r0, field);
2629   __ push(stos);
2630   // Rewrite bytecode to be faster
2631   if (rc == may_rewrite) {
2632     patch_bytecode(Bytecodes::_fast_sgetfield, bc, r1);
2633   }
2634   __ b(Done);
2635 
2636   __ bind(notShort);
2637   __ cmp(flags, ltos);
2638   __ br(Assembler::NE, notLong);
2639   // ltos
2640   __ ldr(r0, field);
2641   __ push(ltos);
2642   // Rewrite bytecode to be faster
2643   if (rc == may_rewrite) {
2644     patch_bytecode(Bytecodes::_fast_lgetfield, bc, r1);
2645   }
2646   __ b(Done);
2647 
2648   __ bind(notLong);
2649   __ cmp(flags, ftos);
2650   __ br(Assembler::NE, notFloat);
2651   // ftos
2652   __ ldrs(v0, field);
2653   __ push(ftos);
2654   // Rewrite bytecode to be faster
2655   if (rc == may_rewrite) {
2656     patch_bytecode(Bytecodes::_fast_fgetfield, bc, r1);
2657   }
2658   __ b(Done);
2659 
2660   __ bind(notFloat);
2661 #ifdef ASSERT
2662   __ cmp(flags, dtos);
2663   __ br(Assembler::NE, notDouble);
2664 #endif
2665   // dtos
2666   __ ldrd(v0, field);
2667   __ push(dtos);
2668   // Rewrite bytecode to be faster
2669   if (rc == may_rewrite) {
2670     patch_bytecode(Bytecodes::_fast_dgetfield, bc, r1);
2671   }
2672 #ifdef ASSERT
2673   __ b(Done);
2674 
2675   __ bind(notDouble);
2676   __ stop("Bad state");
2677 #endif
2678 
2679   __ bind(Done);
2680 
2681   Label notVolatile;
2682   __ tbz(raw_flags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
2683   __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
2684   __ bind(notVolatile);
2685 }
2686 
2687 
2688 void TemplateTable::getfield(int byte_no)
2689 {
2690   getfield_or_static(byte_no, false);
2691 }
2692 
2693 void TemplateTable::nofast_getfield(int byte_no) {
2694   getfield_or_static(byte_no, false, may_not_rewrite);
2695 }
2696 
2697 void TemplateTable::getstatic(int byte_no)
2698 {
2699   getfield_or_static(byte_no, true);
2700 }
2701 
2702 // The registers cache and index expected to be set before call.
2703 // The function may destroy various registers, just not the cache and index registers.
2704 void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
2705   transition(vtos, vtos);
2706 
2707   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
2708 
2709   if (JvmtiExport::can_post_field_modification()) {
2710     // Check to see if a field modification watch has been set before
2711     // we take the time to call into the VM.
2712     Label L1;
2713     assert_different_registers(cache, index, r0);
2714     __ lea(rscratch1, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
2715     __ ldrw(r0, Address(rscratch1));
2716     __ cbz(r0, L1);
2717 
2718     __ get_cache_and_index_at_bcp(c_rarg2, rscratch1, 1);
2719 
2720     if (is_static) {
2721       // Life is simple.  Null out the object pointer.
2722       __ mov(c_rarg1, zr);
2723     } else {
2724       // Life is harder. The stack holds the value on top, followed by
2725       // the object.  We don't know the size of the value, though; it
2726       // could be one or two words depending on its type. As a result,
2727       // we must find the type to determine where the object is.
2728       __ ldrw(c_rarg3, Address(c_rarg2,
2729                                in_bytes(cp_base_offset +
2730                                         ConstantPoolCacheEntry::flags_offset())));
2731       __ lsr(c_rarg3, c_rarg3,
2732              ConstantPoolCacheEntry::tos_state_shift);
2733       ConstantPoolCacheEntry::verify_tos_state_shift();
2734       Label nope2, done, ok;
2735       __ ldr(c_rarg1, at_tos_p1());  // initially assume a one word jvalue
2736       __ cmpw(c_rarg3, ltos);
2737       __ br(Assembler::EQ, ok);
2738       __ cmpw(c_rarg3, dtos);
2739       __ br(Assembler::NE, nope2);
2740       __ bind(ok);
2741       __ ldr(c_rarg1, at_tos_p2()); // ltos (two word jvalue)
2742       __ bind(nope2);
2743     }
2744     // cache entry pointer
2745     __ add(c_rarg2, c_rarg2, in_bytes(cp_base_offset));
2746     // object (tos)
2747     __ mov(c_rarg3, esp);
2748     // c_rarg1: object pointer set up above (NULL if static)
2749     // c_rarg2: cache entry pointer
2750     // c_rarg3: jvalue object on the stack
2751     __ call_VM(noreg,
2752                CAST_FROM_FN_PTR(address,
2753                                 InterpreterRuntime::post_field_modification),
2754                c_rarg1, c_rarg2, c_rarg3);
2755     __ get_cache_and_index_at_bcp(cache, index, 1);
2756     __ bind(L1);
2757   }
2758 }
2759 
2760 void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
2761   transition(vtos, vtos);
2762 
2763   const Register cache = r2;
2764   const Register index = r3;
2765   const Register obj   = r2;
2766   const Register off   = r19;
2767   const Register flags = r0;
2768   const Register bc    = r4;
2769 
2770   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
2771   jvmti_post_field_mod(cache, index, is_static);
2772   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
2773 
2774   Label Done;
2775   __ mov(r5, flags);
2776 
2777   {
2778     Label notVolatile;
2779     __ tbz(r5, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
2780     __ membar(MacroAssembler::StoreStore);
2781     __ bind(notVolatile);
2782   }
2783 
2784   // field address
2785   const Address field(obj, off);
2786 
2787   Label notByte, notBool, notInt, notShort, notChar,
2788         notLong, notFloat, notObj, notDouble;
2789 
2790   // x86 uses a shift and mask or wings it with a shift plus assert
2791   // the mask is not needed. aarch64 just uses bitfield extract
2792   __ ubfxw(flags, flags, ConstantPoolCacheEntry::tos_state_shift,  ConstantPoolCacheEntry::tos_state_bits);
2793 
2794   assert(btos == 0, "change code, btos != 0");
2795   __ cbnz(flags, notByte);
2796 
2797   // Don't rewrite putstatic, only putfield
2798   if (is_static) rc = may_not_rewrite;
2799 
2800   // btos
2801   {
2802     __ pop(btos);
2803     if (!is_static) pop_and_check_object(obj);
2804     __ strb(r0, field);
2805     if (rc == may_rewrite) {
2806       patch_bytecode(Bytecodes::_fast_bputfield, bc, r1, true, byte_no);
2807     }
2808     __ b(Done);
2809   }
2810 
2811   __ bind(notByte);
2812   __ cmp(flags, ztos);
2813   __ br(Assembler::NE, notBool);
2814 
2815   // ztos
2816   {
2817     __ pop(ztos);
2818     if (!is_static) pop_and_check_object(obj);
2819     __ andw(r0, r0, 0x1);
2820     __ strb(r0, field);
2821     if (rc == may_rewrite) {
2822       patch_bytecode(Bytecodes::_fast_zputfield, bc, r1, true, byte_no);
2823     }
2824     __ b(Done);
2825   }
2826 
2827   __ bind(notBool);
2828   __ cmp(flags, atos);
2829   __ br(Assembler::NE, notObj);
2830 
2831   // atos
2832   {
2833     __ pop(atos);
2834     if (!is_static) pop_and_check_object(obj);
2835     // Store into the field
2836     do_oop_store(_masm, field, r0, _bs->kind(), false);
2837     if (rc == may_rewrite) {
2838       patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no);
2839     }
2840     __ b(Done);
2841   }
2842 
2843   __ bind(notObj);
2844   __ cmp(flags, itos);
2845   __ br(Assembler::NE, notInt);
2846 
2847   // itos
2848   {
2849     __ pop(itos);
2850     if (!is_static) pop_and_check_object(obj);
2851     __ strw(r0, field);
2852     if (rc == may_rewrite) {
2853       patch_bytecode(Bytecodes::_fast_iputfield, bc, r1, true, byte_no);
2854     }
2855     __ b(Done);
2856   }
2857 
2858   __ bind(notInt);
2859   __ cmp(flags, ctos);
2860   __ br(Assembler::NE, notChar);
2861 
2862   // ctos
2863   {
2864     __ pop(ctos);
2865     if (!is_static) pop_and_check_object(obj);
2866     __ strh(r0, field);
2867     if (rc == may_rewrite) {
2868       patch_bytecode(Bytecodes::_fast_cputfield, bc, r1, true, byte_no);
2869     }
2870     __ b(Done);
2871   }
2872 
2873   __ bind(notChar);
2874   __ cmp(flags, stos);
2875   __ br(Assembler::NE, notShort);
2876 
2877   // stos
2878   {
2879     __ pop(stos);
2880     if (!is_static) pop_and_check_object(obj);
2881     __ strh(r0, field);
2882     if (rc == may_rewrite) {
2883       patch_bytecode(Bytecodes::_fast_sputfield, bc, r1, true, byte_no);
2884     }
2885     __ b(Done);
2886   }
2887 
2888   __ bind(notShort);
2889   __ cmp(flags, ltos);
2890   __ br(Assembler::NE, notLong);
2891 
2892   // ltos
2893   {
2894     __ pop(ltos);
2895     if (!is_static) pop_and_check_object(obj);
2896     __ str(r0, field);
2897     if (rc == may_rewrite) {
2898       patch_bytecode(Bytecodes::_fast_lputfield, bc, r1, true, byte_no);
2899     }
2900     __ b(Done);
2901   }
2902 
2903   __ bind(notLong);
2904   __ cmp(flags, ftos);
2905   __ br(Assembler::NE, notFloat);
2906 
2907   // ftos
2908   {
2909     __ pop(ftos);
2910     if (!is_static) pop_and_check_object(obj);
2911     __ strs(v0, field);
2912     if (rc == may_rewrite) {
2913       patch_bytecode(Bytecodes::_fast_fputfield, bc, r1, true, byte_no);
2914     }
2915     __ b(Done);
2916   }
2917 
2918   __ bind(notFloat);
2919 #ifdef ASSERT
2920   __ cmp(flags, dtos);
2921   __ br(Assembler::NE, notDouble);
2922 #endif
2923 
2924   // dtos
2925   {
2926     __ pop(dtos);
2927     if (!is_static) pop_and_check_object(obj);
2928     __ strd(v0, field);
2929     if (rc == may_rewrite) {
2930       patch_bytecode(Bytecodes::_fast_dputfield, bc, r1, true, byte_no);
2931     }
2932   }
2933 
2934 #ifdef ASSERT
2935   __ b(Done);
2936 
2937   __ bind(notDouble);
2938   __ stop("Bad state");
2939 #endif
2940 
2941   __ bind(Done);
2942 
2943   {
2944     Label notVolatile;
2945     __ tbz(r5, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
2946     __ membar(MacroAssembler::StoreLoad);
2947     __ bind(notVolatile);
2948   }
2949 }
2950 
2951 void TemplateTable::putfield(int byte_no)
2952 {
2953   putfield_or_static(byte_no, false);
2954 }
2955 
2956 void TemplateTable::nofast_putfield(int byte_no) {
2957   putfield_or_static(byte_no, false, may_not_rewrite);
2958 }
2959 
2960 void TemplateTable::putstatic(int byte_no) {
2961   putfield_or_static(byte_no, true);
2962 }
2963 
2964 void TemplateTable::jvmti_post_fast_field_mod()
2965 {
2966   if (JvmtiExport::can_post_field_modification()) {
2967     // Check to see if a field modification watch has been set before
2968     // we take the time to call into the VM.
2969     Label L2;
2970     __ lea(rscratch1, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
2971     __ ldrw(c_rarg3, Address(rscratch1));
2972     __ cbzw(c_rarg3, L2);
2973     __ pop_ptr(r19);                  // copy the object pointer from tos
2974     __ verify_oop(r19);
2975     __ push_ptr(r19);                 // put the object pointer back on tos
2976     // Save tos values before call_VM() clobbers them. Since we have
2977     // to do it for every data type, we use the saved values as the
2978     // jvalue object.
2979     switch (bytecode()) {          // load values into the jvalue object
2980     case Bytecodes::_fast_aputfield: __ push_ptr(r0); break;
2981     case Bytecodes::_fast_bputfield: // fall through
2982     case Bytecodes::_fast_zputfield: // fall through
2983     case Bytecodes::_fast_sputfield: // fall through
2984     case Bytecodes::_fast_cputfield: // fall through
2985     case Bytecodes::_fast_iputfield: __ push_i(r0); break;
2986     case Bytecodes::_fast_dputfield: __ push_d(); break;
2987     case Bytecodes::_fast_fputfield: __ push_f(); break;
2988     case Bytecodes::_fast_lputfield: __ push_l(r0); break;
2989 
2990     default:
2991       ShouldNotReachHere();
2992     }
2993     __ mov(c_rarg3, esp);             // points to jvalue on the stack
2994     // access constant pool cache entry
2995     __ get_cache_entry_pointer_at_bcp(c_rarg2, r0, 1);
2996     __ verify_oop(r19);
2997     // r19: object pointer copied above
2998     // c_rarg2: cache entry pointer
2999     // c_rarg3: jvalue object on the stack
3000     __ call_VM(noreg,
3001                CAST_FROM_FN_PTR(address,
3002                                 InterpreterRuntime::post_field_modification),
3003                r19, c_rarg2, c_rarg3);
3004 
3005     switch (bytecode()) {             // restore tos values
3006     case Bytecodes::_fast_aputfield: __ pop_ptr(r0); break;
3007     case Bytecodes::_fast_bputfield: // fall through
3008     case Bytecodes::_fast_zputfield: // fall through
3009     case Bytecodes::_fast_sputfield: // fall through
3010     case Bytecodes::_fast_cputfield: // fall through
3011     case Bytecodes::_fast_iputfield: __ pop_i(r0); break;
3012     case Bytecodes::_fast_dputfield: __ pop_d(); break;
3013     case Bytecodes::_fast_fputfield: __ pop_f(); break;
3014     case Bytecodes::_fast_lputfield: __ pop_l(r0); break;
3015     }
3016     __ bind(L2);
3017   }
3018 }
3019 
3020 void TemplateTable::fast_storefield(TosState state)
3021 {
3022   transition(state, vtos);
3023 
3024   ByteSize base = ConstantPoolCache::base_offset();
3025 
3026   jvmti_post_fast_field_mod();
3027 
3028   // access constant pool cache
3029   __ get_cache_and_index_at_bcp(r2, r1, 1);
3030 
3031   // test for volatile with r3
3032   __ ldrw(r3, Address(r2, in_bytes(base +
3033                                    ConstantPoolCacheEntry::flags_offset())));
3034 
3035   // replace index with field offset from cache entry
3036   __ ldr(r1, Address(r2, in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
3037 
3038   {
3039     Label notVolatile;
3040     __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
3041     __ membar(MacroAssembler::StoreStore);
3042     __ bind(notVolatile);
3043   }
3044 
3045   Label notVolatile;
3046 
3047   // Get object from stack
3048   pop_and_check_object(r2);
3049 
3050   // field address
3051   const Address field(r2, r1);
3052 
3053   // access field
3054   switch (bytecode()) {
3055   case Bytecodes::_fast_aputfield:
3056     do_oop_store(_masm, field, r0, _bs->kind(), false);
3057     break;
3058   case Bytecodes::_fast_lputfield:
3059     __ str(r0, field);
3060     break;
3061   case Bytecodes::_fast_iputfield:
3062     __ strw(r0, field);
3063     break;
3064   case Bytecodes::_fast_zputfield:
3065     __ andw(r0, r0, 0x1);  // boolean is true if LSB is 1
3066     // fall through to bputfield
3067   case Bytecodes::_fast_bputfield:
3068     __ strb(r0, field);
3069     break;
3070   case Bytecodes::_fast_sputfield:
3071     // fall through
3072   case Bytecodes::_fast_cputfield:
3073     __ strh(r0, field);
3074     break;
3075   case Bytecodes::_fast_fputfield:
3076     __ strs(v0, field);
3077     break;
3078   case Bytecodes::_fast_dputfield:
3079     __ strd(v0, field);
3080     break;
3081   default:
3082     ShouldNotReachHere();
3083   }
3084 
3085   {
3086     Label notVolatile;
3087     __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
3088     __ membar(MacroAssembler::StoreLoad);
3089     __ bind(notVolatile);
3090   }
3091 }
3092 
3093 
3094 void TemplateTable::fast_accessfield(TosState state)
3095 {
3096   transition(atos, state);
3097   // Do the JVMTI work here to avoid disturbing the register state below
3098   if (JvmtiExport::can_post_field_access()) {
3099     // Check to see if a field access watch has been set before we
3100     // take the time to call into the VM.
3101     Label L1;
3102     __ lea(rscratch1, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
3103     __ ldrw(r2, Address(rscratch1));
3104     __ cbzw(r2, L1);
3105     // access constant pool cache entry
3106     __ get_cache_entry_pointer_at_bcp(c_rarg2, rscratch2, 1);
3107     __ verify_oop(r0);
3108     __ push_ptr(r0);  // save object pointer before call_VM() clobbers it
3109     __ mov(c_rarg1, r0);
3110     // c_rarg1: object pointer copied above
3111     // c_rarg2: cache entry pointer
3112     __ call_VM(noreg,
3113                CAST_FROM_FN_PTR(address,
3114                                 InterpreterRuntime::post_field_access),
3115                c_rarg1, c_rarg2);
3116     __ pop_ptr(r0); // restore object pointer
3117     __ bind(L1);
3118   }
3119 
3120   // access constant pool cache
3121   __ get_cache_and_index_at_bcp(r2, r1, 1);
3122   __ ldr(r1, Address(r2, in_bytes(ConstantPoolCache::base_offset() +
3123                                   ConstantPoolCacheEntry::f2_offset())));
3124   __ ldrw(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() +
3125                                    ConstantPoolCacheEntry::flags_offset())));
3126 
3127   // r0: object
3128   __ verify_oop(r0);
3129   __ null_check(r0);
3130   const Address field(r0, r1);
3131 
3132   // 8179954: We need to make sure that the code generated for
3133   // volatile accesses forms a sequentially-consistent set of
3134   // operations when combined with STLR and LDAR.  Without a leading
3135   // membar it's possible for a simple Dekker test to fail if loads
3136   // use LDR;DMB but stores use STLR.  This can happen if C2 compiles
3137   // the stores in one method and we interpret the loads in another.
3138   if (! UseBarriersForVolatile) {
3139     Label notVolatile;
3140     __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
3141     __ membar(MacroAssembler::AnyAny);
3142     __ bind(notVolatile);
3143   }
3144 
3145   // access field
3146   switch (bytecode()) {
3147   case Bytecodes::_fast_agetfield:
3148     __ load_heap_oop(r0, field);
3149     __ verify_oop(r0);
3150     break;
3151   case Bytecodes::_fast_lgetfield:
3152     __ ldr(r0, field);
3153     break;
3154   case Bytecodes::_fast_igetfield:
3155     __ ldrw(r0, field);
3156     break;
3157   case Bytecodes::_fast_bgetfield:
3158     __ load_signed_byte(r0, field);
3159     break;
3160   case Bytecodes::_fast_sgetfield:
3161     __ load_signed_short(r0, field);
3162     break;
3163   case Bytecodes::_fast_cgetfield:
3164     __ load_unsigned_short(r0, field);
3165     break;
3166   case Bytecodes::_fast_fgetfield:
3167     __ ldrs(v0, field);
3168     break;
3169   case Bytecodes::_fast_dgetfield:
3170     __ ldrd(v0, field);
3171     break;
3172   default:
3173     ShouldNotReachHere();
3174   }
3175   {
3176     Label notVolatile;
3177     __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
3178     __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
3179     __ bind(notVolatile);
3180   }
3181 }
3182 
3183 void TemplateTable::fast_xaccess(TosState state)
3184 {
3185   transition(vtos, state);
3186 
3187   // get receiver
3188   __ ldr(r0, aaddress(0));
3189   // access constant pool cache
3190   __ get_cache_and_index_at_bcp(r2, r3, 2);
3191   __ ldr(r1, Address(r2, in_bytes(ConstantPoolCache::base_offset() +
3192                                   ConstantPoolCacheEntry::f2_offset())));
3193 
3194   // 8179954: We need to make sure that the code generated for
3195   // volatile accesses forms a sequentially-consistent set of
3196   // operations when combined with STLR and LDAR.  Without a leading
3197   // membar it's possible for a simple Dekker test to fail if loads
3198   // use LDR;DMB but stores use STLR.  This can happen if C2 compiles
3199   // the stores in one method and we interpret the loads in another.
3200   if (! UseBarriersForVolatile) {
3201     Label notVolatile;
3202     __ ldrw(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() +
3203                                      ConstantPoolCacheEntry::flags_offset())));
3204     __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
3205     __ membar(MacroAssembler::AnyAny);
3206     __ bind(notVolatile);
3207   }
3208 
3209   // make sure exception is reported in correct bcp range (getfield is
3210   // next instruction)
3211   __ increment(rbcp);
3212   __ null_check(r0);
3213   switch (state) {
3214   case itos:
3215     __ ldrw(r0, Address(r0, r1, Address::lsl(0)));
3216     break;
3217   case atos:
3218     __ load_heap_oop(r0, Address(r0, r1, Address::lsl(0)));
3219     __ verify_oop(r0);
3220     break;
3221   case ftos:
3222     __ ldrs(v0, Address(r0, r1, Address::lsl(0)));
3223     break;
3224   default:
3225     ShouldNotReachHere();
3226   }
3227 
3228   {
3229     Label notVolatile;
3230     __ ldrw(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() +
3231                                      ConstantPoolCacheEntry::flags_offset())));
3232     __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
3233     __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
3234     __ bind(notVolatile);
3235   }
3236 
3237   __ decrement(rbcp);
3238 }
3239 
3240 
3241 
3242 //-----------------------------------------------------------------------------
3243 // Calls
3244 
3245 void TemplateTable::count_calls(Register method, Register temp)
3246 {
3247   __ call_Unimplemented();
3248 }
3249 
3250 void TemplateTable::prepare_invoke(int byte_no,
3251                                    Register method, // linked method (or i-klass)
3252                                    Register index,  // itable index, MethodType, etc.
3253                                    Register recv,   // if caller wants to see it
3254                                    Register flags   // if caller wants to test it
3255                                    ) {
3256   // determine flags
3257   Bytecodes::Code code = bytecode();
3258   const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
3259   const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
3260   const bool is_invokehandle     = code == Bytecodes::_invokehandle;
3261   const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
3262   const bool is_invokespecial    = code == Bytecodes::_invokespecial;
3263   const bool load_receiver       = (recv  != noreg);
3264   const bool save_flags          = (flags != noreg);
3265   assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
3266   assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
3267   assert(flags == noreg || flags == r3, "");
3268   assert(recv  == noreg || recv  == r2, "");
3269 
3270   // setup registers & access constant pool cache
3271   if (recv  == noreg)  recv  = r2;
3272   if (flags == noreg)  flags = r3;
3273   assert_different_registers(method, index, recv, flags);
3274 
3275   // save 'interpreter return address'
3276   __ save_bcp();
3277 
3278   load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
3279 
3280   // maybe push appendix to arguments (just before return address)
3281   if (is_invokedynamic || is_invokehandle) {
3282     Label L_no_push;
3283     __ tbz(flags, ConstantPoolCacheEntry::has_appendix_shift, L_no_push);
3284     // Push the appendix as a trailing parameter.
3285     // This must be done before we get the receiver,
3286     // since the parameter_size includes it.
3287     __ push(r19);
3288     __ mov(r19, index);
3289     assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
3290     __ load_resolved_reference_at_index(index, r19);
3291     __ pop(r19);
3292     __ push(index);  // push appendix (MethodType, CallSite, etc.)
3293     __ bind(L_no_push);
3294   }
3295 
3296   // load receiver if needed (note: no return address pushed yet)
3297   if (load_receiver) {
3298     __ andw(recv, flags, ConstantPoolCacheEntry::parameter_size_mask);
3299     // FIXME -- is this actually correct? looks like it should be 2
3300     // const int no_return_pc_pushed_yet = -1;  // argument slot correction before we push return address
3301     // const int receiver_is_at_end      = -1;  // back off one slot to get receiver
3302     // Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
3303     // __ movptr(recv, recv_addr);
3304     __ add(rscratch1, esp, recv, ext::uxtx, 3); // FIXME: uxtb here?
3305     __ ldr(recv, Address(rscratch1, -Interpreter::expr_offset_in_bytes(1)));
3306     __ verify_oop(recv);
3307   }
3308 
3309   // compute return type
3310   // x86 uses a shift and mask or wings it with a shift plus assert
3311   // the mask is not needed. aarch64 just uses bitfield extract
3312   __ ubfxw(rscratch2, flags, ConstantPoolCacheEntry::tos_state_shift,  ConstantPoolCacheEntry::tos_state_bits);
3313   // load return address
3314   {
3315     const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
3316     __ mov(rscratch1, table_addr);
3317     __ ldr(lr, Address(rscratch1, rscratch2, Address::lsl(3)));
3318   }
3319 }
3320 
3321 
3322 void TemplateTable::invokevirtual_helper(Register index,
3323                                          Register recv,
3324                                          Register flags)
3325 {
3326   // Uses temporary registers r0, r3
3327   assert_different_registers(index, recv, r0, r3);
3328   // Test for an invoke of a final method
3329   Label notFinal;
3330   __ tbz(flags, ConstantPoolCacheEntry::is_vfinal_shift, notFinal);
3331 
3332   const Register method = index;  // method must be rmethod
3333   assert(method == rmethod,
3334          "methodOop must be rmethod for interpreter calling convention");
3335 
3336   // do the call - the index is actually the method to call
3337   // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
3338 
3339   // It's final, need a null check here!
3340   __ null_check(recv);
3341 
3342   // profile this call
3343   __ profile_final_call(r0);
3344   __ profile_arguments_type(r0, method, r4, true);
3345 
3346   __ jump_from_interpreted(method, r0);
3347 
3348   __ bind(notFinal);
3349 
3350   // get receiver klass
3351   __ null_check(recv, oopDesc::klass_offset_in_bytes());
3352   __ load_klass(r0, recv);
3353 
3354   // profile this call
3355   __ profile_virtual_call(r0, rlocals, r3);
3356 
3357   // get target methodOop & entry point
3358   __ lookup_virtual_method(r0, index, method);
3359   __ profile_arguments_type(r3, method, r4, true);
3360   // FIXME -- this looks completely redundant. is it?
3361   // __ ldr(r3, Address(method, Method::interpreter_entry_offset()));
3362   __ jump_from_interpreted(method, r3);
3363 }
3364 
3365 void TemplateTable::invokevirtual(int byte_no)
3366 {
3367   transition(vtos, vtos);
3368   assert(byte_no == f2_byte, "use this argument");
3369 
3370   prepare_invoke(byte_no, rmethod, noreg, r2, r3);
3371 
3372   // rmethod: index (actually a Method*)
3373   // r2: receiver
3374   // r3: flags
3375 
3376   invokevirtual_helper(rmethod, r2, r3);
3377 }
3378 
3379 void TemplateTable::invokespecial(int byte_no)
3380 {
3381   transition(vtos, vtos);
3382   assert(byte_no == f1_byte, "use this argument");
3383 
3384   prepare_invoke(byte_no, rmethod, noreg,  // get f1 Method*
3385                  r2);  // get receiver also for null check
3386   __ verify_oop(r2);
3387   __ null_check(r2);
3388   // do the call
3389   __ profile_call(r0);
3390   __ profile_arguments_type(r0, rmethod, rbcp, false);
3391   __ jump_from_interpreted(rmethod, r0);
3392 }
3393 
3394 void TemplateTable::invokestatic(int byte_no)
3395 {
3396   transition(vtos, vtos);
3397   assert(byte_no == f1_byte, "use this argument");
3398 
3399   prepare_invoke(byte_no, rmethod);  // get f1 Method*
3400   // do the call
3401   __ profile_call(r0);
3402   __ profile_arguments_type(r0, rmethod, r4, false);
3403   __ jump_from_interpreted(rmethod, r0);
3404 }
3405 
3406 void TemplateTable::fast_invokevfinal(int byte_no)
3407 {
3408   __ call_Unimplemented();
3409 }
3410 
3411 void TemplateTable::invokeinterface(int byte_no) {
3412   transition(vtos, vtos);
3413   assert(byte_no == f1_byte, "use this argument");
3414 
3415   prepare_invoke(byte_no, r0, rmethod,  // get f1 Klass*, f2 Method*
3416                  r2, r3); // recv, flags
3417 
3418   // r0: interface klass (from f1)
3419   // rmethod: method (from f2)
3420   // r2: receiver
3421   // r3: flags
3422 
3423   // Special case of invokeinterface called for virtual method of
3424   // java.lang.Object.  See cpCacheOop.cpp for details.
3425   // This code isn't produced by javac, but could be produced by
3426   // another compliant java compiler.
3427   Label notMethod;
3428   __ tbz(r3, ConstantPoolCacheEntry::is_forced_virtual_shift, notMethod);
3429 
3430   invokevirtual_helper(rmethod, r2, r3);
3431   __ bind(notMethod);
3432 
3433   // Get receiver klass into r3 - also a null check
3434   __ restore_locals();
3435   __ null_check(r2, oopDesc::klass_offset_in_bytes());
3436   __ load_klass(r3, r2);
3437 
3438   Label no_such_interface, no_such_method;
3439 
3440   // Preserve method for throw_AbstractMethodErrorVerbose.
3441   __ mov(r16, rmethod);
3442   // Receiver subtype check against REFC.
3443   // Superklass in r0. Subklass in r3. Blows rscratch2, r13
3444   __ lookup_interface_method(// inputs: rec. class, interface, itable index
3445                              r3, r0, noreg,
3446                              // outputs: scan temp. reg, scan temp. reg
3447                              rscratch2, r13,
3448                              no_such_interface,
3449                              /*return_method=*/false);
3450 
3451   // profile this call
3452   __ profile_virtual_call(r3, r13, r19);
3453 
3454   // Get declaring interface class from method, and itable index
3455   __ ldr(r0, Address(rmethod, Method::const_offset()));
3456   __ ldr(r0, Address(r0, ConstMethod::constants_offset()));
3457   __ ldr(r0, Address(r0, ConstantPool::pool_holder_offset_in_bytes()));
3458   __ ldrw(rmethod, Address(rmethod, Method::itable_index_offset()));
3459   __ subw(rmethod, rmethod, Method::itable_index_max);
3460   __ negw(rmethod, rmethod);
3461 
3462   // Preserve recvKlass for throw_AbstractMethodErrorVerbose.
3463   __ mov(rlocals, r3);
3464   __ lookup_interface_method(// inputs: rec. class, interface, itable index
3465                              rlocals, r0, rmethod,
3466                              // outputs: method, scan temp. reg
3467                              rmethod, r13,
3468                              no_such_interface);
3469 
3470   // rmethod,: methodOop to call
3471   // r2: receiver
3472   // Check for abstract method error
3473   // Note: This should be done more efficiently via a throw_abstract_method_error
3474   //       interpreter entry point and a conditional jump to it in case of a null
3475   //       method.
3476   __ cbz(rmethod, no_such_method);
3477 
3478   __ profile_arguments_type(r3, rmethod, r13, true);
3479 
3480   // do the call
3481   // r2: receiver
3482   // rmethod,: methodOop
3483   __ jump_from_interpreted(rmethod, r3);
3484   __ should_not_reach_here();
3485 
3486   // exception handling code follows...
3487   // note: must restore interpreter registers to canonical
3488   //       state for exception handling to work correctly!
3489 
3490   __ bind(no_such_method);
3491   // throw exception
3492   __ restore_bcp();      // bcp must be correct for exception handler   (was destroyed)
3493   __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
3494   // Pass arguments for generating a verbose error message.
3495   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), r3, r16);
3496   // the call_VM checks for exception, so we should never return here.
3497   __ should_not_reach_here();
3498 
3499   __ bind(no_such_interface);
3500   // throw exception
3501   __ restore_bcp();      // bcp must be correct for exception handler   (was destroyed)
3502   __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
3503   // Pass arguments for generating a verbose error message.
3504   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
3505                    InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), r3, r0);
3506   // the call_VM checks for exception, so we should never return here.
3507   __ should_not_reach_here();
3508   return;
3509 }
3510 
3511 void TemplateTable::invokehandle(int byte_no) {
3512   transition(vtos, vtos);
3513   assert(byte_no == f1_byte, "use this argument");
3514 
3515   prepare_invoke(byte_no, rmethod, r0, r2);
3516   __ verify_method_ptr(r2);
3517   __ verify_oop(r2);
3518   __ null_check(r2);
3519 
3520   // FIXME: profile the LambdaForm also
3521 
3522   // r13 is safe to use here as a scratch reg because it is about to
3523   // be clobbered by jump_from_interpreted().
3524   __ profile_final_call(r13);
3525   __ profile_arguments_type(r13, rmethod, r4, true);
3526 
3527   __ jump_from_interpreted(rmethod, r0);
3528 }
3529 
3530 void TemplateTable::invokedynamic(int byte_no) {
3531   transition(vtos, vtos);
3532   assert(byte_no == f1_byte, "use this argument");
3533 
3534   prepare_invoke(byte_no, rmethod, r0);
3535 
3536   // r0: CallSite object (from cpool->resolved_references[])
3537   // rmethod: MH.linkToCallSite method (from f2)
3538 
3539   // Note:  r0_callsite is already pushed by prepare_invoke
3540 
3541   // %%% should make a type profile for any invokedynamic that takes a ref argument
3542   // profile this call
3543   __ profile_call(rbcp);
3544   __ profile_arguments_type(r3, rmethod, r13, false);
3545 
3546   __ verify_oop(r0);
3547 
3548   __ jump_from_interpreted(rmethod, r0);
3549 }
3550 
3551 
3552 //-----------------------------------------------------------------------------
3553 // Allocation
3554 
3555 void TemplateTable::_new() {
3556   transition(vtos, atos);
3557 
3558   __ get_unsigned_2_byte_index_at_bcp(r3, 1);
3559   Label slow_case;
3560   Label done;
3561   Label initialize_header;
3562   Label initialize_object; // including clearing the fields
3563 
3564   __ get_cpool_and_tags(r4, r0);
3565   // Make sure the class we're about to instantiate has been resolved.
3566   // This is done before loading InstanceKlass to be consistent with the order
3567   // how Constant Pool is updated (see ConstantPool::klass_at_put)
3568   const int tags_offset = Array<u1>::base_offset_in_bytes();
3569   __ lea(rscratch1, Address(r0, r3, Address::lsl(0)));
3570   __ lea(rscratch1, Address(rscratch1, tags_offset));
3571   __ ldarb(rscratch1, rscratch1);
3572   __ cmp(rscratch1, JVM_CONSTANT_Class);
3573   __ br(Assembler::NE, slow_case);
3574 
3575   // get InstanceKlass
3576   __ load_resolved_klass_at_offset(r4, r3, r4, rscratch1);
3577 
3578   // make sure klass is initialized & doesn't have finalizer
3579   // make sure klass is fully initialized
3580   __ ldrb(rscratch1, Address(r4, InstanceKlass::init_state_offset()));
3581   __ cmp(rscratch1, InstanceKlass::fully_initialized);
3582   __ br(Assembler::NE, slow_case);
3583 
3584   // get instance_size in InstanceKlass (scaled to a count of bytes)
3585   __ ldrw(r3,
3586           Address(r4,
3587                   Klass::layout_helper_offset()));
3588   // test to see if it has a finalizer or is malformed in some way
3589   __ tbnz(r3, exact_log2(Klass::_lh_instance_slow_path_bit), slow_case);
3590 
3591   // Allocate the instance:
3592   //  If TLAB is enabled:
3593   //    Try to allocate in the TLAB.
3594   //    If fails, go to the slow path.
3595   //  Else If inline contiguous allocations are enabled:
3596   //    Try to allocate in eden.
3597   //    If fails due to heap end, go to slow path.
3598   //
3599   //  If TLAB is enabled OR inline contiguous is enabled:
3600   //    Initialize the allocation.
3601   //    Exit.
3602   //
3603   //  Go to slow path.
3604   const bool allow_shared_alloc =
3605     Universe::heap()->supports_inline_contig_alloc();
3606 
3607   if (UseTLAB) {
3608     __ tlab_allocate(r0, r3, 0, noreg, r1, slow_case);
3609 
3610     if (ZeroTLAB) {
3611       // the fields have been already cleared
3612       __ b(initialize_header);
3613     } else {
3614       // initialize both the header and fields
3615       __ b(initialize_object);
3616     }
3617   } else {
3618     // Allocation in the shared Eden, if allowed.
3619     //
3620     // r3: instance size in bytes
3621     if (allow_shared_alloc) {
3622       __ eden_allocate(r0, r3, 0, r10, slow_case);
3623       __ incr_allocated_bytes(rthread, r3, 0, rscratch1);
3624     }
3625   }
3626 
3627   // If UseTLAB or allow_shared_alloc are true, the object is created above and
3628   // there is an initialize need. Otherwise, skip and go to the slow path.
3629   if (UseTLAB || allow_shared_alloc) {
3630     // The object is initialized before the header.  If the object size is
3631     // zero, go directly to the header initialization.
3632     __ bind(initialize_object);
3633     __ sub(r3, r3, sizeof(oopDesc));
3634     __ cbz(r3, initialize_header);
3635 
3636     // Initialize object fields
3637     {
3638       __ add(r2, r0, sizeof(oopDesc));
3639       Label loop;
3640       __ bind(loop);
3641       __ str(zr, Address(__ post(r2, BytesPerLong)));
3642       __ sub(r3, r3, BytesPerLong);
3643       __ cbnz(r3, loop);
3644     }
3645 
3646     // initialize object header only.
3647     __ bind(initialize_header);
3648     if (UseBiasedLocking) {
3649       __ ldr(rscratch1, Address(r4, Klass::prototype_header_offset()));
3650     } else {
3651       __ mov(rscratch1, (intptr_t)markOopDesc::prototype());
3652     }
3653     __ str(rscratch1, Address(r0, oopDesc::mark_offset_in_bytes()));
3654     __ store_klass_gap(r0, zr);  // zero klass gap for compressed oops
3655     __ store_klass(r0, r4);      // store klass last
3656 
3657     {
3658       SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
3659       // Trigger dtrace event for fastpath
3660       __ push(atos); // save the return value
3661       __ call_VM_leaf(
3662            CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), r0);
3663       __ pop(atos); // restore the return value
3664 
3665     }
3666     __ b(done);
3667   }
3668 
3669   // slow case
3670   __ bind(slow_case);
3671   __ get_constant_pool(c_rarg1);
3672   __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
3673   call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2);
3674   __ verify_oop(r0);
3675 
3676   // continue
3677   __ bind(done);
3678   // Must prevent reordering of stores for object initialization with stores that publish the new object.
3679   __ membar(Assembler::StoreStore);
3680 }
3681 
3682 void TemplateTable::newarray() {
3683   transition(itos, atos);
3684   __ load_unsigned_byte(c_rarg1, at_bcp(1));
3685   __ mov(c_rarg2, r0);
3686   call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
3687           c_rarg1, c_rarg2);
3688   // Must prevent reordering of stores for object initialization with stores that publish the new object.
3689   __ membar(Assembler::StoreStore);
3690 }
3691 
3692 void TemplateTable::anewarray() {
3693   transition(itos, atos);
3694   __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
3695   __ get_constant_pool(c_rarg1);
3696   __ mov(c_rarg3, r0);
3697   call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
3698           c_rarg1, c_rarg2, c_rarg3);
3699   // Must prevent reordering of stores for object initialization with stores that publish the new object.
3700   __ membar(Assembler::StoreStore);
3701 }
3702 
3703 void TemplateTable::arraylength() {
3704   transition(atos, itos);
3705   __ null_check(r0, arrayOopDesc::length_offset_in_bytes());
3706   __ ldrw(r0, Address(r0, arrayOopDesc::length_offset_in_bytes()));
3707 }
3708 
3709 void TemplateTable::checkcast()
3710 {
3711   transition(atos, atos);
3712   Label done, is_null, ok_is_subtype, quicked, resolved;
3713   __ cbz(r0, is_null);
3714 
3715   // Get cpool & tags index
3716   __ get_cpool_and_tags(r2, r3); // r2=cpool, r3=tags array
3717   __ get_unsigned_2_byte_index_at_bcp(r19, 1); // r19=index
3718   // See if bytecode has already been quicked
3719   __ add(rscratch1, r3, Array<u1>::base_offset_in_bytes());
3720   __ lea(r1, Address(rscratch1, r19));
3721   __ ldarb(r1, r1);
3722   __ cmp(r1, JVM_CONSTANT_Class);
3723   __ br(Assembler::EQ, quicked);
3724 
3725   __ push(atos); // save receiver for result, and for GC
3726   call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
3727   // vm_result_2 has metadata result
3728   __ get_vm_result_2(r0, rthread);
3729   __ pop(r3); // restore receiver
3730   __ b(resolved);
3731 
3732   // Get superklass in r0 and subklass in r3
3733   __ bind(quicked);
3734   __ mov(r3, r0); // Save object in r3; r0 needed for subtype check
3735   __ load_resolved_klass_at_offset(r2, r19, r0, rscratch1); // r0 = klass
3736 
3737   __ bind(resolved);
3738   __ load_klass(r19, r3);
3739 
3740   // Generate subtype check.  Blows r2, r5.  Object in r3.
3741   // Superklass in r0.  Subklass in r19.
3742   __ gen_subtype_check(r19, ok_is_subtype);
3743 
3744   // Come here on failure
3745   __ push(r3);
3746   // object is at TOS
3747   __ b(Interpreter::_throw_ClassCastException_entry);
3748 
3749   // Come here on success
3750   __ bind(ok_is_subtype);
3751   __ mov(r0, r3); // Restore object in r3
3752 
3753   // Collect counts on whether this test sees NULLs a lot or not.
3754   if (ProfileInterpreter) {
3755     __ b(done);
3756     __ bind(is_null);
3757     __ profile_null_seen(r2);
3758   } else {
3759     __ bind(is_null);   // same as 'done'
3760   }
3761   __ bind(done);
3762 }
3763 
3764 void TemplateTable::instanceof() {
3765   transition(atos, itos);
3766   Label done, is_null, ok_is_subtype, quicked, resolved;
3767   __ cbz(r0, is_null);
3768 
3769   // Get cpool & tags index
3770   __ get_cpool_and_tags(r2, r3); // r2=cpool, r3=tags array
3771   __ get_unsigned_2_byte_index_at_bcp(r19, 1); // r19=index
3772   // See if bytecode has already been quicked
3773   __ add(rscratch1, r3, Array<u1>::base_offset_in_bytes());
3774   __ lea(r1, Address(rscratch1, r19));
3775   __ ldarb(r1, r1);
3776   __ cmp(r1, JVM_CONSTANT_Class);
3777   __ br(Assembler::EQ, quicked);
3778 
3779   __ push(atos); // save receiver for result, and for GC
3780   call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
3781   // vm_result_2 has metadata result
3782   __ get_vm_result_2(r0, rthread);
3783   __ pop(r3); // restore receiver
3784   __ verify_oop(r3);
3785   __ load_klass(r3, r3);
3786   __ b(resolved);
3787 
3788   // Get superklass in r0 and subklass in r3
3789   __ bind(quicked);
3790   __ load_klass(r3, r0);
3791   __ load_resolved_klass_at_offset(r2, r19, r0, rscratch1);
3792 
3793   __ bind(resolved);
3794 
3795   // Generate subtype check.  Blows r2, r5
3796   // Superklass in r0.  Subklass in r3.
3797   __ gen_subtype_check(r3, ok_is_subtype);
3798 
3799   // Come here on failure
3800   __ mov(r0, 0);
3801   __ b(done);
3802   // Come here on success
3803   __ bind(ok_is_subtype);
3804   __ mov(r0, 1);
3805 
3806   // Collect counts on whether this test sees NULLs a lot or not.
3807   if (ProfileInterpreter) {
3808     __ b(done);
3809     __ bind(is_null);
3810     __ profile_null_seen(r2);
3811   } else {
3812     __ bind(is_null);   // same as 'done'
3813   }
3814   __ bind(done);
3815   // r0 = 0: obj == NULL or  obj is not an instanceof the specified klass
3816   // r0 = 1: obj != NULL and obj is     an instanceof the specified klass
3817 }
3818 
3819 //-----------------------------------------------------------------------------
3820 // Breakpoints
3821 void TemplateTable::_breakpoint() {
3822   // Note: We get here even if we are single stepping..
3823   // jbug inists on setting breakpoints at every bytecode
3824   // even if we are in single step mode.
3825 
3826   transition(vtos, vtos);
3827 
3828   // get the unpatched byte code
3829   __ get_method(c_rarg1);
3830   __ call_VM(noreg,
3831              CAST_FROM_FN_PTR(address,
3832                               InterpreterRuntime::get_original_bytecode_at),
3833              c_rarg1, rbcp);
3834   __ mov(r19, r0);
3835 
3836   // post the breakpoint event
3837   __ call_VM(noreg,
3838              CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
3839              rmethod, rbcp);
3840 
3841   // complete the execution of original bytecode
3842   __ mov(rscratch1, r19);
3843   __ dispatch_only_normal(vtos);
3844 }
3845 
3846 //-----------------------------------------------------------------------------
3847 // Exceptions
3848 
3849 void TemplateTable::athrow() {
3850   transition(atos, vtos);
3851   __ null_check(r0);
3852   __ b(Interpreter::throw_exception_entry());
3853 }
3854 
3855 //-----------------------------------------------------------------------------
3856 // Synchronization
3857 //
3858 // Note: monitorenter & exit are symmetric routines; which is reflected
3859 //       in the assembly code structure as well
3860 //
3861 // Stack layout:
3862 //
3863 // [expressions  ] <--- esp               = expression stack top
3864 // ..
3865 // [expressions  ]
3866 // [monitor entry] <--- monitor block top = expression stack bot
3867 // ..
3868 // [monitor entry]
3869 // [frame data   ] <--- monitor block bot
3870 // ...
3871 // [saved rbp    ] <--- rbp
3872 void TemplateTable::monitorenter()
3873 {
3874   transition(atos, vtos);
3875 
3876   // check for NULL object
3877   __ null_check(r0);
3878 
3879   const Address monitor_block_top(
3880         rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
3881   const Address monitor_block_bot(
3882         rfp, frame::interpreter_frame_initial_sp_offset * wordSize);
3883   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
3884 
3885   Label allocated;
3886 
3887   // initialize entry pointer
3888   __ mov(c_rarg1, zr); // points to free slot or NULL
3889 
3890   // find a free slot in the monitor block (result in c_rarg1)
3891   {
3892     Label entry, loop, exit;
3893     __ ldr(c_rarg3, monitor_block_top); // points to current entry,
3894                                         // starting with top-most entry
3895     __ lea(c_rarg2, monitor_block_bot); // points to word before bottom
3896 
3897     __ b(entry);
3898 
3899     __ bind(loop);
3900     // check if current entry is used
3901     // if not used then remember entry in c_rarg1
3902     __ ldr(rscratch1, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()));
3903     __ cmp(zr, rscratch1);
3904     __ csel(c_rarg1, c_rarg3, c_rarg1, Assembler::EQ);
3905     // check if current entry is for same object
3906     __ cmp(r0, rscratch1);
3907     // if same object then stop searching
3908     __ br(Assembler::EQ, exit);
3909     // otherwise advance to next entry
3910     __ add(c_rarg3, c_rarg3, entry_size);
3911     __ bind(entry);
3912     // check if bottom reached
3913     __ cmp(c_rarg3, c_rarg2);
3914     // if not at bottom then check this entry
3915     __ br(Assembler::NE, loop);
3916     __ bind(exit);
3917   }
3918 
3919   __ cbnz(c_rarg1, allocated); // check if a slot has been found and
3920                             // if found, continue with that on
3921 
3922   // allocate one if there's no free slot
3923   {
3924     Label entry, loop;
3925     // 1. compute new pointers            // rsp: old expression stack top
3926     __ ldr(c_rarg1, monitor_block_bot);   // c_rarg1: old expression stack bottom
3927     __ sub(esp, esp, entry_size);         // move expression stack top
3928     __ sub(c_rarg1, c_rarg1, entry_size); // move expression stack bottom
3929     __ mov(c_rarg3, esp);                 // set start value for copy loop
3930     __ str(c_rarg1, monitor_block_bot);   // set new monitor block bottom
3931 
3932     __ sub(sp, sp, entry_size);           // make room for the monitor
3933 
3934     __ b(entry);
3935     // 2. move expression stack contents
3936     __ bind(loop);
3937     __ ldr(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack
3938                                                    // word from old location
3939     __ str(c_rarg2, Address(c_rarg3, 0));          // and store it at new location
3940     __ add(c_rarg3, c_rarg3, wordSize);            // advance to next word
3941     __ bind(entry);
3942     __ cmp(c_rarg3, c_rarg1);        // check if bottom reached
3943     __ br(Assembler::NE, loop);      // if not at bottom then
3944                                      // copy next word
3945   }
3946 
3947   // call run-time routine
3948   // c_rarg1: points to monitor entry
3949   __ bind(allocated);
3950 
3951   // Increment bcp to point to the next bytecode, so exception
3952   // handling for async. exceptions work correctly.
3953   // The object has already been poped from the stack, so the
3954   // expression stack looks correct.
3955   __ increment(rbcp);
3956 
3957   // store object
3958   __ str(r0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
3959   __ lock_object(c_rarg1);
3960 
3961   // check to make sure this monitor doesn't cause stack overflow after locking
3962   __ save_bcp();  // in case of exception
3963   __ generate_stack_overflow_check(0);
3964 
3965   // The bcp has already been incremented. Just need to dispatch to
3966   // next instruction.
3967   __ dispatch_next(vtos);
3968 }
3969 
3970 
3971 void TemplateTable::monitorexit()
3972 {
3973   transition(atos, vtos);
3974 
3975   // check for NULL object
3976   __ null_check(r0);
3977 
3978   const Address monitor_block_top(
3979         rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
3980   const Address monitor_block_bot(
3981         rfp, frame::interpreter_frame_initial_sp_offset * wordSize);
3982   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
3983 
3984   Label found;
3985 
3986   // find matching slot
3987   {
3988     Label entry, loop;
3989     __ ldr(c_rarg1, monitor_block_top); // points to current entry,
3990                                         // starting with top-most entry
3991     __ lea(c_rarg2, monitor_block_bot); // points to word before bottom
3992                                         // of monitor block
3993     __ b(entry);
3994 
3995     __ bind(loop);
3996     // check if current entry is for same object
3997     __ ldr(rscratch1, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
3998     __ cmp(r0, rscratch1);
3999     // if same object then stop searching
4000     __ br(Assembler::EQ, found);
4001     // otherwise advance to next entry
4002     __ add(c_rarg1, c_rarg1, entry_size);
4003     __ bind(entry);
4004     // check if bottom reached
4005     __ cmp(c_rarg1, c_rarg2);
4006     // if not at bottom then check this entry
4007     __ br(Assembler::NE, loop);
4008   }
4009 
4010   // error handling. Unlocking was not block-structured
4011   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
4012                    InterpreterRuntime::throw_illegal_monitor_state_exception));
4013   __ should_not_reach_here();
4014 
4015   // call run-time routine
4016   __ bind(found);
4017   __ push_ptr(r0); // make sure object is on stack (contract with oopMaps)
4018   __ unlock_object(c_rarg1);
4019   __ pop_ptr(r0); // discard object
4020 }
4021 
4022 
4023 // Wide instructions
4024 void TemplateTable::wide()
4025 {
4026   __ load_unsigned_byte(r19, at_bcp(1));
4027   __ mov(rscratch1, (address)Interpreter::_wentry_point);
4028   __ ldr(rscratch1, Address(rscratch1, r19, Address::uxtw(3)));
4029   __ br(rscratch1);
4030 }
4031 
4032 
4033 // Multi arrays
4034 void TemplateTable::multianewarray() {
4035   transition(vtos, atos);
4036   __ load_unsigned_byte(r0, at_bcp(3)); // get number of dimensions
4037   // last dim is on top of stack; we want address of first one:
4038   // first_addr = last_addr + (ndims - 1) * wordSize
4039   __ lea(c_rarg1, Address(esp, r0, Address::uxtw(3)));
4040   __ sub(c_rarg1, c_rarg1, wordSize);
4041   call_VM(r0,
4042           CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
4043           c_rarg1);
4044   __ load_unsigned_byte(r1, at_bcp(3));
4045   __ lea(esp, Address(esp, r1, Address::uxtw(3)));
4046 }