1 /*
   2  * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.hpp"
  28 #include "gc/shared/barrierSetCodeGen.hpp"
  29 #include "interpreter/interpreter.hpp"
  30 #include "interpreter/interpreterRuntime.hpp"
  31 #include "interpreter/interp_masm.hpp"
  32 #include "interpreter/templateTable.hpp"
  33 #include "memory/universe.inline.hpp"
  34 #include "oops/methodData.hpp"
  35 #include "oops/method.hpp"
  36 #include "oops/objArrayKlass.hpp"
  37 #include "oops/oop.inline.hpp"
  38 #include "prims/methodHandles.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #include "runtime/stubRoutines.hpp"
  41 #include "runtime/synchronizer.hpp"
  42 
  43 #define __ _masm->
  44 
  45 // Platform-dependent initialization
  46 
  47 void TemplateTable::pd_initialize() {
  48   // No aarch64 specific initialization
  49 }
  50 
  51 // Address computation: local variables
  52 
  53 static inline Address iaddress(int n) {
  54   return Address(rlocals, Interpreter::local_offset_in_bytes(n));
  55 }
  56 
  57 static inline Address laddress(int n) {
  58   return iaddress(n + 1);
  59 }
  60 
  61 static inline Address faddress(int n) {
  62   return iaddress(n);
  63 }
  64 
  65 static inline Address daddress(int n) {
  66   return laddress(n);
  67 }
  68 
  69 static inline Address aaddress(int n) {
  70   return iaddress(n);
  71 }
  72 
  73 static inline Address iaddress(Register r) {
  74   return Address(rlocals, r, Address::lsl(3));
  75 }
  76 
  77 static inline Address laddress(Register r, Register scratch,
  78                                InterpreterMacroAssembler* _masm) {
  79   __ lea(scratch, Address(rlocals, r, Address::lsl(3)));
  80   return Address(scratch, Interpreter::local_offset_in_bytes(1));
  81 }
  82 
  83 static inline Address faddress(Register r) {
  84   return iaddress(r);
  85 }
  86 
  87 static inline Address daddress(Register r, Register scratch,
  88                                InterpreterMacroAssembler* _masm) {
  89   return laddress(r, scratch, _masm);
  90 }
  91 
  92 static inline Address aaddress(Register r) {
  93   return iaddress(r);
  94 }
  95 
  96 static inline Address at_rsp() {
  97   return Address(esp, 0);
  98 }
  99 
 100 // At top of Java expression stack which may be different than esp().  It
 101 // isn't for category 1 objects.
 102 static inline Address at_tos   () {
 103   return Address(esp,  Interpreter::expr_offset_in_bytes(0));
 104 }
 105 
 106 static inline Address at_tos_p1() {
 107   return Address(esp,  Interpreter::expr_offset_in_bytes(1));
 108 }
 109 
 110 static inline Address at_tos_p2() {
 111   return Address(esp,  Interpreter::expr_offset_in_bytes(2));
 112 }
 113 
 114 static inline Address at_tos_p3() {
 115   return Address(esp,  Interpreter::expr_offset_in_bytes(3));
 116 }
 117 
 118 static inline Address at_tos_p4() {
 119   return Address(esp,  Interpreter::expr_offset_in_bytes(4));
 120 }
 121 
 122 static inline Address at_tos_p5() {
 123   return Address(esp,  Interpreter::expr_offset_in_bytes(5));
 124 }
 125 
 126 // Condition conversion
 127 static Assembler::Condition j_not(TemplateTable::Condition cc) {
 128   switch (cc) {
 129   case TemplateTable::equal        : return Assembler::NE;
 130   case TemplateTable::not_equal    : return Assembler::EQ;
 131   case TemplateTable::less         : return Assembler::GE;
 132   case TemplateTable::less_equal   : return Assembler::GT;
 133   case TemplateTable::greater      : return Assembler::LE;
 134   case TemplateTable::greater_equal: return Assembler::LT;
 135   }
 136   ShouldNotReachHere();
 137   return Assembler::EQ;
 138 }
 139 
 140 
 141 // Miscelaneous helper routines
 142 // Store an oop (or NULL) at the Address described by obj.
 143 // If val == noreg this means store a NULL
 144 static void do_oop_store(InterpreterMacroAssembler* _masm,
 145                          Address dst,
 146                          Register val,
 147                          DecoratorSet decorators) {
 148   assert(val == noreg || val == r0, "parameter is just for looks");
 149   BarrierSetCodeGen *code_gen = Universe::heap()->barrier_set()->code_gen();
 150   code_gen->store_at(_masm, decorators, T_OBJECT, dst, val, /*tmp1*/ r10, /*tmp2*/ r1);
 151 }
 152 
 153 static void do_oop_load(InterpreterMacroAssembler* _masm,
 154                         Address src,
 155                         Register dst,
 156                         DecoratorSet decorators) {
 157   BarrierSetCodeGen *code_gen = Universe::heap()->barrier_set()->code_gen();
 158   code_gen->load_at(_masm, decorators, T_OBJECT, dst, src, /*tmp1*/ r10, /*tmp_thread*/ r1);
 159 }
 160 
 161 Address TemplateTable::at_bcp(int offset) {
 162   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
 163   return Address(rbcp, offset);
 164 }
 165 
 166 void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
 167                                    Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
 168                                    int byte_no)
 169 {
 170   if (!RewriteBytecodes)  return;
 171   Label L_patch_done;
 172 
 173   switch (bc) {
 174   case Bytecodes::_fast_aputfield:
 175   case Bytecodes::_fast_bputfield:
 176   case Bytecodes::_fast_zputfield:
 177   case Bytecodes::_fast_cputfield:
 178   case Bytecodes::_fast_dputfield:
 179   case Bytecodes::_fast_fputfield:
 180   case Bytecodes::_fast_iputfield:
 181   case Bytecodes::_fast_lputfield:
 182   case Bytecodes::_fast_sputfield:
 183     {
 184       // We skip bytecode quickening for putfield instructions when
 185       // the put_code written to the constant pool cache is zero.
 186       // This is required so that every execution of this instruction
 187       // calls out to InterpreterRuntime::resolve_get_put to do
 188       // additional, required work.
 189       assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
 190       assert(load_bc_into_bc_reg, "we use bc_reg as temp");
 191       __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
 192       __ movw(bc_reg, bc);
 193       __ cbzw(temp_reg, L_patch_done);  // don't patch
 194     }
 195     break;
 196   default:
 197     assert(byte_no == -1, "sanity");
 198     // the pair bytecodes have already done the load.
 199     if (load_bc_into_bc_reg) {
 200       __ movw(bc_reg, bc);
 201     }
 202   }
 203 
 204   if (JvmtiExport::can_post_breakpoint()) {
 205     Label L_fast_patch;
 206     // if a breakpoint is present we can't rewrite the stream directly
 207     __ load_unsigned_byte(temp_reg, at_bcp(0));
 208     __ cmpw(temp_reg, Bytecodes::_breakpoint);
 209     __ br(Assembler::NE, L_fast_patch);
 210     // Let breakpoint table handling rewrite to quicker bytecode
 211     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), rmethod, rbcp, bc_reg);
 212     __ b(L_patch_done);
 213     __ bind(L_fast_patch);
 214   }
 215 
 216 #ifdef ASSERT
 217   Label L_okay;
 218   __ load_unsigned_byte(temp_reg, at_bcp(0));
 219   __ cmpw(temp_reg, (int) Bytecodes::java_code(bc));
 220   __ br(Assembler::EQ, L_okay);
 221   __ cmpw(temp_reg, bc_reg);
 222   __ br(Assembler::EQ, L_okay);
 223   __ stop("patching the wrong bytecode");
 224   __ bind(L_okay);
 225 #endif
 226 
 227   // patch bytecode
 228   __ strb(bc_reg, at_bcp(0));
 229   __ bind(L_patch_done);
 230 }
 231 
 232 
 233 // Individual instructions
 234 
 235 void TemplateTable::nop() {
 236   transition(vtos, vtos);
 237   // nothing to do
 238 }
 239 
 240 void TemplateTable::shouldnotreachhere() {
 241   transition(vtos, vtos);
 242   __ stop("shouldnotreachhere bytecode");
 243 }
 244 
 245 void TemplateTable::aconst_null()
 246 {
 247   transition(vtos, atos);
 248   __ mov(r0, 0);
 249 }
 250 
 251 void TemplateTable::iconst(int value)
 252 {
 253   transition(vtos, itos);
 254   __ mov(r0, value);
 255 }
 256 
 257 void TemplateTable::lconst(int value)
 258 {
 259   __ mov(r0, value);
 260 }
 261 
 262 void TemplateTable::fconst(int value)
 263 {
 264   transition(vtos, ftos);
 265   switch (value) {
 266   case 0:
 267     __ fmovs(v0, zr);
 268     break;
 269   case 1:
 270     __ fmovs(v0, 1.0);
 271     break;
 272   case 2:
 273     __ fmovs(v0, 2.0);
 274     break;
 275   default:
 276     ShouldNotReachHere();
 277     break;
 278   }
 279 }
 280 
 281 void TemplateTable::dconst(int value)
 282 {
 283   transition(vtos, dtos);
 284   switch (value) {
 285   case 0:
 286     __ fmovd(v0, zr);
 287     break;
 288   case 1:
 289     __ fmovd(v0, 1.0);
 290     break;
 291   case 2:
 292     __ fmovd(v0, 2.0);
 293     break;
 294   default:
 295     ShouldNotReachHere();
 296     break;
 297   }
 298 }
 299 
 300 void TemplateTable::bipush()
 301 {
 302   transition(vtos, itos);
 303   __ load_signed_byte32(r0, at_bcp(1));
 304 }
 305 
 306 void TemplateTable::sipush()
 307 {
 308   transition(vtos, itos);
 309   __ load_unsigned_short(r0, at_bcp(1));
 310   __ revw(r0, r0);
 311   __ asrw(r0, r0, 16);
 312 }
 313 
 314 void TemplateTable::ldc(bool wide)
 315 {
 316   transition(vtos, vtos);
 317   Label call_ldc, notFloat, notClass, Done;
 318 
 319   if (wide) {
 320     __ get_unsigned_2_byte_index_at_bcp(r1, 1);
 321   } else {
 322     __ load_unsigned_byte(r1, at_bcp(1));
 323   }
 324   __ get_cpool_and_tags(r2, r0);
 325 
 326   const int base_offset = ConstantPool::header_size() * wordSize;
 327   const int tags_offset = Array<u1>::base_offset_in_bytes();
 328 
 329   // get type
 330   __ add(r3, r1, tags_offset);
 331   __ lea(r3, Address(r0, r3));
 332   __ ldarb(r3, r3);
 333 
 334   // unresolved class - get the resolved class
 335   __ cmp(r3, JVM_CONSTANT_UnresolvedClass);
 336   __ br(Assembler::EQ, call_ldc);
 337 
 338   // unresolved class in error state - call into runtime to throw the error
 339   // from the first resolution attempt
 340   __ cmp(r3, JVM_CONSTANT_UnresolvedClassInError);
 341   __ br(Assembler::EQ, call_ldc);
 342 
 343   // resolved class - need to call vm to get java mirror of the class
 344   __ cmp(r3, JVM_CONSTANT_Class);
 345   __ br(Assembler::NE, notClass);
 346 
 347   __ bind(call_ldc);
 348   __ mov(c_rarg1, wide);
 349   call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1);
 350   __ push_ptr(r0);
 351   __ verify_oop(r0);
 352   __ b(Done);
 353 
 354   __ bind(notClass);
 355   __ cmp(r3, JVM_CONSTANT_Float);
 356   __ br(Assembler::NE, notFloat);
 357   // ftos
 358   __ adds(r1, r2, r1, Assembler::LSL, 3);
 359   __ ldrs(v0, Address(r1, base_offset));
 360   __ push_f();
 361   __ b(Done);
 362 
 363   __ bind(notFloat);
 364 #ifdef ASSERT
 365   {
 366     Label L;
 367     __ cmp(r3, JVM_CONSTANT_Integer);
 368     __ br(Assembler::EQ, L);
 369     // String and Object are rewritten to fast_aldc
 370     __ stop("unexpected tag type in ldc");
 371     __ bind(L);
 372   }
 373 #endif
 374   // itos JVM_CONSTANT_Integer only
 375   __ adds(r1, r2, r1, Assembler::LSL, 3);
 376   __ ldrw(r0, Address(r1, base_offset));
 377   __ push_i(r0);
 378   __ bind(Done);
 379 }
 380 
 381 // Fast path for caching oop constants.
 382 void TemplateTable::fast_aldc(bool wide)
 383 {
 384   transition(vtos, atos);
 385 
 386   Register result = r0;
 387   Register tmp = r1;
 388   int index_size = wide ? sizeof(u2) : sizeof(u1);
 389 
 390   Label resolved;
 391 
 392   // We are resolved if the resolved reference cache entry contains a
 393   // non-null object (String, MethodType, etc.)
 394   assert_different_registers(result, tmp);
 395   __ get_cache_index_at_bcp(tmp, 1, index_size);
 396   __ load_resolved_reference_at_index(result, tmp);
 397   __ cbnz(result, resolved);
 398 
 399   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
 400 
 401   // first time invocation - must resolve first
 402   __ mov(tmp, (int)bytecode());
 403   __ call_VM(result, entry, tmp);
 404 
 405   __ bind(resolved);
 406 
 407   if (VerifyOops) {
 408     __ verify_oop(result);
 409   }
 410 }
 411 
 412 void TemplateTable::ldc2_w()
 413 {
 414   transition(vtos, vtos);
 415   Label Long, Done;
 416   __ get_unsigned_2_byte_index_at_bcp(r0, 1);
 417 
 418   __ get_cpool_and_tags(r1, r2);
 419   const int base_offset = ConstantPool::header_size() * wordSize;
 420   const int tags_offset = Array<u1>::base_offset_in_bytes();
 421 
 422   // get type
 423   __ lea(r2, Address(r2, r0, Address::lsl(0)));
 424   __ load_unsigned_byte(r2, Address(r2, tags_offset));
 425   __ cmpw(r2, (int)JVM_CONSTANT_Double);
 426   __ br(Assembler::NE, Long);
 427   // dtos
 428   __ lea (r2, Address(r1, r0, Address::lsl(3)));
 429   __ ldrd(v0, Address(r2, base_offset));
 430   __ push_d();
 431   __ b(Done);
 432 
 433   __ bind(Long);
 434   // ltos
 435   __ lea(r0, Address(r1, r0, Address::lsl(3)));
 436   __ ldr(r0, Address(r0, base_offset));
 437   __ push_l();
 438 
 439   __ bind(Done);
 440 }
 441 
 442 void TemplateTable::locals_index(Register reg, int offset)
 443 {
 444   __ ldrb(reg, at_bcp(offset));
 445   __ neg(reg, reg);
 446 }
 447 
 448 void TemplateTable::iload() {
 449   iload_internal();
 450 }
 451 
 452 void TemplateTable::nofast_iload() {
 453   iload_internal(may_not_rewrite);
 454 }
 455 
 456 void TemplateTable::iload_internal(RewriteControl rc) {
 457   transition(vtos, itos);
 458   if (RewriteFrequentPairs && rc == may_rewrite) {
 459     Label rewrite, done;
 460     Register bc = r4;
 461 
 462     // get next bytecode
 463     __ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
 464 
 465     // if _iload, wait to rewrite to iload2.  We only want to rewrite the
 466     // last two iloads in a pair.  Comparing against fast_iload means that
 467     // the next bytecode is neither an iload or a caload, and therefore
 468     // an iload pair.
 469     __ cmpw(r1, Bytecodes::_iload);
 470     __ br(Assembler::EQ, done);
 471 
 472     // if _fast_iload rewrite to _fast_iload2
 473     __ cmpw(r1, Bytecodes::_fast_iload);
 474     __ movw(bc, Bytecodes::_fast_iload2);
 475     __ br(Assembler::EQ, rewrite);
 476 
 477     // if _caload rewrite to _fast_icaload
 478     __ cmpw(r1, Bytecodes::_caload);
 479     __ movw(bc, Bytecodes::_fast_icaload);
 480     __ br(Assembler::EQ, rewrite);
 481 
 482     // else rewrite to _fast_iload
 483     __ movw(bc, Bytecodes::_fast_iload);
 484 
 485     // rewrite
 486     // bc: new bytecode
 487     __ bind(rewrite);
 488     patch_bytecode(Bytecodes::_iload, bc, r1, false);
 489     __ bind(done);
 490 
 491   }
 492 
 493   // do iload, get the local value into tos
 494   locals_index(r1);
 495   __ ldr(r0, iaddress(r1));
 496 
 497 }
 498 
 499 void TemplateTable::fast_iload2()
 500 {
 501   transition(vtos, itos);
 502   locals_index(r1);
 503   __ ldr(r0, iaddress(r1));
 504   __ push(itos);
 505   locals_index(r1, 3);
 506   __ ldr(r0, iaddress(r1));
 507 }
 508 
 509 void TemplateTable::fast_iload()
 510 {
 511   transition(vtos, itos);
 512   locals_index(r1);
 513   __ ldr(r0, iaddress(r1));
 514 }
 515 
 516 void TemplateTable::lload()
 517 {
 518   transition(vtos, ltos);
 519   __ ldrb(r1, at_bcp(1));
 520   __ sub(r1, rlocals, r1, ext::uxtw, LogBytesPerWord);
 521   __ ldr(r0, Address(r1, Interpreter::local_offset_in_bytes(1)));
 522 }
 523 
 524 void TemplateTable::fload()
 525 {
 526   transition(vtos, ftos);
 527   locals_index(r1);
 528   // n.b. we use ldrd here because this is a 64 bit slot
 529   // this is comparable to the iload case
 530   __ ldrd(v0, faddress(r1));
 531 }
 532 
 533 void TemplateTable::dload()
 534 {
 535   transition(vtos, dtos);
 536   __ ldrb(r1, at_bcp(1));
 537   __ sub(r1, rlocals, r1, ext::uxtw, LogBytesPerWord);
 538   __ ldrd(v0, Address(r1, Interpreter::local_offset_in_bytes(1)));
 539 }
 540 
 541 void TemplateTable::aload()
 542 {
 543   transition(vtos, atos);
 544   locals_index(r1);
 545   __ ldr(r0, iaddress(r1));
 546 }
 547 
 548 void TemplateTable::locals_index_wide(Register reg) {
 549   __ ldrh(reg, at_bcp(2));
 550   __ rev16w(reg, reg);
 551   __ neg(reg, reg);
 552 }
 553 
 554 void TemplateTable::wide_iload() {
 555   transition(vtos, itos);
 556   locals_index_wide(r1);
 557   __ ldr(r0, iaddress(r1));
 558 }
 559 
 560 void TemplateTable::wide_lload()
 561 {
 562   transition(vtos, ltos);
 563   __ ldrh(r1, at_bcp(2));
 564   __ rev16w(r1, r1);
 565   __ sub(r1, rlocals, r1, ext::uxtw, LogBytesPerWord);
 566   __ ldr(r0, Address(r1, Interpreter::local_offset_in_bytes(1)));
 567 }
 568 
 569 void TemplateTable::wide_fload()
 570 {
 571   transition(vtos, ftos);
 572   locals_index_wide(r1);
 573   // n.b. we use ldrd here because this is a 64 bit slot
 574   // this is comparable to the iload case
 575   __ ldrd(v0, faddress(r1));
 576 }
 577 
 578 void TemplateTable::wide_dload()
 579 {
 580   transition(vtos, dtos);
 581   __ ldrh(r1, at_bcp(2));
 582   __ rev16w(r1, r1);
 583   __ sub(r1, rlocals, r1, ext::uxtw, LogBytesPerWord);
 584   __ ldrd(v0, Address(r1, Interpreter::local_offset_in_bytes(1)));
 585 }
 586 
 587 void TemplateTable::wide_aload()
 588 {
 589   transition(vtos, atos);
 590   locals_index_wide(r1);
 591   __ ldr(r0, aaddress(r1));
 592 }
 593 
 594 void TemplateTable::index_check(Register array, Register index)
 595 {
 596   // destroys r1, rscratch1
 597   // check array
 598   __ null_check(array, arrayOopDesc::length_offset_in_bytes());
 599   // sign extend index for use by indexed load
 600   // __ movl2ptr(index, index);
 601   // check index
 602   Register length = rscratch1;
 603   __ ldrw(length, Address(array, arrayOopDesc::length_offset_in_bytes()));
 604   __ cmpw(index, length);
 605   if (index != r1) {
 606     // ??? convention: move aberrant index into r1 for exception message
 607     assert(r1 != array, "different registers");
 608     __ mov(r1, index);
 609   }
 610   Label ok;
 611   __ br(Assembler::LO, ok);
 612   __ mov(rscratch1, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
 613   __ br(rscratch1);
 614   __ bind(ok);
 615 }
 616 
 617 void TemplateTable::iaload()
 618 {
 619   transition(itos, itos);
 620   __ mov(r1, r0);
 621   __ pop_ptr(r0);
 622   // r0: array
 623   // r1: index
 624   index_check(r0, r1); // leaves index in r1, kills rscratch1
 625   __ lea(r1, Address(r0, r1, Address::uxtw(2)));
 626   __ ldrw(r0, Address(r1, arrayOopDesc::base_offset_in_bytes(T_INT)));
 627 }
 628 
 629 void TemplateTable::laload()
 630 {
 631   transition(itos, ltos);
 632   __ mov(r1, r0);
 633   __ pop_ptr(r0);
 634   // r0: array
 635   // r1: index
 636   index_check(r0, r1); // leaves index in r1, kills rscratch1
 637   __ lea(r1, Address(r0, r1, Address::uxtw(3)));
 638   __ ldr(r0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_LONG)));
 639 }
 640 
 641 void TemplateTable::faload()
 642 {
 643   transition(itos, ftos);
 644   __ mov(r1, r0);
 645   __ pop_ptr(r0);
 646   // r0: array
 647   // r1: index
 648   index_check(r0, r1); // leaves index in r1, kills rscratch1
 649   __ lea(r1,  Address(r0, r1, Address::uxtw(2)));
 650   __ ldrs(v0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
 651 }
 652 
 653 void TemplateTable::daload()
 654 {
 655   transition(itos, dtos);
 656   __ mov(r1, r0);
 657   __ pop_ptr(r0);
 658   // r0: array
 659   // r1: index
 660   index_check(r0, r1); // leaves index in r1, kills rscratch1
 661   __ lea(r1,  Address(r0, r1, Address::uxtw(3)));
 662   __ ldrd(v0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
 663 }
 664 
 665 void TemplateTable::aaload()
 666 {
 667   transition(itos, atos);
 668   __ mov(r1, r0);
 669   __ pop_ptr(r0);
 670   // r0: array
 671   // r1: index
 672   index_check(r0, r1); // leaves index in r1, kills rscratch1
 673   int s = (UseCompressedOops ? 2 : 3);
 674   __ lea(r1, Address(r0, r1, Address::uxtw(s)));
 675   do_oop_load(_masm,
 676               Address(r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
 677               r0,
 678               ACCESS_IN_HEAP | ACCESS_IN_HEAP_ARRAY);
 679 }
 680 
 681 void TemplateTable::baload()
 682 {
 683   transition(itos, itos);
 684   __ mov(r1, r0);
 685   __ pop_ptr(r0);
 686   // r0: array
 687   // r1: index
 688   index_check(r0, r1); // leaves index in r1, kills rscratch1
 689   __ lea(r1,  Address(r0, r1, Address::uxtw(0)));
 690   __ load_signed_byte(r0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_BYTE)));
 691 }
 692 
 693 void TemplateTable::caload()
 694 {
 695   transition(itos, itos);
 696   __ mov(r1, r0);
 697   __ pop_ptr(r0);
 698   // r0: array
 699   // r1: index
 700   index_check(r0, r1); // leaves index in r1, kills rscratch1
 701   __ lea(r1,  Address(r0, r1, Address::uxtw(1)));
 702   __ load_unsigned_short(r0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 703 }
 704 
 705 // iload followed by caload frequent pair
 706 void TemplateTable::fast_icaload()
 707 {
 708   transition(vtos, itos);
 709   // load index out of locals
 710   locals_index(r2);
 711   __ ldr(r1, iaddress(r2));
 712 
 713   __ pop_ptr(r0);
 714 
 715   // r0: array
 716   // r1: index
 717   index_check(r0, r1); // leaves index in r1, kills rscratch1
 718   __ lea(r1,  Address(r0, r1, Address::uxtw(1)));
 719   __ load_unsigned_short(r0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 720 }
 721 
 722 void TemplateTable::saload()
 723 {
 724   transition(itos, itos);
 725   __ mov(r1, r0);
 726   __ pop_ptr(r0);
 727   // r0: array
 728   // r1: index
 729   index_check(r0, r1); // leaves index in r1, kills rscratch1
 730   __ lea(r1,  Address(r0, r1, Address::uxtw(1)));
 731   __ load_signed_short(r0, Address(r1,  arrayOopDesc::base_offset_in_bytes(T_SHORT)));
 732 }
 733 
 734 void TemplateTable::iload(int n)
 735 {
 736   transition(vtos, itos);
 737   __ ldr(r0, iaddress(n));
 738 }
 739 
 740 void TemplateTable::lload(int n)
 741 {
 742   transition(vtos, ltos);
 743   __ ldr(r0, laddress(n));
 744 }
 745 
 746 void TemplateTable::fload(int n)
 747 {
 748   transition(vtos, ftos);
 749   __ ldrs(v0, faddress(n));
 750 }
 751 
 752 void TemplateTable::dload(int n)
 753 {
 754   transition(vtos, dtos);
 755   __ ldrd(v0, daddress(n));
 756 }
 757 
 758 void TemplateTable::aload(int n)
 759 {
 760   transition(vtos, atos);
 761   __ ldr(r0, iaddress(n));
 762 }
 763 
 764 void TemplateTable::aload_0() {
 765   aload_0_internal();
 766 }
 767 
 768 void TemplateTable::nofast_aload_0() {
 769   aload_0_internal(may_not_rewrite);
 770 }
 771 
 772 void TemplateTable::aload_0_internal(RewriteControl rc) {
 773   // According to bytecode histograms, the pairs:
 774   //
 775   // _aload_0, _fast_igetfield
 776   // _aload_0, _fast_agetfield
 777   // _aload_0, _fast_fgetfield
 778   //
 779   // occur frequently. If RewriteFrequentPairs is set, the (slow)
 780   // _aload_0 bytecode checks if the next bytecode is either
 781   // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
 782   // rewrites the current bytecode into a pair bytecode; otherwise it
 783   // rewrites the current bytecode into _fast_aload_0 that doesn't do
 784   // the pair check anymore.
 785   //
 786   // Note: If the next bytecode is _getfield, the rewrite must be
 787   //       delayed, otherwise we may miss an opportunity for a pair.
 788   //
 789   // Also rewrite frequent pairs
 790   //   aload_0, aload_1
 791   //   aload_0, iload_1
 792   // These bytecodes with a small amount of code are most profitable
 793   // to rewrite
 794   if (RewriteFrequentPairs && rc == may_rewrite) {
 795     Label rewrite, done;
 796     const Register bc = r4;
 797 
 798     // get next bytecode
 799     __ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
 800 
 801     // if _getfield then wait with rewrite
 802     __ cmpw(r1, Bytecodes::Bytecodes::_getfield);
 803     __ br(Assembler::EQ, done);
 804 
 805     // if _igetfield then rewrite to _fast_iaccess_0
 806     assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
 807     __ cmpw(r1, Bytecodes::_fast_igetfield);
 808     __ movw(bc, Bytecodes::_fast_iaccess_0);
 809     __ br(Assembler::EQ, rewrite);
 810 
 811     // if _agetfield then rewrite to _fast_aaccess_0
 812     assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
 813     __ cmpw(r1, Bytecodes::_fast_agetfield);
 814     __ movw(bc, Bytecodes::_fast_aaccess_0);
 815     __ br(Assembler::EQ, rewrite);
 816 
 817     // if _fgetfield then rewrite to _fast_faccess_0
 818     assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
 819     __ cmpw(r1, Bytecodes::_fast_fgetfield);
 820     __ movw(bc, Bytecodes::_fast_faccess_0);
 821     __ br(Assembler::EQ, rewrite);
 822 
 823     // else rewrite to _fast_aload0
 824     assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
 825     __ movw(bc, Bytecodes::Bytecodes::_fast_aload_0);
 826 
 827     // rewrite
 828     // bc: new bytecode
 829     __ bind(rewrite);
 830     patch_bytecode(Bytecodes::_aload_0, bc, r1, false);
 831 
 832     __ bind(done);
 833   }
 834 
 835   // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
 836   aload(0);
 837 }
 838 
 839 void TemplateTable::istore()
 840 {
 841   transition(itos, vtos);
 842   locals_index(r1);
 843   // FIXME: We're being very pernickerty here storing a jint in a
 844   // local with strw, which costs an extra instruction over what we'd
 845   // be able to do with a simple str.  We should just store the whole
 846   // word.
 847   __ lea(rscratch1, iaddress(r1));
 848   __ strw(r0, Address(rscratch1));
 849 }
 850 
 851 void TemplateTable::lstore()
 852 {
 853   transition(ltos, vtos);
 854   locals_index(r1);
 855   __ str(r0, laddress(r1, rscratch1, _masm));
 856 }
 857 
 858 void TemplateTable::fstore() {
 859   transition(ftos, vtos);
 860   locals_index(r1);
 861   __ lea(rscratch1, iaddress(r1));
 862   __ strs(v0, Address(rscratch1));
 863 }
 864 
 865 void TemplateTable::dstore() {
 866   transition(dtos, vtos);
 867   locals_index(r1);
 868   __ strd(v0, daddress(r1, rscratch1, _masm));
 869 }
 870 
 871 void TemplateTable::astore()
 872 {
 873   transition(vtos, vtos);
 874   __ pop_ptr(r0);
 875   locals_index(r1);
 876   __ str(r0, aaddress(r1));
 877 }
 878 
 879 void TemplateTable::wide_istore() {
 880   transition(vtos, vtos);
 881   __ pop_i();
 882   locals_index_wide(r1);
 883   __ lea(rscratch1, iaddress(r1));
 884   __ strw(r0, Address(rscratch1));
 885 }
 886 
 887 void TemplateTable::wide_lstore() {
 888   transition(vtos, vtos);
 889   __ pop_l();
 890   locals_index_wide(r1);
 891   __ str(r0, laddress(r1, rscratch1, _masm));
 892 }
 893 
 894 void TemplateTable::wide_fstore() {
 895   transition(vtos, vtos);
 896   __ pop_f();
 897   locals_index_wide(r1);
 898   __ lea(rscratch1, faddress(r1));
 899   __ strs(v0, rscratch1);
 900 }
 901 
 902 void TemplateTable::wide_dstore() {
 903   transition(vtos, vtos);
 904   __ pop_d();
 905   locals_index_wide(r1);
 906   __ strd(v0, daddress(r1, rscratch1, _masm));
 907 }
 908 
 909 void TemplateTable::wide_astore() {
 910   transition(vtos, vtos);
 911   __ pop_ptr(r0);
 912   locals_index_wide(r1);
 913   __ str(r0, aaddress(r1));
 914 }
 915 
 916 void TemplateTable::iastore() {
 917   transition(itos, vtos);
 918   __ pop_i(r1);
 919   __ pop_ptr(r3);
 920   // r0: value
 921   // r1: index
 922   // r3: array
 923   index_check(r3, r1); // prefer index in r1
 924   __ lea(rscratch1, Address(r3, r1, Address::uxtw(2)));
 925   __ strw(r0, Address(rscratch1,
 926                       arrayOopDesc::base_offset_in_bytes(T_INT)));
 927 }
 928 
 929 void TemplateTable::lastore() {
 930   transition(ltos, vtos);
 931   __ pop_i(r1);
 932   __ pop_ptr(r3);
 933   // r0: value
 934   // r1: index
 935   // r3: array
 936   index_check(r3, r1); // prefer index in r1
 937   __ lea(rscratch1, Address(r3, r1, Address::uxtw(3)));
 938   __ str(r0, Address(rscratch1,
 939                       arrayOopDesc::base_offset_in_bytes(T_LONG)));
 940 }
 941 
 942 void TemplateTable::fastore() {
 943   transition(ftos, vtos);
 944   __ pop_i(r1);
 945   __ pop_ptr(r3);
 946   // v0: value
 947   // r1:  index
 948   // r3:  array
 949   index_check(r3, r1); // prefer index in r1
 950   __ lea(rscratch1, Address(r3, r1, Address::uxtw(2)));
 951   __ strs(v0, Address(rscratch1,
 952                       arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
 953 }
 954 
 955 void TemplateTable::dastore() {
 956   transition(dtos, vtos);
 957   __ pop_i(r1);
 958   __ pop_ptr(r3);
 959   // v0: value
 960   // r1:  index
 961   // r3:  array
 962   index_check(r3, r1); // prefer index in r1
 963   __ lea(rscratch1, Address(r3, r1, Address::uxtw(3)));
 964   __ strd(v0, Address(rscratch1,
 965                       arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
 966 }
 967 
 968 void TemplateTable::aastore() {
 969   Label is_null, ok_is_subtype, done;
 970   transition(vtos, vtos);
 971   // stack: ..., array, index, value
 972   __ ldr(r0, at_tos());    // value
 973   __ ldr(r2, at_tos_p1()); // index
 974   __ ldr(r3, at_tos_p2()); // array
 975 
 976   Address element_address(r4, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
 977 
 978   index_check(r3, r2);     // kills r1
 979   __ lea(r4, Address(r3, r2, Address::uxtw(UseCompressedOops? 2 : 3)));
 980 
 981   // do array store check - check for NULL value first
 982   __ cbz(r0, is_null);
 983 
 984   // Move subklass into r1
 985   __ load_klass(r1, r0);
 986   // Move superklass into r0
 987   __ load_klass(r0, r3);
 988   __ ldr(r0, Address(r0,
 989                      ObjArrayKlass::element_klass_offset()));
 990   // Compress array + index*oopSize + 12 into a single register.  Frees r2.
 991 
 992   // Generate subtype check.  Blows r2, r5
 993   // Superklass in r0.  Subklass in r1.
 994   __ gen_subtype_check(r1, ok_is_subtype);
 995 
 996   // Come here on failure
 997   // object is at TOS
 998   __ b(Interpreter::_throw_ArrayStoreException_entry);
 999 
1000   // Come here on success
1001   __ bind(ok_is_subtype);
1002 
1003   // Get the value we will store
1004   __ ldr(r0, at_tos());
1005   // Now store using the appropriate barrier
1006   do_oop_store(_masm, element_address, r0, ACCESS_IN_HEAP);
1007   __ b(done);
1008 
1009   // Have a NULL in r0, r3=array, r2=index.  Store NULL at ary[idx]
1010   __ bind(is_null);
1011   __ profile_null_seen(r2);
1012 
1013   // Store a NULL
1014   do_oop_store(_masm, element_address, noreg, ACCESS_IN_HEAP);
1015 
1016   // Pop stack arguments
1017   __ bind(done);
1018   __ add(esp, esp, 3 * Interpreter::stackElementSize);
1019 }
1020 
1021 void TemplateTable::bastore()
1022 {
1023   transition(itos, vtos);
1024   __ pop_i(r1);
1025   __ pop_ptr(r3);
1026   // r0: value
1027   // r1: index
1028   // r3: array
1029   index_check(r3, r1); // prefer index in r1
1030 
1031   // Need to check whether array is boolean or byte
1032   // since both types share the bastore bytecode.
1033   __ load_klass(r2, r3);
1034   __ ldrw(r2, Address(r2, Klass::layout_helper_offset()));
1035   int diffbit_index = exact_log2(Klass::layout_helper_boolean_diffbit());
1036   Label L_skip;
1037   __ tbz(r2, diffbit_index, L_skip);
1038   __ andw(r0, r0, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
1039   __ bind(L_skip);
1040 
1041   __ lea(rscratch1, Address(r3, r1, Address::uxtw(0)));
1042   __ strb(r0, Address(rscratch1,
1043                       arrayOopDesc::base_offset_in_bytes(T_BYTE)));
1044 }
1045 
1046 void TemplateTable::castore()
1047 {
1048   transition(itos, vtos);
1049   __ pop_i(r1);
1050   __ pop_ptr(r3);
1051   // r0: value
1052   // r1: index
1053   // r3: array
1054   index_check(r3, r1); // prefer index in r1
1055   __ lea(rscratch1, Address(r3, r1, Address::uxtw(1)));
1056   __ strh(r0, Address(rscratch1,
1057                       arrayOopDesc::base_offset_in_bytes(T_CHAR)));
1058 }
1059 
1060 void TemplateTable::sastore()
1061 {
1062   castore();
1063 }
1064 
1065 void TemplateTable::istore(int n)
1066 {
1067   transition(itos, vtos);
1068   __ str(r0, iaddress(n));
1069 }
1070 
1071 void TemplateTable::lstore(int n)
1072 {
1073   transition(ltos, vtos);
1074   __ str(r0, laddress(n));
1075 }
1076 
1077 void TemplateTable::fstore(int n)
1078 {
1079   transition(ftos, vtos);
1080   __ strs(v0, faddress(n));
1081 }
1082 
1083 void TemplateTable::dstore(int n)
1084 {
1085   transition(dtos, vtos);
1086   __ strd(v0, daddress(n));
1087 }
1088 
1089 void TemplateTable::astore(int n)
1090 {
1091   transition(vtos, vtos);
1092   __ pop_ptr(r0);
1093   __ str(r0, iaddress(n));
1094 }
1095 
1096 void TemplateTable::pop()
1097 {
1098   transition(vtos, vtos);
1099   __ add(esp, esp, Interpreter::stackElementSize);
1100 }
1101 
1102 void TemplateTable::pop2()
1103 {
1104   transition(vtos, vtos);
1105   __ add(esp, esp, 2 * Interpreter::stackElementSize);
1106 }
1107 
1108 void TemplateTable::dup()
1109 {
1110   transition(vtos, vtos);
1111   __ ldr(r0, Address(esp, 0));
1112   __ push(r0);
1113   // stack: ..., a, a
1114 }
1115 
1116 void TemplateTable::dup_x1()
1117 {
1118   transition(vtos, vtos);
1119   // stack: ..., a, b
1120   __ ldr(r0, at_tos());  // load b
1121   __ ldr(r2, at_tos_p1());  // load a
1122   __ str(r0, at_tos_p1());  // store b
1123   __ str(r2, at_tos());  // store a
1124   __ push(r0);                  // push b
1125   // stack: ..., b, a, b
1126 }
1127 
1128 void TemplateTable::dup_x2()
1129 {
1130   transition(vtos, vtos);
1131   // stack: ..., a, b, c
1132   __ ldr(r0, at_tos());  // load c
1133   __ ldr(r2, at_tos_p2());  // load a
1134   __ str(r0, at_tos_p2());  // store c in a
1135   __ push(r0);      // push c
1136   // stack: ..., c, b, c, c
1137   __ ldr(r0, at_tos_p2());  // load b
1138   __ str(r2, at_tos_p2());  // store a in b
1139   // stack: ..., c, a, c, c
1140   __ str(r0, at_tos_p1());  // store b in c
1141   // stack: ..., c, a, b, c
1142 }
1143 
1144 void TemplateTable::dup2()
1145 {
1146   transition(vtos, vtos);
1147   // stack: ..., a, b
1148   __ ldr(r0, at_tos_p1());  // load a
1149   __ push(r0);                  // push a
1150   __ ldr(r0, at_tos_p1());  // load b
1151   __ push(r0);                  // push b
1152   // stack: ..., a, b, a, b
1153 }
1154 
1155 void TemplateTable::dup2_x1()
1156 {
1157   transition(vtos, vtos);
1158   // stack: ..., a, b, c
1159   __ ldr(r2, at_tos());  // load c
1160   __ ldr(r0, at_tos_p1());  // load b
1161   __ push(r0);                  // push b
1162   __ push(r2);                  // push c
1163   // stack: ..., a, b, c, b, c
1164   __ str(r2, at_tos_p3());  // store c in b
1165   // stack: ..., a, c, c, b, c
1166   __ ldr(r2, at_tos_p4());  // load a
1167   __ str(r2, at_tos_p2());  // store a in 2nd c
1168   // stack: ..., a, c, a, b, c
1169   __ str(r0, at_tos_p4());  // store b in a
1170   // stack: ..., b, c, a, b, c
1171 }
1172 
1173 void TemplateTable::dup2_x2()
1174 {
1175   transition(vtos, vtos);
1176   // stack: ..., a, b, c, d
1177   __ ldr(r2, at_tos());  // load d
1178   __ ldr(r0, at_tos_p1());  // load c
1179   __ push(r0)            ;      // push c
1180   __ push(r2);                  // push d
1181   // stack: ..., a, b, c, d, c, d
1182   __ ldr(r0, at_tos_p4());  // load b
1183   __ str(r0, at_tos_p2());  // store b in d
1184   __ str(r2, at_tos_p4());  // store d in b
1185   // stack: ..., a, d, c, b, c, d
1186   __ ldr(r2, at_tos_p5());  // load a
1187   __ ldr(r0, at_tos_p3());  // load c
1188   __ str(r2, at_tos_p3());  // store a in c
1189   __ str(r0, at_tos_p5());  // store c in a
1190   // stack: ..., c, d, a, b, c, d
1191 }
1192 
1193 void TemplateTable::swap()
1194 {
1195   transition(vtos, vtos);
1196   // stack: ..., a, b
1197   __ ldr(r2, at_tos_p1());  // load a
1198   __ ldr(r0, at_tos());  // load b
1199   __ str(r2, at_tos());  // store a in b
1200   __ str(r0, at_tos_p1());  // store b in a
1201   // stack: ..., b, a
1202 }
1203 
1204 void TemplateTable::iop2(Operation op)
1205 {
1206   transition(itos, itos);
1207   // r0 <== r1 op r0
1208   __ pop_i(r1);
1209   switch (op) {
1210   case add  : __ addw(r0, r1, r0); break;
1211   case sub  : __ subw(r0, r1, r0); break;
1212   case mul  : __ mulw(r0, r1, r0); break;
1213   case _and : __ andw(r0, r1, r0); break;
1214   case _or  : __ orrw(r0, r1, r0); break;
1215   case _xor : __ eorw(r0, r1, r0); break;
1216   case shl  : __ lslvw(r0, r1, r0); break;
1217   case shr  : __ asrvw(r0, r1, r0); break;
1218   case ushr : __ lsrvw(r0, r1, r0);break;
1219   default   : ShouldNotReachHere();
1220   }
1221 }
1222 
1223 void TemplateTable::lop2(Operation op)
1224 {
1225   transition(ltos, ltos);
1226   // r0 <== r1 op r0
1227   __ pop_l(r1);
1228   switch (op) {
1229   case add  : __ add(r0, r1, r0); break;
1230   case sub  : __ sub(r0, r1, r0); break;
1231   case mul  : __ mul(r0, r1, r0); break;
1232   case _and : __ andr(r0, r1, r0); break;
1233   case _or  : __ orr(r0, r1, r0); break;
1234   case _xor : __ eor(r0, r1, r0); break;
1235   default   : ShouldNotReachHere();
1236   }
1237 }
1238 
1239 void TemplateTable::idiv()
1240 {
1241   transition(itos, itos);
1242   // explicitly check for div0
1243   Label no_div0;
1244   __ cbnzw(r0, no_div0);
1245   __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry);
1246   __ br(rscratch1);
1247   __ bind(no_div0);
1248   __ pop_i(r1);
1249   // r0 <== r1 idiv r0
1250   __ corrected_idivl(r0, r1, r0, /* want_remainder */ false);
1251 }
1252 
1253 void TemplateTable::irem()
1254 {
1255   transition(itos, itos);
1256   // explicitly check for div0
1257   Label no_div0;
1258   __ cbnzw(r0, no_div0);
1259   __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry);
1260   __ br(rscratch1);
1261   __ bind(no_div0);
1262   __ pop_i(r1);
1263   // r0 <== r1 irem r0
1264   __ corrected_idivl(r0, r1, r0, /* want_remainder */ true);
1265 }
1266 
1267 void TemplateTable::lmul()
1268 {
1269   transition(ltos, ltos);
1270   __ pop_l(r1);
1271   __ mul(r0, r0, r1);
1272 }
1273 
1274 void TemplateTable::ldiv()
1275 {
1276   transition(ltos, ltos);
1277   // explicitly check for div0
1278   Label no_div0;
1279   __ cbnz(r0, no_div0);
1280   __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry);
1281   __ br(rscratch1);
1282   __ bind(no_div0);
1283   __ pop_l(r1);
1284   // r0 <== r1 ldiv r0
1285   __ corrected_idivq(r0, r1, r0, /* want_remainder */ false);
1286 }
1287 
1288 void TemplateTable::lrem()
1289 {
1290   transition(ltos, ltos);
1291   // explicitly check for div0
1292   Label no_div0;
1293   __ cbnz(r0, no_div0);
1294   __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry);
1295   __ br(rscratch1);
1296   __ bind(no_div0);
1297   __ pop_l(r1);
1298   // r0 <== r1 lrem r0
1299   __ corrected_idivq(r0, r1, r0, /* want_remainder */ true);
1300 }
1301 
1302 void TemplateTable::lshl()
1303 {
1304   transition(itos, ltos);
1305   // shift count is in r0
1306   __ pop_l(r1);
1307   __ lslv(r0, r1, r0);
1308 }
1309 
1310 void TemplateTable::lshr()
1311 {
1312   transition(itos, ltos);
1313   // shift count is in r0
1314   __ pop_l(r1);
1315   __ asrv(r0, r1, r0);
1316 }
1317 
1318 void TemplateTable::lushr()
1319 {
1320   transition(itos, ltos);
1321   // shift count is in r0
1322   __ pop_l(r1);
1323   __ lsrv(r0, r1, r0);
1324 }
1325 
1326 void TemplateTable::fop2(Operation op)
1327 {
1328   transition(ftos, ftos);
1329   switch (op) {
1330   case add:
1331     // n.b. use ldrd because this is a 64 bit slot
1332     __ pop_f(v1);
1333     __ fadds(v0, v1, v0);
1334     break;
1335   case sub:
1336     __ pop_f(v1);
1337     __ fsubs(v0, v1, v0);
1338     break;
1339   case mul:
1340     __ pop_f(v1);
1341     __ fmuls(v0, v1, v0);
1342     break;
1343   case div:
1344     __ pop_f(v1);
1345     __ fdivs(v0, v1, v0);
1346     break;
1347   case rem:
1348     __ fmovs(v1, v0);
1349     __ pop_f(v0);
1350     __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::frem),
1351                          0, 2, MacroAssembler::ret_type_float);
1352     break;
1353   default:
1354     ShouldNotReachHere();
1355     break;
1356   }
1357 }
1358 
1359 void TemplateTable::dop2(Operation op)
1360 {
1361   transition(dtos, dtos);
1362   switch (op) {
1363   case add:
1364     // n.b. use ldrd because this is a 64 bit slot
1365     __ pop_d(v1);
1366     __ faddd(v0, v1, v0);
1367     break;
1368   case sub:
1369     __ pop_d(v1);
1370     __ fsubd(v0, v1, v0);
1371     break;
1372   case mul:
1373     __ pop_d(v1);
1374     __ fmuld(v0, v1, v0);
1375     break;
1376   case div:
1377     __ pop_d(v1);
1378     __ fdivd(v0, v1, v0);
1379     break;
1380   case rem:
1381     __ fmovd(v1, v0);
1382     __ pop_d(v0);
1383     __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::drem),
1384                          0, 2, MacroAssembler::ret_type_double);
1385     break;
1386   default:
1387     ShouldNotReachHere();
1388     break;
1389   }
1390 }
1391 
1392 void TemplateTable::ineg()
1393 {
1394   transition(itos, itos);
1395   __ negw(r0, r0);
1396 
1397 }
1398 
1399 void TemplateTable::lneg()
1400 {
1401   transition(ltos, ltos);
1402   __ neg(r0, r0);
1403 }
1404 
1405 void TemplateTable::fneg()
1406 {
1407   transition(ftos, ftos);
1408   __ fnegs(v0, v0);
1409 }
1410 
1411 void TemplateTable::dneg()
1412 {
1413   transition(dtos, dtos);
1414   __ fnegd(v0, v0);
1415 }
1416 
1417 void TemplateTable::iinc()
1418 {
1419   transition(vtos, vtos);
1420   __ load_signed_byte(r1, at_bcp(2)); // get constant
1421   locals_index(r2);
1422   __ ldr(r0, iaddress(r2));
1423   __ addw(r0, r0, r1);
1424   __ str(r0, iaddress(r2));
1425 }
1426 
1427 void TemplateTable::wide_iinc()
1428 {
1429   transition(vtos, vtos);
1430   // __ mov(r1, zr);
1431   __ ldrw(r1, at_bcp(2)); // get constant and index
1432   __ rev16(r1, r1);
1433   __ ubfx(r2, r1, 0, 16);
1434   __ neg(r2, r2);
1435   __ sbfx(r1, r1, 16, 16);
1436   __ ldr(r0, iaddress(r2));
1437   __ addw(r0, r0, r1);
1438   __ str(r0, iaddress(r2));
1439 }
1440 
1441 void TemplateTable::convert()
1442 {
1443   // Checking
1444 #ifdef ASSERT
1445   {
1446     TosState tos_in  = ilgl;
1447     TosState tos_out = ilgl;
1448     switch (bytecode()) {
1449     case Bytecodes::_i2l: // fall through
1450     case Bytecodes::_i2f: // fall through
1451     case Bytecodes::_i2d: // fall through
1452     case Bytecodes::_i2b: // fall through
1453     case Bytecodes::_i2c: // fall through
1454     case Bytecodes::_i2s: tos_in = itos; break;
1455     case Bytecodes::_l2i: // fall through
1456     case Bytecodes::_l2f: // fall through
1457     case Bytecodes::_l2d: tos_in = ltos; break;
1458     case Bytecodes::_f2i: // fall through
1459     case Bytecodes::_f2l: // fall through
1460     case Bytecodes::_f2d: tos_in = ftos; break;
1461     case Bytecodes::_d2i: // fall through
1462     case Bytecodes::_d2l: // fall through
1463     case Bytecodes::_d2f: tos_in = dtos; break;
1464     default             : ShouldNotReachHere();
1465     }
1466     switch (bytecode()) {
1467     case Bytecodes::_l2i: // fall through
1468     case Bytecodes::_f2i: // fall through
1469     case Bytecodes::_d2i: // fall through
1470     case Bytecodes::_i2b: // fall through
1471     case Bytecodes::_i2c: // fall through
1472     case Bytecodes::_i2s: tos_out = itos; break;
1473     case Bytecodes::_i2l: // fall through
1474     case Bytecodes::_f2l: // fall through
1475     case Bytecodes::_d2l: tos_out = ltos; break;
1476     case Bytecodes::_i2f: // fall through
1477     case Bytecodes::_l2f: // fall through
1478     case Bytecodes::_d2f: tos_out = ftos; break;
1479     case Bytecodes::_i2d: // fall through
1480     case Bytecodes::_l2d: // fall through
1481     case Bytecodes::_f2d: tos_out = dtos; break;
1482     default             : ShouldNotReachHere();
1483     }
1484     transition(tos_in, tos_out);
1485   }
1486 #endif // ASSERT
1487   // static const int64_t is_nan = 0x8000000000000000L;
1488 
1489   // Conversion
1490   switch (bytecode()) {
1491   case Bytecodes::_i2l:
1492     __ sxtw(r0, r0);
1493     break;
1494   case Bytecodes::_i2f:
1495     __ scvtfws(v0, r0);
1496     break;
1497   case Bytecodes::_i2d:
1498     __ scvtfwd(v0, r0);
1499     break;
1500   case Bytecodes::_i2b:
1501     __ sxtbw(r0, r0);
1502     break;
1503   case Bytecodes::_i2c:
1504     __ uxthw(r0, r0);
1505     break;
1506   case Bytecodes::_i2s:
1507     __ sxthw(r0, r0);
1508     break;
1509   case Bytecodes::_l2i:
1510     __ uxtw(r0, r0);
1511     break;
1512   case Bytecodes::_l2f:
1513     __ scvtfs(v0, r0);
1514     break;
1515   case Bytecodes::_l2d:
1516     __ scvtfd(v0, r0);
1517     break;
1518   case Bytecodes::_f2i:
1519   {
1520     Label L_Okay;
1521     __ clear_fpsr();
1522     __ fcvtzsw(r0, v0);
1523     __ get_fpsr(r1);
1524     __ cbzw(r1, L_Okay);
1525     __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::f2i),
1526                          0, 1, MacroAssembler::ret_type_integral);
1527     __ bind(L_Okay);
1528   }
1529     break;
1530   case Bytecodes::_f2l:
1531   {
1532     Label L_Okay;
1533     __ clear_fpsr();
1534     __ fcvtzs(r0, v0);
1535     __ get_fpsr(r1);
1536     __ cbzw(r1, L_Okay);
1537     __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::f2l),
1538                          0, 1, MacroAssembler::ret_type_integral);
1539     __ bind(L_Okay);
1540   }
1541     break;
1542   case Bytecodes::_f2d:
1543     __ fcvts(v0, v0);
1544     break;
1545   case Bytecodes::_d2i:
1546   {
1547     Label L_Okay;
1548     __ clear_fpsr();
1549     __ fcvtzdw(r0, v0);
1550     __ get_fpsr(r1);
1551     __ cbzw(r1, L_Okay);
1552     __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::d2i),
1553                          0, 1, MacroAssembler::ret_type_integral);
1554     __ bind(L_Okay);
1555   }
1556     break;
1557   case Bytecodes::_d2l:
1558   {
1559     Label L_Okay;
1560     __ clear_fpsr();
1561     __ fcvtzd(r0, v0);
1562     __ get_fpsr(r1);
1563     __ cbzw(r1, L_Okay);
1564     __ call_VM_leaf_base1(CAST_FROM_FN_PTR(address, SharedRuntime::d2l),
1565                          0, 1, MacroAssembler::ret_type_integral);
1566     __ bind(L_Okay);
1567   }
1568     break;
1569   case Bytecodes::_d2f:
1570     __ fcvtd(v0, v0);
1571     break;
1572   default:
1573     ShouldNotReachHere();
1574   }
1575 }
1576 
1577 void TemplateTable::lcmp()
1578 {
1579   transition(ltos, itos);
1580   Label done;
1581   __ pop_l(r1);
1582   __ cmp(r1, r0);
1583   __ mov(r0, (u_int64_t)-1L);
1584   __ br(Assembler::LT, done);
1585   // __ mov(r0, 1UL);
1586   // __ csel(r0, r0, zr, Assembler::NE);
1587   // and here is a faster way
1588   __ csinc(r0, zr, zr, Assembler::EQ);
1589   __ bind(done);
1590 }
1591 
1592 void TemplateTable::float_cmp(bool is_float, int unordered_result)
1593 {
1594   Label done;
1595   if (is_float) {
1596     // XXX get rid of pop here, use ... reg, mem32
1597     __ pop_f(v1);
1598     __ fcmps(v1, v0);
1599   } else {
1600     // XXX get rid of pop here, use ... reg, mem64
1601     __ pop_d(v1);
1602     __ fcmpd(v1, v0);
1603   }
1604   if (unordered_result < 0) {
1605     // we want -1 for unordered or less than, 0 for equal and 1 for
1606     // greater than.
1607     __ mov(r0, (u_int64_t)-1L);
1608     // for FP LT tests less than or unordered
1609     __ br(Assembler::LT, done);
1610     // install 0 for EQ otherwise 1
1611     __ csinc(r0, zr, zr, Assembler::EQ);
1612   } else {
1613     // we want -1 for less than, 0 for equal and 1 for unordered or
1614     // greater than.
1615     __ mov(r0, 1L);
1616     // for FP HI tests greater than or unordered
1617     __ br(Assembler::HI, done);
1618     // install 0 for EQ otherwise ~0
1619     __ csinv(r0, zr, zr, Assembler::EQ);
1620 
1621   }
1622   __ bind(done);
1623 }
1624 
1625 void TemplateTable::branch(bool is_jsr, bool is_wide)
1626 {
1627   // We might be moving to a safepoint.  The thread which calls
1628   // Interpreter::notice_safepoints() will effectively flush its cache
1629   // when it makes a system call, but we need to do something to
1630   // ensure that we see the changed dispatch table.
1631   __ membar(MacroAssembler::LoadLoad);
1632 
1633   __ profile_taken_branch(r0, r1);
1634   const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
1635                              InvocationCounter::counter_offset();
1636   const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
1637                               InvocationCounter::counter_offset();
1638 
1639   // load branch displacement
1640   if (!is_wide) {
1641     __ ldrh(r2, at_bcp(1));
1642     __ rev16(r2, r2);
1643     // sign extend the 16 bit value in r2
1644     __ sbfm(r2, r2, 0, 15);
1645   } else {
1646     __ ldrw(r2, at_bcp(1));
1647     __ revw(r2, r2);
1648     // sign extend the 32 bit value in r2
1649     __ sbfm(r2, r2, 0, 31);
1650   }
1651 
1652   // Handle all the JSR stuff here, then exit.
1653   // It's much shorter and cleaner than intermingling with the non-JSR
1654   // normal-branch stuff occurring below.
1655 
1656   if (is_jsr) {
1657     // Pre-load the next target bytecode into rscratch1
1658     __ load_unsigned_byte(rscratch1, Address(rbcp, r2));
1659     // compute return address as bci
1660     __ ldr(rscratch2, Address(rmethod, Method::const_offset()));
1661     __ add(rscratch2, rscratch2,
1662            in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3));
1663     __ sub(r1, rbcp, rscratch2);
1664     __ push_i(r1);
1665     // Adjust the bcp by the 16-bit displacement in r2
1666     __ add(rbcp, rbcp, r2);
1667     __ dispatch_only(vtos);
1668     return;
1669   }
1670 
1671   // Normal (non-jsr) branch handling
1672 
1673   // Adjust the bcp by the displacement in r2
1674   __ add(rbcp, rbcp, r2);
1675 
1676   assert(UseLoopCounter || !UseOnStackReplacement,
1677          "on-stack-replacement requires loop counters");
1678   Label backedge_counter_overflow;
1679   Label profile_method;
1680   Label dispatch;
1681   if (UseLoopCounter) {
1682     // increment backedge counter for backward branches
1683     // r0: MDO
1684     // w1: MDO bumped taken-count
1685     // r2: target offset
1686     __ cmp(r2, zr);
1687     __ br(Assembler::GT, dispatch); // count only if backward branch
1688 
1689     // ECN: FIXME: This code smells
1690     // check if MethodCounters exists
1691     Label has_counters;
1692     __ ldr(rscratch1, Address(rmethod, Method::method_counters_offset()));
1693     __ cbnz(rscratch1, has_counters);
1694     __ push(r0);
1695     __ push(r1);
1696     __ push(r2);
1697     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
1698             InterpreterRuntime::build_method_counters), rmethod);
1699     __ pop(r2);
1700     __ pop(r1);
1701     __ pop(r0);
1702     __ ldr(rscratch1, Address(rmethod, Method::method_counters_offset()));
1703     __ cbz(rscratch1, dispatch); // No MethodCounters allocated, OutOfMemory
1704     __ bind(has_counters);
1705 
1706     if (TieredCompilation) {
1707       Label no_mdo;
1708       int increment = InvocationCounter::count_increment;
1709       if (ProfileInterpreter) {
1710         // Are we profiling?
1711         __ ldr(r1, Address(rmethod, in_bytes(Method::method_data_offset())));
1712         __ cbz(r1, no_mdo);
1713         // Increment the MDO backedge counter
1714         const Address mdo_backedge_counter(r1, in_bytes(MethodData::backedge_counter_offset()) +
1715                                            in_bytes(InvocationCounter::counter_offset()));
1716         const Address mask(r1, in_bytes(MethodData::backedge_mask_offset()));
1717         __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
1718                                    r0, rscratch1, false, Assembler::EQ, &backedge_counter_overflow);
1719         __ b(dispatch);
1720       }
1721       __ bind(no_mdo);
1722       // Increment backedge counter in MethodCounters*
1723       __ ldr(rscratch1, Address(rmethod, Method::method_counters_offset()));
1724       const Address mask(rscratch1, in_bytes(MethodCounters::backedge_mask_offset()));
1725       __ increment_mask_and_jump(Address(rscratch1, be_offset), increment, mask,
1726                                  r0, rscratch2, false, Assembler::EQ, &backedge_counter_overflow);
1727     } else { // not TieredCompilation
1728       // increment counter
1729       __ ldr(rscratch2, Address(rmethod, Method::method_counters_offset()));
1730       __ ldrw(r0, Address(rscratch2, be_offset));        // load backedge counter
1731       __ addw(rscratch1, r0, InvocationCounter::count_increment); // increment counter
1732       __ strw(rscratch1, Address(rscratch2, be_offset));        // store counter
1733 
1734       __ ldrw(r0, Address(rscratch2, inv_offset));    // load invocation counter
1735       __ andw(r0, r0, (unsigned)InvocationCounter::count_mask_value); // and the status bits
1736       __ addw(r0, r0, rscratch1);        // add both counters
1737 
1738       if (ProfileInterpreter) {
1739         // Test to see if we should create a method data oop
1740         __ ldrw(rscratch1, Address(rscratch2, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
1741         __ cmpw(r0, rscratch1);
1742         __ br(Assembler::LT, dispatch);
1743 
1744         // if no method data exists, go to profile method
1745         __ test_method_data_pointer(r0, profile_method);
1746 
1747         if (UseOnStackReplacement) {
1748           // check for overflow against w1 which is the MDO taken count
1749           __ ldrw(rscratch1, Address(rscratch2, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
1750           __ cmpw(r1, rscratch1);
1751           __ br(Assembler::LO, dispatch); // Intel == Assembler::below
1752 
1753           // When ProfileInterpreter is on, the backedge_count comes
1754           // from the MethodData*, which value does not get reset on
1755           // the call to frequency_counter_overflow().  To avoid
1756           // excessive calls to the overflow routine while the method is
1757           // being compiled, add a second test to make sure the overflow
1758           // function is called only once every overflow_frequency.
1759           const int overflow_frequency = 1024;
1760           __ andsw(r1, r1, overflow_frequency - 1);
1761           __ br(Assembler::EQ, backedge_counter_overflow);
1762 
1763         }
1764       } else {
1765         if (UseOnStackReplacement) {
1766           // check for overflow against w0, which is the sum of the
1767           // counters
1768           __ ldrw(rscratch1, Address(rscratch2, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
1769           __ cmpw(r0, rscratch1);
1770           __ br(Assembler::HS, backedge_counter_overflow); // Intel == Assembler::aboveEqual
1771         }
1772       }
1773     }
1774   }
1775   __ bind(dispatch);
1776 
1777   // Pre-load the next target bytecode into rscratch1
1778   __ load_unsigned_byte(rscratch1, Address(rbcp, 0));
1779 
1780   // continue with the bytecode @ target
1781   // rscratch1: target bytecode
1782   // rbcp: target bcp
1783   __ dispatch_only(vtos);
1784 
1785   if (UseLoopCounter) {
1786     if (ProfileInterpreter) {
1787       // Out-of-line code to allocate method data oop.
1788       __ bind(profile_method);
1789       __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
1790       __ load_unsigned_byte(r1, Address(rbcp, 0));  // restore target bytecode
1791       __ set_method_data_pointer_for_bcp();
1792       __ b(dispatch);
1793     }
1794 
1795     if (TieredCompilation || UseOnStackReplacement) {
1796       // invocation counter overflow
1797       __ bind(backedge_counter_overflow);
1798       __ neg(r2, r2);
1799       __ add(r2, r2, rbcp);     // branch bcp
1800       // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
1801       __ call_VM(noreg,
1802                  CAST_FROM_FN_PTR(address,
1803                                   InterpreterRuntime::frequency_counter_overflow),
1804                  r2);
1805       if (!UseOnStackReplacement)
1806         __ b(dispatch);
1807     }
1808 
1809     if (UseOnStackReplacement) {
1810       __ load_unsigned_byte(r1, Address(rbcp, 0));  // restore target bytecode
1811 
1812       // r0: osr nmethod (osr ok) or NULL (osr not possible)
1813       // w1: target bytecode
1814       // r2: scratch
1815       __ cbz(r0, dispatch);     // test result -- no osr if null
1816       // nmethod may have been invalidated (VM may block upon call_VM return)
1817       __ ldrb(r2, Address(r0, nmethod::state_offset()));
1818       if (nmethod::in_use != 0)
1819         __ sub(r2, r2, nmethod::in_use);
1820       __ cbnz(r2, dispatch);
1821 
1822       // We have the address of an on stack replacement routine in r0
1823       // We need to prepare to execute the OSR method. First we must
1824       // migrate the locals and monitors off of the stack.
1825 
1826       __ mov(r19, r0);                             // save the nmethod
1827 
1828       call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
1829 
1830       // r0 is OSR buffer, move it to expected parameter location
1831       __ mov(j_rarg0, r0);
1832 
1833       // remove activation
1834       // get sender esp
1835       __ ldr(esp,
1836           Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize));
1837       // remove frame anchor
1838       __ leave();
1839       // Ensure compiled code always sees stack at proper alignment
1840       __ andr(sp, esp, -16);
1841 
1842       // and begin the OSR nmethod
1843       __ ldr(rscratch1, Address(r19, nmethod::osr_entry_point_offset()));
1844       __ br(rscratch1);
1845     }
1846   }
1847 }
1848 
1849 
1850 void TemplateTable::if_0cmp(Condition cc)
1851 {
1852   transition(itos, vtos);
1853   // assume branch is more often taken than not (loops use backward branches)
1854   Label not_taken;
1855   if (cc == equal)
1856     __ cbnzw(r0, not_taken);
1857   else if (cc == not_equal)
1858     __ cbzw(r0, not_taken);
1859   else {
1860     __ andsw(zr, r0, r0);
1861     __ br(j_not(cc), not_taken);
1862   }
1863 
1864   branch(false, false);
1865   __ bind(not_taken);
1866   __ profile_not_taken_branch(r0);
1867 }
1868 
1869 void TemplateTable::if_icmp(Condition cc)
1870 {
1871   transition(itos, vtos);
1872   // assume branch is more often taken than not (loops use backward branches)
1873   Label not_taken;
1874   __ pop_i(r1);
1875   __ cmpw(r1, r0, Assembler::LSL);
1876   __ br(j_not(cc), not_taken);
1877   branch(false, false);
1878   __ bind(not_taken);
1879   __ profile_not_taken_branch(r0);
1880 }
1881 
1882 void TemplateTable::if_nullcmp(Condition cc)
1883 {
1884   transition(atos, vtos);
1885   // assume branch is more often taken than not (loops use backward branches)
1886   Label not_taken;
1887   if (cc == equal)
1888     __ cbnz(r0, not_taken);
1889   else
1890     __ cbz(r0, not_taken);
1891   branch(false, false);
1892   __ bind(not_taken);
1893   __ profile_not_taken_branch(r0);
1894 }
1895 
1896 void TemplateTable::if_acmp(Condition cc)
1897 {
1898   transition(atos, vtos);
1899   // assume branch is more often taken than not (loops use backward branches)
1900   Label not_taken;
1901   __ pop_ptr(r1);
1902   __ cmp(r1, r0);
1903   __ br(j_not(cc), not_taken);
1904   branch(false, false);
1905   __ bind(not_taken);
1906   __ profile_not_taken_branch(r0);
1907 }
1908 
1909 void TemplateTable::ret() {
1910   transition(vtos, vtos);
1911   // We might be moving to a safepoint.  The thread which calls
1912   // Interpreter::notice_safepoints() will effectively flush its cache
1913   // when it makes a system call, but we need to do something to
1914   // ensure that we see the changed dispatch table.
1915   __ membar(MacroAssembler::LoadLoad);
1916 
1917   locals_index(r1);
1918   __ ldr(r1, aaddress(r1)); // get return bci, compute return bcp
1919   __ profile_ret(r1, r2);
1920   __ ldr(rbcp, Address(rmethod, Method::const_offset()));
1921   __ lea(rbcp, Address(rbcp, r1));
1922   __ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset()));
1923   __ dispatch_next(vtos);
1924 }
1925 
1926 void TemplateTable::wide_ret() {
1927   transition(vtos, vtos);
1928   locals_index_wide(r1);
1929   __ ldr(r1, aaddress(r1)); // get return bci, compute return bcp
1930   __ profile_ret(r1, r2);
1931   __ ldr(rbcp, Address(rmethod, Method::const_offset()));
1932   __ lea(rbcp, Address(rbcp, r1));
1933   __ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset()));
1934   __ dispatch_next(vtos);
1935 }
1936 
1937 
1938 void TemplateTable::tableswitch() {
1939   Label default_case, continue_execution;
1940   transition(itos, vtos);
1941   // align rbcp
1942   __ lea(r1, at_bcp(BytesPerInt));
1943   __ andr(r1, r1, -BytesPerInt);
1944   // load lo & hi
1945   __ ldrw(r2, Address(r1, BytesPerInt));
1946   __ ldrw(r3, Address(r1, 2 * BytesPerInt));
1947   __ rev32(r2, r2);
1948   __ rev32(r3, r3);
1949   // check against lo & hi
1950   __ cmpw(r0, r2);
1951   __ br(Assembler::LT, default_case);
1952   __ cmpw(r0, r3);
1953   __ br(Assembler::GT, default_case);
1954   // lookup dispatch offset
1955   __ subw(r0, r0, r2);
1956   __ lea(r3, Address(r1, r0, Address::uxtw(2)));
1957   __ ldrw(r3, Address(r3, 3 * BytesPerInt));
1958   __ profile_switch_case(r0, r1, r2);
1959   // continue execution
1960   __ bind(continue_execution);
1961   __ rev32(r3, r3);
1962   __ load_unsigned_byte(rscratch1, Address(rbcp, r3, Address::sxtw(0)));
1963   __ add(rbcp, rbcp, r3, ext::sxtw);
1964   __ dispatch_only(vtos);
1965   // handle default
1966   __ bind(default_case);
1967   __ profile_switch_default(r0);
1968   __ ldrw(r3, Address(r1, 0));
1969   __ b(continue_execution);
1970 }
1971 
1972 void TemplateTable::lookupswitch() {
1973   transition(itos, itos);
1974   __ stop("lookupswitch bytecode should have been rewritten");
1975 }
1976 
1977 void TemplateTable::fast_linearswitch() {
1978   transition(itos, vtos);
1979   Label loop_entry, loop, found, continue_execution;
1980   // bswap r0 so we can avoid bswapping the table entries
1981   __ rev32(r0, r0);
1982   // align rbcp
1983   __ lea(r19, at_bcp(BytesPerInt)); // btw: should be able to get rid of
1984                                     // this instruction (change offsets
1985                                     // below)
1986   __ andr(r19, r19, -BytesPerInt);
1987   // set counter
1988   __ ldrw(r1, Address(r19, BytesPerInt));
1989   __ rev32(r1, r1);
1990   __ b(loop_entry);
1991   // table search
1992   __ bind(loop);
1993   __ lea(rscratch1, Address(r19, r1, Address::lsl(3)));
1994   __ ldrw(rscratch1, Address(rscratch1, 2 * BytesPerInt));
1995   __ cmpw(r0, rscratch1);
1996   __ br(Assembler::EQ, found);
1997   __ bind(loop_entry);
1998   __ subs(r1, r1, 1);
1999   __ br(Assembler::PL, loop);
2000   // default case
2001   __ profile_switch_default(r0);
2002   __ ldrw(r3, Address(r19, 0));
2003   __ b(continue_execution);
2004   // entry found -> get offset
2005   __ bind(found);
2006   __ lea(rscratch1, Address(r19, r1, Address::lsl(3)));
2007   __ ldrw(r3, Address(rscratch1, 3 * BytesPerInt));
2008   __ profile_switch_case(r1, r0, r19);
2009   // continue execution
2010   __ bind(continue_execution);
2011   __ rev32(r3, r3);
2012   __ add(rbcp, rbcp, r3, ext::sxtw);
2013   __ ldrb(rscratch1, Address(rbcp, 0));
2014   __ dispatch_only(vtos);
2015 }
2016 
2017 void TemplateTable::fast_binaryswitch() {
2018   transition(itos, vtos);
2019   // Implementation using the following core algorithm:
2020   //
2021   // int binary_search(int key, LookupswitchPair* array, int n) {
2022   //   // Binary search according to "Methodik des Programmierens" by
2023   //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
2024   //   int i = 0;
2025   //   int j = n;
2026   //   while (i+1 < j) {
2027   //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
2028   //     // with      Q: for all i: 0 <= i < n: key < a[i]
2029   //     // where a stands for the array and assuming that the (inexisting)
2030   //     // element a[n] is infinitely big.
2031   //     int h = (i + j) >> 1;
2032   //     // i < h < j
2033   //     if (key < array[h].fast_match()) {
2034   //       j = h;
2035   //     } else {
2036   //       i = h;
2037   //     }
2038   //   }
2039   //   // R: a[i] <= key < a[i+1] or Q
2040   //   // (i.e., if key is within array, i is the correct index)
2041   //   return i;
2042   // }
2043 
2044   // Register allocation
2045   const Register key   = r0; // already set (tosca)
2046   const Register array = r1;
2047   const Register i     = r2;
2048   const Register j     = r3;
2049   const Register h     = rscratch1;
2050   const Register temp  = rscratch2;
2051 
2052   // Find array start
2053   __ lea(array, at_bcp(3 * BytesPerInt)); // btw: should be able to
2054                                           // get rid of this
2055                                           // instruction (change
2056                                           // offsets below)
2057   __ andr(array, array, -BytesPerInt);
2058 
2059   // Initialize i & j
2060   __ mov(i, 0);                            // i = 0;
2061   __ ldrw(j, Address(array, -BytesPerInt)); // j = length(array);
2062 
2063   // Convert j into native byteordering
2064   __ rev32(j, j);
2065 
2066   // And start
2067   Label entry;
2068   __ b(entry);
2069 
2070   // binary search loop
2071   {
2072     Label loop;
2073     __ bind(loop);
2074     // int h = (i + j) >> 1;
2075     __ addw(h, i, j);                           // h = i + j;
2076     __ lsrw(h, h, 1);                                   // h = (i + j) >> 1;
2077     // if (key < array[h].fast_match()) {
2078     //   j = h;
2079     // } else {
2080     //   i = h;
2081     // }
2082     // Convert array[h].match to native byte-ordering before compare
2083     __ ldr(temp, Address(array, h, Address::lsl(3)));
2084     __ rev32(temp, temp);
2085     __ cmpw(key, temp);
2086     // j = h if (key <  array[h].fast_match())
2087     __ csel(j, h, j, Assembler::LT);
2088     // i = h if (key >= array[h].fast_match())
2089     __ csel(i, h, i, Assembler::GE);
2090     // while (i+1 < j)
2091     __ bind(entry);
2092     __ addw(h, i, 1);          // i+1
2093     __ cmpw(h, j);             // i+1 < j
2094     __ br(Assembler::LT, loop);
2095   }
2096 
2097   // end of binary search, result index is i (must check again!)
2098   Label default_case;
2099   // Convert array[i].match to native byte-ordering before compare
2100   __ ldr(temp, Address(array, i, Address::lsl(3)));
2101   __ rev32(temp, temp);
2102   __ cmpw(key, temp);
2103   __ br(Assembler::NE, default_case);
2104 
2105   // entry found -> j = offset
2106   __ add(j, array, i, ext::uxtx, 3);
2107   __ ldrw(j, Address(j, BytesPerInt));
2108   __ profile_switch_case(i, key, array);
2109   __ rev32(j, j);
2110   __ load_unsigned_byte(rscratch1, Address(rbcp, j, Address::sxtw(0)));
2111   __ lea(rbcp, Address(rbcp, j, Address::sxtw(0)));
2112   __ dispatch_only(vtos);
2113 
2114   // default case -> j = default offset
2115   __ bind(default_case);
2116   __ profile_switch_default(i);
2117   __ ldrw(j, Address(array, -2 * BytesPerInt));
2118   __ rev32(j, j);
2119   __ load_unsigned_byte(rscratch1, Address(rbcp, j, Address::sxtw(0)));
2120   __ lea(rbcp, Address(rbcp, j, Address::sxtw(0)));
2121   __ dispatch_only(vtos);
2122 }
2123 
2124 
2125 void TemplateTable::_return(TosState state)
2126 {
2127   transition(state, state);
2128   assert(_desc->calls_vm(),
2129          "inconsistent calls_vm information"); // call in remove_activation
2130 
2131   if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
2132     assert(state == vtos, "only valid state");
2133 
2134     __ ldr(c_rarg1, aaddress(0));
2135     __ load_klass(r3, c_rarg1);
2136     __ ldrw(r3, Address(r3, Klass::access_flags_offset()));
2137     Label skip_register_finalizer;
2138     __ tbz(r3, exact_log2(JVM_ACC_HAS_FINALIZER), skip_register_finalizer);
2139 
2140     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1);
2141 
2142     __ bind(skip_register_finalizer);
2143   }
2144 
2145   // Issue a StoreStore barrier after all stores but before return
2146   // from any constructor for any class with a final field.  We don't
2147   // know if this is a finalizer, so we always do so.
2148   if (_desc->bytecode() == Bytecodes::_return)
2149     __ membar(MacroAssembler::StoreStore);
2150 
2151   // Narrow result if state is itos but result type is smaller.
2152   // Need to narrow in the return bytecode rather than in generate_return_entry
2153   // since compiled code callers expect the result to already be narrowed.
2154   if (state == itos) {
2155     __ narrow(r0);
2156   }
2157 
2158   __ remove_activation(state);
2159   __ ret(lr);
2160 }
2161 
2162 // ----------------------------------------------------------------------------
2163 // Volatile variables demand their effects be made known to all CPU's
2164 // in order.  Store buffers on most chips allow reads & writes to
2165 // reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
2166 // without some kind of memory barrier (i.e., it's not sufficient that
2167 // the interpreter does not reorder volatile references, the hardware
2168 // also must not reorder them).
2169 //
2170 // According to the new Java Memory Model (JMM):
2171 // (1) All volatiles are serialized wrt to each other.  ALSO reads &
2172 //     writes act as aquire & release, so:
2173 // (2) A read cannot let unrelated NON-volatile memory refs that
2174 //     happen after the read float up to before the read.  It's OK for
2175 //     non-volatile memory refs that happen before the volatile read to
2176 //     float down below it.
2177 // (3) Similar a volatile write cannot let unrelated NON-volatile
2178 //     memory refs that happen BEFORE the write float down to after the
2179 //     write.  It's OK for non-volatile memory refs that happen after the
2180 //     volatile write to float up before it.
2181 //
2182 // We only put in barriers around volatile refs (they are expensive),
2183 // not _between_ memory refs (that would require us to track the
2184 // flavor of the previous memory refs).  Requirements (2) and (3)
2185 // require some barriers before volatile stores and after volatile
2186 // loads.  These nearly cover requirement (1) but miss the
2187 // volatile-store-volatile-load case.  This final case is placed after
2188 // volatile-stores although it could just as well go before
2189 // volatile-loads.
2190 
2191 void TemplateTable::resolve_cache_and_index(int byte_no,
2192                                             Register Rcache,
2193                                             Register index,
2194                                             size_t index_size) {
2195   const Register temp = r19;
2196   assert_different_registers(Rcache, index, temp);
2197 
2198   Label resolved;
2199 
2200   Bytecodes::Code code = bytecode();
2201   switch (code) {
2202   case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
2203   case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
2204   }
2205 
2206   assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
2207   __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
2208   __ cmp(temp, (int) code);  // have we resolved this bytecode?
2209   __ br(Assembler::EQ, resolved);
2210 
2211   // resolve first time through
2212   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
2213   __ mov(temp, (int) code);
2214   __ call_VM(noreg, entry, temp);
2215 
2216   // Update registers with resolved info
2217   __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
2218   // n.b. unlike x86 Rcache is now rcpool plus the indexed offset
2219   // so all clients ofthis method must be modified accordingly
2220   __ bind(resolved);
2221 }
2222 
2223 // The Rcache and index registers must be set before call
2224 // n.b unlike x86 cache already includes the index offset
2225 void TemplateTable::load_field_cp_cache_entry(Register obj,
2226                                               Register cache,
2227                                               Register index,
2228                                               Register off,
2229                                               Register flags,
2230                                               bool is_static = false) {
2231   assert_different_registers(cache, index, flags, off);
2232 
2233   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
2234   // Field offset
2235   __ ldr(off, Address(cache, in_bytes(cp_base_offset +
2236                                           ConstantPoolCacheEntry::f2_offset())));
2237   // Flags
2238   __ ldrw(flags, Address(cache, in_bytes(cp_base_offset +
2239                                            ConstantPoolCacheEntry::flags_offset())));
2240 
2241   // klass overwrite register
2242   if (is_static) {
2243     __ ldr(obj, Address(cache, in_bytes(cp_base_offset +
2244                                         ConstantPoolCacheEntry::f1_offset())));
2245     const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2246     __ ldr(obj, Address(obj, mirror_offset));
2247   }
2248 }
2249 
2250 void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
2251                                                Register method,
2252                                                Register itable_index,
2253                                                Register flags,
2254                                                bool is_invokevirtual,
2255                                                bool is_invokevfinal, /*unused*/
2256                                                bool is_invokedynamic) {
2257   // setup registers
2258   const Register cache = rscratch2;
2259   const Register index = r4;
2260   assert_different_registers(method, flags);
2261   assert_different_registers(method, cache, index);
2262   assert_different_registers(itable_index, flags);
2263   assert_different_registers(itable_index, cache, index);
2264   // determine constant pool cache field offsets
2265   assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
2266   const int method_offset = in_bytes(
2267     ConstantPoolCache::base_offset() +
2268       (is_invokevirtual
2269        ? ConstantPoolCacheEntry::f2_offset()
2270        : ConstantPoolCacheEntry::f1_offset()));
2271   const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
2272                                     ConstantPoolCacheEntry::flags_offset());
2273   // access constant pool cache fields
2274   const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
2275                                     ConstantPoolCacheEntry::f2_offset());
2276 
2277   size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
2278   resolve_cache_and_index(byte_no, cache, index, index_size);
2279   __ ldr(method, Address(cache, method_offset));
2280 
2281   if (itable_index != noreg) {
2282     __ ldr(itable_index, Address(cache, index_offset));
2283   }
2284   __ ldrw(flags, Address(cache, flags_offset));
2285 }
2286 
2287 
2288 // The registers cache and index expected to be set before call.
2289 // Correct values of the cache and index registers are preserved.
2290 void TemplateTable::jvmti_post_field_access(Register cache, Register index,
2291                                             bool is_static, bool has_tos) {
2292   // do the JVMTI work here to avoid disturbing the register state below
2293   // We use c_rarg registers here because we want to use the register used in
2294   // the call to the VM
2295   if (JvmtiExport::can_post_field_access()) {
2296     // Check to see if a field access watch has been set before we
2297     // take the time to call into the VM.
2298     Label L1;
2299     assert_different_registers(cache, index, r0);
2300     __ lea(rscratch1, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
2301     __ ldrw(r0, Address(rscratch1));
2302     __ cbzw(r0, L1);
2303 
2304     __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
2305     __ lea(c_rarg2, Address(c_rarg2, in_bytes(ConstantPoolCache::base_offset())));
2306 
2307     if (is_static) {
2308       __ mov(c_rarg1, zr); // NULL object reference
2309     } else {
2310       __ ldr(c_rarg1, at_tos()); // get object pointer without popping it
2311       __ verify_oop(c_rarg1);
2312     }
2313     // c_rarg1: object pointer or NULL
2314     // c_rarg2: cache entry pointer
2315     // c_rarg3: jvalue object on the stack
2316     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
2317                                        InterpreterRuntime::post_field_access),
2318                c_rarg1, c_rarg2, c_rarg3);
2319     __ get_cache_and_index_at_bcp(cache, index, 1);
2320     __ bind(L1);
2321   }
2322 }
2323 
2324 void TemplateTable::pop_and_check_object(Register r)
2325 {
2326   __ pop_ptr(r);
2327   __ null_check(r);  // for field access must check obj.
2328   __ verify_oop(r);
2329 }
2330 
2331 void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc)
2332 {
2333   const Register cache = r2;
2334   const Register index = r3;
2335   const Register obj   = r4;
2336   const Register off   = r19;
2337   const Register flags = r0;
2338   const Register raw_flags = r6;
2339   const Register bc    = r4; // uses same reg as obj, so don't mix them
2340 
2341   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
2342   jvmti_post_field_access(cache, index, is_static, false);
2343   load_field_cp_cache_entry(obj, cache, index, off, raw_flags, is_static);
2344 
2345   if (!is_static) {
2346     // obj is on the stack
2347     pop_and_check_object(obj);
2348   }
2349 
2350   // 8179954: We need to make sure that the code generated for
2351   // volatile accesses forms a sequentially-consistent set of
2352   // operations when combined with STLR and LDAR.  Without a leading
2353   // membar it's possible for a simple Dekker test to fail if loads
2354   // use LDR;DMB but stores use STLR.  This can happen if C2 compiles
2355   // the stores in one method and we interpret the loads in another.
2356   if (! UseBarriersForVolatile) {
2357     Label notVolatile;
2358     __ tbz(raw_flags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
2359     __ membar(MacroAssembler::AnyAny);
2360     __ bind(notVolatile);
2361   }
2362 
2363   const Address field(obj, off);
2364 
2365   Label Done, notByte, notBool, notInt, notShort, notChar,
2366               notLong, notFloat, notObj, notDouble;
2367 
2368   // x86 uses a shift and mask or wings it with a shift plus assert
2369   // the mask is not needed. aarch64 just uses bitfield extract
2370   __ ubfxw(flags, raw_flags, ConstantPoolCacheEntry::tos_state_shift,
2371            ConstantPoolCacheEntry::tos_state_bits);
2372 
2373   assert(btos == 0, "change code, btos != 0");
2374   __ cbnz(flags, notByte);
2375 
2376   // Don't rewrite getstatic, only getfield
2377   if (is_static) rc = may_not_rewrite;
2378 
2379   // btos
2380   __ load_signed_byte(r0, field);
2381   __ push(btos);
2382   // Rewrite bytecode to be faster
2383   if (rc == may_rewrite) {
2384     patch_bytecode(Bytecodes::_fast_bgetfield, bc, r1);
2385   }
2386   __ b(Done);
2387 
2388   __ bind(notByte);
2389   __ cmp(flags, ztos);
2390   __ br(Assembler::NE, notBool);
2391 
2392   // ztos (same code as btos)
2393   __ ldrsb(r0, field);
2394   __ push(ztos);
2395   // Rewrite bytecode to be faster
2396   if (rc == may_rewrite) {
2397     // use btos rewriting, no truncating to t/f bit is needed for getfield.
2398     patch_bytecode(Bytecodes::_fast_bgetfield, bc, r1);
2399   }
2400   __ b(Done);
2401 
2402   __ bind(notBool);
2403   __ cmp(flags, atos);
2404   __ br(Assembler::NE, notObj);
2405   // atos
2406   do_oop_load(_masm, field, r0, ACCESS_IN_HEAP);
2407   __ push(atos);
2408   if (rc == may_rewrite) {
2409     patch_bytecode(Bytecodes::_fast_agetfield, bc, r1);
2410   }
2411   __ b(Done);
2412 
2413   __ bind(notObj);
2414   __ cmp(flags, itos);
2415   __ br(Assembler::NE, notInt);
2416   // itos
2417   __ ldrw(r0, field);
2418   __ push(itos);
2419   // Rewrite bytecode to be faster
2420   if (rc == may_rewrite) {
2421     patch_bytecode(Bytecodes::_fast_igetfield, bc, r1);
2422   }
2423   __ b(Done);
2424 
2425   __ bind(notInt);
2426   __ cmp(flags, ctos);
2427   __ br(Assembler::NE, notChar);
2428   // ctos
2429   __ load_unsigned_short(r0, field);
2430   __ push(ctos);
2431   // Rewrite bytecode to be faster
2432   if (rc == may_rewrite) {
2433     patch_bytecode(Bytecodes::_fast_cgetfield, bc, r1);
2434   }
2435   __ b(Done);
2436 
2437   __ bind(notChar);
2438   __ cmp(flags, stos);
2439   __ br(Assembler::NE, notShort);
2440   // stos
2441   __ load_signed_short(r0, field);
2442   __ push(stos);
2443   // Rewrite bytecode to be faster
2444   if (rc == may_rewrite) {
2445     patch_bytecode(Bytecodes::_fast_sgetfield, bc, r1);
2446   }
2447   __ b(Done);
2448 
2449   __ bind(notShort);
2450   __ cmp(flags, ltos);
2451   __ br(Assembler::NE, notLong);
2452   // ltos
2453   __ ldr(r0, field);
2454   __ push(ltos);
2455   // Rewrite bytecode to be faster
2456   if (rc == may_rewrite) {
2457     patch_bytecode(Bytecodes::_fast_lgetfield, bc, r1);
2458   }
2459   __ b(Done);
2460 
2461   __ bind(notLong);
2462   __ cmp(flags, ftos);
2463   __ br(Assembler::NE, notFloat);
2464   // ftos
2465   __ ldrs(v0, field);
2466   __ push(ftos);
2467   // Rewrite bytecode to be faster
2468   if (rc == may_rewrite) {
2469     patch_bytecode(Bytecodes::_fast_fgetfield, bc, r1);
2470   }
2471   __ b(Done);
2472 
2473   __ bind(notFloat);
2474 #ifdef ASSERT
2475   __ cmp(flags, dtos);
2476   __ br(Assembler::NE, notDouble);
2477 #endif
2478   // dtos
2479   __ ldrd(v0, field);
2480   __ push(dtos);
2481   // Rewrite bytecode to be faster
2482   if (rc == may_rewrite) {
2483     patch_bytecode(Bytecodes::_fast_dgetfield, bc, r1);
2484   }
2485 #ifdef ASSERT
2486   __ b(Done);
2487 
2488   __ bind(notDouble);
2489   __ stop("Bad state");
2490 #endif
2491 
2492   __ bind(Done);
2493 
2494   Label notVolatile;
2495   __ tbz(raw_flags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
2496   __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
2497   __ bind(notVolatile);
2498 }
2499 
2500 
2501 void TemplateTable::getfield(int byte_no)
2502 {
2503   getfield_or_static(byte_no, false);
2504 }
2505 
2506 void TemplateTable::nofast_getfield(int byte_no) {
2507   getfield_or_static(byte_no, false, may_not_rewrite);
2508 }
2509 
2510 void TemplateTable::getstatic(int byte_no)
2511 {
2512   getfield_or_static(byte_no, true);
2513 }
2514 
2515 // The registers cache and index expected to be set before call.
2516 // The function may destroy various registers, just not the cache and index registers.
2517 void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
2518   transition(vtos, vtos);
2519 
2520   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
2521 
2522   if (JvmtiExport::can_post_field_modification()) {
2523     // Check to see if a field modification watch has been set before
2524     // we take the time to call into the VM.
2525     Label L1;
2526     assert_different_registers(cache, index, r0);
2527     __ lea(rscratch1, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
2528     __ ldrw(r0, Address(rscratch1));
2529     __ cbz(r0, L1);
2530 
2531     __ get_cache_and_index_at_bcp(c_rarg2, rscratch1, 1);
2532 
2533     if (is_static) {
2534       // Life is simple.  Null out the object pointer.
2535       __ mov(c_rarg1, zr);
2536     } else {
2537       // Life is harder. The stack holds the value on top, followed by
2538       // the object.  We don't know the size of the value, though; it
2539       // could be one or two words depending on its type. As a result,
2540       // we must find the type to determine where the object is.
2541       __ ldrw(c_rarg3, Address(c_rarg2,
2542                                in_bytes(cp_base_offset +
2543                                         ConstantPoolCacheEntry::flags_offset())));
2544       __ lsr(c_rarg3, c_rarg3,
2545              ConstantPoolCacheEntry::tos_state_shift);
2546       ConstantPoolCacheEntry::verify_tos_state_shift();
2547       Label nope2, done, ok;
2548       __ ldr(c_rarg1, at_tos_p1());  // initially assume a one word jvalue
2549       __ cmpw(c_rarg3, ltos);
2550       __ br(Assembler::EQ, ok);
2551       __ cmpw(c_rarg3, dtos);
2552       __ br(Assembler::NE, nope2);
2553       __ bind(ok);
2554       __ ldr(c_rarg1, at_tos_p2()); // ltos (two word jvalue)
2555       __ bind(nope2);
2556     }
2557     // cache entry pointer
2558     __ add(c_rarg2, c_rarg2, in_bytes(cp_base_offset));
2559     // object (tos)
2560     __ mov(c_rarg3, esp);
2561     // c_rarg1: object pointer set up above (NULL if static)
2562     // c_rarg2: cache entry pointer
2563     // c_rarg3: jvalue object on the stack
2564     __ call_VM(noreg,
2565                CAST_FROM_FN_PTR(address,
2566                                 InterpreterRuntime::post_field_modification),
2567                c_rarg1, c_rarg2, c_rarg3);
2568     __ get_cache_and_index_at_bcp(cache, index, 1);
2569     __ bind(L1);
2570   }
2571 }
2572 
2573 void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
2574   transition(vtos, vtos);
2575 
2576   const Register cache = r2;
2577   const Register index = r3;
2578   const Register obj   = r2;
2579   const Register off   = r19;
2580   const Register flags = r0;
2581   const Register bc    = r4;
2582 
2583   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
2584   jvmti_post_field_mod(cache, index, is_static);
2585   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
2586 
2587   Label Done;
2588   __ mov(r5, flags);
2589 
2590   {
2591     Label notVolatile;
2592     __ tbz(r5, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
2593     __ membar(MacroAssembler::StoreStore);
2594     __ bind(notVolatile);
2595   }
2596 
2597   // field address
2598   const Address field(obj, off);
2599 
2600   Label notByte, notBool, notInt, notShort, notChar,
2601         notLong, notFloat, notObj, notDouble;
2602 
2603   // x86 uses a shift and mask or wings it with a shift plus assert
2604   // the mask is not needed. aarch64 just uses bitfield extract
2605   __ ubfxw(flags, flags, ConstantPoolCacheEntry::tos_state_shift,  ConstantPoolCacheEntry::tos_state_bits);
2606 
2607   assert(btos == 0, "change code, btos != 0");
2608   __ cbnz(flags, notByte);
2609 
2610   // Don't rewrite putstatic, only putfield
2611   if (is_static) rc = may_not_rewrite;
2612 
2613   // btos
2614   {
2615     __ pop(btos);
2616     if (!is_static) pop_and_check_object(obj);
2617     __ strb(r0, field);
2618     if (rc == may_rewrite) {
2619       patch_bytecode(Bytecodes::_fast_bputfield, bc, r1, true, byte_no);
2620     }
2621     __ b(Done);
2622   }
2623 
2624   __ bind(notByte);
2625   __ cmp(flags, ztos);
2626   __ br(Assembler::NE, notBool);
2627 
2628   // ztos
2629   {
2630     __ pop(ztos);
2631     if (!is_static) pop_and_check_object(obj);
2632     __ andw(r0, r0, 0x1);
2633     __ strb(r0, field);
2634     if (rc == may_rewrite) {
2635       patch_bytecode(Bytecodes::_fast_zputfield, bc, r1, true, byte_no);
2636     }
2637     __ b(Done);
2638   }
2639 
2640   __ bind(notBool);
2641   __ cmp(flags, atos);
2642   __ br(Assembler::NE, notObj);
2643 
2644   // atos
2645   {
2646     __ pop(atos);
2647     if (!is_static) pop_and_check_object(obj);
2648     // Store into the field
2649     do_oop_store(_masm, field, r0, ACCESS_IN_HEAP);
2650     if (rc == may_rewrite) {
2651       patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no);
2652     }
2653     __ b(Done);
2654   }
2655 
2656   __ bind(notObj);
2657   __ cmp(flags, itos);
2658   __ br(Assembler::NE, notInt);
2659 
2660   // itos
2661   {
2662     __ pop(itos);
2663     if (!is_static) pop_and_check_object(obj);
2664     __ strw(r0, field);
2665     if (rc == may_rewrite) {
2666       patch_bytecode(Bytecodes::_fast_iputfield, bc, r1, true, byte_no);
2667     }
2668     __ b(Done);
2669   }
2670 
2671   __ bind(notInt);
2672   __ cmp(flags, ctos);
2673   __ br(Assembler::NE, notChar);
2674 
2675   // ctos
2676   {
2677     __ pop(ctos);
2678     if (!is_static) pop_and_check_object(obj);
2679     __ strh(r0, field);
2680     if (rc == may_rewrite) {
2681       patch_bytecode(Bytecodes::_fast_cputfield, bc, r1, true, byte_no);
2682     }
2683     __ b(Done);
2684   }
2685 
2686   __ bind(notChar);
2687   __ cmp(flags, stos);
2688   __ br(Assembler::NE, notShort);
2689 
2690   // stos
2691   {
2692     __ pop(stos);
2693     if (!is_static) pop_and_check_object(obj);
2694     __ strh(r0, field);
2695     if (rc == may_rewrite) {
2696       patch_bytecode(Bytecodes::_fast_sputfield, bc, r1, true, byte_no);
2697     }
2698     __ b(Done);
2699   }
2700 
2701   __ bind(notShort);
2702   __ cmp(flags, ltos);
2703   __ br(Assembler::NE, notLong);
2704 
2705   // ltos
2706   {
2707     __ pop(ltos);
2708     if (!is_static) pop_and_check_object(obj);
2709     __ str(r0, field);
2710     if (rc == may_rewrite) {
2711       patch_bytecode(Bytecodes::_fast_lputfield, bc, r1, true, byte_no);
2712     }
2713     __ b(Done);
2714   }
2715 
2716   __ bind(notLong);
2717   __ cmp(flags, ftos);
2718   __ br(Assembler::NE, notFloat);
2719 
2720   // ftos
2721   {
2722     __ pop(ftos);
2723     if (!is_static) pop_and_check_object(obj);
2724     __ strs(v0, field);
2725     if (rc == may_rewrite) {
2726       patch_bytecode(Bytecodes::_fast_fputfield, bc, r1, true, byte_no);
2727     }
2728     __ b(Done);
2729   }
2730 
2731   __ bind(notFloat);
2732 #ifdef ASSERT
2733   __ cmp(flags, dtos);
2734   __ br(Assembler::NE, notDouble);
2735 #endif
2736 
2737   // dtos
2738   {
2739     __ pop(dtos);
2740     if (!is_static) pop_and_check_object(obj);
2741     __ strd(v0, field);
2742     if (rc == may_rewrite) {
2743       patch_bytecode(Bytecodes::_fast_dputfield, bc, r1, true, byte_no);
2744     }
2745   }
2746 
2747 #ifdef ASSERT
2748   __ b(Done);
2749 
2750   __ bind(notDouble);
2751   __ stop("Bad state");
2752 #endif
2753 
2754   __ bind(Done);
2755 
2756   {
2757     Label notVolatile;
2758     __ tbz(r5, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
2759     __ membar(MacroAssembler::StoreLoad);
2760     __ bind(notVolatile);
2761   }
2762 }
2763 
2764 void TemplateTable::putfield(int byte_no)
2765 {
2766   putfield_or_static(byte_no, false);
2767 }
2768 
2769 void TemplateTable::nofast_putfield(int byte_no) {
2770   putfield_or_static(byte_no, false, may_not_rewrite);
2771 }
2772 
2773 void TemplateTable::putstatic(int byte_no) {
2774   putfield_or_static(byte_no, true);
2775 }
2776 
2777 void TemplateTable::jvmti_post_fast_field_mod()
2778 {
2779   if (JvmtiExport::can_post_field_modification()) {
2780     // Check to see if a field modification watch has been set before
2781     // we take the time to call into the VM.
2782     Label L2;
2783     __ lea(rscratch1, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
2784     __ ldrw(c_rarg3, Address(rscratch1));
2785     __ cbzw(c_rarg3, L2);
2786     __ pop_ptr(r19);                  // copy the object pointer from tos
2787     __ verify_oop(r19);
2788     __ push_ptr(r19);                 // put the object pointer back on tos
2789     // Save tos values before call_VM() clobbers them. Since we have
2790     // to do it for every data type, we use the saved values as the
2791     // jvalue object.
2792     switch (bytecode()) {          // load values into the jvalue object
2793     case Bytecodes::_fast_aputfield: __ push_ptr(r0); break;
2794     case Bytecodes::_fast_bputfield: // fall through
2795     case Bytecodes::_fast_zputfield: // fall through
2796     case Bytecodes::_fast_sputfield: // fall through
2797     case Bytecodes::_fast_cputfield: // fall through
2798     case Bytecodes::_fast_iputfield: __ push_i(r0); break;
2799     case Bytecodes::_fast_dputfield: __ push_d(); break;
2800     case Bytecodes::_fast_fputfield: __ push_f(); break;
2801     case Bytecodes::_fast_lputfield: __ push_l(r0); break;
2802 
2803     default:
2804       ShouldNotReachHere();
2805     }
2806     __ mov(c_rarg3, esp);             // points to jvalue on the stack
2807     // access constant pool cache entry
2808     __ get_cache_entry_pointer_at_bcp(c_rarg2, r0, 1);
2809     __ verify_oop(r19);
2810     // r19: object pointer copied above
2811     // c_rarg2: cache entry pointer
2812     // c_rarg3: jvalue object on the stack
2813     __ call_VM(noreg,
2814                CAST_FROM_FN_PTR(address,
2815                                 InterpreterRuntime::post_field_modification),
2816                r19, c_rarg2, c_rarg3);
2817 
2818     switch (bytecode()) {             // restore tos values
2819     case Bytecodes::_fast_aputfield: __ pop_ptr(r0); break;
2820     case Bytecodes::_fast_bputfield: // fall through
2821     case Bytecodes::_fast_zputfield: // fall through
2822     case Bytecodes::_fast_sputfield: // fall through
2823     case Bytecodes::_fast_cputfield: // fall through
2824     case Bytecodes::_fast_iputfield: __ pop_i(r0); break;
2825     case Bytecodes::_fast_dputfield: __ pop_d(); break;
2826     case Bytecodes::_fast_fputfield: __ pop_f(); break;
2827     case Bytecodes::_fast_lputfield: __ pop_l(r0); break;
2828     }
2829     __ bind(L2);
2830   }
2831 }
2832 
2833 void TemplateTable::fast_storefield(TosState state)
2834 {
2835   transition(state, vtos);
2836 
2837   ByteSize base = ConstantPoolCache::base_offset();
2838 
2839   jvmti_post_fast_field_mod();
2840 
2841   // access constant pool cache
2842   __ get_cache_and_index_at_bcp(r2, r1, 1);
2843 
2844   // test for volatile with r3
2845   __ ldrw(r3, Address(r2, in_bytes(base +
2846                                    ConstantPoolCacheEntry::flags_offset())));
2847 
2848   // replace index with field offset from cache entry
2849   __ ldr(r1, Address(r2, in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
2850 
2851   {
2852     Label notVolatile;
2853     __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
2854     __ membar(MacroAssembler::StoreStore);
2855     __ bind(notVolatile);
2856   }
2857 
2858   Label notVolatile;
2859 
2860   // Get object from stack
2861   pop_and_check_object(r2);
2862 
2863   // field address
2864   const Address field(r2, r1);
2865 
2866   // access field
2867   switch (bytecode()) {
2868   case Bytecodes::_fast_aputfield:
2869     do_oop_store(_masm, field, r0, ACCESS_IN_HEAP);
2870     break;
2871   case Bytecodes::_fast_lputfield:
2872     __ str(r0, field);
2873     break;
2874   case Bytecodes::_fast_iputfield:
2875     __ strw(r0, field);
2876     break;
2877   case Bytecodes::_fast_zputfield:
2878     __ andw(r0, r0, 0x1);  // boolean is true if LSB is 1
2879     // fall through to bputfield
2880   case Bytecodes::_fast_bputfield:
2881     __ strb(r0, field);
2882     break;
2883   case Bytecodes::_fast_sputfield:
2884     // fall through
2885   case Bytecodes::_fast_cputfield:
2886     __ strh(r0, field);
2887     break;
2888   case Bytecodes::_fast_fputfield:
2889     __ strs(v0, field);
2890     break;
2891   case Bytecodes::_fast_dputfield:
2892     __ strd(v0, field);
2893     break;
2894   default:
2895     ShouldNotReachHere();
2896   }
2897 
2898   {
2899     Label notVolatile;
2900     __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
2901     __ membar(MacroAssembler::StoreLoad);
2902     __ bind(notVolatile);
2903   }
2904 }
2905 
2906 
2907 void TemplateTable::fast_accessfield(TosState state)
2908 {
2909   transition(atos, state);
2910   // Do the JVMTI work here to avoid disturbing the register state below
2911   if (JvmtiExport::can_post_field_access()) {
2912     // Check to see if a field access watch has been set before we
2913     // take the time to call into the VM.
2914     Label L1;
2915     __ lea(rscratch1, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
2916     __ ldrw(r2, Address(rscratch1));
2917     __ cbzw(r2, L1);
2918     // access constant pool cache entry
2919     __ get_cache_entry_pointer_at_bcp(c_rarg2, rscratch2, 1);
2920     __ verify_oop(r0);
2921     __ push_ptr(r0);  // save object pointer before call_VM() clobbers it
2922     __ mov(c_rarg1, r0);
2923     // c_rarg1: object pointer copied above
2924     // c_rarg2: cache entry pointer
2925     __ call_VM(noreg,
2926                CAST_FROM_FN_PTR(address,
2927                                 InterpreterRuntime::post_field_access),
2928                c_rarg1, c_rarg2);
2929     __ pop_ptr(r0); // restore object pointer
2930     __ bind(L1);
2931   }
2932 
2933   // access constant pool cache
2934   __ get_cache_and_index_at_bcp(r2, r1, 1);
2935   __ ldr(r1, Address(r2, in_bytes(ConstantPoolCache::base_offset() +
2936                                   ConstantPoolCacheEntry::f2_offset())));
2937   __ ldrw(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() +
2938                                    ConstantPoolCacheEntry::flags_offset())));
2939 
2940   // r0: object
2941   __ verify_oop(r0);
2942   __ null_check(r0);
2943   const Address field(r0, r1);
2944 
2945   // 8179954: We need to make sure that the code generated for
2946   // volatile accesses forms a sequentially-consistent set of
2947   // operations when combined with STLR and LDAR.  Without a leading
2948   // membar it's possible for a simple Dekker test to fail if loads
2949   // use LDR;DMB but stores use STLR.  This can happen if C2 compiles
2950   // the stores in one method and we interpret the loads in another.
2951   if (! UseBarriersForVolatile) {
2952     Label notVolatile;
2953     __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
2954     __ membar(MacroAssembler::AnyAny);
2955     __ bind(notVolatile);
2956   }
2957 
2958   // access field
2959   switch (bytecode()) {
2960   case Bytecodes::_fast_agetfield:
2961     do_oop_load(_masm, field, r0, ACCESS_IN_HEAP);
2962     __ verify_oop(r0);
2963     break;
2964   case Bytecodes::_fast_lgetfield:
2965     __ ldr(r0, field);
2966     break;
2967   case Bytecodes::_fast_igetfield:
2968     __ ldrw(r0, field);
2969     break;
2970   case Bytecodes::_fast_bgetfield:
2971     __ load_signed_byte(r0, field);
2972     break;
2973   case Bytecodes::_fast_sgetfield:
2974     __ load_signed_short(r0, field);
2975     break;
2976   case Bytecodes::_fast_cgetfield:
2977     __ load_unsigned_short(r0, field);
2978     break;
2979   case Bytecodes::_fast_fgetfield:
2980     __ ldrs(v0, field);
2981     break;
2982   case Bytecodes::_fast_dgetfield:
2983     __ ldrd(v0, field);
2984     break;
2985   default:
2986     ShouldNotReachHere();
2987   }
2988   {
2989     Label notVolatile;
2990     __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
2991     __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
2992     __ bind(notVolatile);
2993   }
2994 }
2995 
2996 void TemplateTable::fast_xaccess(TosState state)
2997 {
2998   transition(vtos, state);
2999 
3000   // get receiver
3001   __ ldr(r0, aaddress(0));
3002   // access constant pool cache
3003   __ get_cache_and_index_at_bcp(r2, r3, 2);
3004   __ ldr(r1, Address(r2, in_bytes(ConstantPoolCache::base_offset() +
3005                                   ConstantPoolCacheEntry::f2_offset())));
3006 
3007   // 8179954: We need to make sure that the code generated for
3008   // volatile accesses forms a sequentially-consistent set of
3009   // operations when combined with STLR and LDAR.  Without a leading
3010   // membar it's possible for a simple Dekker test to fail if loads
3011   // use LDR;DMB but stores use STLR.  This can happen if C2 compiles
3012   // the stores in one method and we interpret the loads in another.
3013   if (! UseBarriersForVolatile) {
3014     Label notVolatile;
3015     __ ldrw(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() +
3016                                      ConstantPoolCacheEntry::flags_offset())));
3017     __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
3018     __ membar(MacroAssembler::AnyAny);
3019     __ bind(notVolatile);
3020   }
3021 
3022   // make sure exception is reported in correct bcp range (getfield is
3023   // next instruction)
3024   __ increment(rbcp);
3025   __ null_check(r0);
3026   switch (state) {
3027   case itos:
3028     __ ldrw(r0, Address(r0, r1, Address::lsl(0)));
3029     break;
3030   case atos:
3031     do_oop_load(_masm, Address(r0, r1, Address::lsl(0)), r0, ACCESS_IN_HEAP);
3032     __ verify_oop(r0);
3033     break;
3034   case ftos:
3035     __ ldrs(v0, Address(r0, r1, Address::lsl(0)));
3036     break;
3037   default:
3038     ShouldNotReachHere();
3039   }
3040 
3041   {
3042     Label notVolatile;
3043     __ ldrw(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() +
3044                                      ConstantPoolCacheEntry::flags_offset())));
3045     __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
3046     __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
3047     __ bind(notVolatile);
3048   }
3049 
3050   __ decrement(rbcp);
3051 }
3052 
3053 
3054 
3055 //-----------------------------------------------------------------------------
3056 // Calls
3057 
3058 void TemplateTable::count_calls(Register method, Register temp)
3059 {
3060   __ call_Unimplemented();
3061 }
3062 
3063 void TemplateTable::prepare_invoke(int byte_no,
3064                                    Register method, // linked method (or i-klass)
3065                                    Register index,  // itable index, MethodType, etc.
3066                                    Register recv,   // if caller wants to see it
3067                                    Register flags   // if caller wants to test it
3068                                    ) {
3069   // determine flags
3070   Bytecodes::Code code = bytecode();
3071   const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
3072   const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
3073   const bool is_invokehandle     = code == Bytecodes::_invokehandle;
3074   const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
3075   const bool is_invokespecial    = code == Bytecodes::_invokespecial;
3076   const bool load_receiver       = (recv  != noreg);
3077   const bool save_flags          = (flags != noreg);
3078   assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
3079   assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
3080   assert(flags == noreg || flags == r3, "");
3081   assert(recv  == noreg || recv  == r2, "");
3082 
3083   // setup registers & access constant pool cache
3084   if (recv  == noreg)  recv  = r2;
3085   if (flags == noreg)  flags = r3;
3086   assert_different_registers(method, index, recv, flags);
3087 
3088   // save 'interpreter return address'
3089   __ save_bcp();
3090 
3091   load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
3092 
3093   // maybe push appendix to arguments (just before return address)
3094   if (is_invokedynamic || is_invokehandle) {
3095     Label L_no_push;
3096     __ tbz(flags, ConstantPoolCacheEntry::has_appendix_shift, L_no_push);
3097     // Push the appendix as a trailing parameter.
3098     // This must be done before we get the receiver,
3099     // since the parameter_size includes it.
3100     __ push(r19);
3101     __ mov(r19, index);
3102     assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
3103     __ load_resolved_reference_at_index(index, r19);
3104     __ pop(r19);
3105     __ push(index);  // push appendix (MethodType, CallSite, etc.)
3106     __ bind(L_no_push);
3107   }
3108 
3109   // load receiver if needed (note: no return address pushed yet)
3110   if (load_receiver) {
3111     __ andw(recv, flags, ConstantPoolCacheEntry::parameter_size_mask);
3112     // FIXME -- is this actually correct? looks like it should be 2
3113     // const int no_return_pc_pushed_yet = -1;  // argument slot correction before we push return address
3114     // const int receiver_is_at_end      = -1;  // back off one slot to get receiver
3115     // Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
3116     // __ movptr(recv, recv_addr);
3117     __ add(rscratch1, esp, recv, ext::uxtx, 3); // FIXME: uxtb here?
3118     __ ldr(recv, Address(rscratch1, -Interpreter::expr_offset_in_bytes(1)));
3119     __ verify_oop(recv);
3120   }
3121 
3122   // compute return type
3123   // x86 uses a shift and mask or wings it with a shift plus assert
3124   // the mask is not needed. aarch64 just uses bitfield extract
3125   __ ubfxw(rscratch2, flags, ConstantPoolCacheEntry::tos_state_shift,  ConstantPoolCacheEntry::tos_state_bits);
3126   // load return address
3127   {
3128     const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
3129     __ mov(rscratch1, table_addr);
3130     __ ldr(lr, Address(rscratch1, rscratch2, Address::lsl(3)));
3131   }
3132 }
3133 
3134 
3135 void TemplateTable::invokevirtual_helper(Register index,
3136                                          Register recv,
3137                                          Register flags)
3138 {
3139   // Uses temporary registers r0, r3
3140   assert_different_registers(index, recv, r0, r3);
3141   // Test for an invoke of a final method
3142   Label notFinal;
3143   __ tbz(flags, ConstantPoolCacheEntry::is_vfinal_shift, notFinal);
3144 
3145   const Register method = index;  // method must be rmethod
3146   assert(method == rmethod,
3147          "methodOop must be rmethod for interpreter calling convention");
3148 
3149   // do the call - the index is actually the method to call
3150   // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
3151 
3152   // It's final, need a null check here!
3153   __ null_check(recv);
3154 
3155   // profile this call
3156   __ profile_final_call(r0);
3157   __ profile_arguments_type(r0, method, r4, true);
3158 
3159   __ jump_from_interpreted(method, r0);
3160 
3161   __ bind(notFinal);
3162 
3163   // get receiver klass
3164   __ null_check(recv, oopDesc::klass_offset_in_bytes());
3165   __ load_klass(r0, recv);
3166 
3167   // profile this call
3168   __ profile_virtual_call(r0, rlocals, r3);
3169 
3170   // get target methodOop & entry point
3171   __ lookup_virtual_method(r0, index, method);
3172   __ profile_arguments_type(r3, method, r4, true);
3173   // FIXME -- this looks completely redundant. is it?
3174   // __ ldr(r3, Address(method, Method::interpreter_entry_offset()));
3175   __ jump_from_interpreted(method, r3);
3176 }
3177 
3178 void TemplateTable::invokevirtual(int byte_no)
3179 {
3180   transition(vtos, vtos);
3181   assert(byte_no == f2_byte, "use this argument");
3182 
3183   prepare_invoke(byte_no, rmethod, noreg, r2, r3);
3184 
3185   // rmethod: index (actually a Method*)
3186   // r2: receiver
3187   // r3: flags
3188 
3189   invokevirtual_helper(rmethod, r2, r3);
3190 }
3191 
3192 void TemplateTable::invokespecial(int byte_no)
3193 {
3194   transition(vtos, vtos);
3195   assert(byte_no == f1_byte, "use this argument");
3196 
3197   prepare_invoke(byte_no, rmethod, noreg,  // get f1 Method*
3198                  r2);  // get receiver also for null check
3199   __ verify_oop(r2);
3200   __ null_check(r2);
3201   // do the call
3202   __ profile_call(r0);
3203   __ profile_arguments_type(r0, rmethod, rbcp, false);
3204   __ jump_from_interpreted(rmethod, r0);
3205 }
3206 
3207 void TemplateTable::invokestatic(int byte_no)
3208 {
3209   transition(vtos, vtos);
3210   assert(byte_no == f1_byte, "use this argument");
3211 
3212   prepare_invoke(byte_no, rmethod);  // get f1 Method*
3213   // do the call
3214   __ profile_call(r0);
3215   __ profile_arguments_type(r0, rmethod, r4, false);
3216   __ jump_from_interpreted(rmethod, r0);
3217 }
3218 
3219 void TemplateTable::fast_invokevfinal(int byte_no)
3220 {
3221   __ call_Unimplemented();
3222 }
3223 
3224 void TemplateTable::invokeinterface(int byte_no) {
3225   transition(vtos, vtos);
3226   assert(byte_no == f1_byte, "use this argument");
3227 
3228   prepare_invoke(byte_no, r0, rmethod,  // get f1 Klass*, f2 itable index
3229                  r2, r3); // recv, flags
3230 
3231   // r0: interface klass (from f1)
3232   // rmethod: itable index (from f2)
3233   // r2: receiver
3234   // r3: flags
3235 
3236   // Special case of invokeinterface called for virtual method of
3237   // java.lang.Object.  See cpCacheOop.cpp for details.
3238   // This code isn't produced by javac, but could be produced by
3239   // another compliant java compiler.
3240   Label notMethod;
3241   __ tbz(r3, ConstantPoolCacheEntry::is_forced_virtual_shift, notMethod);
3242 
3243   invokevirtual_helper(rmethod, r2, r3);
3244   __ bind(notMethod);
3245 
3246   // Get receiver klass into r3 - also a null check
3247   __ restore_locals();
3248   __ null_check(r2, oopDesc::klass_offset_in_bytes());
3249   __ load_klass(r3, r2);
3250 
3251   // profile this call
3252   __ profile_virtual_call(r3, r13, r19);
3253 
3254   Label no_such_interface, no_such_method;
3255 
3256   __ lookup_interface_method(// inputs: rec. class, interface, itable index
3257                              r3, r0, rmethod,
3258                              // outputs: method, scan temp. reg
3259                              rmethod, r13,
3260                              no_such_interface);
3261 
3262   // rmethod,: methodOop to call
3263   // r2: receiver
3264   // Check for abstract method error
3265   // Note: This should be done more efficiently via a throw_abstract_method_error
3266   //       interpreter entry point and a conditional jump to it in case of a null
3267   //       method.
3268   __ cbz(rmethod, no_such_method);
3269 
3270   __ profile_arguments_type(r3, rmethod, r13, true);
3271 
3272   // do the call
3273   // r2: receiver
3274   // rmethod,: methodOop
3275   __ jump_from_interpreted(rmethod, r3);
3276   __ should_not_reach_here();
3277 
3278   // exception handling code follows...
3279   // note: must restore interpreter registers to canonical
3280   //       state for exception handling to work correctly!
3281 
3282   __ bind(no_such_method);
3283   // throw exception
3284   __ restore_bcp();      // bcp must be correct for exception handler   (was destroyed)
3285   __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
3286   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
3287   // the call_VM checks for exception, so we should never return here.
3288   __ should_not_reach_here();
3289 
3290   __ bind(no_such_interface);
3291   // throw exception
3292   __ restore_bcp();      // bcp must be correct for exception handler   (was destroyed)
3293   __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
3294   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
3295                    InterpreterRuntime::throw_IncompatibleClassChangeError));
3296   // the call_VM checks for exception, so we should never return here.
3297   __ should_not_reach_here();
3298   return;
3299 }
3300 
3301 void TemplateTable::invokehandle(int byte_no) {
3302   transition(vtos, vtos);
3303   assert(byte_no == f1_byte, "use this argument");
3304 
3305   prepare_invoke(byte_no, rmethod, r0, r2);
3306   __ verify_method_ptr(r2);
3307   __ verify_oop(r2);
3308   __ null_check(r2);
3309 
3310   // FIXME: profile the LambdaForm also
3311 
3312   // r13 is safe to use here as a scratch reg because it is about to
3313   // be clobbered by jump_from_interpreted().
3314   __ profile_final_call(r13);
3315   __ profile_arguments_type(r13, rmethod, r4, true);
3316 
3317   __ jump_from_interpreted(rmethod, r0);
3318 }
3319 
3320 void TemplateTable::invokedynamic(int byte_no) {
3321   transition(vtos, vtos);
3322   assert(byte_no == f1_byte, "use this argument");
3323 
3324   prepare_invoke(byte_no, rmethod, r0);
3325 
3326   // r0: CallSite object (from cpool->resolved_references[])
3327   // rmethod: MH.linkToCallSite method (from f2)
3328 
3329   // Note:  r0_callsite is already pushed by prepare_invoke
3330 
3331   // %%% should make a type profile for any invokedynamic that takes a ref argument
3332   // profile this call
3333   __ profile_call(rbcp);
3334   __ profile_arguments_type(r3, rmethod, r13, false);
3335 
3336   __ verify_oop(r0);
3337 
3338   __ jump_from_interpreted(rmethod, r0);
3339 }
3340 
3341 
3342 //-----------------------------------------------------------------------------
3343 // Allocation
3344 
3345 void TemplateTable::_new() {
3346   transition(vtos, atos);
3347 
3348   __ get_unsigned_2_byte_index_at_bcp(r3, 1);
3349   Label slow_case;
3350   Label done;
3351   Label initialize_header;
3352   Label initialize_object; // including clearing the fields
3353   Label allocate_shared;
3354 
3355   __ get_cpool_and_tags(r4, r0);
3356   // Make sure the class we're about to instantiate has been resolved.
3357   // This is done before loading InstanceKlass to be consistent with the order
3358   // how Constant Pool is updated (see ConstantPool::klass_at_put)
3359   const int tags_offset = Array<u1>::base_offset_in_bytes();
3360   __ lea(rscratch1, Address(r0, r3, Address::lsl(0)));
3361   __ lea(rscratch1, Address(rscratch1, tags_offset));
3362   __ ldarb(rscratch1, rscratch1);
3363   __ cmp(rscratch1, JVM_CONSTANT_Class);
3364   __ br(Assembler::NE, slow_case);
3365 
3366   // get InstanceKlass
3367   __ load_resolved_klass_at_offset(r4, r3, r4, rscratch1);
3368 
3369   // make sure klass is initialized & doesn't have finalizer
3370   // make sure klass is fully initialized
3371   __ ldrb(rscratch1, Address(r4, InstanceKlass::init_state_offset()));
3372   __ cmp(rscratch1, InstanceKlass::fully_initialized);
3373   __ br(Assembler::NE, slow_case);
3374 
3375   // get instance_size in InstanceKlass (scaled to a count of bytes)
3376   __ ldrw(r3,
3377           Address(r4,
3378                   Klass::layout_helper_offset()));
3379   // test to see if it has a finalizer or is malformed in some way
3380   __ tbnz(r3, exact_log2(Klass::_lh_instance_slow_path_bit), slow_case);
3381 
3382   // Allocate the instance
3383   // 1) Try to allocate in the TLAB
3384   // 2) if fail and the object is large allocate in the shared Eden
3385   // 3) if the above fails (or is not applicable), go to a slow case
3386   // (creates a new TLAB, etc.)
3387 
3388   const bool allow_shared_alloc =
3389     Universe::heap()->supports_inline_contig_alloc();
3390 
3391   if (UseTLAB) {
3392     __ tlab_allocate(r0, r3, 0, noreg, r1,
3393                      allow_shared_alloc ? allocate_shared : slow_case);
3394 
3395     if (ZeroTLAB) {
3396       // the fields have been already cleared
3397       __ b(initialize_header);
3398     } else {
3399       // initialize both the header and fields
3400       __ b(initialize_object);
3401     }
3402   }
3403 
3404   // Allocation in the shared Eden, if allowed.
3405   //
3406   // r3: instance size in bytes
3407   if (allow_shared_alloc) {
3408     __ bind(allocate_shared);
3409 
3410     __ eden_allocate(r0, r3, 0, r10, slow_case);
3411     __ incr_allocated_bytes(rthread, r3, 0, rscratch1);
3412   }
3413 
3414   if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
3415     // The object is initialized before the header.  If the object size is
3416     // zero, go directly to the header initialization.
3417     __ bind(initialize_object);
3418     __ sub(r3, r3, sizeof(oopDesc));
3419     __ cbz(r3, initialize_header);
3420 
3421     // Initialize object fields
3422     {
3423       __ add(r2, r0, sizeof(oopDesc));
3424       Label loop;
3425       __ bind(loop);
3426       __ str(zr, Address(__ post(r2, BytesPerLong)));
3427       __ sub(r3, r3, BytesPerLong);
3428       __ cbnz(r3, loop);
3429     }
3430 
3431     // initialize object header only.
3432     __ bind(initialize_header);
3433     if (UseBiasedLocking) {
3434       __ ldr(rscratch1, Address(r4, Klass::prototype_header_offset()));
3435     } else {
3436       __ mov(rscratch1, (intptr_t)markOopDesc::prototype());
3437     }
3438     __ str(rscratch1, Address(r0, oopDesc::mark_offset_in_bytes()));
3439     __ store_klass_gap(r0, zr);  // zero klass gap for compressed oops
3440     __ store_klass(r0, r4);      // store klass last
3441 
3442     {
3443       SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
3444       // Trigger dtrace event for fastpath
3445       __ push(atos); // save the return value
3446       __ call_VM_leaf(
3447            CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), r0);
3448       __ pop(atos); // restore the return value
3449 
3450     }
3451     __ b(done);
3452   }
3453 
3454   // slow case
3455   __ bind(slow_case);
3456   __ get_constant_pool(c_rarg1);
3457   __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
3458   call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2);
3459   __ verify_oop(r0);
3460 
3461   // continue
3462   __ bind(done);
3463   // Must prevent reordering of stores for object initialization with stores that publish the new object.
3464   __ membar(Assembler::StoreStore);
3465 }
3466 
3467 void TemplateTable::newarray() {
3468   transition(itos, atos);
3469   __ load_unsigned_byte(c_rarg1, at_bcp(1));
3470   __ mov(c_rarg2, r0);
3471   call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
3472           c_rarg1, c_rarg2);
3473   // Must prevent reordering of stores for object initialization with stores that publish the new object.
3474   __ membar(Assembler::StoreStore);
3475 }
3476 
3477 void TemplateTable::anewarray() {
3478   transition(itos, atos);
3479   __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
3480   __ get_constant_pool(c_rarg1);
3481   __ mov(c_rarg3, r0);
3482   call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
3483           c_rarg1, c_rarg2, c_rarg3);
3484   // Must prevent reordering of stores for object initialization with stores that publish the new object.
3485   __ membar(Assembler::StoreStore);
3486 }
3487 
3488 void TemplateTable::arraylength() {
3489   transition(atos, itos);
3490   __ null_check(r0, arrayOopDesc::length_offset_in_bytes());
3491   __ ldrw(r0, Address(r0, arrayOopDesc::length_offset_in_bytes()));
3492 }
3493 
3494 void TemplateTable::checkcast()
3495 {
3496   transition(atos, atos);
3497   Label done, is_null, ok_is_subtype, quicked, resolved;
3498   __ cbz(r0, is_null);
3499 
3500   // Get cpool & tags index
3501   __ get_cpool_and_tags(r2, r3); // r2=cpool, r3=tags array
3502   __ get_unsigned_2_byte_index_at_bcp(r19, 1); // r19=index
3503   // See if bytecode has already been quicked
3504   __ add(rscratch1, r3, Array<u1>::base_offset_in_bytes());
3505   __ lea(r1, Address(rscratch1, r19));
3506   __ ldarb(r1, r1);
3507   __ cmp(r1, JVM_CONSTANT_Class);
3508   __ br(Assembler::EQ, quicked);
3509 
3510   __ push(atos); // save receiver for result, and for GC
3511   call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
3512   // vm_result_2 has metadata result
3513   __ get_vm_result_2(r0, rthread);
3514   __ pop(r3); // restore receiver
3515   __ b(resolved);
3516 
3517   // Get superklass in r0 and subklass in r3
3518   __ bind(quicked);
3519   __ mov(r3, r0); // Save object in r3; r0 needed for subtype check
3520   __ load_resolved_klass_at_offset(r2, r19, r0, rscratch1); // r0 = klass
3521 
3522   __ bind(resolved);
3523   __ load_klass(r19, r3);
3524 
3525   // Generate subtype check.  Blows r2, r5.  Object in r3.
3526   // Superklass in r0.  Subklass in r19.
3527   __ gen_subtype_check(r19, ok_is_subtype);
3528 
3529   // Come here on failure
3530   __ push(r3);
3531   // object is at TOS
3532   __ b(Interpreter::_throw_ClassCastException_entry);
3533 
3534   // Come here on success
3535   __ bind(ok_is_subtype);
3536   __ mov(r0, r3); // Restore object in r3
3537 
3538   // Collect counts on whether this test sees NULLs a lot or not.
3539   if (ProfileInterpreter) {
3540     __ b(done);
3541     __ bind(is_null);
3542     __ profile_null_seen(r2);
3543   } else {
3544     __ bind(is_null);   // same as 'done'
3545   }
3546   __ bind(done);
3547 }
3548 
3549 void TemplateTable::instanceof() {
3550   transition(atos, itos);
3551   Label done, is_null, ok_is_subtype, quicked, resolved;
3552   __ cbz(r0, is_null);
3553 
3554   // Get cpool & tags index
3555   __ get_cpool_and_tags(r2, r3); // r2=cpool, r3=tags array
3556   __ get_unsigned_2_byte_index_at_bcp(r19, 1); // r19=index
3557   // See if bytecode has already been quicked
3558   __ add(rscratch1, r3, Array<u1>::base_offset_in_bytes());
3559   __ lea(r1, Address(rscratch1, r19));
3560   __ ldarb(r1, r1);
3561   __ cmp(r1, JVM_CONSTANT_Class);
3562   __ br(Assembler::EQ, quicked);
3563 
3564   __ push(atos); // save receiver for result, and for GC
3565   call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
3566   // vm_result_2 has metadata result
3567   __ get_vm_result_2(r0, rthread);
3568   __ pop(r3); // restore receiver
3569   __ verify_oop(r3);
3570   __ load_klass(r3, r3);
3571   __ b(resolved);
3572 
3573   // Get superklass in r0 and subklass in r3
3574   __ bind(quicked);
3575   __ load_klass(r3, r0);
3576   __ load_resolved_klass_at_offset(r2, r19, r0, rscratch1);
3577 
3578   __ bind(resolved);
3579 
3580   // Generate subtype check.  Blows r2, r5
3581   // Superklass in r0.  Subklass in r3.
3582   __ gen_subtype_check(r3, ok_is_subtype);
3583 
3584   // Come here on failure
3585   __ mov(r0, 0);
3586   __ b(done);
3587   // Come here on success
3588   __ bind(ok_is_subtype);
3589   __ mov(r0, 1);
3590 
3591   // Collect counts on whether this test sees NULLs a lot or not.
3592   if (ProfileInterpreter) {
3593     __ b(done);
3594     __ bind(is_null);
3595     __ profile_null_seen(r2);
3596   } else {
3597     __ bind(is_null);   // same as 'done'
3598   }
3599   __ bind(done);
3600   // r0 = 0: obj == NULL or  obj is not an instanceof the specified klass
3601   // r0 = 1: obj != NULL and obj is     an instanceof the specified klass
3602 }
3603 
3604 //-----------------------------------------------------------------------------
3605 // Breakpoints
3606 void TemplateTable::_breakpoint() {
3607   // Note: We get here even if we are single stepping..
3608   // jbug inists on setting breakpoints at every bytecode
3609   // even if we are in single step mode.
3610 
3611   transition(vtos, vtos);
3612 
3613   // get the unpatched byte code
3614   __ get_method(c_rarg1);
3615   __ call_VM(noreg,
3616              CAST_FROM_FN_PTR(address,
3617                               InterpreterRuntime::get_original_bytecode_at),
3618              c_rarg1, rbcp);
3619   __ mov(r19, r0);
3620 
3621   // post the breakpoint event
3622   __ call_VM(noreg,
3623              CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
3624              rmethod, rbcp);
3625 
3626   // complete the execution of original bytecode
3627   __ mov(rscratch1, r19);
3628   __ dispatch_only_normal(vtos);
3629 }
3630 
3631 //-----------------------------------------------------------------------------
3632 // Exceptions
3633 
3634 void TemplateTable::athrow() {
3635   transition(atos, vtos);
3636   __ null_check(r0);
3637   __ b(Interpreter::throw_exception_entry());
3638 }
3639 
3640 //-----------------------------------------------------------------------------
3641 // Synchronization
3642 //
3643 // Note: monitorenter & exit are symmetric routines; which is reflected
3644 //       in the assembly code structure as well
3645 //
3646 // Stack layout:
3647 //
3648 // [expressions  ] <--- esp               = expression stack top
3649 // ..
3650 // [expressions  ]
3651 // [monitor entry] <--- monitor block top = expression stack bot
3652 // ..
3653 // [monitor entry]
3654 // [frame data   ] <--- monitor block bot
3655 // ...
3656 // [saved rbp    ] <--- rbp
3657 void TemplateTable::monitorenter()
3658 {
3659   transition(atos, vtos);
3660 
3661   // check for NULL object
3662   __ null_check(r0);
3663 
3664   const Address monitor_block_top(
3665         rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
3666   const Address monitor_block_bot(
3667         rfp, frame::interpreter_frame_initial_sp_offset * wordSize);
3668   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
3669 
3670   Label allocated;
3671 
3672   // initialize entry pointer
3673   __ mov(c_rarg1, zr); // points to free slot or NULL
3674 
3675   // find a free slot in the monitor block (result in c_rarg1)
3676   {
3677     Label entry, loop, exit;
3678     __ ldr(c_rarg3, monitor_block_top); // points to current entry,
3679                                         // starting with top-most entry
3680     __ lea(c_rarg2, monitor_block_bot); // points to word before bottom
3681 
3682     __ b(entry);
3683 
3684     __ bind(loop);
3685     // check if current entry is used
3686     // if not used then remember entry in c_rarg1
3687     __ ldr(rscratch1, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()));
3688     __ cmp(zr, rscratch1);
3689     __ csel(c_rarg1, c_rarg3, c_rarg1, Assembler::EQ);
3690     // check if current entry is for same object
3691     __ cmp(r0, rscratch1);
3692     // if same object then stop searching
3693     __ br(Assembler::EQ, exit);
3694     // otherwise advance to next entry
3695     __ add(c_rarg3, c_rarg3, entry_size);
3696     __ bind(entry);
3697     // check if bottom reached
3698     __ cmp(c_rarg3, c_rarg2);
3699     // if not at bottom then check this entry
3700     __ br(Assembler::NE, loop);
3701     __ bind(exit);
3702   }
3703 
3704   __ cbnz(c_rarg1, allocated); // check if a slot has been found and
3705                             // if found, continue with that on
3706 
3707   // allocate one if there's no free slot
3708   {
3709     Label entry, loop;
3710     // 1. compute new pointers            // rsp: old expression stack top
3711     __ ldr(c_rarg1, monitor_block_bot);   // c_rarg1: old expression stack bottom
3712     __ sub(esp, esp, entry_size);         // move expression stack top
3713     __ sub(c_rarg1, c_rarg1, entry_size); // move expression stack bottom
3714     __ mov(c_rarg3, esp);                 // set start value for copy loop
3715     __ str(c_rarg1, monitor_block_bot);   // set new monitor block bottom
3716 
3717     __ sub(sp, sp, entry_size);           // make room for the monitor
3718 
3719     __ b(entry);
3720     // 2. move expression stack contents
3721     __ bind(loop);
3722     __ ldr(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack
3723                                                    // word from old location
3724     __ str(c_rarg2, Address(c_rarg3, 0));          // and store it at new location
3725     __ add(c_rarg3, c_rarg3, wordSize);            // advance to next word
3726     __ bind(entry);
3727     __ cmp(c_rarg3, c_rarg1);        // check if bottom reached
3728     __ br(Assembler::NE, loop);      // if not at bottom then
3729                                      // copy next word
3730   }
3731 
3732   // call run-time routine
3733   // c_rarg1: points to monitor entry
3734   __ bind(allocated);
3735 
3736   // Increment bcp to point to the next bytecode, so exception
3737   // handling for async. exceptions work correctly.
3738   // The object has already been poped from the stack, so the
3739   // expression stack looks correct.
3740   __ increment(rbcp);
3741 
3742   // store object
3743   __ str(r0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
3744   __ lock_object(c_rarg1);
3745 
3746   // check to make sure this monitor doesn't cause stack overflow after locking
3747   __ save_bcp();  // in case of exception
3748   __ generate_stack_overflow_check(0);
3749 
3750   // The bcp has already been incremented. Just need to dispatch to
3751   // next instruction.
3752   __ dispatch_next(vtos);
3753 }
3754 
3755 
3756 void TemplateTable::monitorexit()
3757 {
3758   transition(atos, vtos);
3759 
3760   // check for NULL object
3761   __ null_check(r0);
3762 
3763   const Address monitor_block_top(
3764         rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
3765   const Address monitor_block_bot(
3766         rfp, frame::interpreter_frame_initial_sp_offset * wordSize);
3767   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
3768 
3769   Label found;
3770 
3771   // find matching slot
3772   {
3773     Label entry, loop;
3774     __ ldr(c_rarg1, monitor_block_top); // points to current entry,
3775                                         // starting with top-most entry
3776     __ lea(c_rarg2, monitor_block_bot); // points to word before bottom
3777                                         // of monitor block
3778     __ b(entry);
3779 
3780     __ bind(loop);
3781     // check if current entry is for same object
3782     __ ldr(rscratch1, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
3783     __ cmp(r0, rscratch1);
3784     // if same object then stop searching
3785     __ br(Assembler::EQ, found);
3786     // otherwise advance to next entry
3787     __ add(c_rarg1, c_rarg1, entry_size);
3788     __ bind(entry);
3789     // check if bottom reached
3790     __ cmp(c_rarg1, c_rarg2);
3791     // if not at bottom then check this entry
3792     __ br(Assembler::NE, loop);
3793   }
3794 
3795   // error handling. Unlocking was not block-structured
3796   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
3797                    InterpreterRuntime::throw_illegal_monitor_state_exception));
3798   __ should_not_reach_here();
3799 
3800   // call run-time routine
3801   __ bind(found);
3802   __ push_ptr(r0); // make sure object is on stack (contract with oopMaps)
3803   __ unlock_object(c_rarg1);
3804   __ pop_ptr(r0); // discard object
3805 }
3806 
3807 
3808 // Wide instructions
3809 void TemplateTable::wide()
3810 {
3811   __ load_unsigned_byte(r19, at_bcp(1));
3812   __ mov(rscratch1, (address)Interpreter::_wentry_point);
3813   __ ldr(rscratch1, Address(rscratch1, r19, Address::uxtw(3)));
3814   __ br(rscratch1);
3815 }
3816 
3817 
3818 // Multi arrays
3819 void TemplateTable::multianewarray() {
3820   transition(vtos, atos);
3821   __ load_unsigned_byte(r0, at_bcp(3)); // get number of dimensions
3822   // last dim is on top of stack; we want address of first one:
3823   // first_addr = last_addr + (ndims - 1) * wordSize
3824   __ lea(c_rarg1, Address(esp, r0, Address::uxtw(3)));
3825   __ sub(c_rarg1, c_rarg1, wordSize);
3826   call_VM(r0,
3827           CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
3828           c_rarg1);
3829   __ load_unsigned_byte(r1, at_bcp(3));
3830   __ lea(esp, Address(esp, r1, Address::uxtw(3)));
3831 }