1 /*
   2  * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2015, Linaro Ltd. All rights reserved.
   5  * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved.
   6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   7  *
   8  * This code is free software; you can redistribute it and/or modify it
   9  * under the terms of the GNU General Public License version 2 only, as
  10  * published by the Free Software Foundation.
  11  *
  12  * This code is distributed in the hope that it will be useful, but WITHOUT
  13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15  * version 2 for more details (a copy is included in the LICENSE file that
  16  * accompanied this code).
  17  *
  18  * You should have received a copy of the GNU General Public License version
  19  * 2 along with this work; if not, write to the Free Software Foundation,
  20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  21  *
  22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  23  * or visit www.oracle.com if you need additional information or have any
  24  * questions.
  25  *
  26  */
  27 
  28 #include "precompiled.hpp"
  29 #include "asm/macroAssembler.inline.hpp"
  30 #include "gc/shared/barrierSetAssembler.hpp"
  31 #include "interp_masm_aarch32.hpp"
  32 #include "interpreter/bytecodeHistogram.hpp"
  33 #include "interpreter/bytecodeTracer.hpp"
  34 #include "interpreter/interpreter.hpp"
  35 #include "interpreter/templateInterpreterGenerator.hpp"
  36 #include "interpreter/interpreterRuntime.hpp"
  37 #include "interpreter/templateTable.hpp"
  38 #include "oops/arrayOop.hpp"
  39 #include "oops/method.hpp"
  40 #include "oops/methodData.hpp"
  41 #include "oops/oop.inline.hpp"
  42 #include "prims/jvmtiExport.hpp"
  43 #include "prims/jvmtiThreadState.hpp"
  44 #include "runtime/arguments.hpp"
  45 #include "runtime/deoptimization.hpp"
  46 #include "runtime/frame.inline.hpp"
  47 #include "runtime/sharedRuntime.hpp"
  48 #include "runtime/stubRoutines.hpp"
  49 #include "runtime/synchronizer.hpp"
  50 #include "runtime/timer.hpp"
  51 #include "runtime/vframeArray.hpp"
  52 #include "utilities/debug.hpp"
  53 
  54 #include <sys/types.h>
  55 
  56 #ifndef PRODUCT
  57 #include "oops/method.hpp"
  58 #include "vm_version_aarch32.hpp"
  59 #endif // !PRODUCT
  60 
  61 // Size of interpreter code.  Increase if too small.  Interpreter will
  62 // fail with a guarantee ("not enough space for interpreter generation");
  63 // if too small.
  64 // Run with +PrintInterpreter to get the VM to print out the size.
  65 // Max size with JVMTI
  66 int TemplateInterpreter::InterpreterCodeSize = 200 * 1024;
  67 
  68 #define __ _masm->
  69 
  70 //-----------------------------------------------------------------------------
  71 
  72 extern "C" void entry(CodeBuffer*);
  73 
  74 //-----------------------------------------------------------------------------
  75 
  76 address TemplateInterpreterGenerator::generate_slow_signature_handler() {
  77   address entry = __ pc();
  78 
  79   // The sp should be aligned on entry to the bottom of where the integer args
  80   // need to be copied to.
  81 
  82   // rmethod
  83   // rlocals
  84   // c_rarg3: first stack arg - wordSize
  85 
  86   __ mov(c_rarg3, sp);
  87   __ sub(sp, sp, 22 * wordSize);
  88   __ str(lr, sp);
  89   __ call_VM(noreg,
  90              CAST_FROM_FN_PTR(address,
  91                               InterpreterRuntime::slow_signature_handler),
  92              rmethod, rlocals, c_rarg3);
  93 
  94   // r0: result handler
  95 
  96   // Stack layout:
  97   // rsp: return address           <- sp (lowest addr)
  98   //      1 float/double identifiers with the following structure:
  99   //        16 bit - 2 bits per word free/in use indication (0==in use)
 100   //        8 bits - 1 bit per word, double/float indication (0==double)
 101   //      4 integer args (if static first is unused)
 102   //      8 double args (defined by ARM calling convention spec)
 103   //        stack args              <- sp (on entry)
 104   //        garbage
 105   //        expression stack bottom
 106   //        bcp (NULL)
 107   //        ...
 108   // If this changes, update interpreterRt_aarch32.cpp slowpath!
 109 
 110   // Restore LR
 111   __ ldr(lr, sp);
 112 
 113 #ifdef HARD_FLOAT_CC
 114   // Do FP first so we can use c_rarg3 as temp
 115   __ ldr(c_rarg3, Address(sp, wordSize)); // float/double identifiers
 116 
 117   {
 118     Label fp_done;
 119     // each iteration covers either single double register or up to 2 float registers
 120     for (int i = 0; i < Argument::n_float_register_parameters_c; i++) {
 121       Label d, done;
 122 
 123       __ tst(c_rarg3, 1 << i+16);
 124       __ b(d, __ EQ);
 125       __ tst(c_rarg3, 1 << i*2);
 126       __ b(fp_done, __ NE);
 127       __ vldr_f32(as_FloatRegister(i*2), Address(sp, (6 + 2 * i) * wordSize));
 128       __ tst(c_rarg3, 1 << i*2+1);
 129       __ vldr_f32(as_FloatRegister(i*2+1), Address(sp, (7 + 2 * i) * wordSize), __ EQ);
 130       __ b(done);
 131       __ bind(d);
 132       __ vldr_f64(as_DoubleFloatRegister(i), Address(sp, (6 + 2 * i) * wordSize));
 133       __ bind(done);
 134     }
 135     __ bind(fp_done);
 136   }
 137 #endif // HARD_FLOAT_CC
 138 
 139   // c_rarg0 contains the result from the call of
 140   // InterpreterRuntime::slow_signature_handler so we don't touch it
 141   // here.  It will be loaded with the JNIEnv* later.
 142   __ ldr(c_rarg1, Address(sp, 2 * wordSize));
 143   __ ldrd(c_rarg2, c_rarg3, Address(sp, 3 * wordSize));
 144 
 145   __ add(sp, sp, 22 * wordSize);
 146   __ b(lr);
 147 
 148   return entry;
 149 }
 150 
 151 
 152 //
 153 // Various method entries
 154 //
 155 
 156 address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
 157   // rmethod: Method*
 158   // r4: sender sp
 159   // sp: args
 160 
 161   //if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
 162   // FIXME currently ignoring this flag and inlining anyway
 163 
 164   // These don't need a safepoint check because they aren't virtually
 165   // callable. We won't enter these intrinsics from compiled code.
 166   // If in the future we added an intrinsic which was virtually callable
 167   // we'd have to worry about how to safepoint so that this code is used.
 168 
 169   // mathematical functions inlined by compiler
 170   // (interpreter must provide identical implementation
 171   // in order to avoid monotonicity bugs when switching
 172   // from interpreter to compiler in the middle of some
 173   // computation)
 174   //
 175   // stack:
 176   //        [ arg ] <-- sp
 177   //        [ arg ]
 178   // retaddr in lr
 179 
 180   address entry_point = NULL;
 181   Register continuation = lr;
 182   bool transcendental_entry = false;
 183 
 184   switch (kind) {
 185   case Interpreter::java_lang_math_abs:
 186     entry_point = __ pc();
 187       if(hasFPU()) {
 188         __ vldr_f64(d0, Address(sp));
 189         __ vabs_f64(d0, d0);
 190       } else {
 191         __ ldrd(r0, Address(sp));
 192         transcendental_entry = true;
 193       }
 194     break;
 195   case Interpreter::java_lang_math_sqrt:
 196     entry_point = __ pc();
 197     if(hasFPU()) {
 198         __ vldr_f64(d0, Address(sp));
 199         __ vsqrt_f64(d0, d0);
 200     } else {
 201         __ ldrd(r0, Address(sp));
 202         transcendental_entry = true;
 203     }
 204     break;
 205   case Interpreter::java_lang_math_sin :
 206   case Interpreter::java_lang_math_cos :
 207   case Interpreter::java_lang_math_tan :
 208   case Interpreter::java_lang_math_log :
 209   case Interpreter::java_lang_math_log10 :
 210   case Interpreter::java_lang_math_exp :
 211     entry_point = __ pc();
 212     transcendental_entry = true;
 213 #ifndef HARD_FLOAT_CC
 214     __ ldrd(r0, Address(sp));
 215 #else
 216     __ vldr_f64(d0, Address(sp));
 217 #endif //HARD_FLOAT_CC
 218     break;
 219   case Interpreter::java_lang_math_pow :
 220     entry_point = __ pc();
 221     transcendental_entry = true;
 222 #ifndef HARD_FLOAT_CC
 223     __ ldrd(r0, Address(sp, 2*Interpreter::stackElementSize));
 224     __ ldrd(r2, Address(sp));
 225 #else
 226     __ vldr_f64(d0, Address(sp, 2*Interpreter::stackElementSize));
 227     __ vldr_f64(d1, Address(sp));
 228 #endif //HARD_FLOAT_CC
 229     break;
 230   case Interpreter::java_lang_math_fmaD :
 231   case Interpreter::java_lang_math_fmaF :
 232     if (UseFMA) {
 233       __ unimplemented();
 234     }
 235     break;
 236   default:
 237     ShouldNotReachHere();
 238   }
 239 
 240   if (entry_point) {
 241     __ mov(sp, r4);
 242 
 243     if(transcendental_entry) {
 244       __ mov(r4, lr);
 245       continuation = r4;
 246       generate_transcendental_entry(kind);
 247 #ifndef HARD_FLOAT_CC
 248       if(hasFPU()) {
 249         __ vmov_f64(d0, r0, r1);
 250       }
 251 #endif
 252     }
 253 
 254     __ b(continuation);
 255   }
 256 
 257   return entry_point;
 258 }
 259 
 260   // double trigonometrics and transcendentals
 261   // static jdouble dsin(jdouble x);
 262   // static jdouble dcos(jdouble x);
 263   // static jdouble dtan(jdouble x);
 264   // static jdouble dlog(jdouble x);
 265   // static jdouble dlog10(jdouble x);
 266   // static jdouble dexp(jdouble x);
 267   // static jdouble dpow(jdouble x, jdouble y);
 268 
 269 void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind) {
 270   address fn = NULL;
 271   switch (kind) {
 272 #ifdef __SOFTFP__
 273   case  Interpreter::java_lang_math_abs:
 274     fn = CAST_FROM_FN_PTR(address, SharedRuntime::dabs);
 275     break;
 276   case Interpreter::java_lang_math_sqrt:
 277     fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt);
 278     break;
 279 #endif //__SOFTFP__
 280   case Interpreter::java_lang_math_sin :
 281     fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
 282     break;
 283   case Interpreter::java_lang_math_cos :
 284     fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
 285     break;
 286   case Interpreter::java_lang_math_tan :
 287     fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
 288     break;
 289   case Interpreter::java_lang_math_log :
 290     fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
 291     break;
 292   case Interpreter::java_lang_math_log10 :
 293     fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
 294     break;
 295   case Interpreter::java_lang_math_exp :
 296     fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
 297     break;
 298   case Interpreter::java_lang_math_pow :
 299     fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
 300     break;
 301   default:
 302     ShouldNotReachHere();
 303   }
 304   __ align_stack();
 305   __ mov(rscratch1, fn);
 306   __ bl(rscratch1);
 307 }
 308 
 309 // Abstract method entry
 310 // Attempt to execute abstract method. Throw exception
 311 address TemplateInterpreterGenerator::generate_abstract_entry(void) {
 312   // rmethod: Method*
 313   // r13: sender SP
 314 
 315   address entry_point = __ pc();
 316 
 317   // abstract method entry
 318 
 319   //  pop return address, reset last_sp to NULL
 320   __ empty_expression_stack();
 321   __ restore_bcp();      // bcp must be correct for exception handler   (was destroyed)
 322   __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
 323 
 324   // throw exception
 325   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
 326                                      InterpreterRuntime::throw_AbstractMethodErrorWithMethod),
 327                                      rmethod);
 328   // the call_VM checks for exception, so we should never return here.
 329   __ should_not_reach_here();
 330 
 331   return entry_point;
 332 }
 333 
 334 address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
 335   address entry = __ pc();
 336 
 337 #ifdef ASSERT
 338   {
 339     Label L;
 340     __ ldr(rscratch1, Address(rfp,
 341                        frame::get_interpreter_frame_monitor_block_top_offset() *
 342                        wordSize));
 343     __ mov(rscratch2, sp);
 344     __ cmp(rscratch1, rscratch2); // maximal rsp for current rfp (stack
 345                            // grows negative)
 346     __ b(L, Assembler::HS); // check if frame is complete
 347     __ stop ("interpreter frame not set up");
 348     __ bind(L);
 349   }
 350 #endif // ASSERT
 351   // Restore bcp under the assumption that the current frame is still
 352   // interpreted
 353   __ restore_bcp();
 354 
 355   // expression stack must be empty before entering the VM if an
 356   // exception happened
 357   __ empty_expression_stack();
 358   // throw exception
 359   __ call_VM(noreg,
 360              CAST_FROM_FN_PTR(address,
 361                               InterpreterRuntime::throw_StackOverflowError));
 362   return entry;
 363 }
 364 
 365 address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {
 366   address entry = __ pc();
 367   // expression stack must be empty before entering the VM if an
 368   // exception happened
 369   __ empty_expression_stack();
 370   // setup parameters
 371 
 372   // ??? convention: expect aberrant index in register r2
 373   // ??? convention: expect array in register r3
 374   __ mov(c_rarg1, r3);
 375   __ call_VM(noreg,
 376              CAST_FROM_FN_PTR(address,
 377                               InterpreterRuntime::
 378                               throw_ArrayIndexOutOfBoundsException),
 379              c_rarg1, c_rarg2);
 380   return entry;
 381 }
 382 
 383 address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
 384   address entry = __ pc();
 385 
 386   // object is at TOS
 387   __ pop(c_rarg1);
 388 
 389   // expression stack must be empty before entering the VM if an
 390   // exception happened
 391   __ empty_expression_stack();
 392 
 393   __ call_VM(noreg,
 394              CAST_FROM_FN_PTR(address,
 395                               InterpreterRuntime::
 396                               throw_ClassCastException),
 397              c_rarg1);
 398   return entry;
 399 }
 400 
 401 address TemplateInterpreterGenerator::generate_exception_handler_common(
 402         const char* name, const char* message, bool pass_oop) {
 403   assert(!pass_oop || message == NULL, "either oop or message but not both");
 404   address entry = __ pc();
 405   if (pass_oop) {
 406     // object is at TOS
 407     __ pop(c_rarg2);
 408   }
 409   // expression stack must be empty before entering the VM if an
 410   // exception happened
 411   __ empty_expression_stack();
 412   // FIXME shouldn't it be in rest of generate_* ?
 413   // rdispatch assumed to cache dispatch table. This code can be called from
 414   // signal handler, so it can't assume execption caller preserved the register,
 415   // so restore it here
 416   __ get_dispatch();
 417   // FIXME shouldn't get_method be here ?
 418   // setup parameters
 419   __ lea(c_rarg1, Address((address)name));
 420   if (pass_oop) {
 421     __ call_VM(r0, CAST_FROM_FN_PTR(address,
 422                                     InterpreterRuntime::
 423                                     create_klass_exception),
 424                c_rarg1, c_rarg2);
 425   } else {
 426     // kind of lame ExternalAddress can't take NULL because
 427     // external_word_Relocation will assert.
 428     if (message != NULL) {
 429       __ lea(c_rarg2, Address((address)message));
 430     } else {
 431       __ mov(c_rarg2, NULL_WORD);
 432     }
 433     __ call_VM(r0,
 434                CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception),
 435                c_rarg1, c_rarg2);
 436   }
 437   // throw exception
 438   __ b(address(Interpreter::throw_exception_entry()));
 439   return entry;
 440 }
 441 
 442 address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
 443   address entry = __ pc();
 444 
 445   __ print_method_exit();
 446   __ reg_printf("A. return_entry <r1:r0> : 0x%08x%08x\n", r1, r0);
 447 
 448   // Restore stack bottom in case i2c adjusted stack
 449   __ ldr(sp, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize));
 450   // and NULL it as marker that sp is now tos until next java call
 451   __ mov(rscratch1, 0);
 452   __ str(rscratch1, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize));
 453   __ reg_printf("B. return_entry <r1:r0> : 0x%08x%08x\n", r1, r0);
 454   __ restore_bcp();
 455   __ restore_locals();
 456   __ restore_constant_pool_cache();
 457   __ get_method(rmethod);
 458   __ reg_printf("C. return_entry <r1:r0> : 0x%08x%08x\n", r1, r0);
 459 
 460   if (state == atos) {
 461     Register obj = r0;
 462     Register mdp = r1;
 463     Register tmp = r2;
 464     __ ldr(mdp, Address(rmethod, Method::method_data_offset()));
 465     __ profile_return_type(mdp, obj, tmp);
 466   }
 467 
 468   // Pop N words from the stack
 469   __ get_cache_and_index_at_bcp(r3, r2, 1, index_size);
 470   __ reg_printf("D. return_entry <r1:r0> : 0x%08x%08x\n", r1, r0);
 471   __ ldr(r3, Address(r3, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
 472   __ andr(r3, r3, ConstantPoolCacheEntry::parameter_size_mask);
 473 
 474   __ add(sp, sp, r3, lsl(2));
 475 
 476   // Restore machine SP
 477   /*__ ldr(rscratch1, Address(rmethod, Method::const_offset()));
 478   __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset()));
 479   __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 2);
 480   __ ldr(rscratch2,
 481          Address(rfp, frame::get_interpreter_frame_initial_sp_offset() * wordSize));
 482   __ sub(rscratch1, rscratch2, rscratch1, lsl(2));
 483   __ bic(sp, rscratch1, 0xf);*/
 484 
 485   __ check_and_handle_popframe(rthread);
 486   __ check_and_handle_earlyret(rthread);
 487 
 488   __ get_dispatch();
 489   __ reg_printf("E. return_entry <r1:r0> : 0x%08x%08x\n", r1, r0);
 490   __ dispatch_next(state, step);
 491 
 492   return entry;
 493 }
 494 
 495 address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
 496                                                                int step,
 497                                                                address continuation) {
 498   address entry = __ pc();
 499   __ restore_bcp();
 500   __ restore_locals();
 501   __ restore_constant_pool_cache();
 502   __ get_method(rmethod);
 503 
 504   __ get_dispatch();
 505 
 506   // Calculate stack limit
 507   __ ldr(rscratch1, Address(rmethod, Method::const_offset()));
 508   __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset()));
 509   __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 2);
 510   __ ldr(rscratch2,
 511          Address(rfp, frame::get_interpreter_frame_initial_sp_offset() * wordSize));
 512   __ sub(rscratch1, rscratch2, rscratch1, lsl(2));
 513   __ bic(sp, rscratch1, 0xf);
 514 
 515   // Restore expression stack pointer
 516   __ ldr(sp, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize));
 517   // NULL last_sp until next java call
 518   __ mov(rscratch1, 0);
 519   __ str(rscratch1, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize));
 520 
 521   // handle exceptions
 522   {
 523     Label L;
 524     __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
 525     __ cbz(rscratch1, L);
 526     __ call_VM(noreg,
 527                CAST_FROM_FN_PTR(address,
 528                                 InterpreterRuntime::throw_pending_exception));
 529     __ should_not_reach_here();
 530     __ bind(L);
 531   }
 532 
 533   if (continuation == NULL) {
 534     __ dispatch_next(state, step);
 535   } else {
 536     __ jump_to_entry(continuation);
 537   }
 538   return entry;
 539 }
 540 
 541 
 542 address TemplateInterpreterGenerator::generate_result_handler_for(
 543         BasicType type) {
 544   address entry = __ pc();
 545   switch (type) {
 546   case T_BOOLEAN: __ c2bool(r0);         break;
 547   case T_CHAR   : __ uxth(r0, r0);       break;
 548   case T_BYTE   : __ sxtb(r0, r0);       break;
 549   case T_SHORT  : __ sxth(r0, r0);       break;
 550   case T_INT    : /* nothing to do */    break;
 551   case T_LONG   : /* nothing to do */    break;
 552   case T_VOID   : /* nothing to do */    break;
 553   case T_FLOAT  :
 554 #ifndef HARD_FLOAT_CC
 555       if(hasFPU()) {
 556           __ vmov_f32(d0, r0);
 557       }
 558 #endif
 559       break;
 560   case T_DOUBLE :
 561 #ifndef HARD_FLOAT_CC
 562       if(hasFPU()) {
 563           __ vmov_f64(d0, r0, r1);
 564       }
 565 #endif
 566     break;
 567   case T_OBJECT :
 568     // retrieve result from frame
 569     __ reg_printf("In object result handler\n");
 570     __ ldr(r0, Address(rfp, frame::get_interpreter_frame_oop_temp_offset()*wordSize));
 571     // and verify it
 572     __ verify_oop(r0);
 573     break;
 574   default       : ShouldNotReachHere();
 575   }
 576   __ b(lr);                                  // return from result handler
 577   return entry;
 578 }
 579 
 580 address TemplateInterpreterGenerator::generate_safept_entry_for(
 581         TosState state,
 582         address runtime_entry) {
 583   address entry = __ pc();
 584   __ push(state);
 585   __ call_VM(noreg, runtime_entry);
 586   __ membar(Assembler::AnyAny);
 587   __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
 588   return entry;
 589 }
 590 
 591 // Helpers for commoning out cases in the various type of method entries.
 592 //
 593 
 594 
 595 // increment invocation count & check for overflow
 596 //
 597 // Note: checking for negative value instead of overflow
 598 //       so we have a 'sticky' overflow test
 599 //
 600 // rmethod: method
 601 //
 602 void TemplateInterpreterGenerator::generate_counter_incr(
 603         Label* overflow,
 604         Label* profile_method,
 605         Label* profile_method_continue) {
 606   Label done;
 607   // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
 608   if (TieredCompilation) {
 609     int increment = InvocationCounter::count_increment;
 610     Label no_mdo;
 611     if (ProfileInterpreter) {
 612       // Are we profiling?
 613       __ ldr(r0, Address(rmethod, Method::method_data_offset()));
 614       __ cbz(r0, no_mdo);
 615       // Increment counter in the MDO
 616       const Address mdo_invocation_counter(r0, in_bytes(MethodData::invocation_counter_offset()) +
 617                                            in_bytes(InvocationCounter::counter_offset()));
 618       const Address mask(r0, in_bytes(MethodData::invoke_mask_offset()));
 619       __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, rscratch1, rscratch2, false, Assembler::EQ, overflow);
 620       __ b(done);
 621     }
 622     __ bind(no_mdo);
 623     // Increment counter in MethodCounters
 624     const Address invocation_counter(rscratch2,
 625                   MethodCounters::invocation_counter_offset() +
 626                   InvocationCounter::counter_offset());
 627     const Address mask(rscratch2, in_bytes(MethodCounters::invoke_mask_offset()));
 628     __ get_method_counters(rmethod, rscratch2, done);
 629     __ increment_mask_and_jump(invocation_counter, increment, mask, rscratch1, rscratch2, false, Assembler::EQ, overflow);
 630     __ bind(done);
 631   } else { // not TieredCompilation
 632     const Address backedge_counter(rscratch2,
 633                   MethodCounters::backedge_counter_offset() +
 634                   InvocationCounter::counter_offset());
 635     const Address invocation_counter(rscratch2,
 636                   MethodCounters::invocation_counter_offset() +
 637                   InvocationCounter::counter_offset());
 638 
 639     __ get_method_counters(rmethod, rscratch2, done);
 640 
 641     if (ProfileInterpreter) { // %%% Merge this into MethodData*
 642       __ ldr(r1, Address(rscratch2, MethodCounters::interpreter_invocation_counter_offset()));
 643       __ add(r1, r1, 1);
 644       __ str(r1, Address(rscratch2, MethodCounters::interpreter_invocation_counter_offset()));
 645     }
 646     // Update standard invocation counters
 647     __ ldr(r1, invocation_counter);
 648     __ ldr(r0, backedge_counter);
 649 
 650     __ add(r1, r1, InvocationCounter::count_increment);
 651     __ mov(rscratch1, InvocationCounter::count_mask_value);
 652     __ andr(r0, r0, rscratch1);
 653 
 654     __ str(r1, invocation_counter);
 655     __ add(r0, r0, r1);                // add both counters
 656 
 657     // profile_method is non-null only for interpreted method so
 658     // profile_method != NULL == !native_call
 659 
 660     if (ProfileInterpreter && profile_method != NULL) {
 661       // Test to see if we should create a method data oop
 662       __ ldr(rscratch2, Address(rmethod, Method::method_counters_offset()));
 663       __ ldr(rscratch2, Address(rscratch2, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
 664       __ cmp(r0, rscratch2);
 665       __ b(*profile_method_continue, Assembler::LT);
 666 
 667       // if no method data exists, go to profile_method
 668       __ test_method_data_pointer(rscratch2, *profile_method);
 669     }
 670 
 671     {
 672       __ ldr(rscratch2, Address(rmethod, Method::method_counters_offset()));
 673       __ ldr(rscratch2, Address(rscratch2, in_bytes(MethodCounters::interpreter_invocation_limit_offset())));
 674       __ cmp(r0, rscratch2);
 675       __ b(*overflow, Assembler::HS);
 676     }
 677     __ bind(done);
 678   }
 679 }
 680 
 681 void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
 682 
 683   // Asm interpreter on entry
 684   // On return (i.e. jump to entry_point) [ back to invocation of interpreter ]
 685   // Everything as it was on entry
 686 
 687   // InterpreterRuntime::frequency_counter_overflow takes two
 688   // arguments, the first (thread) is passed by call_VM, the second
 689   // indicates if the counter overflow occurs at a backwards branch
 690   // (NULL bcp).  We pass zero for it.  The call returns the address
 691   // of the verified entry point for the method or NULL if the
 692   // compilation did not complete (either went background or bailed
 693   // out).
 694   __ mov(c_rarg1, 0);
 695   __ call_VM(noreg,
 696              CAST_FROM_FN_PTR(address,
 697                               InterpreterRuntime::frequency_counter_overflow),
 698              c_rarg1);
 699 
 700   __ b(do_continue);
 701 }
 702 
 703 // See if we've got enough room on the stack for locals plus overhead
 704 // below JavaThread::stack_overflow_limit(). If not, throw a StackOverflowError
 705 // without going through the signal handler, i.e., reserved and yellow zones
 706 // will not be made usable. The shadow zone must suffice to handle the
 707 // overflow.
 708 // The expression stack grows down incrementally, so the normal guard
 709 // page mechanism will work for that.
 710 //
 711 // NOTE: Since the additional locals are also always pushed (wasn't
 712 // obvious in generate_method_entry) so the guard should work for them
 713 // too.
 714 //
 715 // Args:
 716 //      r3: number of additional locals this frame needs (what we must check)
 717 //      rmethod: Method*
 718 //
 719 // Kills:
 720 //      r0
 721 void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
 722 
 723   // monitor entry size: see picture of stack set
 724   // (generate_method_entry) and frame_amd64.hpp
 725   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
 726 
 727   // total overhead size: entry_size + (saved rbp through expr stack
 728   // bottom).  be sure to change this if you add/subtract anything
 729   // to/from the overhead area
 730   const int overhead_size =
 731     -(frame::get_interpreter_frame_initial_sp_offset() * wordSize) + entry_size;
 732 
 733   const int page_size = os::vm_page_size();
 734 
 735   Label after_frame_check;
 736 
 737   // see if the frame is greater than one page in size. If so,
 738   // then we need to verify there is enough stack space remaining
 739   // for the additional locals.
 740   //
 741   __ mov(rscratch1, (page_size - overhead_size) / Interpreter::stackElementSize);
 742   __ cmp(r3, rscratch1);
 743   __ b(after_frame_check, Assembler::LS);
 744 
 745   // compute rsp as if this were going to be the last frame on
 746   // the stack before the red zone
 747 
 748   // locals + overhead, in bytes
 749   __ mov(r0, overhead_size);
 750   __ add(r0, r0, r3, lsl(Interpreter::logStackElementSize));  // 1 slot per parameter.
 751 
 752   const Address stack_limit(rthread, JavaThread::stack_overflow_limit_offset());
 753   __ ldr(rscratch1, stack_limit);
 754 
 755 #ifdef ASSERT
 756   Label limit_okay;
 757   // Verify that thread stack limit is non-zero.
 758   __ cbnz(rscratch1, limit_okay);
 759   __ stop("stack overflow limit is zero");
 760   __ bind(limit_okay);
 761 #endif
 762 
 763   // Add stack limit to locals.
 764   __ add(r0, r0, rscratch1);
 765 
 766   // Check against the current stack bottom.
 767   __ cmp(sp, r0);
 768   __ b(after_frame_check, Assembler::HI);
 769 
 770   // Remove the incoming args, peeling the machine SP back to where it
 771   // was in the caller.  This is not strictly necessary, but unless we
 772   // do so the stack frame may have a garbage FP; this ensures a
 773   // correct call stack that we can always unwind.
 774   __ mov(sp, r4);
 775 
 776   // Note: the restored frame is not necessarily interpreted.
 777   // Use the shared runtime version of the StackOverflowError.
 778   assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
 779   __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry()));
 780 
 781   // all done with frame size check
 782   __ bind(after_frame_check);
 783 }
 784 
 785 // Allocate monitor and lock method (asm interpreter)
 786 //
 787 // Args:
 788 //      rmethod: Method*
 789 //      rlocals: locals
 790 //
 791 // Kills:
 792 //      r0
 793 //      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs)
 794 //      rscratch1, rscratch2 (scratch regs)
 795 void TemplateInterpreterGenerator::lock_method(void) {
 796   // synchronize method
 797   const Address access_flags(rmethod, Method::access_flags_offset());
 798   const Address monitor_block_top(
 799         rfp,
 800         frame::get_interpreter_frame_monitor_block_top_offset() * wordSize);
 801   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
 802 
 803 #ifdef ASSERT
 804   {
 805     Label L;
 806     __ ldr(r0, access_flags);
 807     __ tst(r0, JVM_ACC_SYNCHRONIZED);
 808     __ b(L, Assembler::NE);
 809     __ stop("method doesn't need synchronization");
 810     __ bind(L);
 811   }
 812 #endif // ASSERT
 813 
 814   // get synchronization object
 815   {
 816     Label done;
 817     __ ldr(r0, access_flags);
 818     __ tst(r0, JVM_ACC_STATIC);
 819     // get receiver (assume this is frequent case)
 820     __ ldr(r0, Address(rlocals, Interpreter::local_offset_in_bytes(0)));
 821     __ b(done, Assembler::EQ);
 822     __ load_mirror(r0, rmethod, r1);
 823 
 824 #ifdef ASSERT
 825     {
 826       Label L;
 827       __ cbnz(r0, L);
 828       __ stop("synchronization object is NULL");
 829       __ bind(L);
 830     }
 831 #endif // ASSERT
 832 
 833     __ bind(done);
 834   }
 835 
 836   // add space for monitor & lock
 837   __ sub(sp, sp, entry_size); // add space for a monitor entry
 838   __ mov(rscratch1, sp);
 839   __ str(rscratch1, monitor_block_top);  // set new monitor block top
 840   // store object
 841   __ str(r0, Address(sp, BasicObjectLock::obj_offset_in_bytes()));
 842   __ mov(c_rarg1, sp); // object address
 843   __ lock_object(c_rarg1);
 844 }
 845 
 846 // Generate a fixed interpreter frame. This is identical setup for
 847 // interpreted methods and for native methods hence the shared code.
 848 //
 849 // Args:
 850 //      lr: return address
 851 //      rmethod: Method*
 852 //      rlocals: pointer to locals
 853 //      stack_pointer: previous sp
 854 //      r4 contains the sender sp
 855 void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
 856   // initialize fixed part of activation frame
 857   __ reg_printf("About to print native entry, rmethod = %p\n", rmethod);
 858   __ print_method_entry(rmethod, native_call);
 859 
 860   const int int_frame_size = 10;
 861   const int common_frame_size = int_frame_size + frame::get_frame_size();
 862   const int frame_size = native_call ? common_frame_size + 2 : common_frame_size;
 863 
 864   if (native_call) {
 865     // add 2 zero-initialized slots for native calls
 866     __ sub(sp, sp, 2 * wordSize);
 867     __ mov(rbcp, 0);
 868     __ strd(rbcp, rbcp, Address(sp));
 869   } else {
 870     __ ldr(rscratch1, Address(rmethod, Method::const_offset()));      // get ConstMethod
 871     __ add(rbcp, rscratch1, in_bytes(ConstMethod::codes_offset())); // get codebase
 872   }
 873 
 874   __ enter();
 875   __ sub(sp, sp, int_frame_size * wordSize);
 876 
 877   __ strd(sp, rbcp, Address(sp));
 878 
 879   if (ProfileInterpreter) {
 880     Label method_data_continue;
 881     __ ldr(rscratch1, Address(rmethod, Method::method_data_offset()));
 882     __ cbz(rscratch1, method_data_continue);
 883     __ lea(rscratch1, Address(rscratch1, in_bytes(MethodData::data_offset())));
 884     __ bind(method_data_continue);
 885     __ strd(rscratch1, rmethod, Address(sp, 6 * wordSize));  // save Method* and mdp (method data pointer)
 886   } else {
 887     __ mov(rscratch1, 0);
 888     __ strd(rscratch1, rmethod, Address(sp, 6 * wordSize));        // save Method* (no mdp)
 889   }
 890 
 891   // Get mirror and store it in the frame as GC root for this Method*
 892   __ load_mirror(rscratch1, rmethod, rcpool);
 893   __ mov(rscratch2, 0);
 894   __ strd(rscratch1, rscratch2, Address(sp, 4 * wordSize));
 895 
 896   __ ldr(rcpool, Address(rmethod, Method::const_offset()));
 897   __ ldr(rcpool, Address(rcpool, ConstMethod::constants_offset()));
 898   __ ldr(rcpool, Address(rcpool, ConstantPool::cache_offset_in_bytes()));
 899   __ strd(rlocals, rcpool, Address(sp, 2 * wordSize));
 900 
 901   __ reg_printf("Three-quarters through\n");
 902   // set sender sp
 903   // leave last_sp as null
 904   __ mov(rscratch1, 0);
 905   // r4 contains the sender sp
 906   __ strd(rscratch1, r4, Address(sp, 8 * wordSize));
 907 
 908   // Move SP out of the way
 909   /*if (! native_call) {
 910     __ ldr(rscratch1, Address(rmethod, Method::const_offset()));
 911     __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset()));
 912     __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 2);
 913     __ sub(rscratch1, sp, rscratch1, lsl(2));
 914     __ bic(sp, rscratch1, 0xf);
 915   }*/
 916   // FIXME This code moves the sp to after the end of the stack - if this is what's happening
 917   // some calls out of the VM may need to be patched
 918   __ reg_printf("Fully through\n");
 919 }
 920 
 921 // End of helpers
 922 
 923 // Various method entries
 924 //------------------------------------------------------------------------------------------------------------------------
 925 //
 926 //
 927 
 928 // Method entry for java.lang.ref.Reference.get.
 929 address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
 930   // Code: _aload_0, _getfield, _areturn
 931   // parameter size = 1
 932   //
 933   // The code that gets generated by this routine is split into 2 parts:
 934   //    1. The "intrinsified" code for G1 (or any SATB based GC),
 935   //    2. The slow path - which is an expansion of the regular method entry.
 936   //
 937   // Notes:-
 938   // * In the G1 code we do not check whether we need to block for
 939   //   a safepoint. If G1 is enabled then we must execute the specialized
 940   //   code for Reference.get (except when the Reference object is null)
 941   //   so that we can log the value in the referent field with an SATB
 942   //   update buffer.
 943   //   If the code for the getfield template is modified so that the
 944   //   G1 pre-barrier code is executed when the current method is
 945   //   Reference.get() then going through the normal method entry
 946   //   will be fine.
 947   // * The G1 code can, however, check the receiver object (the instance
 948   //   of java.lang.Reference) and jump to the slow path if null. If the
 949   //   Reference object is null then we obviously cannot fetch the referent
 950   //   and so we don't need to call the G1 pre-barrier. Thus we can use the
 951   //   regular method entry code to generate the NPE.
 952   //
 953   // This code is based on generate_accessor_entry.
 954   //
 955   // rmethod: Method*
 956   // r13: senderSP must preserve for slow path, set SP to it on fast path
 957 
 958   // LR is live.  It must be saved around calls.
 959 
 960   address entry = __ pc();
 961 
 962   const int referent_offset = java_lang_ref_Reference::referent_offset;
 963   guarantee(referent_offset > 0, "referent offset not initialized");
 964 
 965   Label slow_path;
 966   const Register local_0 = c_rarg0;
 967   // Check if local 0 != NULL
 968   // If the receiver is null then it is OK to jump to the slow path.
 969   __ ldr(local_0, Address(sp, 0));
 970   __ cbz(local_0, slow_path);
 971 
 972   // Load the value of the referent field.
 973   const Address field_address(local_0, referent_offset);
 974   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
 975   bs->load_word_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ rscratch2, /*tmp2*/ rscratch1);
 976 
 977   // areturn
 978   __ mov(sp, r4);           // set sp to sender sp
 979   __ b(lr);
 980 
 981   // generate a vanilla interpreter entry as the slow path
 982   __ bind(slow_path);
 983   __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
 984   return entry;
 985 }
 986 
 987 void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
 988   // Bang each page in the shadow zone. We can't assume it's been done for
 989   // an interpreter frame with greater than a page of locals, so each page
 990   // needs to be checked.  Only true for non-native.
 991   if (UseStackBanging) {
 992     const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size();
 993     const int start_page = native_call ? n_shadow_pages : 1;
 994     const int page_size = os::vm_page_size();
 995     __ mov(rscratch1, 0);
 996     for (int pages = start_page; pages <= n_shadow_pages ; pages++) {
 997       __ sub(rscratch2, sp, pages*page_size);
 998       __ str(rscratch1, Address(rscratch2));
 999     }
1000   }
1001 }
1002 
1003 
1004 // Interpreter stub for calling a native method. (asm interpreter)
1005 // This sets up a somewhat different looking stack for calling the
1006 // native method than the typical interpreter frame setup.
1007 address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
1008   // determine code generation flags
1009   bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
1010 
1011   // r1: Method*
1012   // r4: sender sp
1013 
1014   address entry_point = __ pc();
1015   __ reg_printf("entering generate_native_entry, lr = %p, rfp = %p\n\tRBCP = %p\n", lr, rfp, rbcp);
1016 
1017   const Address constMethod       (rmethod, Method::const_offset());
1018   const Address access_flags      (rmethod, Method::access_flags_offset());
1019   const Address size_of_parameters(r2, ConstMethod::
1020                                        size_of_parameters_offset());
1021 
1022   // get parameter size (always needed)
1023   __ ldr(r2, constMethod);
1024   __ load_unsigned_short(r2, size_of_parameters);
1025 
1026   // Native calls don't need the stack size check since they have no
1027   // expression stack and the arguments are already on the stack and
1028   // we only add a handful of words to the stack.
1029 
1030   // rmethod: Method*
1031   // r2: size of parameters
1032   // r4: sender sp
1033 
1034   // for natives the size of locals is zero
1035 
1036   // compute beginning of parameters (rlocals)
1037   __ add(rlocals, sp, r2, lsl(2));
1038   __ sub(rlocals, rlocals, wordSize);
1039   __ reg_printf("(start of parameters) rlocals = %p, nparams = %d\n", rlocals, r2);
1040 
1041   // initialize fixed part of activation frame
1042   generate_fixed_frame(true);
1043   __ reg_printf("pushed new fixed frame, lr = %p, rfp = %p\n", lr, rfp);
1044 
1045   Register locals_sp = r4; // the overwrites rdispatch, we can restore at end
1046   // !! If this canges, change the end of arguements in interpreterRT_aarch32.cpp
1047   //__ mov(r4, sp); //Save top of arguments
1048 
1049   // make sure method is native & not abstract
1050 #ifdef ASSERT
1051   __ ldr(r0, access_flags);
1052   {
1053     Label L;
1054     __ tst(r0, JVM_ACC_NATIVE);
1055     __ b(L, Assembler::NE);
1056     __ stop("tried to execute non-native method as native");
1057     __ bind(L);
1058   }
1059   {
1060     Label L;
1061     __ tst(r0, JVM_ACC_ABSTRACT);
1062     __ b(L, Assembler::EQ);
1063     __ stop("tried to execute abstract method in interpreter");
1064     __ bind(L);
1065   }
1066 #endif
1067 
1068   // Since at this point in the method invocation the exception
1069   // handler would try to exit the monitor of synchronized methods
1070   // which hasn't been entered yet, we set the thread local variable
1071   // _do_not_unlock_if_synchronized to true. The remove_activation
1072   // will check this flag.
1073 
1074    const Address do_not_unlock_if_synchronized(rthread,
1075         in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
1076   __ mov(rscratch2, true);
1077   __ strb(rscratch2, do_not_unlock_if_synchronized);
1078 
1079   // increment invocation count & check for overflow
1080   Label invocation_counter_overflow;
1081   if (inc_counter) {
1082     generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
1083   }
1084 
1085   Label continue_after_compile;
1086   __ bind(continue_after_compile);
1087 
1088   bang_stack_shadow_pages(true);
1089   // Note rscratch1 will contain zero here due to bang_stack_shadow_pages
1090   // reset the _do_not_unlock_if_synchronized flag
1091   //__ mov(rscratch1, 0);
1092   __ strb(rscratch1, do_not_unlock_if_synchronized);
1093 
1094   // check for synchronized methods
1095   // Must happen AFTER invocation_counter check and stack overflow check,
1096   // so method is not locked if overflows.
1097   if (synchronized) {
1098     lock_method();
1099   } else {
1100     // no synchronization necessary
1101 #ifdef ASSERT
1102     {
1103       Label L;
1104       __ ldr(r0, access_flags);
1105       __ tst(r0, JVM_ACC_SYNCHRONIZED);
1106       __ b(L, Assembler::EQ);
1107       __ stop("method needs synchronization");
1108       __ bind(L);
1109     }
1110 #endif
1111   }
1112 
1113   // start execution
1114 #ifdef ASSERT
1115   {
1116     Label L;
1117     const Address monitor_block_top(rfp,
1118                  frame::get_interpreter_frame_monitor_block_top_offset() * wordSize);
1119     __ ldr(rscratch1, monitor_block_top);
1120     __ cmp(sp, rscratch1);
1121     __ b(L, Assembler::EQ);
1122     __ stop("broken stack frame setup in interpreter");
1123     __ bind(L);
1124   }
1125 #endif
1126 
1127   // jvmti support
1128   __ notify_method_entry();
1129 
1130   const Register result_handler = rlocals;
1131   //This is recomputed for the new function and result_handler is not written until
1132   // after the function has been called
1133 
1134   // allocate space for parameters
1135   __ ldr(rscratch1, Address(rmethod, Method::const_offset()));
1136   __ load_unsigned_short(rscratch1, Address(rscratch1, ConstMethod::size_of_parameters_offset()));
1137 
1138   __ sub(sp, sp, rscratch1, lsl(Interpreter::logStackElementSize + 1));
1139   // This +1 is a hack to double the amount of space allocated for parameters, this is likely far
1140   // more than needed as in the worst case when parameters have to be placed on the stack they would be aligned
1141   // as follows LONG | INT | EMPTY | LONG ... This would only increase the space used by a half.
1142   __ align_stack();
1143   __ mov(locals_sp, sp);
1144   __ reg_printf("Stack Pointer on arg copy, sp = %p, locals_sp = %p, rlocals = %p\n", sp, locals_sp, rlocals);
1145 
1146   // get signature handler
1147   {
1148     Label L;
1149     __ ldr(rscratch1, Address(rmethod, Method::signature_handler_offset()));
1150     __ cmp(rscratch1, 0);
1151     __ b(L, Assembler::NE);
1152     __ reg_printf("Prepare_native_call, locals_sp = %p, rlocals = %p\n", locals_sp, rlocals);
1153     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
1154                                        InterpreterRuntime::prepare_native_call), rmethod);
1155     __ reg_printf("Finished prepare_native_call, locals_sp = %p, rlocals = %p\n", locals_sp, rlocals);
1156     __ ldr(rscratch1, Address(rmethod, Method::signature_handler_offset()));
1157     __ bind(L);
1158   }
1159 
1160   // call signature handler
1161   assert(InterpreterRuntime::SignatureHandlerGenerator::from() == rlocals,
1162          "adjust this code");
1163   assert(InterpreterRuntime::SignatureHandlerGenerator::to() == locals_sp,
1164          "adjust this code");
1165   assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == rscratch1,
1166           "adjust this code");
1167 
1168   // The generated handlers do not touch rmethod (the method).
1169   // However, large signatures cannot be cached and are generated
1170   // each time here.  The slow-path generator can do a GC on return,
1171   // so we must reload it after the call.
1172   __ reg_printf("**BEFORE**\nrlocals = %p,locals_sp = %p, sp = %p\n", rlocals, locals_sp, sp);
1173   __ reg_printf("About to call the Method::signature_handler = %p\n", rscratch1);
1174   __ bl(rscratch1);
1175   __ reg_printf("**AFTER**\nr0 : %p, r1 : %p, r2 : %p\n", r0, r1, r2);
1176   __ reg_printf("r3 : %p, sp : %p\n", r3, sp);
1177   __ get_method(rmethod);        // slow path can do a GC, reload rmethod
1178 
1179 
1180 
1181   // result handler is in r0
1182   // set result handler
1183   __ mov(result_handler, r0);
1184   // pass mirror handle if static call
1185   {
1186     Label L;
1187     __ ldr(rscratch1, Address(rmethod, Method::access_flags_offset()));
1188     __ tst(rscratch1, JVM_ACC_STATIC);
1189     __ b(L, Assembler::EQ);
1190     // get mirror
1191     __ load_mirror(rscratch1, rmethod, r1);
1192     // copy mirror into activation frame
1193     __ str(rscratch1, Address(rfp, frame::get_interpreter_frame_oop_temp_offset() * wordSize));
1194     // pass handle to mirror
1195     __ add(c_rarg1, rfp, frame::get_interpreter_frame_oop_temp_offset() * wordSize);
1196     __ bind(L);
1197   }
1198 
1199   // get native function entry point in r14
1200   Register native_entry_point = r14;
1201 
1202   {
1203     Label L;
1204     __ ldr(native_entry_point, Address(rmethod, Method::native_function_offset()));
1205     address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
1206     __ mov(rscratch2, unsatisfied);
1207     __ ldr(rscratch2, rscratch2);
1208     __ reg_printf("QWERTY native_entry_point = %p, unsatisfied_link_entry_point = %p\n", native_entry_point, rscratch2);
1209     __ cmp(native_entry_point, rscratch2);
1210     __ b(L, Assembler::NE);
1211     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
1212                                        InterpreterRuntime::prepare_native_call), rmethod);
1213     __ get_method(rmethod);
1214     __ ldr(native_entry_point, Address(rmethod, Method::native_function_offset()));
1215     __ bind(L);
1216   }
1217 
1218   // pass JNIEnv
1219   __ add(c_rarg0, rthread, in_bytes(JavaThread::jni_environment_offset()));
1220 
1221   // It is enough that the pc() points into the right code
1222   // segment. It does not have to be the correct return pc.
1223   __ set_last_Java_frame(sp, rfp, (address)NULL, rscratch1);
1224 
1225   // change thread state
1226 #ifdef ASSERT
1227   {
1228     Label L;
1229     __ ldr(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
1230     __ cmp(rscratch1, _thread_in_Java);
1231     __ b(L, Assembler::EQ);
1232     __ stop("Wrong thread state in native stub");
1233     __ bind(L);
1234   }
1235 #endif
1236 
1237   // Change state to native
1238   __ mov(rscratch1, _thread_in_native);
1239   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
1240   __ dmb(Assembler::ISH);
1241   __ str(rscratch1, Address(rscratch2));
1242 
1243   __ reg_printf("Calling native method, lr = %p & rmethod = %p\n", lr, rmethod);
1244   // Call the native method.
1245   /*__ reg_printf("**ONCALL**\nr0 : %p\nr1 : %p\nr2 : %p\n", r0, r1, r2);
1246   __ reg_printf("r3 : %p\n\nr4 : %p\nrloc : %p\n", r3, r4, rlocals);*/
1247   __ reg_printf("Stack Pointer on entry to native, sp = %p\n", sp);
1248   __ bl(native_entry_point);
1249   __ reg_printf("Returned from native, lr = %p, r1 = %p, r0 = %p\n", lr, r1, r0);
1250   __ maybe_isb();
1251   __ get_method(rmethod);
1252   // result potentially in r0, <r0:r1> or v0
1253 
1254   // make room for the pushes we're about to do
1255   //__ sub(rscratch1, sp, 4 * wordSize);
1256   //__ bic(sp, rscratch1, 0xf);
1257   // NOTE: The order of these pushes is known to frame::interpreter_frame_result
1258   // in order to extract the result of a method call. If the order of these
1259   // pushes change or anything else is added to the stack then the code in
1260   // interpreter_frame_result must also change.
1261   __ reg_printf("Before push dtos, ltos. sp = %p\n", sp);
1262   __ push(dtos);
1263   __ push(ltos);
1264 
1265   // change thread state
1266   __ mov(rscratch1, _thread_in_native_trans);
1267   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
1268   __ dmb(Assembler::ISH);
1269   __ str(rscratch1, Address(rscratch2));
1270   __ reg_printf("before os::is_MP\n");
1271   if (os::is_MP()) {
1272     if (UseMembar) {
1273       // Force this write out before the read below
1274       __ membar(Assembler::AnyAny);
1275     } else {
1276       // Write serialization page so VM thread can do a pseudo remote membar.
1277       // We use the current thread pointer to calculate a thread specific
1278       // offset to write to within the page. This minimizes bus traffic
1279       // due to cache line collision.
1280       __ serialize_memory(rthread, rscratch2);
1281     }
1282   }
1283   __ reg_printf("after os::is_MP\n");
1284   // check for safepoint operation in progress and/or pending suspend requests
1285   {
1286     Label L, Continue;
1287     __ safepoint_poll_acquire(L);
1288     __ ldr(rscratch2, Address(rthread, JavaThread::suspend_flags_offset()));
1289     __ cbz(rscratch2, Continue);
1290     __ bind(L);
1291 
1292     // Don't use call_VM as it will see a possible pending exception
1293     // and forward it and never return here preventing us from
1294     // clearing _last_native_pc down below. So we do a runtime call by
1295     // hand.
1296     //
1297     __ mov(c_rarg0, rthread);
1298     __ mov(rscratch2, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
1299     //__ blrt(rscratch2, 1, 0, 0);
1300     __ bl(rscratch2);
1301     __ maybe_isb();
1302     __ get_method(rmethod);
1303     __ bind(Continue);
1304   }
1305   __ reg_printf("finished safepoint check\n");
1306   // change thread state
1307   __ mov(rscratch1, _thread_in_Java);
1308   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
1309   __ dmb(Assembler::ISH);
1310   __ str(rscratch1, Address(rscratch2));
1311 
1312   // reset_last_Java_frame
1313   __ reset_last_Java_frame(true);
1314 
1315   __ mov(rscratch1, 0);
1316   if (CheckJNICalls) {
1317     // clear_pending_jni_exception_check
1318     __ str(rscratch1, Address(rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1319   }
1320 
1321   // reset handle block
1322   __ ldr(rscratch2, Address(rthread, JavaThread::active_handles_offset()));
1323   __ str(rscratch1, Address(rscratch2, JNIHandleBlock::top_offset_in_bytes()));
1324 
1325   // If result is an oop unbox and store it in frame where gc will see it
1326   // and result handler will pick it up
1327   __ reg_printf("finished checking last_Java_frame\n");
1328   {
1329     Label no_oop, not_weak, store_result;
1330     //__ bkpt(345);
1331     //__ adr(rscratch2, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
1332     __ mov(rscratch2, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
1333     __ reg_printf("Comparing rscratch2 = %p and result_handler = %p\n", rscratch2, result_handler);
1334 
1335     __ cmp(rscratch2, result_handler);
1336     __ b(no_oop, Assembler::NE);
1337     __ reg_printf("It's an oop.\n");
1338     // Unbox oop result, e.g. JNIHandles::resolve result.
1339     __ pop(ltos);
1340     __ resolve_jobject(r0, rthread, rscratch2);
1341     __ str(r0, Address(rfp, frame::get_interpreter_frame_oop_temp_offset()*wordSize));
1342     // keep stack depth as expected by pushing oop which will eventually be discarded
1343     __ push(ltos);
1344     __ bind(no_oop);
1345   }
1346 
1347   {
1348     Label no_reguard;
1349     __ lea(rscratch1, Address(rthread, in_bytes(JavaThread::stack_guard_state_offset())));
1350     __ ldrb(rscratch1, Address(rscratch1));
1351     __ cmp(rscratch1, JavaThread::stack_guard_yellow_reserved_disabled);
1352     __ b(no_reguard, Assembler::NE);
1353 
1354     __ pusha(); // XXX only save smashed registers
1355     __ mov(c_rarg0, rthread);
1356     __ mov(rscratch2, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1357     __ bl(rscratch2);
1358     __ popa(); // XXX only restore smashed registers
1359     __ bind(no_reguard);
1360   }
1361   __ reg_printf("Restoring java-ish things\n");
1362   // The method register is junk from after the thread_in_native transition
1363   // until here.  Also can't call_VM until the bcp has been
1364   // restored.  Need bcp for throwing exception below so get it now.
1365   __ get_method(rmethod);
1366   __ get_dispatch(); // used to save sp in for args
1367   // restore bcp to have legal interpreter frame, i.e., bci == 0 <=>
1368   // rbcp == code_base()
1369   __ ldr(rbcp, Address(rmethod, Method::const_offset()));   // get ConstMethod*
1370   __ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset()));          // get codebase
1371   // handle exceptions (exception handling will handle unlocking!)
1372   {
1373     Label L;
1374     __ reg_printf("Checking pending exceptions\n");
1375     __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
1376     __ cbz(rscratch1, L);
1377     // Note: At some point we may want to unify this with the code
1378     // used in call_VM_base(); i.e., we should use the
1379     // StubRoutines::forward_exception code. For now this doesn't work
1380     // here because the rsp is not correctly set at this point.
1381     __ reg_printf("Calling vm to throw_pending_exception\n");
1382 
1383     // Need to restore lr? - introduced on aarch32 port
1384     //__ ldr(lr, Address(rfp, frame::get_return_addr_offset()));
1385 
1386     __ MacroAssembler::call_VM(noreg,
1387                                CAST_FROM_FN_PTR(address,
1388                                InterpreterRuntime::throw_pending_exception));
1389     __ should_not_reach_here();
1390     __ bind(L);
1391   }
1392 
1393   // do unlocking if necessary
1394   {
1395     Label L;
1396     __ reg_printf("testing if we need to unlock\n");
1397     __ ldr(rscratch1, Address(rmethod, Method::access_flags_offset()));
1398     __ tst(rscratch1, JVM_ACC_SYNCHRONIZED);
1399     __ b(L, Assembler::EQ);
1400     // the code below should be shared with interpreter macro
1401     // assembler implementation
1402     {
1403       Label unlock;
1404       // BasicObjectLock will be first in list, since this is a
1405       // synchronized method. However, need to check that the object
1406       // has not been unlocked by an explicit monitorexit bytecode.
1407 
1408       // monitor expect in c_rarg1 for slow unlock path
1409       __ lea (c_rarg1, Address(rfp,   // address of first monitor
1410                                (intptr_t)(frame::get_interpreter_frame_initial_sp_offset() *
1411                                           wordSize - sizeof(BasicObjectLock))));
1412 
1413       __ ldr(rscratch1, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
1414       __ reg_printf("Checking if we are already unlocked\n");
1415       __ cbnz(rscratch1, unlock);
1416 
1417       // Entry already unlocked, need to throw exception
1418       __ MacroAssembler::call_VM(noreg,
1419                                  CAST_FROM_FN_PTR(address,
1420                    InterpreterRuntime::throw_illegal_monitor_state_exception));
1421       __ should_not_reach_here();
1422 
1423       __ bind(unlock);
1424       __ reg_printf("Doing unlock\n");
1425       __ unlock_object(c_rarg1);
1426     }
1427     __ bind(L);
1428   }
1429   __ reg_printf("finished unlocking\n");
1430   // jvmti support
1431   // Note: This must happen _after_ handling/throwing any exceptions since
1432   //       the exception handler code notifies the runtime of method exits
1433   //       too. If this happens before, method entry/exit notifications are
1434   //       not properly paired (was bug - gri 11/22/99).
1435   __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
1436 
1437   // restore potential result in r0:d0, call result handler to
1438   // restore potential result in ST0 & handle result
1439   __ reg_printf("Before pop dtos, ltos. sp = %p\n", sp);
1440   __ pop(ltos);
1441   __ pop(dtos);
1442 
1443   __ reg_printf("Calling result handler, r1 = %p, r0 = %p\n", r1, r0);
1444   __ bl(result_handler);
1445   __ reg_printf("Finished result_handler\n RFP NOW = %p, r0 = %p\n", rfp, r0);
1446 
1447   // remove activation restore sp to sender_sp
1448   __ ldr(rscratch1, Address(rfp,
1449                     frame::get_interpreter_frame_sender_sp_offset() *
1450                     wordSize)); // get sender sp
1451   // remove frame anchor & restore sp
1452   __ leave();
1453 
1454   __ mov(sp, rscratch1); // Native frame so two extra fields
1455   __ reg_printf("Returning to Java execution, restored frame = %p, lr = %p\n\tRBCP = %p\n", rfp, lr, rbcp);
1456   __ b(lr);
1457 
1458   if (inc_counter) {
1459     // Handle overflow of counter and compile method
1460     __ bind(invocation_counter_overflow);
1461     generate_counter_overflow(continue_after_compile);
1462   }
1463 
1464   return entry_point;
1465 }
1466 
1467 address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
1468   if (UseCRC32Intrinsics) {
1469     address entry = __ pc();
1470 
1471     // rmethod: Method*
1472     // sp: args
1473 
1474     Label slow_path;
1475     // If we need a safepoint check, generate full interpreter entry.
1476     __ safepoint_poll(slow_path);
1477 
1478     // We don't generate local frame and don't align stack because
1479     // we call stub code and there is no safepoint on this path.
1480 
1481     // Load parameters
1482     const Register crc = c_rarg0;  // crc
1483     const Register val = c_rarg1;  // source java byte value
1484     const Register tbl = c_rarg2;  // scratch
1485 
1486     // Arguments are reversed on java expression stack
1487     __ ldr(val, Address(sp, 0));              // byte value
1488     __ ldr(crc, Address(sp, wordSize));       // Initial CRC
1489 
1490     __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr()));
1491     __ inv(crc, crc);
1492     __ update_byte_crc32(crc, val, tbl);
1493     __ inv(crc, crc); // result in c_rarg0
1494 
1495     __ mov(sp, r4);
1496     __ ret(lr);
1497 
1498     // generate a vanilla native entry as the slow path
1499     __ bind(slow_path);
1500     __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
1501 
1502     return entry;
1503   }
1504   return NULL;
1505 }
1506 
1507 address TemplateInterpreterGenerator::generate_CRC32_updateBytes_inner(AbstractInterpreter::MethodKind kind, int is_crc32c) {
1508   if (!is_crc32c ? UseCRC32Intrinsics : UseCRC32CIntrinsics) {
1509     address entry = __ pc();
1510 
1511     // rmethod,: Method*
1512     // sp: senderSP must preserved for slow path
1513 
1514     Label slow_path;
1515     // If we need a safepoint check, generate full interpreter entry.
1516     __ safepoint_poll(slow_path);
1517 
1518     // We don't generate local frame and don't align stack because
1519     // we call stub code and there is no safepoint on this path.
1520 
1521     // Load parameters
1522     const Register crc = c_rarg0;  // crc
1523     const Register buf = c_rarg1;  // source java byte array address
1524     const Register len = c_rarg2;  // length
1525     const Register off = len;      // offset (never overlaps with 'len')
1526     const Register tmp = rscratch1;// tmp register used to load end in case crc32c
1527 
1528     // Arguments are reversed on java expression stack
1529     // Calculate address of start element
1530     if (!is_crc32c ? kind == Interpreter::java_util_zip_CRC32_updateByteBuffer :
1531                      kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
1532       __ ldr(buf, Address(sp, 2*wordSize)); // long buf
1533       __ ldr(off, Address(sp, wordSize)); // offset
1534       __ add(buf, buf, off); // + offset
1535       __ ldr(crc, Address(sp, 4*wordSize)); // Initial CRC
1536     } else {
1537       __ ldr(buf, Address(sp, 2*wordSize)); // byte[] array
1538       __ add(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
1539       __ ldr(off, Address(sp, wordSize)); // offset
1540       __ add(buf, buf, off); // + offset
1541       __ ldr(crc, Address(sp, 3*wordSize)); // Initial CRC
1542     }
1543 
1544     // Can now load 'len' since we're finished with 'off'
1545     if (!is_crc32c) {
1546       __ ldr(len, Address(sp)); // Length
1547     } else {
1548       __ ldr(tmp, Address(sp));
1549       // len = end - offset
1550       __ sub(len, tmp, off);
1551     }
1552 
1553     __ mov(sp, r4); // Restore the caller's SP
1554 
1555     // We are frameless so we can just jump to the stub.
1556     __ b(CAST_FROM_FN_PTR(address, !is_crc32c ? StubRoutines::updateBytesCRC32() :
1557                                                 StubRoutines::updateBytesCRC32C()));
1558 
1559     // generate a vanilla native entry as the slow path
1560     __ bind(slow_path);
1561 
1562     if (!is_crc32c)
1563       __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
1564     else
1565       __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
1566 
1567     return entry;
1568   }
1569   return NULL;
1570 }
1571 
1572 address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
1573     return generate_CRC32_updateBytes_inner(kind, false);
1574 }
1575 address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
1576     return generate_CRC32_updateBytes_inner(kind, true);
1577 }
1578 
1579 address TemplateInterpreterGenerator::generate_aescrypt_block_entry(AbstractInterpreter::MethodKind kind) {
1580   // TODO enable once class fields offsets are known at this point
1581   if (false && UseAESIntrinsics) {
1582     const int K_offset = com_sun_crypto_provider_AESCrypt::K_offset();
1583     guarantee(K_offset > 0, "referent offset not initialized");
1584 
1585     address entry = __ pc();
1586 
1587     Label slow_path;
1588     // If we need a safepoint check, generate full interpreter entry.
1589     __ safepoint_poll(slow_path);
1590 
1591     // Load parameters
1592     const Register from = c_rarg0; // source java byte array address
1593     const Register to = c_rarg1; // source java byte array address
1594     const Register key = c_rarg2; // source java byte array address
1595     const Register off = c_rarg3; // offset (never overlaps with 'len')
1596 
1597     // Arguments are reversed on java expression stack
1598     // Calculate address of start element
1599     __ ldr(off, Address(sp)); // to buffer offset
1600     __ ldr(to, Address(sp, wordSize)); // to buffer
1601     __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
1602     __ add(to, to, off);
1603     __ ldr(off, Address(sp, 2 * wordSize)); // from buffer offset
1604     __ ldr(from, Address(sp, 3 * wordSize)); // from buffer
1605     __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
1606     __ add(from, from, off);
1607     // Load the value of the referent field.
1608     __ ldr(key, Address(sp, 4 * wordSize)); // object itself
1609     const Address field_address(key, K_offset);
1610     __ load_heap_oop(key, field_address);
1611     __ add(key, key, arrayOopDesc::base_offset_in_bytes(T_INT)); // + header size
1612 
1613     __ mov(sp, r4); // Restore the caller's SP
1614 
1615     if (kind == Interpreter::com_sun_crypto_provider_AESCrypt_encryptBlock) {
1616       // We are frameless so we can just jump to the stub.
1617       __ b(CAST_FROM_FN_PTR(address, StubRoutines::aescrypt_encryptBlock()));
1618     } else {
1619       // We are frameless so we can just jump to the stub.
1620       __ b(CAST_FROM_FN_PTR(address, StubRoutines::aescrypt_decryptBlock()));
1621     }
1622 
1623     // generate a vanilla native entry as the slow path
1624     __ bind(slow_path);
1625     __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
1626 
1627     return entry;
1628   }
1629   return NULL;
1630 }
1631 
1632 address TemplateInterpreterGenerator::generate_cipherBlockChaining_encryptAESCrypt_entry(AbstractInterpreter::MethodKind kind) {
1633   // TODO enable once class fields offsets are known at this point
1634   if (false && UseAESIntrinsics && UseNeon) {
1635     address entry = __ pc();
1636 
1637     Label slow_path;
1638     // If we need a safepoint check, generate full interpreter entry.
1639     __ safepoint_poll(slow_path);
1640 
1641     const int embeddedCipher_offset = com_sun_crypto_provider_FeedbackCipher::embeddedCipher_offset();
1642     guarantee(embeddedCipher_offset > 0, "referent offset not initialized");
1643     const int K_offset = com_sun_crypto_provider_AESCrypt::K_offset();
1644     guarantee(K_offset > 0, "referent offset not initialized");
1645     const int r_offset = com_sun_crypto_provider_CipherBlockChaining::r_offset();
1646     guarantee(r_offset > 0, "referent offset not initialized");
1647 
1648     // Load parameters
1649     const Register from = c_rarg0; // source java byte array address
1650     const Register to = c_rarg1; // dest java byte array address
1651     const Register key = c_rarg2; // key java byte array address
1652     const Register rvec = c_rarg3; // rvec java byte array address
1653     const Register len = r4; // len of the input
1654     const Register off = r5; // offset
1655     const Register sp_pointer = r6; // sp
1656 
1657     __ mov(sp_pointer, r4);
1658     // Arguments are reversed on java expression stack:
1659     // outBuffer offset, outBuffer, inBuffer len, inBuffer len , inBuffer
1660     // Calculate address of start element
1661     __ ldr(off, Address(sp)); // to buffer offset
1662     __ ldr(to, Address(sp, wordSize)); // to buffer
1663     __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
1664     __ add(to, to, off);
1665     __ ldr(len, Address(sp, 2 * wordSize)); // len
1666     __ ldr(off, Address(sp, 3 * wordSize)); // from buffer offset
1667     __ ldr(from, Address(sp, 4 * wordSize)); // from buffer
1668     __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
1669     __ add(from, from, off);
1670     // Load the value of the referent field.
1671     __ ldr(rvec, Address(sp, 5 * wordSize)); // object itself
1672     const Address field_address(rvec, r_offset);
1673     __ load_heap_oop(rvec, field_address);
1674     __ add(rvec, rvec, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
1675 
1676     __ ldr(key, Address(sp, 5 * wordSize)); // object itself
1677     const Address field_address2(key, embeddedCipher_offset);
1678     __ load_heap_oop(key, field_address2);
1679     const Address field_address3(key, K_offset);
1680     __ load_heap_oop(key, field_address3);
1681     __ add(key, key, arrayOopDesc::base_offset_in_bytes(T_INT)); // + header size
1682 
1683     __ mov(sp, sp_pointer); // Restore the caller's SP
1684 
1685     if (kind == Interpreter::com_sun_crypto_provider_CipherBlockChaining_encrypt) {
1686       // We are frameless so we can just jump to the stub.
1687       __ b(CAST_FROM_FN_PTR(address, StubRoutines::cipherBlockChaining_encryptAESCrypt_special()));
1688     } else {
1689       // We are frameless so we can just jump to the stub.
1690       __ b(CAST_FROM_FN_PTR(address, StubRoutines::cipherBlockChaining_decryptAESCrypt_special()));
1691     }
1692 
1693     // generate a vanilla native entry as the slow path
1694     __ bind(slow_path);
1695 
1696     __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
1697 
1698     return entry;
1699   }
1700   return NULL;
1701 }
1702 
1703 address TemplateInterpreterGenerator::generate_SHA_implCompress_entry(AbstractInterpreter::MethodKind kind) {
1704   // TODO enable once class fields offsets are known at this point
1705   if (false && ((UseSHA1Intrinsics && kind == Interpreter::sun_security_provider_SHA_implCompress) ||
1706       (UseSHA256Intrinsics && kind == Interpreter::sun_security_provider_SHA2_implCompress) ||
1707       (UseSHA512Intrinsics && kind == Interpreter::sun_security_provider_SHA5_implCompress))) {
1708     address entry = __ pc();
1709 
1710     Label slow_path;
1711     // If we need a safepoint check, generate full interpreter entry.
1712     __ safepoint_poll(slow_path);
1713 
1714     int state_offset;
1715     int state_data_offset;
1716     address stub_addr;
1717     switch (kind) {
1718       case Interpreter::sun_security_provider_SHA_implCompress:
1719         state_offset = sun_security_provider_SHA::state_offset();
1720         state_data_offset = arrayOopDesc::base_offset_in_bytes(T_INT);
1721         stub_addr = StubRoutines::sha1_implCompress();
1722         break;
1723       case Interpreter::sun_security_provider_SHA2_implCompress:
1724         state_offset = sun_security_provider_SHA2::state_offset();
1725         state_data_offset = arrayOopDesc::base_offset_in_bytes(T_INT);
1726         stub_addr = StubRoutines::sha256_implCompress();
1727         break;
1728       case Interpreter::sun_security_provider_SHA5_implCompress:
1729         state_offset = sun_security_provider_SHA5::state_offset();
1730         state_data_offset = arrayOopDesc::base_offset_in_bytes(T_LONG);
1731         stub_addr = StubRoutines::sha512_implCompress();
1732         break;
1733       default:
1734         ShouldNotReachHere(); return NULL; //  cannot be, stupid gcc
1735     }
1736     guarantee(state_offset > 0, "referent offset not initialized");
1737 
1738     // Load parameters
1739     const Register from = c_rarg0; // source java byte array address
1740     const Register state  = c_rarg1; // state java byte array address
1741     const Register off = r3; // offset
1742 
1743     // Arguments are reversed on java expression stack:
1744     // fromBufferOffset , fromBuffer
1745     // Calculate address of start element
1746     __ ldr(off, Address(sp)); // from buffer offset
1747     __ ldr(from, Address(sp, wordSize)); // from buffer
1748     __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
1749     __ add(from, from, off);
1750     // Load the value of the referent field.
1751     __ ldr(state, Address(sp, 2 * wordSize)); // object itself
1752     const Address field_address(state, state_offset);
1753     __ load_heap_oop(state, field_address);
1754     __ add(state, state, state_data_offset); // + header size
1755 
1756     __ mov(sp, r4); // Restore the caller's SP
1757 
1758     // We are frameless so we can just jump to the stub.
1759     __ b(CAST_FROM_FN_PTR(address, stub_addr));
1760 
1761     // generate a vanilla native entry as the slow path
1762     __ bind(slow_path);
1763 
1764     __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
1765 
1766     return entry;
1767   }
1768   return NULL;
1769 }
1770 
1771 //
1772 // Generic interpreted method entry to (asm) interpreter
1773 //
1774 address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
1775   // determine code generation flags
1776   bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
1777 
1778   // r4: sender sp
1779   address entry_point = __ pc();
1780 
1781   const Address constMethod(rmethod, Method::const_offset());
1782   const Address access_flags(rmethod, Method::access_flags_offset());
1783   const Address size_of_parameters(r3,
1784                                    ConstMethod::size_of_parameters_offset());
1785   const Address size_of_locals(r3, ConstMethod::size_of_locals_offset());
1786 
1787   // get parameter size (always needed)
1788   // need to load the const method first
1789   __ ldr(r3, constMethod);
1790   __ load_unsigned_short(r2, size_of_parameters);
1791 
1792   // r2: size of parameters
1793 
1794   __ load_unsigned_short(r3, size_of_locals); // get size of locals in words
1795   __ sub(r3, r3, r2); // r3 = no. of additional locals
1796 
1797   // see if we've got enough room on the stack for locals plus overhead.
1798   generate_stack_overflow_check();
1799 
1800   // compute beginning of parameters (rlocals)
1801   __ add(rlocals, sp, r2, lsl(2));
1802   __ sub(rlocals, rlocals, wordSize);
1803 
1804   // Make room for locals
1805   __ sub(rscratch1, sp, r3, lsl(2));
1806   // Align the sp value
1807   __ bic(sp, rscratch1, StackAlignmentInBytes-1);
1808 
1809   // r3 - # of additional locals
1810   // allocate space for locals
1811   // explicitly initialize locals
1812   {
1813     Label exit, loop;
1814     __ mov(rscratch2, 0);
1815     __ cmp(r3, 0);
1816     __ b(exit, Assembler::LE); // do nothing if r3 <= 0
1817     __ bind(loop);
1818     __ str(rscratch2, Address(__ post(rscratch1, wordSize)));
1819     __ subs(r3, r3, 1); // until everything initialized
1820     __ b(loop, Assembler::NE);
1821     __ bind(exit);
1822   }
1823   __ reg_printf("Done locals space\n", r2);
1824 
1825   // initialize fixed part of activation frame
1826   __ reg_printf("About to do fixed frame\n", r2);
1827   generate_fixed_frame(false);
1828   // And the base dispatch table
1829   __ get_dispatch();
1830   // make sure method is not native & not abstract
1831   __ reg_printf("Just done generate_fixed_frame; rmethod = %p\n", rmethod);
1832 #ifdef ASSERT
1833   __ ldr(r0, access_flags);
1834   {
1835     Label L;
1836     __ tst(r0, JVM_ACC_NATIVE);
1837     __ b(L, Assembler::EQ);
1838     __ stop("tried to execute native method as non-native");
1839     __ bind(L);
1840   }
1841   {
1842     Label L;
1843     __ tst(r0, JVM_ACC_ABSTRACT);
1844     __ b(L, Assembler::EQ);
1845     __ stop("tried to execute abstract method in interpreter");
1846     __ bind(L);
1847   }
1848 #endif
1849 
1850   // Since at this point in the method invocation the exception
1851   // handler would try to exit the monitor of synchronized methods
1852   // which hasn't been entered yet, we set the thread local variable
1853   // _do_not_unlock_if_synchronized to true. The remove_activation
1854   // will check this flag.
1855 
1856    const Address do_not_unlock_if_synchronized(rthread,
1857         in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
1858   __ mov(rscratch2, true);
1859   __ strb(rscratch2, do_not_unlock_if_synchronized);
1860 
1861   Label no_mdp;
1862   Register mdp = r3;
1863   __ ldr(mdp, Address(rmethod, Method::method_data_offset()));
1864   __ cbz(mdp, no_mdp);
1865   __ add(mdp, mdp, in_bytes(MethodData::data_offset()));
1866   __ profile_parameters_type(mdp, r1, r2);
1867   __ bind(no_mdp);
1868 
1869   // increment invocation count & check for overflow
1870   Label invocation_counter_overflow;
1871   Label profile_method;
1872   Label profile_method_continue;
1873   if (inc_counter) {
1874     generate_counter_incr(&invocation_counter_overflow,
1875                           &profile_method,
1876                           &profile_method_continue);
1877     if (ProfileInterpreter) {
1878       __ bind(profile_method_continue);
1879     }
1880   }
1881 
1882   Label continue_after_compile;
1883   __ bind(continue_after_compile);
1884 
1885   bang_stack_shadow_pages(false);
1886   // Note rscratch1 will contain zero here
1887   // reset the _do_not_unlock_if_synchronized flag
1888   __ strb(rscratch1, do_not_unlock_if_synchronized);
1889 
1890   // check for synchronized methods
1891   // Must happen AFTER invocation_counter check and stack overflow check,
1892   // so method is not locked if overflows.
1893   if (synchronized) {
1894     // Allocate monitor and lock method
1895     lock_method();
1896   } else {
1897     // no synchronization necessary
1898 #ifdef ASSERT
1899     {
1900       Label L;
1901       __ reg_printf("Checking synchronization, rmethod = %p\n", rmethod);
1902       __ ldr(r0, access_flags);
1903       __ tst(r0, JVM_ACC_SYNCHRONIZED);
1904       __ b(L, Assembler::EQ);
1905       __ stop("method needs synchronization");
1906       __ bind(L);
1907     }
1908 #endif
1909   }
1910 
1911   // start execution
1912 #ifdef ASSERT
1913   {
1914     Label L;
1915      const Address monitor_block_top (rfp,
1916                  frame::get_interpreter_frame_monitor_block_top_offset() * wordSize);
1917     __ ldr(rscratch1, monitor_block_top);
1918     __ cmp(sp, rscratch1);
1919     __ b(L, Assembler::EQ);
1920     __ stop("broken stack frame setup in interpreter");
1921     __ bind(L);
1922   }
1923 #endif
1924 
1925   // jvmti support
1926   __ notify_method_entry();
1927   __ reg_printf("About to dispatch, rmethod = %p, rlocals = %p\n", rmethod, rlocals);
1928   __ dispatch_next(vtos);
1929   __ reg_printf("Finshed dispatch? rmethod = %p\n", rmethod);
1930   // invocation counter overflow
1931   if (inc_counter) {
1932     if (ProfileInterpreter) {
1933       // We have decided to profile this method in the interpreter
1934       __ bind(profile_method);
1935       __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
1936       __ set_method_data_pointer_for_bcp();
1937       // don't think we need this
1938       __ get_method(r1);
1939       __ b(profile_method_continue);
1940     }
1941     // Handle overflow of counter and compile method
1942     __ bind(invocation_counter_overflow);
1943     generate_counter_overflow(continue_after_compile);
1944   }
1945 
1946   __ reg_printf("Just completed normal entry, rmethod = %p\n", rmethod);
1947   return entry_point;
1948 }
1949 
1950 //-----------------------------------------------------------------------------
1951 // Exceptions
1952 
1953 void TemplateInterpreterGenerator::generate_throw_exception() {
1954   // Entry point in previous activation (i.e., if the caller was
1955   // interpreted)
1956   Interpreter::_rethrow_exception_entry = __ pc();
1957   __ reg_printf("rethrow_exception_entry\n");
1958 
1959   // Restore sp to interpreter_frame_last_sp even though we are going
1960   // to empty the expression stack for the exception processing.
1961   __ mov(rscratch1, 0);
1962   __ str(rscratch1, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize));
1963   // r0: exception
1964   // r3: return address/pc that threw exception
1965   __ restore_bcp();    // rbcp points to call/send
1966   __ restore_locals();
1967   __ restore_constant_pool_cache();
1968   __ get_dispatch();
1969 
1970   // Entry point for exceptions thrown within interpreter code
1971   Interpreter::_throw_exception_entry = __ pc();
1972   __ reg_printf("throw_exception_entry\n");
1973   // If we came here via a NullPointerException on the receiver of a
1974   // method, rmethod may be corrupt.
1975   __ get_method(rmethod);
1976   // expression stack is undefined here
1977   // r0: exception
1978   // rbcp: exception bcp
1979   __ verify_oop(r0);
1980   __ mov(c_rarg1, r0);
1981 
1982   // expression stack must be empty before entering the VM in case of
1983   // an exception
1984   __ empty_expression_stack();
1985   // find exception handler address and preserve exception oop
1986   __ call_VM(r3,
1987              CAST_FROM_FN_PTR(address,
1988                           InterpreterRuntime::exception_handler_for_exception),
1989              c_rarg1);
1990 
1991   // Calculate stack limit
1992   /*__ ldr(rscratch1, Address(rmethod, Method::const_offset()));
1993   __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset()));
1994   __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 4);
1995   __ ldr(rscratch2,
1996          Address(rfp, frame::get_interpreter_frame_initial_sp_offset() * wordSize));
1997   __ sub(rscratch1, rscratch2, rscratch1, lsl(2));
1998   __ bic(sp, rscratch1, 0xf);*/
1999   // Don't do this as we don't have a stack pointer
2000 
2001   // r0: exception handler entry point
2002   // r3: preserved exception oop
2003   // rbcp: bcp for exception handler
2004   __ push_ptr(r3); // push exception which is now the only value on the stack
2005   __ b(r0); // jump to exception handler (may be _remove_activation_entry!)
2006 
2007   // If the exception is not handled in the current frame the frame is
2008   // removed and the exception is rethrown (i.e. exception
2009   // continuation is _rethrow_exception).
2010   //
2011   // Note: At this point the bci is still the bxi for the instruction
2012   // which caused the exception and the expression stack is
2013   // empty. Thus, for any VM calls at this point, GC will find a legal
2014   // oop map (with empty expression stack).
2015 
2016   //
2017   // JVMTI PopFrame support
2018   //
2019 
2020   Interpreter::_remove_activation_preserving_args_entry = __ pc();
2021   __ print_method_exit(false);
2022   __ reg_printf("remove_activation_preserving_args_entry\n");
2023   __ empty_expression_stack();
2024   // Set the popframe_processing bit in pending_popframe_condition
2025   // indicating that we are currently handling popframe, so that
2026   // call_VMs that may happen later do not trigger new popframe
2027   // handling cycles.
2028   __ ldr(r3, Address(rthread, JavaThread::popframe_condition_offset()));
2029   __ orr(r3, r3, JavaThread::popframe_processing_bit);
2030   __ str(r3, Address(rthread, JavaThread::popframe_condition_offset()));
2031 
2032   {
2033     // Check to see whether we are returning to a deoptimized frame.
2034     // (The PopFrame call ensures that the caller of the popped frame is
2035     // either interpreted or compiled and deoptimizes it if compiled.)
2036     // In this case, we can't call dispatch_next() after the frame is
2037     // popped, but instead must save the incoming arguments and restore
2038     // them after deoptimization has occurred.
2039     //
2040     // Note that we don't compare the return PC against the
2041     // deoptimization blob's unpack entry because of the presence of
2042     // adapter frames in C2.
2043     Label caller_not_deoptimized;
2044     __ ldr(c_rarg1, Address(rfp, frame::get_return_addr_offset() * wordSize));
2045     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
2046                                InterpreterRuntime::interpreter_contains), c_rarg1);
2047     __ cbnz(r0, caller_not_deoptimized);
2048 
2049     // Compute size of arguments for saving when returning to
2050     // deoptimized caller
2051     __ get_method(r0);
2052     __ ldr(r0, Address(r0, Method::const_offset()));
2053     __ load_unsigned_short(r0, Address(r0, in_bytes(ConstMethod::
2054                                                     size_of_parameters_offset())));
2055     __ lsl(r0, r0, Interpreter::logStackElementSize);
2056     __ restore_locals(); // XXX do we need this?
2057     __ sub(rlocals, rlocals, r0);
2058     __ add(rlocals, rlocals, wordSize);
2059     // Save these arguments
2060     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
2061                                            Deoptimization::
2062                                            popframe_preserve_args),
2063                           rthread, r0, rlocals);
2064 
2065     __ remove_activation(vtos,
2066                          /* throw_monitor_exception */ false,
2067                          /* install_monitor_exception */ false,
2068                          /* notify_jvmdi */ false);
2069 
2070     // Inform deoptimization that it is responsible for restoring
2071     // these arguments
2072     __ mov(rscratch1, JavaThread::popframe_force_deopt_reexecution_bit);
2073     __ str(rscratch1, Address(rthread, JavaThread::popframe_condition_offset()));
2074 
2075     // Continue in deoptimization handler
2076     __ b(lr);
2077 
2078     __ bind(caller_not_deoptimized);
2079   }
2080 
2081   __ remove_activation(vtos,
2082                        /* throw_monitor_exception */ false,
2083                        /* install_monitor_exception */ false,
2084                        /* notify_jvmdi */ false);
2085 
2086   // Restore the last_sp and null it out
2087   __ ldr(sp, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize));
2088   __ mov(rscratch1, 0);
2089   __ str(rscratch1, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize));
2090   // remove_activation restores sp?
2091 
2092   __ restore_bcp();
2093   __ restore_locals();
2094   __ restore_constant_pool_cache();
2095   __ get_method(rmethod);
2096   __ get_dispatch();
2097 
2098   // The method data pointer was incremented already during
2099   // call profiling. We have to restore the mdp for the current bcp.
2100   if (ProfileInterpreter) {
2101     __ set_method_data_pointer_for_bcp();
2102   }
2103 
2104   // Clear the popframe condition flag
2105   __ mov(rscratch1, JavaThread::popframe_inactive);
2106   __ str(rscratch1, Address(rthread, JavaThread::popframe_condition_offset()));
2107   assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive");
2108 
2109 #if INCLUDE_JVMTI
2110   {
2111     Label L_done;
2112     __ ldrb(rscratch1, Address(rbcp, 0));
2113     __ cmp(rscratch1, Bytecodes::_invokestatic);
2114     __ b(L_done, Assembler::EQ);
2115 
2116     // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
2117     // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
2118 
2119     __ ldr(c_rarg0, Address(rlocals, 0));
2120     __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), c_rarg0, rmethod, rbcp);
2121 
2122     __ cbz(r0, L_done);
2123 
2124     __ str(r0, Address(sp, 0));
2125     __ bind(L_done);
2126   }
2127 #endif // INCLUDE_JVMTI
2128 
2129   // Restore machine SP
2130   /*__ ldr(rscratch1, Address(rmethod, Method::const_offset()));
2131   __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset()));
2132   __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 4);
2133   __ ldr(rscratch2,
2134          Address(rfp, frame::get_interpreter_frame_initial_sp_offset() * wordSize));
2135   __ sub(rscratch1, rscratch2, rscratch1, lsl(2));
2136   __ bic(sp, rscratch1, 0xf);*/
2137 
2138   __ dispatch_next(vtos);
2139   // end of PopFrame support
2140 
2141   Interpreter::_remove_activation_entry = __ pc();
2142   __ print_method_exit(false);
2143   __ reg_printf("remove_activation_entry\n");
2144 
2145   // preserve exception over this code sequence
2146   __ pop_ptr(r0);
2147   __ str(r0, Address(rthread, JavaThread::vm_result_offset()));
2148   // remove the activation (without doing throws on illegalMonitorExceptions)
2149   __ remove_activation(vtos, false, true, false);
2150   // restore exception
2151   // restore exception
2152   __ get_vm_result(r0, rthread);
2153 
2154   // In between activations - previous activation type unknown yet
2155   // compute continuation point - the continuation point expects the
2156   // following registers set up:
2157   //
2158   // r0: exception
2159   // lr: return address/pc that threw exception
2160   // rsp: expression stack of caller
2161   // rfp: fp of caller
2162   // FIXME: There's no point saving LR here because VM calls don't trash it
2163   __ strd(r0, lr, Address(__ pre(sp, -2 * wordSize)));  // save exception & return address
2164   __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
2165                           SharedRuntime::exception_handler_for_return_address),
2166                         rthread, lr);
2167   __ mov(r1, r0);                               // save exception handler
2168   __ ldrd(r0, lr, Address(__ post(sp, 2 * wordSize)));  // restore exception & return address
2169   // We might be returning to a deopt handler that expects r3 to
2170   // contain the exception pc
2171   __ mov(r3, lr);
2172   // Note that an "issuing PC" is actually the next PC after the call
2173   __ b(r1);                                    // jump to exception
2174                                                 // handler of caller
2175 }
2176 
2177 
2178 //
2179 // JVMTI ForceEarlyReturn support
2180 //
2181 address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
2182   address entry = __ pc();
2183   __ restore_bcp();
2184   __ restore_locals();
2185   __ empty_expression_stack();
2186   __ load_earlyret_value(state);
2187 
2188   __ ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset()));
2189   Address cond_addr(rscratch1, JvmtiThreadState::earlyret_state_offset());
2190 
2191   // Clear the earlyret state
2192   assert(JvmtiThreadState::earlyret_inactive == 0, "should be");
2193   __ mov(rscratch2, 0);
2194   __ str(rscratch2, cond_addr);
2195 
2196   __ remove_activation(state,
2197                        false, /* throw_monitor_exception */
2198                        false, /* install_monitor_exception */
2199                        true); /* notify_jvmdi */
2200   __ b(lr);
2201 
2202   return entry;
2203 } // end of ForceEarlyReturn support
2204 
2205 
2206 
2207 //-----------------------------------------------------------------------------
2208 // Helper for vtos entry point generation
2209 
2210 void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
2211                                                          address& bep,
2212                                                          address& cep,
2213                                                          address& sep,
2214                                                          address& aep,
2215                                                          address& iep,
2216                                                          address& lep,
2217                                                          address& fep,
2218                                                          address& dep,
2219                                                          address& vep) {
2220   assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
2221   Label L;
2222   aep = __ pc();  __ push_ptr();  __ b(L);
2223   dep = __ pc();
2224   if(hasFPU()){
2225     __ push_d(); __ b(L);
2226   }
2227   lep = __ pc();  __ push_l();    __ b(L);
2228   fep = __ pc();
2229   if(hasFPU()){
2230     __ push_f();    __ b(L);
2231   }
2232   bep = cep = sep =
2233   iep = __ pc();  __ push_i();
2234   vep = __ pc();
2235   __ bind(L);
2236   generate_and_dispatch(t);
2237 }
2238 
2239 //-----------------------------------------------------------------------------
2240 
2241 // Non-product code
2242 #ifndef PRODUCT
2243 address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
2244   address entry = __ pc();
2245 
2246   __ push(state);
2247   // Save all registers on stack, so omit SP and PC
2248   const RegSet push_set = RegSet::range(r0, r12) + lr;
2249   const int push_set_cnt = __builtin_popcount(push_set.bits());
2250   __ push(push_set, sp);
2251   __ ldr(c_rarg2, Address(sp, push_set_cnt*wordSize));      // Pass top of stack
2252   __ ldr(c_rarg3, Address(sp, (push_set_cnt+1)*wordSize));  // Pass top of stack high part/2nd stack word
2253   __ call_VM(noreg,
2254   //TODO: XXX: moved from SharedRuntime to InterpreterRuntime
2255              CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode),
2256              c_rarg1, c_rarg2, c_rarg3);
2257   __ pop(RegSet::range(r0, r12) + lr, sp);
2258   __ pop(state);
2259   __ b(lr);                                   // return from result handler
2260 
2261   return entry;
2262 }
2263 
2264 void TemplateInterpreterGenerator::count_bytecode() {
2265   __ push(c_rarg0);
2266   __ push(rscratch1);
2267   __ push(rscratch2);
2268   Label L;
2269   __ mov(rscratch2, (address) &BytecodeCounter::_counter_value);
2270   __ bind(L);
2271   __ ldrex(rscratch1, rscratch2);
2272   __ add(rscratch1, rscratch1, 1);
2273   // strex stores 2nd arg to dest adressed by 3rd arg,
2274   // stores status to 1st arg. So, 1st and 2nd shoud be different.
2275   __ strex(c_rarg0, rscratch1, rscratch2);
2276   __ cmp(c_rarg0, 0);
2277   __ b(L, Assembler::NE);
2278   __ pop(rscratch2);
2279   __ pop(rscratch1);
2280   __ pop(c_rarg0);
2281 }
2282 
2283 void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; }
2284 
2285 void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; }
2286 
2287 
2288 void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
2289   // Call a little run-time stub to avoid blow-up for each bytecode.
2290   // The run-time runtime saves the right registers, depending on
2291   // the tosca in-state for the given template.
2292 
2293   assert(Interpreter::trace_code(t->tos_in()) != NULL,
2294          "entry must have been generated");
2295   __ bl(Interpreter::trace_code(t->tos_in()));
2296 }
2297 
2298 
2299 void TemplateInterpreterGenerator::stop_interpreter_at() {
2300   Label L;
2301   __ push(rscratch1);
2302   __ mov(rscratch1, (address) &BytecodeCounter::_counter_value);
2303   __ ldr(rscratch1, Address(rscratch1));
2304   __ mov(rscratch2, StopInterpreterAt);
2305   __ cmp(rscratch1, rscratch2);
2306   __ b(L, Assembler::NE);
2307   __ bkpt(0);
2308   __ bind(L);
2309   __ pop(rscratch1);
2310 }
2311 
2312 #endif // !PRODUCT