1 /*
   2  * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2015, Linaro Ltd. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "precompiled.hpp"
  28 #include "asm/macroAssembler.hpp"
  29 #include "asm/macroAssembler.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "nativeInst_aarch32.hpp"
  32 #include "oops/instanceOop.hpp"
  33 #include "oops/method.hpp"
  34 #include "oops/objArrayKlass.hpp"
  35 #include "oops/oop.inline.hpp"
  36 #include "prims/methodHandles.hpp"
  37 #include "runtime/frame.inline.hpp"
  38 #include "runtime/handles.inline.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #include "runtime/stubCodeGenerator.hpp"
  41 #include "runtime/stubRoutines.hpp"
  42 #include "runtime/thread.inline.hpp"
  43 #include "utilities/top.hpp"
  44 #ifdef COMPILER2
  45 #include "opto/runtime.hpp"
  46 #endif
  47 
  48 
  49 // Declaration and definition of StubGenerator (no .hpp file).
  50 // For a more detailed description of the stub routine structure
  51 // see the comment in stubRoutines.hpp
  52 
  53 #undef __
  54 #define __ _masm->
  55 #define TIMES_OOP lsl(exact_log2(4))
  56 
  57 #ifdef PRODUCT
  58 #define BLOCK_COMMENT(str) /* nothing */
  59 #else
  60 #define BLOCK_COMMENT(str) __ block_comment(str)
  61 #endif
  62 
  63 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  64 
  65 // Stub Code definitions
  66 
  67 class StubGenerator: public StubCodeGenerator {
  68  private:
  69 
  70 #ifdef PRODUCT
  71 #define inc_counter_np(counter) ((void)0)
  72 #else
  73   void inc_counter_np_(int& counter) {
  74     __ lea(rscratch2, ExternalAddress((address)&counter));
  75     __ ldr(rscratch1, Address(rscratch2));
  76     __ add(rscratch1, rscratch1, 1);
  77     __ str(rscratch1, Address(rscratch2));
  78   }
  79 #define inc_counter_np(counter) \
  80   BLOCK_COMMENT("inc_counter " #counter); \
  81   inc_counter_np_(counter);
  82 #endif
  83 
  84   // Call stubs are used to call Java from C
  85   //
  86   // There are only four registers available to house arguments and we're expecting eight
  87   // the layout will be as follows:
  88 
  89   // c_rarg0 = call wrapper address
  90   // c_rarg1 = result
  91   // c_rarg2 = result type
  92   // c_rarg3 = method
  93   // sp -> [ entry_point
  94   //         parameters -> java params
  95   //         parameter size (in words)
  96   //         thread] (address increasing)
  97   //
  98   // We don't
  99   // NEW!! layout for aarch32 so that save and restore can be collapsed into a single
 100   // load/store
 101   // layout of saved registers now is
 102   // 0   [ saved lr      ] <- rfp
 103   // -1  [ saved fp      ]
 104   // -2  [ r12/rthread   ] Thread passed in args
 105   // -3  [ r10/rmethod   ] NOTE omitted rfp as restored automatically
 106   // -4  [ r9/rscratch1  ] Platform register?
 107   // -5  [ r8/thread     ]
 108   // -6  [ r7/rcpool     ]
 109   // -7  [ r6/rlocals    ]
 110   // -8  [ r5/rbcp       ]
 111   // -9  [ r4/rdispatch  ]
 112   // -10 [ r2/res type   ]
 113   // -11 [ r1/result     ]
 114   // -12 [r0/call wrapper]<- sp (when restored from fp value)
 115   // -13 maybe alignment
 116   // -YY [ java arg0     ]
 117   //   ...
 118   // -xx [ java argn     ] <- sp on branch into java
 119   //
 120   // XXX Note we do not save floating point registers
 121   // Only floating point registers s16-31 / d8-15 need to be saved
 122   // these are never touched by template interpreted code.
 123   // On a sequence such as C -> Java -> C, the C functions will save them if used.
 124 
 125   static const int thread_off = -2 * wordSize; // The offset of the saved thread
 126 
 127   address generate_call_stub(address& return_address) {
 128     /*assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 &&
 129            (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
 130            "adjust this code");*/
 131 
 132     StubCodeMark mark(this, "StubRoutines", "call_stub");
 133     address start = __ pc();
 134     __ reg_printf("entering call stub with { sp : %p, rfp : %p, lr : %p}\n", sp, rfp, lr);
 135     __ enter(); //save rfp & lr !!NOTE PUSHES TWO REGISTERS TO STACK
 136 
 137     const int entry_point_arg_off = 1 * wordSize,
 138               params_arg_off      = 2 * wordSize,
 139               param_sz_arg_off    = 3 * wordSize,
 140               thread_arg_off      = 4 * wordSize;
 141     // r12 is a scratch register so we can clobber it to save thread
 142     // which is needed at the end
 143     __ ldr(r12, Address(rfp, thread_arg_off));
 144     // r0, r1, r2, r4 - r10, r12
 145     // we save r0 as the call_wrapper_address is needed elsewhere
 146     // we save r1, r2 as they hold the result and it's type,
 147     // which are needed on return
 148     // r12 holds the thread ptr
 149     unsigned c_save_regset = 0b0001011111110111;
 150     int nsaved = __ count_bits(c_save_regset);
 151     __ stmdb(sp, c_save_regset);
 152 
 153     // Offset from rfp to end of stack.
 154     const int rfp_tos_offset_bytes = frame::offset_from_rfp_bytes + nsaved * wordSize;
 155 
 156     // install Java thread in global register now we have saved
 157     // whatever value it held
 158     __ mov(rthread, r12);
 159     // And method
 160     __ mov(rmethod, c_rarg3);
 161 
 162 #ifdef ASSERT
 163     // make sure we have no pending exceptions
 164     {
 165       Label L;
 166       __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
 167       __ cmp(rscratch1, (unsigned)NULL_WORD);
 168       __ b(L, Assembler::EQ);
 169       __ stop("StubRoutines::call_stub: entered with pending exception");
 170       __ BIND(L);
 171     }
 172 #endif
 173     __ ldr(rscratch2, Address(rfp, param_sz_arg_off));
 174     // align sp at the time we call java
 175     __ sub(sp, sp, rscratch2, lsl(LogBytesPerWord));
 176     __ align_stack();
 177     __ add(sp, sp, rscratch2, lsl(LogBytesPerWord));
 178 
 179     __ ldr(rscratch1, Address(rfp, params_arg_off));
 180 
 181     BLOCK_COMMENT("pass parameters if any");
 182     Label parameters_done;
 183 
 184     __ reg_printf("call_stub param_off = %p, param_sz = %d\n", rscratch1, rscratch2);
 185     __ cmp(rscratch2, 0);
 186     __ b(parameters_done, Assembler::EQ);
 187 
 188     // r14 makes ok temp as saved
 189     address loop = __ pc();
 190     __ ldr(r14, Address(__ post(rscratch1, wordSize)));
 191     __ subs(rscratch2, rscratch2, 1);
 192 
 193     // TODO remove
 194     __ reg_printf("\tARG SP[%d] : 0x%08x\n", rscratch2, r14);
 195     __ cmp(rscratch2, 0);
 196     // END TODO
 197     __ push(r14);
 198     __ b(loop, Assembler::GT);
 199 
 200     __ BIND(parameters_done);
 201 
 202 #ifdef ASSERT
 203     __ verify_stack_alignment();
 204 #endif
 205 
 206     BLOCK_COMMENT("call Java function");
 207     __ ldr(rscratch1, Address(rfp, entry_point_arg_off));
 208 
 209     __ reg_printf("Calling Java function with rfp = %p, sp = %p\n", rfp, sp);
 210     __ mov(r4, sp);                 // set sender sp
 211     __ bl(rscratch1);
 212     // save current address for use by exception handling code
 213     return_address = __ pc();
 214 
 215     __ reg_printf("Returned to call_stub with rfp = %p, sp = %p\n", rfp, sp);
 216 
 217 
 218     // At this point rfp should be restored to the value it was set to before
 219     // use it to set the top of stack.
 220     __ sub(sp, rfp, rfp_tos_offset_bytes);
 221 
 222 #ifdef ASSERT
 223     // verify that threads correspond
 224     __ ldr(r12, Address(rfp, thread_off));
 225     //rfp points to register stored in highest memory location - first on
 226     // stack, that's the saved lr, r12 is just below that
 227     // stored in r12 at this point
 228     {
 229       Label L, S;
 230       __ cmp(rthread, r12);
 231       __ b(S, Assembler::NE);
 232       __ get_thread(r12);
 233       __ cmp(rthread, r12);
 234       __ b(L, Assembler::EQ);
 235       __ BIND(S);
 236       __ stop("StubRoutines::call_stub: threads must correspond");
 237       __ BIND(L);
 238     }
 239 #endif
 240 
 241     if(MacroAssembler::enable_debugging_static){
 242       // FIXME Remove this hacky debugging code
 243       Label L;
 244       __ ldr(rscratch2, Address(rthread, Thread::pending_exception_offset()));
 245       __ cbnz(rscratch2, L);
 246       // If we're returning via an exception then we shouldn't report exit,
 247       // the exception handler will have already reported the exit and reporting
 248       // via our progress through the call stub will result in an extra method
 249       // being reported as exited.
 250       __ print_method_exit();
 251       __ bind(L);
 252     }
 253 
 254     // NOTE Horrible tricks here
 255     // We need to preserve current r0 and r1 values as they contain the return value.
 256     // First we discard r0 saved to stack, no longer needed.
 257     // We have saved result and type as c_rarg1 and c_rarg2, so now we alter
 258     // the regset to load as follows:
 259     // c_rarg2 = result
 260     // c_rarg3 = result_type
 261 
 262     assert((c_save_regset & 0xf) == 0b0111, "change me");
 263     __ add(sp, sp, wordSize);
 264     const int altered_saved_regset = (~0xf & c_save_regset) | 0xc;
 265     __ ldmia(sp, altered_saved_regset);
 266 
 267     // store result depending on type (everything that is not
 268     // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
 269     // n.b. this assumes Java returns an integral result in r0
 270     // and a floating result in j_farg0
 271 
 272     Label is_object, is_long, is_float, is_double, exit;
 273     __ cmp(c_rarg3, T_OBJECT);
 274     __ b(is_object, Assembler::EQ);
 275     __ cmp(c_rarg3, T_LONG);
 276     __ b(is_long, Assembler::EQ);
 277     __ cmp(c_rarg3, T_FLOAT);
 278     __ b(is_float, Assembler::EQ);
 279     __ cmp(c_rarg3, T_DOUBLE);
 280     __ b(is_double, Assembler::EQ);
 281 
 282     // handle T_INT case
 283     __ str(r0, Address(c_rarg2));
 284 
 285     __ BIND(exit);
 286     __ leave(); //Restore rfp, sp, lr
 287     __ reg_printf("leaving call stub with { sp : %p, rfp : %p, lr : %p}\n", sp, rfp, lr);
 288     // Pop arguments from stack.
 289     //__ add(sp, sp, 4 * wordSize);
 290 
 291     __ b(lr);
 292 
 293     // handle return types different from T_INT
 294     __ BIND(is_object);
 295     __ mov(r1, 0);
 296 
 297     __ BIND(is_long);
 298     __ strd(r0, r1, Address(c_rarg2, 0));
 299     __ b(exit, Assembler::AL);
 300 
 301     __ BIND(is_float);
 302     __ vstr_f32(f0, Address(c_rarg2, 0));
 303     __ b(exit, Assembler::AL);
 304 
 305     __ BIND(is_double);
 306     __ vstr_f64(d0, Address(c_rarg2, 0));
 307     __ b(exit, Assembler::AL);
 308 
 309     return start;
 310   }
 311 
 312   // Return point for a Java call if there's an exception thrown in
 313   // Java code.  The exception is caught and transformed into a
 314   // pending exception stored in JavaThread that can be tested from
 315   // within the VM.
 316   //
 317   // Note: Usually the parameters are removed by the callee. In case
 318   // of an exception crossing an activation frame boundary, that is
 319   // not the case if the callee is compiled code => need to setup the
 320   // rsp.
 321   //
 322   // r0: exception oop
 323 
 324   // NOTE: this is used as a target from the signal handler so it
 325   // needs an x86 prolog which returns into the current simulator
 326   // executing the generated catch_exception code. so the prolog
 327   // needs to install rax in a sim register and adjust the sim's
 328   // restart pc to enter the generated code at the start position
 329   // then return from native to simulated execution.
 330 
 331   address generate_catch_exception() {
 332     StubCodeMark mark(this, "StubRoutines", "catch_exception");
 333     address start = __ pc();
 334 
 335     // same as in generate_call_stub():
 336     const Address thread(rfp, thread_off);
 337 
 338 #ifdef ASSERT
 339     // verify that threads correspond
 340     {
 341       Label L, S;
 342       __ ldr(rscratch1, thread);
 343       __ cmp(rthread, rscratch1);
 344       __ b(S, Assembler::NE);
 345       __ get_thread(rscratch1);
 346       __ cmp(rthread, rscratch1);
 347       __ b(L, Assembler::EQ);
 348       __ bind(S);
 349       __ stop("StubRoutines::catch_exception: threads must correspond");
 350       __ bind(L);
 351     }
 352 #endif
 353 
 354     // set pending exception
 355     __ verify_oop(r0);
 356 
 357     __ str(r0, Address(rthread, Thread::pending_exception_offset()));
 358     __ mov(rscratch1, (address)__FILE__);
 359     __ str(rscratch1, Address(rthread, Thread::exception_file_offset()));
 360     __ mov(rscratch1, (int)__LINE__);
 361     __ str(rscratch1, Address(rthread, Thread::exception_line_offset()));
 362 
 363     // complete return to VM
 364     assert(StubRoutines::_call_stub_return_address != NULL,
 365            "_call_stub_return_address must have been generated before");
 366     __ b(StubRoutines::_call_stub_return_address);
 367 
 368     return start;
 369   }
 370 
 371   // Continuation point for runtime calls returning with a pending
 372   // exception.  The pending exception check happened in the runtime
 373   // or native call stub.  The pending exception in Thread is
 374   // converted into a Java-level exception.
 375   //
 376   // Contract with Java-level exception handlers:
 377   // r0: exception
 378   // r3: throwing pc
 379   //
 380   // NOTE: At entry of this stub, exception-pc must be in LR !!
 381 
 382   // NOTE: this is always used as a jump target within generated code
 383   // so it just needs to be generated code wiht no x86 prolog
 384 
 385   address generate_forward_exception() {
 386     //FIXME NOTE ON ALTERATION TO ARM32 IT WAS ASSUMED THAT rmethod
 387     // won't be used anymore and set on entry to the handler - is this true?
 388 
 389     Register spare = rmethod;
 390 
 391     StubCodeMark mark(this, "StubRoutines", "forward exception");
 392     address start = __ pc();
 393 
 394     // Upon entry, LR points to the return address returning into
 395     // Java (interpreted or compiled) code; i.e., the return address
 396     // becomes the throwing pc.
 397     //
 398     // Arguments pushed before the runtime call are still on the stack
 399     // but the exception handler will reset the stack pointer ->
 400     // ignore them.  A potential result in registers can be ignored as
 401     // well.
 402 
 403 #ifdef ASSERT
 404     // make sure this code is only executed if there is a pending exception
 405     {
 406       Label L;
 407       __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
 408       __ cbnz(rscratch1, L);
 409       __ stop("StubRoutines::forward exception: no pending exception (1)");
 410       __ bind(L);
 411     }
 412 #endif
 413 
 414     // compute exception handler into r2
 415 
 416     // call the VM to find the handler address associated with the
 417     // caller address. pass thread in r0 and caller pc (ret address)
 418     // in r1. n.b. the caller pc is in lr, unlike x86 where it is on
 419     // the stack.
 420     __ mov(c_rarg1, lr);
 421     // lr will be trashed by the VM call so we move it to R2
 422     // (callee-saved) because we also need to pass it to the handler
 423     // returned by this call.
 424     __ mov(spare, lr); //note rscratch1 is a callee saved register
 425     BLOCK_COMMENT("call exception_handler_for_return_address");
 426     __ call_VM_leaf(CAST_FROM_FN_PTR(address,
 427                          SharedRuntime::exception_handler_for_return_address),
 428                     rthread, c_rarg1);
 429     // we should not really care that lr is no longer the callee
 430     // address. we saved the value the handler needs in spare so we can
 431     // just copy it to r3. however, the C2 handler will push its own
 432     // frame and then calls into the VM and the VM code asserts that
 433     // the PC for the frame above the handler belongs to a compiled
 434     // Java method. So, we restore lr here to satisfy that assert.
 435     __ mov(lr, spare);
 436     // setup r0 & r3 & clear pending exception
 437     __ mov(r3, spare);
 438     __ mov(spare, r0);
 439     __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
 440     __ mov(rscratch1, 0);
 441     __ str(rscratch1, Address(rthread, Thread::pending_exception_offset()));
 442 
 443 #ifdef ASSERT
 444     // make sure exception is set
 445     {
 446       Label L;
 447       __ cbnz(r0, L);
 448       __ stop("StubRoutines::forward exception: no pending exception (2)");
 449       __ bind(L);
 450     }
 451 #endif
 452     // continue at exception handler
 453     // r0: exception
 454     // r3: throwing pc
 455     // spare: exception handler
 456 
 457     __ verify_oop(r0);
 458     __ b(spare);
 459 
 460     return start;
 461   }
 462 
 463   // Non-destructive plausibility checks for oops
 464   //
 465   // Arguments:
 466   //    r0: oop to verify
 467   //    rscratch1: error message
 468   //
 469   // Stack after saving c_rarg3:
 470   //    [tos + 0]: saved c_rarg3
 471   //    [tos + 1]: saved c_rarg2
 472   //    [tos + 2]: saved lr
 473   //    [tos + 3]: saved rscratch2
 474   //    [tos + 4]: saved r1
 475   //    [tos + 5]: saved r0
 476   //    [tos + 6]: saved rscratch1
 477   address generate_verify_oop() {
 478     StubCodeMark mark(this, "StubRoutines", "verify_oop");
 479     address start = __ pc();
 480 
 481     Label exit, error;
 482 
 483     // save c_rarg2 and c_rarg3
 484     __ stmdb(sp, RegSet::of(c_rarg2, c_rarg3).bits());
 485 
 486     __ lea(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
 487     __ ldr(c_rarg3, Address(c_rarg2));
 488     __ add(c_rarg3, c_rarg3, 1);
 489     __ str(c_rarg3, Address(c_rarg2));
 490 
 491     // object is in r0
 492     // make sure object is 'reasonable'
 493     __ cbz(r0, exit); // if obj is NULL it is OK
 494 
 495     // Check if the oop is in the right area of memory
 496     __ mov(c_rarg3, (intptr_t) Universe::verify_oop_mask());
 497     __ andr(c_rarg2, r0, c_rarg3);
 498     __ mov(c_rarg3, (intptr_t) Universe::verify_oop_bits());
 499 
 500     // Compare c_rarg2 and c_rarg3.  We don't use a compare
 501     // instruction here because the flags register is live.
 502     __ eor(c_rarg2, c_rarg2, c_rarg3);
 503     __ cbnz(c_rarg2, error);
 504 
 505     // make sure klass is 'reasonable', which is not zero.
 506     __ load_klass(r0, r0);  // get klass
 507     __ cbz(r0, error);      // if klass is NULL it is broken
 508 
 509     // return if everything seems ok
 510     __ bind(exit);
 511 
 512     __ ldmia(sp, RegSet::of(c_rarg2, c_rarg3).bits());
 513     __ b(lr);
 514 
 515     // handle errors
 516     __ bind(error);
 517     __ ldmia(sp, RegSet::of(c_rarg2, c_rarg3).bits());
 518 
 519     __ pusha();
 520     // Save old sp
 521     __ add(c_rarg2, sp, 14 * wordSize);
 522     __ str(c_rarg2, Address( __ pre(sp, -wordSize)));
 523     __ mov(c_rarg0, rscratch1);      // pass address of error message
 524     __ mov(c_rarg1, lr);             // pass return address
 525     __ mov(c_rarg2, sp);             // pass address of regs on stack
 526 #ifndef PRODUCT
 527     assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
 528 #endif
 529     BLOCK_COMMENT("call MacroAssembler::debug");
 530     __ mov(rscratch1, CAST_FROM_FN_PTR(address, MacroAssembler::debug32));
 531     __ bl(rscratch1);
 532     __ hlt(0);
 533 
 534     return start;
 535   }
 536 
 537   // NOTE : very strange, I changed this but I don't know why the Address:(signed extend word) was here
 538   //void array_overlap_test(Label& L_no_overlap, Address sf) { __ b(L_no_overlap); }
 539   void array_overlap_test(Label& L_no_overlap) { __ b(L_no_overlap); }
 540   //no test being performed ?
 541 
 542   // Generate code for an array write pre barrier
 543   //
 544   //     addr    -  starting address
 545   //     count   -  element count
 546   //     tmp     - scratch register
 547   //
 548   //     Destroy no registers!
 549   //
 550   void  gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
 551     BarrierSet* bs = Universe::heap()->barrier_set();
 552     switch (bs->kind()) {
 553     case BarrierSet::G1SATBCTLogging:
 554       // With G1, don't generate the call if we statically know that the target in uninitialized
 555       if (!dest_uninitialized) {
 556         __ push(RegSet::range(r0, r12), sp);         // integer registers except lr & sp
 557         if (count == c_rarg0) {
 558           if (addr == c_rarg1) {
 559             // exactly backwards!!
 560             __ strd(c_rarg0, c_rarg1, __ pre(sp, -2 * wordSize));
 561             __ ldrd(c_rarg1, c_rarg0, __ post(sp, -2 * wordSize));
 562           } else {
 563             __ mov(c_rarg1, count);
 564             __ mov(c_rarg0, addr);
 565           }
 566         } else {
 567           __ mov(c_rarg0, addr);
 568           __ mov(c_rarg1, count);
 569         }
 570         __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
 571         __ pop(RegSet::range(r0, r12), sp);         // integer registers except lr & sp        }
 572         break;
 573       case BarrierSet::CardTableModRef:
 574       case BarrierSet::CardTableExtension:
 575       case BarrierSet::ModRef:
 576         break;
 577       default:
 578         ShouldNotReachHere();
 579 
 580       }
 581     }
 582   }
 583 
 584   //
 585   // Generate code for an array write post barrier
 586   //
 587   //  Input:
 588   //     start    - register containing starting address of destination array
 589   //     end      - register containing ending address of destination array
 590   //     scratch  - scratch register
 591   //
 592   //  The input registers are overwritten.
 593   //  The ending address is inclusive.
 594   void gen_write_ref_array_post_barrier(Register start, Register end, Register scratch) {
 595     assert_different_registers(start, end, scratch);
 596     BarrierSet* bs = Universe::heap()->barrier_set();
 597     switch (bs->kind()) {
 598       case BarrierSet::G1SATBCTLogging:
 599 
 600         {
 601           __ push(RegSet::range(r0, r12), sp);         // integer registers except lr & sp
 602           // must compute element count unless barrier set interface is changed (other platforms supply count)
 603           assert_different_registers(start, end, scratch);
 604           __ lea(scratch, Address(end, BytesPerHeapOop));
 605           __ sub(scratch, scratch, start);               // subtract start to get #bytes
 606           __ lsr(scratch, scratch, LogBytesPerHeapOop);  // convert to element count
 607           __ mov(c_rarg0, start);
 608           __ mov(c_rarg1, scratch);
 609           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
 610           __ pop(RegSet::range(r0, r12), sp);         // integer registers except lr & sp        }
 611         }
 612         break;
 613       case BarrierSet::CardTableModRef:
 614       case BarrierSet::CardTableExtension:
 615         {
 616           CardTableModRefBS* ct = (CardTableModRefBS*)bs;
 617           assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
 618 
 619           Label L_loop;
 620 
 621            __ lsr(start, start, CardTableModRefBS::card_shift);
 622            __ lsr(end, end, CardTableModRefBS::card_shift);
 623            __ sub(end, end, start); // number of bytes to copy
 624 
 625           const Register count = end; // 'end' register contains bytes count now
 626           __ mov(scratch, (address)ct->byte_map_base);
 627           __ add(start, start, scratch);
 628           __ BIND(L_loop);
 629           __ mov(scratch, 0);
 630           __ strb(scratch, Address(start, count));
 631           __ subs(count, count, 1);
 632           __ b(L_loop, Assembler::HS);
 633         }
 634         break;
 635       default:
 636         ShouldNotReachHere();
 637 
 638     }
 639   }
 640 
 641   //
 642   // Small copy: less than 4 bytes.
 643   //
 644   // NB: Ignores all of the bits of count which represent more than 3
 645   // bytes, so a caller doesn't have to mask them.
 646 
 647   void copy_memory_small(Register s, Register d, Register count, Register tmp, bool is_aligned, int step) {
 648     const int granularity = uabs(step);
 649     const bool gen_always = !is_aligned || (-4 < step && step < 0);
 650     Label halfword, done;
 651 
 652     if ((granularity <= 1) || gen_always) {
 653       __ tst(count, 1);
 654       __ b(halfword, Assembler::EQ);
 655       __ ldrb(tmp, step < 0 ? __ pre(s, -1) : __ post(s, 1));
 656       __ strb(tmp, step < 0 ? __ pre(d, -1) : __ post(d, 1));
 657     }
 658 
 659     if ((granularity <= 2) || gen_always) {
 660       __ bind(halfword);
 661       __ tst(count, 2);
 662       __ b(done, Assembler::EQ);
 663       __ ldrh(tmp, step < 0 ? __ pre(s, -2) : __ post(s, 2));
 664       __ strh(tmp, step < 0 ? __ pre(d, -2) : __ post(d, 2));
 665     }
 666 
 667     __ bind(done);
 668   }
 669 
 670   void copy_memory_simd(Register s, Register d,
 671                    Register count, Register tmp, int step,
 672                    DoubleFloatRegSet tmp_set, size_t tmp_set_size ) {
 673     assert(UseSIMDForMemoryOps, "should be available");
 674     Label simd_loop, simd_small;
 675 
 676     __ cmp(count, tmp_set_size);
 677     __ b(simd_small, Assembler::LT);
 678 
 679     __ mov(tmp, count, __ lsr(exact_log2(tmp_set_size)));
 680     __ sub(count, count, tmp, __ lsl(exact_log2(tmp_set_size)));
 681 
 682     __ bind(simd_loop);
 683 
 684     __ pld(s, step < 0 ? -2 * tmp_set_size : tmp_set_size);
 685 
 686     if (step < 0) {
 687       __ vldmdb_f64(s, tmp_set.bits());
 688       __ vstmdb_f64(d, tmp_set.bits());
 689     } else {
 690       __ vldmia_f64(s, tmp_set.bits());
 691       __ vstmia_f64(d, tmp_set.bits());
 692     }
 693 
 694     __ subs(tmp, tmp, 1);
 695     __ b(simd_loop, Assembler::NE);
 696 
 697     __ bind(simd_small);
 698   }
 699 
 700   // All-singing all-dancing memory copy.
 701   //
 702   // Copy count units of memory from s to d.  The size of a unit is
 703   // step, which can be positive or negative depending on the direction
 704   // of copy.  If is_aligned is false, we align the source address.
 705   //
 706 
 707   void copy_memory(bool is_aligned, Register s, Register d,
 708                    Register count, Register tmp, int step) {
 709     const int small_copy_size = 32; // 1 copy by ldm pays off alignment efforts and push/pop of temp set
 710     const int granularity = uabs(step);
 711     const Register tmp2 = rscratch2;
 712     const Register t0 = r3;
 713     Label small;
 714 
 715     assert_different_registers(s, d, count, tmp, tmp2, t0);
 716 
 717     __ mov(count, count, __ lsl(exact_log2(granularity)));
 718 
 719     if (step < 0) {
 720       __ add(s, s, count);
 721       __ add(d, d, count);
 722     }
 723 
 724     __ cmp(count, small_copy_size);
 725     __ b(small, Assembler::LT);
 726 
 727     // aligning
 728     if (!is_aligned || (-4 < step && step < 0)) {
 729       assert(3 <= small_copy_size, "may copy number of bytes required for alignment");
 730       if (step < 0) {
 731         __ andr(tmp2, s, 3);
 732       } else {
 733         __ rsb(tmp2, s, 0);
 734         __ andr(tmp2, tmp2, 3);
 735       }
 736       __ sub(count, count, tmp2);
 737       copy_memory_small(s, d, tmp2, tmp, is_aligned, step);
 738     }
 739 
 740 #ifdef ASSERT
 741     Label src_aligned;
 742     __ tst(s, 3);
 743     __ b(src_aligned, Assembler::EQ);
 744     __ stop("src is not aligned");
 745     __ bind(src_aligned);
 746 #endif
 747 
 748     // if destination is unaliged, copying by words is the only option
 749     __ tst(d, 3);
 750     __ b(small, Assembler::NE);
 751 #ifndef __SOFTFP__
 752     if (UseSIMDForMemoryOps) {
 753       copy_memory_simd(s, d, count, tmp2, step, DoubleFloatRegSet::range(d0, d7), 64);
 754       copy_memory_simd(s, d, count, tmp2, step, DoubleFloatRegSet::range(d0, d1), 16);
 755     } else
 756 #endif //__SOFTFP__
 757     {
 758       const RegSet tmp_set = RegSet::range(r4, r7);
 759       const int tmp_set_size = 16;
 760       Label ldm_loop;
 761 
 762       assert_different_registers(s, d, count, tmp2, r4, r5, r6, r7);
 763 
 764       __ cmp(count, tmp_set_size);
 765       __ b(small, Assembler::LT);
 766 
 767       __ push(tmp_set, sp);
 768 
 769       __ mov(tmp2, count, __ lsr(exact_log2(tmp_set_size)));
 770       __ sub(count, count, tmp2, __ lsl(exact_log2(tmp_set_size)));
 771 
 772       __ bind(ldm_loop);
 773 
 774       __ pld(s, step < 0 ? -2 * tmp_set_size : tmp_set_size);
 775 
 776       if (step < 0) {
 777         __ ldmdb(s, tmp_set.bits());
 778         __ stmdb(d, tmp_set.bits());
 779       } else {
 780         __ ldmia(s, tmp_set.bits());
 781         __ stmia(d, tmp_set.bits());
 782       }
 783 
 784       __ subs(tmp2, tmp2, 1);
 785       __ b(ldm_loop, Assembler::NE);
 786 
 787       __ pop(tmp_set, sp);
 788     }
 789 
 790     __ bind(small);
 791 
 792     Label words_loop, words_done;
 793     __ cmp(count, BytesPerWord);
 794     __ b(words_done, Assembler::LT);
 795 
 796     __ mov(tmp2, count, __ lsr(exact_log2(BytesPerWord)));
 797     __ sub(count, count, tmp2, __ lsl(exact_log2(BytesPerWord)));
 798 
 799     __ bind(words_loop);
 800 
 801     Address src = step < 0 ? __ pre(s, -BytesPerWord) : __ post(s, BytesPerWord);
 802     Address dst = step < 0 ? __ pre(d, -BytesPerWord) : __ post(d, BytesPerWord);
 803 
 804     __ pld(s, step < 0 ? -2 * BytesPerWord : BytesPerWord);
 805     __ ldr(t0, src);
 806     __ str(t0, dst);
 807     __ subs(tmp2, tmp2, 1);
 808 
 809     __ b(words_loop, Assembler::NE);
 810 
 811     __ bind(words_done);
 812     copy_memory_small(s, d, count, tmp, is_aligned, step);
 813   }
 814 
 815   // Arguments:
 816   //   aligned - true => Input and output aligned on a HeapWord == 4-byte boundary
 817   //             ignored
 818   //   is_oop  - true => oop array, so generate store check code
 819   //   name    - stub name string
 820   //
 821   // Inputs:
 822   //   c_rarg0   - source array address
 823   //   c_rarg1   - destination array address
 824   //   c_rarg2   - element count, treated as ssize_t, can be zero
 825   //
 826   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
 827   // the hardware handle it.  The two dwords within qwords that span
 828   // cache line boundaries will still be loaded and stored atomicly.
 829   //
 830   // Side Effects:
 831   //   disjoint_int_copy_entry is set to the no-overlap entry point
 832   //   used by generate_conjoint_int_oop_copy().
 833   //
 834   address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address *entry,
 835                                   const char *name, bool dest_uninitialized = false) {
 836     Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
 837     __ align(CodeEntryAlignment);
 838     StubCodeMark mark(this, "StubRoutines", name);
 839     address start = __ pc();
 840     if (entry != NULL) {
 841       *entry = __ pc();
 842       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
 843       BLOCK_COMMENT("Entry:");
 844     }
 845     __ enter();
 846     if (is_oop) {
 847       __ push(RegSet::of(d, count), sp);
 848       // no registers are destroyed by this call
 849       gen_write_ref_array_pre_barrier(d, count, dest_uninitialized);
 850     }
 851     copy_memory(aligned, s, d, count, rscratch1, size);
 852     if (is_oop) {
 853       __ pop(RegSet::of(d, count), sp);
 854       __ sub(count, count, 1); // make an inclusive end pointer
 855       __ lea(count, Address(d, count, lsl(exact_log2(size))));
 856       gen_write_ref_array_post_barrier(d, count, rscratch1);
 857     }
 858     __ leave();
 859     __ b(lr);
 860     return start;
 861   }
 862 
 863   // Arguments:
 864   //   aligned - true => Input and output aligned on a HeapWord == 4-byte boundary
 865   //             ignored
 866   //   is_oop  - true => oop array, so generate store check code
 867   //   name    - stub name string
 868   //
 869   // Inputs:
 870   //   c_rarg0   - source array address
 871   //   c_rarg1   - destination array address
 872   //   c_rarg2   - element count, treated as ssize_t, can be zero
 873   //
 874   // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
 875   // the hardware handle it.  The two dwords within qwords that span
 876   // cache line boundaries will still be loaded and stored atomicly.
 877   //
 878   address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
 879                                  address *entry, const char *name,
 880                                  bool dest_uninitialized = false) {
 881     Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
 882     __ align(CodeEntryAlignment);
 883     StubCodeMark mark(this, "StubRoutines", name);
 884     address start = __ pc();
 885 
 886     __ cmp(d, s);
 887     __ b(nooverlap_target, Assembler::LS);
 888 
 889     __ enter();
 890     if (is_oop) {
 891       __ push(RegSet::of(d, count), sp);
 892       // no registers are destroyed by this call
 893       gen_write_ref_array_pre_barrier(d, count, dest_uninitialized);
 894     }
 895     copy_memory(aligned, s, d, count, rscratch1, -size);
 896     if (is_oop) {
 897       __ pop(RegSet::of(d, count), sp);
 898       __ sub(count, count, 1); // make an inclusive end pointer
 899       __ lea(count, Address(d, count, lsl(exact_log2(size))));
 900       gen_write_ref_array_post_barrier(d, count, rscratch1);
 901     }
 902     __ leave();
 903     __ b(lr);
 904     return start;
 905   }
 906 
 907   // Helper for generating a dynamic type check.
 908   // Smashes rscratch1.
 909   void generate_type_check(Register sub_klass,
 910                            Register super_check_offset,
 911                            Register super_klass,
 912                            Label& L_success) {
 913     assert_different_registers(sub_klass, super_check_offset, super_klass);
 914 
 915     BLOCK_COMMENT("type_check:");
 916 
 917     Label L_miss;
 918 
 919     __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg,        &L_success, &L_miss, NULL,
 920                                      super_check_offset);
 921     __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
 922 
 923     // Fall through on failure!
 924     __ BIND(L_miss);
 925   }
 926 
 927   //
 928   //  Generate checkcasting array copy stub
 929   //
 930   //  Input:
 931   //    c_rarg0   - source array address
 932   //    c_rarg1   - destination array address
 933   //    c_rarg2   - oop ckval (super_klass)
 934   //    c_rarg3   - size_t ckoff (super_check_offset)
 935   //    r4        - element count, treated as ssize_t, can be zero
 936   //
 937   //  Output:
 938   //    r0 ==  0  -  success
 939   //    r0 == -1^K - failure, where K is partial transfer count
 940   //
 941   address generate_checkcast_copy(const char *name, address *entry,
 942                                   bool dest_uninitialized = false) {
 943     Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
 944 
 945     // Input registers (after setup_arg_regs)
 946     const Register from        = c_rarg0;   // source array address
 947     const Register to          = c_rarg1;   // destination array address
 948     const Register count       = r4;        // elementscount
 949     const Register ckoff       = c_rarg3;   // super_check_offset
 950     const Register ckval       = c_rarg2;   // super_klass
 951 
 952     // Registers used as temps
 953     const Register count_save  = r5;       // orig elementscount
 954     const Register copied_oop  = r6;       // actual oop copied
 955     const Register oop_klass   = r7;       // oop._klass
 956 
 957     //---------------------------------------------------------------
 958     // Assembler stub will be used for this call to arraycopy
 959     // if the two arrays are subtypes of Object[] but the
 960     // destination array type is not equal to or a supertype
 961     // of the source type.  Each element must be separately
 962     // checked.
 963 
 964     assert_different_registers(from, to, count, ckoff, ckval,
 965                                copied_oop, oop_klass, count_save);
 966 
 967     __ align(CodeEntryAlignment);
 968     StubCodeMark mark(this, "StubRoutines", name);
 969     address start = __ pc();
 970 
 971     __ enter(); // required for proper stackwalking of RuntimeStub frame
 972 
 973 #ifdef ASSERT
 974     // caller guarantees that the arrays really are different
 975     // otherwise, we would have to make conjoint checks
 976     { Label L;
 977       array_overlap_test(L);//, TIMES_OOP);
 978       __ stop("checkcast_copy within a single array");
 979       __ bind(L);
 980     }
 981 #endif //ASSERT
 982 
 983     // Caller of this entry point must set up the argument registers.
 984     if (entry != NULL) {
 985       *entry = __ pc();
 986       BLOCK_COMMENT("Entry:");
 987     }
 988 
 989      // Empty array:  Nothing to do.
 990     __ cbz(count, L_done);
 991 
 992     __ push(RegSet::of(count_save, copied_oop, oop_klass), sp);
 993 
 994 #ifdef ASSERT
 995     BLOCK_COMMENT("assert consistent ckoff/ckval");
 996     // The ckoff and ckval must be mutually consistent,
 997     // even though caller generates both.
 998     { Label L;
 999       int sco_offset = in_bytes(Klass::super_check_offset_offset());
1000       __ ldr(rscratch1, Address(ckval, sco_offset));
1001       __ cmp(ckoff, rscratch1);
1002       __ b(L, Assembler::EQ);
1003       __ stop("super_check_offset inconsistent");
1004       __ bind(L);
1005     }
1006 #endif //ASSERT
1007 
1008     // save the original count
1009     __ mov(count_save, count);
1010 
1011     // save destination array start address
1012     __ push(to);
1013 
1014     // Copy from low to high addresses
1015     __ b(L_load_element);
1016 
1017     // ======== begin loop ========
1018     // (Loop is rotated; its entry is L_load_element.)
1019     // Loop control:
1020     //   for (; count != 0; count--) {
1021     //     copied_oop = load_heap_oop(from++);
1022     //     ... generate_type_check ...;
1023     //     store_heap_oop(to++, copied_oop);
1024     //   }
1025     __ align(OptoLoopAlignment);
1026 
1027     __ BIND(L_store_element);
1028     __ store_heap_oop(__ post(to, 4), copied_oop);  // store the oop
1029     __ sub(count, count, 1);
1030     __ cbz(count, L_do_card_marks);
1031 
1032     // ======== loop entry is here ========
1033     __ BIND(L_load_element);
1034     __ load_heap_oop(copied_oop, __ post(from, 4)); // load the oop
1035     __ cbz(copied_oop, L_store_element);
1036 
1037     __ load_klass(oop_klass, copied_oop);// query the object klass
1038     generate_type_check(oop_klass, ckoff, ckval, L_store_element);
1039     // ======== end loop ========
1040 
1041     // It was a real error; we must depend on the caller to finish the job.
1042     // Register count = remaining oops, count_orig = total oops.
1043     // Emit GC store barriers for the oops we have copied and report
1044     // their number to the caller.
1045 
1046     __ subs(count, count_save, count);     // K = partially copied oop count
1047     __ inv(count, count);                   // report (-1^K) to caller
1048     __ b(L_done_pop, Assembler::EQ);
1049 
1050     __ BIND(L_do_card_marks);
1051     __ add(to, to, -heapOopSize);         // make an inclusive end pointer
1052     __ pop(rscratch2);                    // restore original to address
1053     gen_write_ref_array_post_barrier(rscratch2, to, rscratch1);
1054 
1055     __ bind(L_done_pop);
1056     __ pop(RegSet::of(count_save, copied_oop, oop_klass), sp);
1057     inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
1058 
1059     __ bind(L_done);
1060     __ mov(r0, count);
1061     __ leave();
1062     __ b(lr);
1063     return start;
1064   }
1065 
1066   void generate_arraycopy_stubs() {
1067     address entry;
1068 
1069     // jbyte
1070     StubRoutines::_arrayof_jbyte_disjoint_arraycopy =      generate_disjoint_copy(sizeof(jbyte),  true,  false,        &entry, "arrayof_jbyte_disjoint_arraycopy");
1071     StubRoutines::_arrayof_jbyte_arraycopy =               generate_conjoint_copy(sizeof(jbyte),  true,  false, entry, NULL,   "arrayof_jbyte_arraycopy");
1072     StubRoutines::_jbyte_disjoint_arraycopy =              generate_disjoint_copy(sizeof(jbyte),  false, false,        &entry, "jbyte_disjoint_arraycopy");
1073     StubRoutines::_jbyte_arraycopy =                       generate_conjoint_copy(sizeof(jbyte),  false, false, entry, NULL,   "jbyte_arraycopy");
1074     // jshort
1075     StubRoutines::_arrayof_jshort_disjoint_arraycopy =     generate_disjoint_copy(sizeof(jshort), true,  false,        &entry, "arrayof_jshort_disjoint_arraycopy");
1076     StubRoutines::_arrayof_jshort_arraycopy =              generate_conjoint_copy(sizeof(jshort), true,  false, entry, NULL,   "arrayof_jshort_arraycopy");
1077     StubRoutines::_jshort_disjoint_arraycopy =             generate_disjoint_copy(sizeof(jshort), false, false,        &entry, "jshort_disjoint_arraycopy");
1078     StubRoutines::_jshort_arraycopy =                      generate_conjoint_copy(sizeof(jshort), false, false, entry, NULL,   "jshort_arraycopy");
1079     // jint (always aligned)
1080     StubRoutines::_arrayof_jint_disjoint_arraycopy =       generate_disjoint_copy(sizeof(jint),   true,  false,        &entry, "arrayof_jint_disjoint_arraycopy");
1081     StubRoutines::_arrayof_jint_arraycopy =                generate_conjoint_copy(sizeof(jint),   true,  false, entry, NULL,   "arrayof_jint_arraycopy");
1082     StubRoutines::_jint_disjoint_arraycopy =               StubRoutines::_arrayof_jint_disjoint_arraycopy;
1083     StubRoutines::_jint_arraycopy =                        StubRoutines::_arrayof_jint_arraycopy;
1084     // jlong (always aligned)
1085     StubRoutines::_arrayof_jlong_disjoint_arraycopy =      generate_disjoint_copy(sizeof(jlong),  true,  false,        &entry, "arrayof_jlong_disjoint_arraycopy");
1086     StubRoutines::_arrayof_jlong_arraycopy =               generate_conjoint_copy(sizeof(jlong),  true,  false, entry, NULL,   "arrayof_jlong_arraycopy");
1087     StubRoutines::_jlong_disjoint_arraycopy =              StubRoutines::_arrayof_jlong_disjoint_arraycopy;
1088     StubRoutines::_jlong_arraycopy =                       StubRoutines::_arrayof_jlong_arraycopy;
1089     // OOP (always aligned)
1090     StubRoutines::_arrayof_oop_disjoint_arraycopy =        generate_disjoint_copy(sizeof(jint),   true,  true,         &entry, "arrayof_oop_disjoint_arraycopy");
1091     StubRoutines::_arrayof_oop_arraycopy =                 generate_conjoint_copy(sizeof(jint),   true,  true,  entry, NULL,   "arrayof_oop_arraycopy");
1092     StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_copy(sizeof(jint),   true,  true,         &entry, "arrayof_oop_disjoint_arraycopy_uninit", true);
1093     StubRoutines::_arrayof_oop_arraycopy_uninit =          generate_conjoint_copy(sizeof(jint),   true,  true,  entry, NULL,   "arrayof_oop_arraycopy_uninit",          true);
1094     StubRoutines::_oop_disjoint_arraycopy =                StubRoutines::_arrayof_oop_disjoint_arraycopy;
1095     StubRoutines::_oop_arraycopy =                         StubRoutines::_arrayof_oop_arraycopy;
1096     StubRoutines::_oop_disjoint_arraycopy_uninit =         StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
1097     StubRoutines::_oop_arraycopy_uninit =                  StubRoutines::_arrayof_oop_arraycopy_uninit;
1098 
1099     StubRoutines::_checkcast_arraycopy =        generate_checkcast_copy("checkcast_arraycopy",        NULL);
1100     StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, true);
1101   }
1102 
1103   void generate_math_stubs() { Unimplemented(); }
1104 
1105   // Safefetch stubs.
1106   void generate_safefetch(const char* name, int size, address* entry,
1107                           address* fault_pc, address* continuation_pc) {
1108     // safefetch signatures:
1109     //   int      SafeFetch32(int*      adr, int      errValue);
1110     //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
1111     //
1112     // arguments:
1113     //   c_rarg0 = adr
1114     //   c_rarg1 = errValue
1115     //
1116     // result:
1117     //   PPC_RET  = *adr or errValue
1118 
1119     StubCodeMark mark(this, "StubRoutines", name);
1120 
1121     // Entry point, pc or function descriptor.
1122     *entry = __ pc();
1123 
1124     // Load *adr into c_rarg1, may fault.
1125     __ mov(c_rarg2, c_rarg0);
1126     *fault_pc = __ pc();
1127     switch (size) {
1128       case 4:
1129         // int32_t
1130         __ ldr(c_rarg0, Address(c_rarg2, 0));
1131         break;
1132       case 8:
1133         __ ldrd(c_rarg0, c_rarg1, Address(c_rarg2, 0));
1134         break;
1135       default:
1136         ShouldNotReachHere();
1137     }
1138     __ b(lr);
1139     // return errValue or *adr
1140     *continuation_pc = __ pc();
1141     __ mov(r0, c_rarg1);
1142     __ b(lr);
1143   }
1144 
1145   /**
1146    *  Arguments:
1147    *
1148    * Inputs:
1149    *   c_rarg0   - int crc
1150    *   c_rarg1   - byte* buf
1151    *   c_rarg2   - int length
1152    *
1153    * Output:
1154    *       r0   - int crc result
1155    *
1156    * Preserves:
1157    *       r13
1158    *
1159    */
1160   address generate_updateBytesCRC32() {
1161     assert(UseCRC32Intrinsics, "what are we doing here?");
1162 
1163     __ align(CodeEntryAlignment);
1164     StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
1165 
1166     address start = __ pc();
1167 
1168     const Register crc   = c_rarg0;  // crc
1169     const Register buf   = c_rarg1;  // source java byte array address
1170     const Register len   = c_rarg2;  // length
1171     const Register table0 = c_rarg3; // crc_table address
1172     const Register table1 = r4;
1173     const Register table2 = r5;
1174     const Register table3 = lr;
1175 
1176     BLOCK_COMMENT("Entry:");
1177     __ enter(); // required for proper stackwalking of RuntimeStub frame
1178     __ push(RegSet::of(table1, table2, r6, r7), sp);
1179 
1180     __ kernel_crc32(crc, buf, len,
1181               table0, table1, table2, table3, rscratch1, rscratch2, r6);
1182 
1183     __ pop(RegSet::of(table1, table2, r6, r7), sp);
1184     __ leave(); // required for proper stackwalking of RuntimeStub frame
1185     __ ret(lr);
1186 
1187     return start;
1188   }
1189 
1190   // Continuation point for throwing of implicit exceptions that are
1191   // not handled in the current activation. Fabricates an exception
1192   // oop and initiates normal exception dispatching in this
1193   // frame. Since we need to preserve callee-saved values (currently
1194   // only for C2, but done for C1 as well) we need a callee-saved oop
1195   // map and therefore have to make these stubs into RuntimeStubs
1196   // rather than BufferBlobs.  If the compiler needs all registers to
1197   // be preserved between the fault point and the exception handler
1198   // then it must assume responsibility for that in
1199   // AbstractCompiler::continuation_for_implicit_null_exception or
1200   // continuation_for_implicit_division_by_zero_exception. All other
1201   // implicit exceptions (e.g., NullPointerException or
1202   // AbstractMethodError on entry) are either at call sites or
1203   // otherwise assume that stack unwinding will be initiated, so
1204   // caller saved registers were assumed volatile in the compiler.
1205 
1206 #undef __
1207 #define __ masm->
1208 
1209   address generate_throw_exception(const char* name,
1210                                    address runtime_entry,
1211                                    Register arg1 = noreg,
1212                                    Register arg2 = noreg) {
1213     // Information about frame layout at time of blocking runtime call.
1214     // Note that we only have to preserve callee-saved registers since
1215     // the compilers are responsible for supplying a continuation point
1216     // if they expect all registers to be preserved.
1217     // n.b. aarch32 asserts that frame::arg_reg_save_area_bytes == 0
1218     enum layout {
1219       rfp_off = 0,
1220       return_off,
1221       framesize // inclusive of return address
1222     };
1223 
1224     int insts_size = 512;
1225     int locs_size  = 64;
1226 
1227     CodeBuffer code(name, insts_size, locs_size);
1228     OopMapSet* oop_maps  = new OopMapSet();
1229     MacroAssembler* masm = new MacroAssembler(&code);
1230 
1231     address start = __ pc();
1232 
1233     // This is an inlined and slightly modified version of call_VM
1234     // which has the ability to fetch the return PC out of
1235     // thread-local storage and also sets up last_Java_sp slightly
1236     // differently than the real call_VM
1237 
1238     __ enter(); // Save FP and LR before call
1239 
1240     assert(is_even(framesize), "sp not 8-byte aligned");
1241 
1242     int frame_complete = __ pc() - start;
1243 
1244     // Set up last_Java_sp and last_Java_fp
1245     address the_pc = __ pc();
1246     __ set_last_Java_frame(sp, rfp, (address)NULL, rscratch1);
1247 
1248     // Call runtime
1249     if (arg1 != noreg) {
1250       assert(arg2 != c_rarg1, "clobbered");
1251       __ mov(c_rarg1, arg1);
1252     }
1253     if (arg2 != noreg) {
1254       __ mov(c_rarg2, arg2);
1255     }
1256     __ mov(c_rarg0, rthread);
1257     BLOCK_COMMENT("call runtime_entry");
1258     __ align_stack();
1259     __ mov(rscratch1, runtime_entry);
1260     __ bl(rscratch1);
1261 
1262     // Generate oop map
1263     OopMap* map = new OopMap(framesize, 0);
1264 
1265     oop_maps->add_gc_map(the_pc - start, map);
1266 
1267     __ reset_last_Java_frame(true, true);
1268     __ maybe_isb();
1269 
1270     __ leave();
1271 
1272     // check for pending exceptions
1273 #ifdef ASSERT
1274     Label L;
1275     __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
1276     __ cbnz(rscratch1, L);
1277     __ should_not_reach_here();
1278     __ bind(L);
1279 #endif // ASSERT
1280     __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
1281 
1282 
1283     // codeBlob framesize is in words (not VMRegImpl::slot_size)
1284     RuntimeStub* stub =
1285       RuntimeStub::new_runtime_stub(name,
1286                                     &code,
1287                                     frame_complete,
1288                                     framesize,
1289                                     oop_maps, false);
1290     return stub->entry_point();
1291   }
1292 
1293   // Initialization
1294   void generate_initial() {
1295     // Generate initial stubs and initializes the entry points
1296 
1297     // entry points that exist in all platforms Note: This is code
1298     // that could be shared among different platforms - however the
1299     // benefit seems to be smaller than the disadvantage of having a
1300     // much more complicated generator structure. See also comment in
1301     // stubRoutines.hpp.
1302 
1303     StubRoutines::_forward_exception_entry = generate_forward_exception();
1304 
1305     StubRoutines::_call_stub_entry =
1306       generate_call_stub(StubRoutines::_call_stub_return_address);
1307 
1308     // is referenced by megamorphic call
1309     StubRoutines::_catch_exception_entry = generate_catch_exception();
1310 
1311     // Build this early so it's available for the interpreter.
1312     StubRoutines::_throw_StackOverflowError_entry =
1313       generate_throw_exception("StackOverflowError throw_exception",
1314                                CAST_FROM_FN_PTR(address,
1315                                                 SharedRuntime::
1316                                                 throw_StackOverflowError));
1317     if (UseCRC32Intrinsics) {
1318       // set table address before stub generation which use it
1319       StubRoutines::_crc_table_adr = (address)StubRoutines::aarch32::_crc_table;
1320       StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
1321     }
1322 
1323     NativeCall::init();
1324   }
1325 
1326   void generate_all() {
1327     // support for verify_oop (must happen after universe_init)
1328     StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
1329     StubRoutines::_throw_AbstractMethodError_entry =
1330       generate_throw_exception("AbstractMethodError throw_exception",
1331                                CAST_FROM_FN_PTR(address,
1332                                                 SharedRuntime::
1333                                                 throw_AbstractMethodError));
1334 
1335     StubRoutines::_throw_IncompatibleClassChangeError_entry =
1336       generate_throw_exception("IncompatibleClassChangeError throw_exception",
1337                                CAST_FROM_FN_PTR(address,
1338                                                 SharedRuntime::
1339                                                 throw_IncompatibleClassChangeError));
1340 
1341     StubRoutines::_throw_NullPointerException_at_call_entry =
1342       generate_throw_exception("NullPointerException at call throw_exception",
1343                                CAST_FROM_FN_PTR(address,
1344                                                 SharedRuntime::
1345                                                 throw_NullPointerException_at_call));
1346 
1347     // arraycopy stubs used by compilers
1348     generate_arraycopy_stubs();
1349 
1350     // Safefetch stubs.
1351     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
1352                                                        &StubRoutines::_safefetch32_fault_pc,
1353                                                        &StubRoutines::_safefetch32_continuation_pc);
1354     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
1355                                                        &StubRoutines::_safefetchN_fault_pc,
1356                                                        &StubRoutines::_safefetchN_continuation_pc);
1357   }
1358 
1359  public:
1360   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
1361     if (all) {
1362       generate_all();
1363     } else {
1364       generate_initial();
1365     }
1366   }
1367 }; // end class declaration
1368 
1369 void StubGenerator_generate(CodeBuffer* code, bool all) {
1370   StubGenerator g(code, all);
1371 }