1 /*
   2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2015, Linaro Ltd. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #ifndef CPU_AARCH32_VM_MACROASSEMBLER_AARCH32_HPP
  28 #define CPU_AARCH32_VM_MACROASSEMBLER_AARCH32_HPP
  29 
  30 #include "asm/assembler.hpp"
  31 
  32 // MacroAssembler extends Assembler by frequently used macros.
  33 //
  34 // Instructions for which a 'better' code sequence exists depending
  35 // on arguments should also go in here.
  36 
  37 class MacroAssembler: public Assembler {
  38   friend class LIR_Assembler;
  39 
  40   using Assembler::mov;
  41 
  42  protected:
  43 
  44   // Support for VM calls
  45   //
  46   // This is the base routine called by the different versions of call_VM_leaf. The interpreter
  47   // may customize this version by overriding it for its purposes (e.g., to save/restore
  48   // additional registers when doing a VM call).
  49 #ifdef CC_INTERP
  50   // c++ interpreter never wants to use interp_masm version of call_VM
  51   #define VIRTUAL
  52 #else
  53   #define VIRTUAL virtual
  54 #endif
  55 
  56   VIRTUAL void call_VM_leaf_base(
  57     address entry_point,               // the entry point
  58     int     number_of_arguments,        // the number of arguments to pop after the call
  59     Label *retaddr = NULL
  60   );
  61 
  62   VIRTUAL void call_VM_leaf_base(
  63     address entry_point,               // the entry point
  64     int     number_of_arguments,        // the number of arguments to pop after the call
  65     Label &retaddr) {
  66     call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
  67   }
  68 
  69   // This is the base routine called by the different versions of call_VM. The interpreter
  70   // may customize this version by overriding it for its purposes (e.g., to save/restore
  71   // additional registers when doing a VM call).
  72   //
  73   // If no java_thread register is specified (noreg) than rthread will be used instead. call_VM_base
  74   // returns the register which contains the thread upon return. If a thread register has been
  75   // specified, the return value will correspond to that register. If no last_java_sp is specified
  76   // (noreg) than rsp will be used instead.
  77   VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
  78     Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
  79     Register java_thread,              // the thread if computed before     ; use noreg otherwise
  80     Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
  81     address  entry_point,              // the entry point
  82     int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
  83     bool     check_exceptions          // whether to check for pending exceptions after return
  84   );
  85 
  86   // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
  87   // The implementation is only non-empty for the InterpreterMacroAssembler,
  88   // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
  89   virtual void check_and_handle_popframe(Register java_thread);
  90   virtual void check_and_handle_earlyret(Register java_thread);
  91 
  92   void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
  93 
  94  public:
  95   void init_unseen_bytecodes();
  96   MacroAssembler(CodeBuffer* code) : Assembler(code) { init_unseen_bytecodes();}
  97 
  98   // Biased locking support
  99   // lock_reg and obj_reg must be loaded up with the appropriate values.
 100   // swap_reg is killed.
 101   // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
 102   // be killed; if not supplied, push/pop will be used internally to
 103   // allocate a temporary (inefficient, avoid if possible).
 104   // Optional slow case is for implementations (interpreter and C1) which branch to
 105   // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
 106   // Returns offset of first potentially-faulting instruction for null
 107   // check info (currently consumed only by C1). If
 108   // swap_reg_contains_mark is true then returns -1 as it is assumed
 109   // the calling code has already passed any potential faults.
 110   int biased_locking_enter(Register lock_reg, Register obj_reg,
 111                            Register swap_reg, Register tmp_reg,
 112                            bool swap_reg_contains_mark,
 113                            Label& done, Label* slow_case = NULL,
 114                            BiasedLockingCounters* counters = NULL);
 115   void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
 116 
 117 
 118   // Helper functions for statistics gathering.
 119   // Unconditional atomic increment.
 120   void atomic_inc(Register counter_addr, Register tmp);
 121   void atomic_inc(Address counter_addr, Register tmp1, Register tmp2) {
 122     lea(tmp1, counter_addr);
 123     atomic_inc(tmp1, tmp2);
 124   }
 125   // Load Effective Address
 126   void lea(Register r, const Address &a) {
 127     InstructionMark im(this);
 128     code_section()->relocate(inst_mark(), a.rspec());
 129     a.lea(this, r);
 130   }
 131 
 132   virtual void _call_Unimplemented(address call_site) {
 133     mov(rscratch2, call_site);
 134     stop("HALT");
 135   }
 136 
 137 #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
 138 
 139 // macro assembly operations needed for aarch32
 140 
 141 // first two private routines for loading 32 bit constants
 142 //TODO Probably add back the 64-bit one as it will be useful for longs
 143 private:
 144 
 145   int push(unsigned int bitset, Register stack);
 146   int pop(unsigned int bitset, Register stack);
 147 
 148 public:
 149 
 150   void mov(Register dst, Address a, Condition cond = C_DFLT);
 151 
 152 
 153   void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
 154   void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
 155 
 156   // now mov instructions for loading absolute addresses and 32bit immediates
 157 
 158   inline void mov(Register dst, address addr, Condition cond = C_DFLT) {
 159     // TODO: Do Address end up as address and then passing through this method, after
 160     // being marked for relocation elsewhere? If not (as I suspect) then this can
 161     // be relaxed to mov_immediate to potentially produce shorter code sequences.
 162     mov_immediate32(dst, (u_int32_t)addr, cond, false);
 163   }
 164 
 165   inline void mov(Register dst, long l, Condition cond = C_DFLT) {
 166     mov(dst, (u_int32_t)l, cond);
 167   }
 168   inline void mov(Register dst, unsigned long l, Condition cond = C_DFLT) {
 169     mov(dst, (u_int32_t)l, cond);
 170   }
 171   inline void mov(Register dst, int i, Condition cond = C_DFLT) {
 172     mov(dst, (u_int32_t)i, cond);
 173   }
 174   inline void mov(Register dst, u_int32_t i, Condition cond = C_DFLT) {
 175     mov_immediate(dst, i, cond, false);
 176   }
 177 
 178   inline void mov(Register dst, Register src, Condition cond = C_DFLT) {
 179     Assembler::mov(dst, src, cond);
 180   }
 181   inline void mov(Register dst, Register src, shift_op shift,
 182                   Condition cond = C_DFLT) {
 183     Assembler::mov(dst, src, shift, cond);
 184   }
 185   // TODO add sflag compatibility
 186   void movptr(Register r, uintptr_t imm32, Condition cond = C_DFLT);
 187 
 188   void ret(Register reg);
 189 
 190   // Both of these are aarch64 instructions that can easily be emulated
 191   // Note that this does not quite have the same semantics as aarch64
 192   // version as this updates the s flag.
 193   void cbz(Register r, Label& l) {
 194     cmp(r, 0);
 195     b(l, EQ);
 196   }
 197   void cbnz(Register r, Label& l) {
 198     cmp(r, 0);
 199     b(l, NE);
 200   }
 201   void tbz(Register r, unsigned bit, Label& l) {
 202     tst(r, 1 << bit);
 203     b(l, EQ);
 204   }
 205   void tbnz(Register r, unsigned bit, Label& l) {
 206     tst(r, 1 << bit);
 207     b(l, NE);
 208   }
 209 
 210   void addmw(Address a, Register incr, Register scratch) {
 211     ldr(scratch, a);
 212     add(scratch, scratch, incr);
 213     str(scratch, a);
 214   }
 215 
 216   // Add constant to memory word
 217   void addmw(Address a, int imm, Register scratch) {
 218     ldr(scratch, a);
 219     if (imm > 0)
 220       add(scratch, scratch, (unsigned)imm);
 221     else
 222       sub(scratch, scratch, (unsigned)-imm);
 223     str(scratch, a);
 224   }
 225 
 226 // XXX stubs
 227 
 228   Register tlab_refill(Label& retry, Label& try_eden, Label& slow_case);
 229 
 230   // macro instructions for accessing and updating floating point
 231   // status register
 232   //
 233   // FPSR : op1 == 011
 234   //        CRn == 0100
 235   //        CRm == 0100
 236   //        op2 == 001
 237 
 238   inline void get_fpsr(Register reg = as_Register(0xf)) {
 239     vmrs(reg);
 240   }
 241 
 242   inline void set_fpsr(Register reg) {
 243     vmsr(reg);
 244   }
 245 
 246   inline void clear_fpsr() {
 247     mov(rscratch1, 0);
 248     set_fpsr(rscratch1);
 249   }
 250 
 251   // Support for NULL-checks
 252   //
 253   // Generates code that causes a NULL OS exception if the content of reg is NULL.
 254   // If the accessed location is M[reg + offset] and the offset is known, provide the
 255   // offset. No explicit code generation is needed if the offset is within a certain
 256   // range (0 <= offset <= page_size).
 257 
 258   virtual void null_check(Register reg, int offset = -1);
 259   static bool needs_explicit_null_check(intptr_t offset);
 260 
 261   static address target_addr_for_insn(address insn_addr, unsigned insn);
 262   static address target_addr_for_insn(address insn_addr) {
 263     unsigned insn = *(unsigned*)insn_addr;
 264     return target_addr_for_insn(insn_addr, insn);
 265   }
 266 
 267   // Required platform-specific helpers for Label::patch_instructions.
 268   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
 269   static int pd_patch_instruction_size(address branch, address target);
 270   static void pd_patch_instruction(address branch, address target) {
 271     pd_patch_instruction_size(branch, target);
 272   }
 273 
 274 #ifndef PRODUCT
 275   static void pd_print_patched_instruction(address branch);
 276 #endif
 277 
 278   static int patch_oop(address insn_addr, address o);
 279 
 280   // The following 4 methods return the offset of the appropriate move instruction
 281 
 282   // Support for fast byte/short loading with zero extension (depending on particular CPU)
 283   int load_unsigned_byte(Register dst, Address src);
 284   int load_unsigned_short(Register dst, Address src);
 285 
 286   // Support for fast byte/short loading with sign extension (depending on particular CPU)
 287   int load_signed_byte(Register dst, Address src);
 288   int load_signed_short(Register dst, Address src);
 289 
 290   // Support for sign-extension (hi:lo = extend_sign(lo))
 291   void extend_sign(Register hi, Register lo);
 292 
 293   // Load and store values by size and signed-ness
 294   void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
 295   void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
 296 
 297   // Support for inc/dec with optimal instruction selection depending on value.
 298   // increment()/decrement() calls with an address destination will need to use
 299   // rscratch1 to load the value to be incremented. increment()/decrement()
 300   // calls which add or subtract a constant value greater than 2^12 will need
 301   // to use rscratch2 to hold the constant. So, a register increment()/
 302   // decrement() may trash rscratch2, and an address increment()/decrement()
 303   // may trash rscratch1 and rscratch2.
 304   void decrement(Register reg, int value = 1);
 305   void decrement(Address dst, int value = 1);
 306   void increment(Register reg, int value = 1);
 307   void increment(Address dst, int value = 1);
 308 
 309   // Alignment
 310   void align(int modulus);
 311 
 312   // Stack frame creation/removal
 313   void enter()
 314   {
 315     stmdb(sp, RegSet::of(rfp, lr).bits());
 316     add(rfp, sp, wordSize);
 317   }
 318 
 319   void leave()
 320   {
 321     sub(sp, rfp, wordSize);
 322     ldmia(sp, RegSet::of(rfp, lr).bits());
 323   }
 324 
 325   // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
 326   // The pointer will be loaded into the thread register.
 327   void get_thread(Register thread);
 328 
 329   enum ret_type { ret_type_void, ret_type_integral, ret_type_float, ret_type_double};
 330   // Support for VM calls
 331   //
 332   // It is imperative that all calls into the VM are handled via the call_VM macros.
 333   // They make sure that the stack linkage is setup correctly. call_VM's correspond
 334   // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
 335 
 336 
 337   void call_VM(Register oop_result,
 338                address entry_point,
 339                bool check_exceptions = true);
 340   void call_VM(Register oop_result,
 341                address entry_point,
 342                Register arg_1,
 343                bool check_exceptions = true);
 344   void call_VM(Register oop_result,
 345                address entry_point,
 346                Register arg_1, Register arg_2,
 347                bool check_exceptions = true);
 348   void call_VM(Register oop_result,
 349                address entry_point,
 350                Register arg_1, Register arg_2, Register arg_3,
 351                bool check_exceptions = true);
 352 
 353   // Overloadings with last_Java_sp
 354   void call_VM(Register oop_result,
 355                Register last_java_sp,
 356                address entry_point,
 357                int number_of_arguments = 0,
 358                bool check_exceptions = true);
 359   void call_VM(Register oop_result,
 360                Register last_java_sp,
 361                address entry_point,
 362                Register arg_1, bool
 363                check_exceptions = true);
 364   void call_VM(Register oop_result,
 365                Register last_java_sp,
 366                address entry_point,
 367                Register arg_1, Register arg_2,
 368                bool check_exceptions = true);
 369   void call_VM(Register oop_result,
 370                Register last_java_sp,
 371                address entry_point,
 372                Register arg_1, Register arg_2, Register arg_3,
 373                bool check_exceptions = true);
 374 
 375   void get_vm_result  (Register oop_result, Register thread);
 376   void get_vm_result_2(Register metadata_result, Register thread);
 377 
 378   // These always tightly bind to MacroAssembler::call_VM_base
 379   // bypassing the virtual implementation
 380   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
 381   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true);
 382   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
 383   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
 384   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true);
 385 
 386   void call_VM_leaf(address entry_point,
 387                     int number_of_arguments = 0);
 388   void call_VM_leaf(address entry_point,
 389                     Register arg_1);
 390   void call_VM_leaf(address entry_point,
 391                     Register arg_1, Register arg_2);
 392   void call_VM_leaf(address entry_point,
 393                     Register arg_1, Register arg_2, Register arg_3);
 394 
 395   // These always tightly bind to MacroAssembler::call_VM_leaf_base
 396   // bypassing the virtual implementation
 397   void super_call_VM_leaf(address entry_point);
 398   void super_call_VM_leaf(address entry_point, Register arg_1);
 399   void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
 400   void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
 401   void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4);
 402 
 403   // last Java Frame (fills frame anchor)
 404   void set_last_Java_frame(Register last_java_sp,
 405                            Register last_java_fp,
 406                            address last_java_pc,
 407                            Register scratch);
 408 
 409   void set_last_Java_frame(Register last_java_sp,
 410                            Register last_java_fp,
 411                            Label &last_java_pc,
 412                            Register scratch);
 413 
 414   void set_last_Java_frame(Register last_java_sp,
 415                            Register last_java_fp,
 416                            Register last_java_pc,
 417                            Register scratch);
 418 
 419   void reset_last_Java_frame(Register thread, bool clearfp, bool clear_pc);
 420 
 421   // thread in the default location (r15_thread on 64bit)
 422   void reset_last_Java_frame(bool clear_fp, bool clear_pc);
 423 
 424   // Stores
 425   void store_check(Register obj);                // store check for obj - register is destroyed afterwards
 426   void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
 427 
 428 #if INCLUDE_ALL_GCS
 429 
 430   void g1_write_barrier_pre(Register obj,
 431                             Register pre_val,
 432                             Register thread,
 433                             Register tmp,
 434                             bool tosca_live,
 435                             bool expand_call);
 436 
 437   void g1_write_barrier_post(Register store_addr,
 438                              Register new_val,
 439                              Register thread,
 440                              Register tmp,
 441                              Register tmp2);
 442 
 443 #endif // INCLUDE_ALL_GCS
 444 
 445   // split store_check(Register obj) to enhance instruction interleaving
 446   void store_check_part_1(Register obj);
 447   void store_check_part_2(Register obj);
 448 
 449   // oop manipulations
 450   void load_klass(Register dst, Register src);
 451   void store_klass(Register dst, Register src);
 452   void cmp_klass(Register oop, Register trial_klass, Register tmp);
 453 
 454   void load_heap_oop(Register dst, Address src);
 455 
 456   void load_heap_oop_not_null(Register dst, Address src);
 457   void store_heap_oop(Address dst, Register src);
 458 
 459   // Used for storing NULL. All other oop constants should be
 460   // stored using routines that take a jobject.
 461   void store_heap_oop_null(Address dst);
 462 
 463   void load_prototype_header(Register dst, Register src);
 464 
 465   void store_klass_gap(Register dst, Register src);
 466 
 467   // This dummy is to prevent a call to store_heap_oop from
 468   // converting a zero (like NULL) into a Register by giving
 469   // the compiler two choices it can't resolve
 470 
 471   void store_heap_oop(Address dst, void* dummy);
 472 
 473   void push_CPU_state();
 474   void pop_CPU_state() ;
 475 
 476   // Round up to a power of two
 477   void round_to(Register reg, int modulus);
 478 
 479   // allocation
 480   void eden_allocate(
 481     Register obj,                      // result: pointer to object after successful allocation
 482     Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
 483     int      con_size_in_bytes,        // object size in bytes if   known at compile time
 484     Register t1,                       // temp register
 485     Label&   slow_case                 // continuation point if fast allocation fails
 486   );
 487   void tlab_allocate(
 488     Register obj,                      // result: pointer to object after successful allocation
 489     Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
 490     int      con_size_in_bytes,        // object size in bytes if   known at compile time
 491     Register t1,                       // temp register
 492     Register t2,                       // temp register
 493     Label&   slow_case                 // continuation point if fast allocation fails
 494   );
 495 
 496   void verify_tlab();
 497 
 498   void incr_allocated_bytes(Register thread,
 499                             Register var_size_in_bytes, int con_size_in_bytes,
 500                             Register t1 = noreg);
 501 
 502   // interface method calling
 503   void lookup_interface_method(Register recv_klass,
 504                                Register intf_klass,
 505                                RegisterOrConstant itable_index,
 506                                Register method_result,
 507                                Register scan_temp,
 508                                Label& no_such_interface);
 509 
 510   // virtual method calling
 511   // n.b. x86 allows RegisterOrConstant for vtable_index
 512   void lookup_virtual_method(Register recv_klass,
 513                              RegisterOrConstant vtable_index,
 514                              Register method_result);
 515 
 516   // Test sub_klass against super_klass, with fast and slow paths.
 517 
 518   // The fast path produces a tri-state answer: yes / no / maybe-slow.
 519   // One of the three labels can be NULL, meaning take the fall-through.
 520   // If super_check_offset is -1, the value is loaded up from super_klass.
 521   // No registers are killed, except temp_reg.
 522   void check_klass_subtype_fast_path(Register sub_klass,
 523                                      Register super_klass,
 524                                      Register temp_reg,
 525                                      Label* L_success,
 526                                      Label* L_failure,
 527                                      Label* L_slow_path,
 528                 RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
 529 
 530   // The rest of the type check; must be wired to a corresponding fast path.
 531   // It does not repeat the fast path logic, so don't use it standalone.
 532   // The temp_reg and temp2_reg can be noreg, if no temps are available.
 533   // Updates the sub's secondary super cache as necessary.
 534   // If set_cond_codes, condition codes will be Z on success, NZ on failure.
 535   void check_klass_subtype_slow_path(Register sub_klass,
 536                                      Register super_klass,
 537                                      Register temp_reg,
 538                                      Register temp2_reg,
 539                                      Label* L_success,
 540                                      Label* L_failure,
 541                                      bool set_cond_codes = false);
 542 
 543   // Simplified, combined version, good for typical uses.
 544   // Falls through on failure.
 545   void check_klass_subtype(Register sub_klass,
 546                            Register super_klass,
 547                            Register temp_reg,
 548                            Label& L_success);
 549 
 550   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 551 
 552 
 553   // Debugging
 554 
 555   // only if +VerifyOops
 556   void verify_oop(Register reg, const char* s = "broken oop");
 557   void verify_oop_addr(Address addr, const char * s = "broken oop addr");
 558 
 559 // TODO: verify method and klass metadata (compare against vptr?)
 560   void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
 561   void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
 562 
 563 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
 564 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
 565 
 566   // only if +VerifyFPU
 567   void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
 568 
 569   // prints msg, dumps registers and stops execution
 570   void stop(const char* msg);
 571 
 572   // prints msg and continues
 573   void warn(const char* msg);
 574 
 575   static void debug32(char* msg, int32_t pc, int32_t regs[]);
 576 
 577   void untested()                                { stop("untested"); }
 578 
 579   void unimplemented(const char* what = "")      { char* b = new char[1024];  jio_snprintf(b, 1024, "unimplemented: %s", what);  stop(b); }
 580 
 581 #define should_not_reach_here() should_not_reach_here_line(__FILE__, __LINE__)
 582   void should_not_reach_here_line(const char *file, int line) {
 583     mov(rscratch1, line);
 584     reg_printf_important(file);
 585     reg_printf_important(": %d", rscratch1);
 586     stop("should_not_reach_here");
 587   }
 588 
 589   // Stack overflow checking
 590   void bang_stack_with_offset(int offset) {
 591     // stack grows down, caller passes positive offset
 592     assert(offset > 0, "must bang with negative offset");
 593     mov(rscratch2, -offset);
 594     // bang with random number from r0
 595     str(r0, Address(sp, rscratch2));
 596   }
 597 
 598   // Writes to stack successive pages until offset reached to check for
 599   // stack overflow + shadow pages.  Also, clobbers tmp
 600   void bang_stack_size(Register size, Register tmp);
 601 
 602   virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
 603                                                 Register tmp,
 604                                                 int offset);
 605 
 606   // Support for serializing memory accesses between threads
 607   void serialize_memory(Register thread, Register tmp);
 608 
 609   // Arithmetics
 610 
 611   void addptr(Address dst, int32_t src) {
 612     lea(rscratch2, dst);
 613     ldr(rscratch1, Address(rscratch2));
 614     add(rscratch1, rscratch1, src);
 615     str(rscratch1, Address(rscratch2));
 616   }
 617 
 618   void cmpptr(Register src1, Address src2);
 619 
 620   void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
 621                   Label &suceed, Label *fail);
 622 
 623   void cmpxchgw(Register oldv, Register newv, Register addr, Register tmp,
 624                   Label &suceed, Label *fail);
 625 
 626   void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
 627   void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
 628 
 629   void atomic_xchg(Register prev, Register newv, Register addr);
 630   void atomic_xchgw(Register prev, Register newv, Register addr);
 631 
 632   void orptr(Address adr, RegisterOrConstant src) {
 633     ldr(rscratch2, adr);
 634     if (src.is_register())
 635       orr(rscratch2, rscratch2, src.as_register());
 636     else
 637       orr(rscratch2, rscratch2, src.as_constant());
 638     str(rscratch2, adr);
 639   }
 640 
 641   // Calls
 642 
 643   void trampoline_call(Address entry, CodeBuffer *cbuf = NULL);
 644 
 645   static bool far_branches() {
 646     return ReservedCodeCacheSize > branch_range;
 647   }
 648 
 649   // Jumps that can reach anywhere in the code cache.
 650   // Trashes tmp.
 651   void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
 652   void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
 653 
 654   static int far_branch_size() {
 655     // TODO performance issue: always generate real far jumps
 656     return 3 * 4;  // movw, movt, br
 657   }
 658 
 659   // Emit the CompiledIC call idiom
 660   void ic_call(address entry);
 661 
 662 public:
 663   // Data
 664   void mov_metadata(Register dst, Metadata* obj);
 665   Address allocate_metadata_address(Metadata* obj);
 666   Address constant_oop_address(jobject obj);
 667 
 668   void movoop(Register dst, jobject obj, bool immediate = false);
 669 
 670   // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
 671   void kernel_crc32(Register crc, Register buf, Register len,
 672         Register table0, Register table1, Register table2, Register table3,
 673         Register tmp, Register tmp2, Register tmp3);
 674 
 675 #undef VIRTUAL
 676 
 677   // Stack push and pop individual 64 bit registers
 678   void push(Register src);
 679   void pop(Register dst);
 680 
 681   // push all registers onto the stack
 682   void pusha();
 683   void popa();
 684 
 685   void repne_scan(Register addr, Register value, Register count,
 686                   Register scratch);
 687   void repne_scanw(Register addr, Register value, Register count,
 688                    Register scratch);
 689 
 690   // Form an address from base + offset in Rd. Rd may or may not actually be
 691   // used: you must use the Address that is returned. It is up to you to ensure
 692   // that the shift provided matches the size of your data.
 693   Address form_address(Register Rd, Register base, long byte_offset, int shift);
 694 
 695  public:
 696 
 697   void ldr_constant(Register dest, const Address &const_addr) {
 698     if (NearCpool) {
 699       ldr(dest, const_addr);
 700     } else {
 701       mov(dest, InternalAddress(const_addr.target()));
 702       ldr(dest, dest);
 703     }
 704   }
 705 
 706   address read_polling_page(Register r, address page, relocInfo::relocType rtype);
 707   address read_polling_page(Register r, relocInfo::relocType rtype);
 708 
 709   // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
 710   void update_byte_crc32(Register crc, Register val, Register table);
 711   void update_word_crc32(Register crc, Register v, Register tmp, Register tmp2,
 712         Register table0, Register table1, Register table2, Register table3);
 713 
 714   // Auto dispatch for barriers isb, dmb & dsb.
 715   void isb() {
 716     if(VM_Version::features() & FT_ARMV7) {
 717       Assembler::isb();
 718     } else {
 719       cp15isb();
 720     }
 721   }
 722 
 723   void dsb(enum barrier option) {
 724     if(VM_Version::features() & FT_ARMV7) {
 725       Assembler::dsb(option);
 726     } else {
 727       cp15dsb();
 728     }
 729   }
 730 
 731   void dmb(enum barrier option) {
 732     if(VM_Version::features() & FT_ARMV7) {
 733       Assembler::dmb(option);
 734     } else {
 735       cp15dmb();
 736     }
 737   }
 738 
 739   void membar(Membar_mask_bits order_constraint) {
 740     dmb(Assembler::barrier(order_constraint));
 741   }
 742 
 743   // ISB may be needed because of a safepoint
 744   void maybe_isb() { MacroAssembler::isb(); }
 745 
 746   // Helper functions for 64-bit multipliction, division and remainder
 747   // does <Rd+1:Rd> = <Rn+1:Rn> * <Rm+1:Rm>
 748   void mult_long(Register Rd, Register Rn, Register Rm);
 749   // does <Rdh:Rd> = <Rnh:Rn> * <Rmh:Rm>
 750   void mult_long(Register Rd, Register Rdh, Register Rn, Register Rnh, Register Rm, Register Rmh);
 751 
 752  private:
 753   void divide32(Register res, Register num, Register den, bool want_mod);
 754  public:
 755   // <Rd+1:Rd> = <Rn+1:Rn> / <Rm+1:Rm>
 756   // <Rd+1:Rd> = <Rn+1:Rn> % <Rm+1:Rm>
 757   // <Rd> = <Rn> / <Rm>
 758   // <Rd> = <Rn> % <Rm>
 759   void divide(Register Rd, Register Rn, Register Rm, int width, bool want_remainder);
 760 
 761   void extract_bits(Register dest, Register source, int lsb, int width);
 762 
 763   // These functions require that the src/dst register is an even register
 764   // and will emit LDREXD/STREXD if there are multiple cores and the procesor
 765   // supports it. If there's only one core then LDRD/STRD will be emit instead.
 766   // If the processor has multiple cores and doesn't support LDREXD/STREXD then
 767   // LDRD/STRD will be emitted and a warning message printed.
 768   void atomic_ldrd(Register Rt, Register RtII, Register Rbase);
 769   void atomic_strd(Register Rt, Register RtII, Register Rbase,
 770                    Register temp, Register tempII);
 771 
 772  private:
 773   // generic fallback ldrd generator. may need to use temporary register
 774   // when register collisions are found
 775   //
 776   // since double_ld_failed_dispatch can introduce address manipulation instructions
 777   // it should return offset of first load/store instruction that will be used
 778   // while constructing implicit null check table
 779   int double_ld_failed_dispatch(Register Rt, Register Rt2, const Address& adr,
 780                             void (Assembler::* mul)(unsigned, const Address&, Condition),
 781                             void (Assembler::* sgl)(Register, const Address&, Condition),
 782                             Register Rtmp, Condition cond);
 783   // ldrd/strd generator. can handle all strd cases and those ldrd where there
 784   // are no register collisions
 785   void double_ldst_failed_dispatch(Register Rt, Register Rt2, const Address& adr,
 786                             void (Assembler::* mul)(unsigned, const Address&, Condition),
 787                             void (Assembler::* sgl)(Register, const Address&, Condition),
 788                             Condition cond);
 789 public:
 790   // override ldrd/strd to perform a magic for when Rt + 1 != Rt2 or any other
 791   // conditions which prevent to use single ldrd/strd insn. a pair of ldr/str
 792   // is used instead then
 793   //
 794   // Since ldrd/strd macro can introduce address manipulation instructions
 795   // it should return offset of first load/store instruction that will be used
 796   // while constructing implicit null check table
 797   using Assembler::ldrd;
 798   int ldrd(Register Rt, Register Rt2, const Address& adr, Register Rmp = rscratch1, Condition cond = C_DFLT);
 799   using Assembler::strd;
 800   int strd(Register Rt, Register Rt2, const Address& adr, Condition cond = C_DFLT);
 801 
 802 private:
 803   void bfc_impl(Register rd, int lsb, int width, Condition cond);
 804 public:
 805   void bfc(Register Rd, int lsb, int width, Condition cond = C_DFLT) {
 806     if (VM_Version::features() & (FT_ARMV6T2 | FT_ARMV7))
 807       Assembler::bfc(Rd, lsb, width, cond);
 808     else
 809       bfc_impl(Rd, lsb, width, cond);
 810   }
 811 
 812   void align_stack() {
 813     if (StackAlignmentInBytes > 4)
 814       bic(sp, sp, StackAlignmentInBytes-1);
 815   }
 816 
 817 #ifdef ASSERT
 818   void verify_stack_alignment();
 819 #endif
 820 
 821   // Debug helper
 822   void save_machine_state();
 823   void restore_machine_state();
 824 
 825   static uint32_t bytecodes_until_print;
 826   static uint32_t bytecodes_executed;
 827   static int enable_debug;
 828   static int enable_method_debug;
 829   static int enable_debugging_static;
 830 
 831 
 832   void bytecode_seen(Register bc_reg, Register scratch);
 833   static void print_unseen_bytecodes();
 834   void reg_printf_internal(bool important, const char *fmt, Register a = r0, Register b = r0, Register c = r0);
 835   void reg_printf_important(const char *fmt, Register a = r0, Register b = r0, Register c = r0);
 836   void reg_printf(const char *fmt, Register a = r0, Register b = r0, Register c = r0);
 837   void print_method_entry(Register rmethod, bool native);
 838   void print_method_exit(bool normal = true);
 839   void get_bytecode(Register bc, Register dst);
 840   static void print_cpool(InstanceKlass *klass);
 841 
 842   void create_breakpoint();
 843 };
 844 
 845 
 846 #ifdef ASSERT
 847 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
 848 #endif
 849 
 850 /**
 851  * class SkipIfEqual:
 852  *
 853  * Instantiating this class will result in assembly code being output that will
 854  * jump around any code emitted between the creation of the instance and it's
 855  * automatic destruction at the end of a scope block, depending on the value of
 856  * the flag passed to the constructor, which will be checked at run-time.
 857  */
 858 class SkipIfEqual {
 859  private:
 860   MacroAssembler* _masm;
 861   Label _label;
 862 
 863  public:
 864    SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
 865    ~SkipIfEqual();
 866 };
 867 
 868 struct tableswitch {
 869   Register _reg;
 870   int _insn_index;
 871   jint _first_key;
 872   jint _last_key;
 873   Label _after;
 874   Label _branches;
 875 };
 876 
 877 #endif // CPU_AARCH32_VM_MACROASSEMBLER_AARCH32_HPP