1 /*
   2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2015, Linaro Ltd. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #ifndef CPU_AARCH32_VM_MACROASSEMBLER_AARCH32_HPP
  28 #define CPU_AARCH32_VM_MACROASSEMBLER_AARCH32_HPP
  29 
  30 #include "asm/assembler.hpp"
  31 
  32 // MacroAssembler extends Assembler by frequently used macros.
  33 //
  34 // Instructions for which a 'better' code sequence exists depending
  35 // on arguments should also go in here.
  36 
  37 class MacroAssembler: public Assembler {
  38   friend class LIR_Assembler;
  39 
  40   using Assembler::mov;
  41 
  42  protected:
  43 
  44   // Support for VM calls
  45   //
  46   // This is the base routine called by the different versions of call_VM_leaf. The interpreter
  47   // may customize this version by overriding it for its purposes (e.g., to save/restore
  48   // additional registers when doing a VM call).
  49 #ifdef CC_INTERP
  50   // c++ interpreter never wants to use interp_masm version of call_VM
  51   #define VIRTUAL
  52 #else
  53   #define VIRTUAL virtual
  54 #endif
  55 
  56   VIRTUAL void call_VM_leaf_base(
  57     address entry_point,               // the entry point
  58     int     number_of_arguments,        // the number of arguments to pop after the call
  59     Label *retaddr = NULL
  60   );
  61 
  62   VIRTUAL void call_VM_leaf_base(
  63     address entry_point,               // the entry point
  64     int     number_of_arguments,        // the number of arguments to pop after the call
  65     Label &retaddr) {
  66     call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
  67   }
  68 
  69   // This is the base routine called by the different versions of call_VM. The interpreter
  70   // may customize this version by overriding it for its purposes (e.g., to save/restore
  71   // additional registers when doing a VM call).
  72   //
  73   // If no java_thread register is specified (noreg) than rthread will be used instead. call_VM_base
  74   // returns the register which contains the thread upon return. If a thread register has been
  75   // specified, the return value will correspond to that register. If no last_java_sp is specified
  76   // (noreg) than rsp will be used instead.
  77   VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
  78     Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
  79     Register java_thread,              // the thread if computed before     ; use noreg otherwise
  80     Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
  81     address  entry_point,              // the entry point
  82     int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
  83     bool     check_exceptions          // whether to check for pending exceptions after return
  84   );
  85 
  86   // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
  87   // The implementation is only non-empty for the InterpreterMacroAssembler,
  88   // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
  89   virtual void check_and_handle_popframe(Register java_thread);
  90   virtual void check_and_handle_earlyret(Register java_thread);
  91 
  92   void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
  93 
  94  public:
  95   void init_unseen_bytecodes();
  96   MacroAssembler(CodeBuffer* code) : Assembler(code) { init_unseen_bytecodes();}
  97 
  98   // Biased locking support
  99   // lock_reg and obj_reg must be loaded up with the appropriate values.
 100   // swap_reg is killed.
 101   // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
 102   // be killed; if not supplied, push/pop will be used internally to
 103   // allocate a temporary (inefficient, avoid if possible).
 104   // Optional slow case is for implementations (interpreter and C1) which branch to
 105   // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
 106   // Returns offset of first potentially-faulting instruction for null
 107   // check info (currently consumed only by C1). If
 108   // swap_reg_contains_mark is true then returns -1 as it is assumed
 109   // the calling code has already passed any potential faults.
 110   int biased_locking_enter(Register lock_reg, Register obj_reg,
 111                            Register swap_reg, Register tmp_reg,
 112                            bool swap_reg_contains_mark,
 113                            Label& done, Label* slow_case = NULL,
 114                            BiasedLockingCounters* counters = NULL);
 115   void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
 116 
 117 
 118   // Helper functions for statistics gathering.
 119   // Unconditional atomic increment.
 120   void atomic_inc(Register counter_addr, Register tmp);
 121   void atomic_inc(Address counter_addr, Register tmp1, Register tmp2) {
 122     lea(tmp1, counter_addr);
 123     atomic_inc(tmp1, tmp2);
 124   }
 125   // Load Effective Address
 126   void lea(Register r, const Address &a) {
 127     InstructionMark im(this);
 128     code_section()->relocate(inst_mark(), a.rspec());
 129     a.lea(this, r);
 130   }
 131 
 132   virtual void _call_Unimplemented(address call_site) {
 133     mov(rscratch2, call_site);
 134     stop("HALT");
 135   }
 136 
 137 #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
 138 
 139 // macro assembly operations needed for aarch32
 140 
 141 // first two private routines for loading 32 bit constants
 142 //TODO Probably add back the 64-bit one as it will be useful for longs
 143 private:
 144 
 145   int push(unsigned int bitset, Register stack);
 146   int pop(unsigned int bitset, Register stack);
 147 
 148 public:
 149 
 150   void mov(Register dst, Address a, Condition cond = C_DFLT);
 151 
 152 
 153   void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
 154   void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
 155 
 156   // now mov instructions for loading absolute addresses and 32bit immediates
 157 
 158   inline void mov(Register dst, address addr, Condition cond = C_DFLT) {
 159     // TODO: Do Address end up as address and then passing through this method, after
 160     // being marked for relocation elsewhere? If not (as I suspect) then this can
 161     // be relaxed to mov_immediate to potentially produce shorter code sequences.
 162     mov_immediate32(dst, (uint32_t)addr, cond, false);
 163   }
 164 
 165   inline void mov(Register dst, long l, Condition cond = C_DFLT) {
 166     mov(dst, (uint32_t)l, cond);
 167   }
 168   inline void mov(Register dst, unsigned long l, Condition cond = C_DFLT) {
 169     mov(dst, (uint32_t)l, cond);
 170   }
 171   inline void mov(Register dst, int i, Condition cond = C_DFLT) {
 172     mov(dst, (uint32_t)i, cond);
 173   }
 174   inline void mov(Register dst, uint32_t i, Condition cond = C_DFLT) {
 175     mov_immediate(dst, i, cond, false);
 176   }
 177 
 178   inline void mov(Register dst, Register src, Condition cond = C_DFLT) {
 179     Assembler::mov(dst, src, cond);
 180   }
 181   inline void mov(Register dst, Register src, shift_op shift,
 182                   Condition cond = C_DFLT) {
 183     Assembler::mov(dst, src, shift, cond);
 184   }
 185   // TODO add sflag compatibility
 186   void movptr(Register r, uintptr_t imm32, Condition cond = C_DFLT);
 187 
 188   void ret(Register reg);
 189 
 190   // Both of these are aarch64 instructions that can easily be emulated
 191   // Note that this does not quite have the same semantics as aarch64
 192   // version as this updates the s flag.
 193   void cbz(Register r, Label& l) {
 194     cmp(r, 0);
 195     b(l, EQ);
 196   }
 197   void cbnz(Register r, Label& l) {
 198     cmp(r, 0);
 199     b(l, NE);
 200   }
 201   void tbz(Register r, unsigned bit, Label& l) {
 202     tst(r, 1 << bit);
 203     b(l, EQ);
 204   }
 205   void tbnz(Register r, unsigned bit, Label& l) {
 206     tst(r, 1 << bit);
 207     b(l, NE);
 208   }
 209 
 210   void addmw(Address a, Register incr, Register scratch) {
 211     ldr(scratch, a);
 212     add(scratch, scratch, incr);
 213     str(scratch, a);
 214   }
 215 
 216   // Add constant to memory word
 217   void addmw(Address a, int imm, Register scratch) {
 218     ldr(scratch, a);
 219     if (imm > 0)
 220       add(scratch, scratch, (unsigned)imm);
 221     else
 222       sub(scratch, scratch, (unsigned)-imm);
 223     str(scratch, a);
 224   }
 225 
 226 // XXX stubs
 227 
 228   Register tlab_refill(Label& retry, Label& try_eden, Label& slow_case);
 229 
 230   // macro instructions for accessing and updating floating point
 231   // status register
 232   //
 233   // FPSR : op1 == 011
 234   //        CRn == 0100
 235   //        CRm == 0100
 236   //        op2 == 001
 237 
 238   inline void get_fpsr(Register reg = as_Register(0xf)) {
 239     vmrs(reg);
 240   }
 241 
 242   inline void set_fpsr(Register reg) {
 243     vmsr(reg);
 244   }
 245 
 246   inline void clear_fpsr() {
 247     mov(rscratch1, 0);
 248     set_fpsr(rscratch1);
 249   }
 250 
 251   // Support for NULL-checks
 252   //
 253   // Generates code that causes a NULL OS exception if the content of reg is NULL.
 254   // If the accessed location is M[reg + offset] and the offset is known, provide the
 255   // offset. No explicit code generation is needed if the offset is within a certain
 256   // range (0 <= offset <= page_size).
 257 
 258   virtual void null_check(Register reg, int offset = -1);
 259   static bool needs_explicit_null_check(intptr_t offset);
 260 
 261   static address target_addr_for_insn(address insn_addr, unsigned insn);
 262   static address target_addr_for_insn(address insn_addr) {
 263     unsigned insn = *(unsigned*)insn_addr;
 264     return target_addr_for_insn(insn_addr, insn);
 265   }
 266 
 267   // Required platform-specific helpers for Label::patch_instructions.
 268   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
 269   static int pd_patch_instruction_size(address branch, address target);
 270   static void pd_patch_instruction(address branch, address target) {
 271     pd_patch_instruction_size(branch, target);
 272   }
 273 
 274 #ifndef PRODUCT
 275   static void pd_print_patched_instruction(address branch);
 276 #endif
 277 
 278   static int patch_oop(address insn_addr, address o);
 279 
 280   // The following 4 methods return the offset of the appropriate move instruction
 281 
 282   // Support for fast byte/short loading with zero extension (depending on particular CPU)
 283   int load_unsigned_byte(Register dst, Address src);
 284   int load_unsigned_short(Register dst, Address src);
 285 
 286   // Support for fast byte/short loading with sign extension (depending on particular CPU)
 287   int load_signed_byte(Register dst, Address src);
 288   int load_signed_short(Register dst, Address src);
 289 
 290   // Support for sign-extension (hi:lo = extend_sign(lo))
 291   void extend_sign(Register hi, Register lo);
 292 
 293   // Load and store values by size and signed-ness
 294   void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
 295   void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
 296 
 297   // Support for inc/dec with optimal instruction selection depending on value.
 298   // increment()/decrement() calls with an address destination will need to use
 299   // rscratch1 to load the value to be incremented. increment()/decrement()
 300   // calls which add or subtract a constant value greater than 2^12 will need
 301   // to use rscratch2 to hold the constant. So, a register increment()/
 302   // decrement() may trash rscratch2, and an address increment()/decrement()
 303   // may trash rscratch1 and rscratch2.
 304   void decrement(Register reg, int value = 1);
 305   void decrement(Address dst, int value = 1);
 306   void increment(Register reg, int value = 1);
 307   void increment(Address dst, int value = 1);
 308 
 309   // Alignment
 310   void align(int modulus);
 311 
 312   // Stack frame creation/removal
 313   void enter()
 314   {
 315     stmdb(sp, RegSet::of(rfp, lr).bits());
 316     add(rfp, sp, wordSize);
 317   }
 318 
 319   void leave()
 320   {
 321     sub(sp, rfp, wordSize);
 322     ldmia(sp, RegSet::of(rfp, lr).bits());
 323   }
 324 
 325   // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
 326   // The pointer will be loaded into the thread register.
 327   void get_thread(Register thread);
 328 
 329   enum ret_type { ret_type_void, ret_type_integral, ret_type_float, ret_type_double};
 330   // Support for VM calls
 331   //
 332   // It is imperative that all calls into the VM are handled via the call_VM macros.
 333   // They make sure that the stack linkage is setup correctly. call_VM's correspond
 334   // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
 335 
 336 
 337   void call_VM(Register oop_result,
 338                address entry_point,
 339                bool check_exceptions = true);
 340   void call_VM(Register oop_result,
 341                address entry_point,
 342                Register arg_1,
 343                bool check_exceptions = true);
 344   void call_VM(Register oop_result,
 345                address entry_point,
 346                Register arg_1, Register arg_2,
 347                bool check_exceptions = true);
 348   void call_VM(Register oop_result,
 349                address entry_point,
 350                Register arg_1, Register arg_2, Register arg_3,
 351                bool check_exceptions = true);
 352 
 353   // Overloadings with last_Java_sp
 354   void call_VM(Register oop_result,
 355                Register last_java_sp,
 356                address entry_point,
 357                int number_of_arguments = 0,
 358                bool check_exceptions = true);
 359   void call_VM(Register oop_result,
 360                Register last_java_sp,
 361                address entry_point,
 362                Register arg_1, bool
 363                check_exceptions = true);
 364   void call_VM(Register oop_result,
 365                Register last_java_sp,
 366                address entry_point,
 367                Register arg_1, Register arg_2,
 368                bool check_exceptions = true);
 369   void call_VM(Register oop_result,
 370                Register last_java_sp,
 371                address entry_point,
 372                Register arg_1, Register arg_2, Register arg_3,
 373                bool check_exceptions = true);
 374 
 375   void get_vm_result  (Register oop_result, Register thread);
 376   void get_vm_result_2(Register metadata_result, Register thread);
 377 
 378   // These always tightly bind to MacroAssembler::call_VM_base
 379   // bypassing the virtual implementation
 380   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
 381   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true);
 382   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
 383   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
 384   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true);
 385 
 386   void call_VM_leaf(address entry_point,
 387                     int number_of_arguments = 0);
 388   void call_VM_leaf(address entry_point,
 389                     Register arg_1);
 390   void call_VM_leaf(address entry_point,
 391                     Register arg_1, Register arg_2);
 392   void call_VM_leaf(address entry_point,
 393                     Register arg_1, Register arg_2, Register arg_3);
 394 
 395   // These always tightly bind to MacroAssembler::call_VM_leaf_base
 396   // bypassing the virtual implementation
 397   void super_call_VM_leaf(address entry_point);
 398   void super_call_VM_leaf(address entry_point, Register arg_1);
 399   void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
 400   void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
 401   void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4);
 402 
 403   // last Java Frame (fills frame anchor)
 404   void set_last_Java_frame(Register last_java_sp,
 405                            Register last_java_fp,
 406                            address last_java_pc,
 407                            Register scratch);
 408 
 409   void set_last_Java_frame(Register last_java_sp,
 410                            Register last_java_fp,
 411                            Label &last_java_pc,
 412                            Register scratch);
 413 
 414   void set_last_Java_frame(Register last_java_sp,
 415                            Register last_java_fp,
 416                            Register last_java_pc,
 417                            Register scratch);
 418 
 419   void reset_last_Java_frame(Register thread, bool clearfp, bool clear_pc);
 420 
 421   // thread in the default location (r15_thread on 64bit)
 422   void reset_last_Java_frame(bool clear_fp, bool clear_pc);
 423 
 424   // Stores
 425   void store_check(Register obj);                // store check for obj - register is destroyed afterwards
 426   void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
 427 
 428 #if INCLUDE_ALL_GCS
 429 
 430   void g1_write_barrier_pre(Register obj,
 431                             Register pre_val,
 432                             Register thread,
 433                             Register tmp,
 434                             bool tosca_live,
 435                             bool expand_call);
 436 
 437   void g1_write_barrier_post(Register store_addr,
 438                              Register new_val,
 439                              Register thread,
 440                              Register tmp,
 441                              Register tmp2);
 442 
 443 #endif // INCLUDE_ALL_GCS
 444 
 445   // split store_check(Register obj) to enhance instruction interleaving
 446   void store_check_part_1(Register obj);
 447   void store_check_part_2(Register obj);
 448 
 449   // oop manipulations
 450   void load_klass(Register dst, Register src);
 451   void store_klass(Register dst, Register src);
 452   void cmp_klass(Register oop, Register trial_klass, Register tmp);
 453 
 454   void load_heap_oop(Register dst, Address src);
 455 
 456   void load_heap_oop_not_null(Register dst, Address src);
 457   void store_heap_oop(Address dst, Register src);
 458 
 459   // Used for storing NULL. All other oop constants should be
 460   // stored using routines that take a jobject.
 461   void store_heap_oop_null(Address dst);
 462 
 463   void load_prototype_header(Register dst, Register src);
 464 
 465   void store_klass_gap(Register dst, Register src);
 466 
 467   // This dummy is to prevent a call to store_heap_oop from
 468   // converting a zero (like NULL) into a Register by giving
 469   // the compiler two choices it can't resolve
 470 
 471   void store_heap_oop(Address dst, void* dummy);
 472 
 473   void push_CPU_state();
 474   void pop_CPU_state() ;
 475 
 476   // Round up to a power of two
 477   void round_to(Register reg, int modulus);
 478 
 479   // allocation
 480   void eden_allocate(
 481     Register obj,                      // result: pointer to object after successful allocation
 482     Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
 483     int      con_size_in_bytes,        // object size in bytes if   known at compile time
 484     Register t1,                       // temp register
 485     Label&   slow_case                 // continuation point if fast allocation fails
 486   );
 487   void tlab_allocate(
 488     Register obj,                      // result: pointer to object after successful allocation
 489     Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
 490     int      con_size_in_bytes,        // object size in bytes if   known at compile time
 491     Register t1,                       // temp register
 492     Register t2,                       // temp register
 493     Label&   slow_case                 // continuation point if fast allocation fails
 494   );
 495 
 496   void verify_tlab();
 497 
 498   void incr_allocated_bytes(Register thread,
 499                             Register var_size_in_bytes, int con_size_in_bytes,
 500                             Register t1 = noreg);
 501 
 502   // interface method calling
 503   void lookup_interface_method(Register recv_klass,
 504                                Register intf_klass,
 505                                RegisterOrConstant itable_index,
 506                                Register method_result,
 507                                Register scan_temp,
 508                                Label& no_such_interface);
 509 
 510   // virtual method calling
 511   // n.b. x86 allows RegisterOrConstant for vtable_index
 512   void lookup_virtual_method(Register recv_klass,
 513                              RegisterOrConstant vtable_index,
 514                              Register method_result);
 515 
 516   // Test sub_klass against super_klass, with fast and slow paths.
 517 
 518   // The fast path produces a tri-state answer: yes / no / maybe-slow.
 519   // One of the three labels can be NULL, meaning take the fall-through.
 520   // If super_check_offset is -1, the value is loaded up from super_klass.
 521   // No registers are killed, except temp_reg.
 522   void check_klass_subtype_fast_path(Register sub_klass,
 523                                      Register super_klass,
 524                                      Register temp_reg,
 525                                      Label* L_success,
 526                                      Label* L_failure,
 527                                      Label* L_slow_path,
 528                 RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
 529 
 530   // The rest of the type check; must be wired to a corresponding fast path.
 531   // It does not repeat the fast path logic, so don't use it standalone.
 532   // The temp_reg and temp2_reg can be noreg, if no temps are available.
 533   // Updates the sub's secondary super cache as necessary.
 534   // If set_cond_codes, condition codes will be Z on success, NZ on failure.
 535   void check_klass_subtype_slow_path(Register sub_klass,
 536                                      Register super_klass,
 537                                      Register temp_reg,
 538                                      Register temp2_reg,
 539                                      Label* L_success,
 540                                      Label* L_failure,
 541                                      bool set_cond_codes = false);
 542 
 543   // Simplified, combined version, good for typical uses.
 544   // Falls through on failure.
 545   void check_klass_subtype(Register sub_klass,
 546                            Register super_klass,
 547                            Register temp_reg,
 548                            Label& L_success);
 549 
 550   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 551 
 552 
 553   // Debugging
 554 
 555   // only if +VerifyOops
 556   void verify_oop(Register reg, const char* s = "broken oop");
 557   void verify_oop_addr(Address addr, const char * s = "broken oop addr");
 558 
 559 // TODO: verify method and klass metadata (compare against vptr?)
 560   void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
 561   void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
 562 
 563 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
 564 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
 565 
 566   // only if +VerifyFPU
 567   void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
 568 
 569   // prints msg, dumps registers and stops execution
 570   void stop(const char* msg);
 571 
 572   // prints msg and continues
 573   void warn(const char* msg);
 574 
 575   static void debug32(char* msg, int32_t pc, int32_t regs[]);
 576 
 577   void untested()                                { stop("untested"); }
 578 
 579   void unimplemented(const char* what = "")      { char* b = new char[1024];  jio_snprintf(b, 1024, "unimplemented: %s", what);  stop(b); }
 580 
 581 #define should_not_reach_here() should_not_reach_here_line(__FILE__, __LINE__)
 582   void should_not_reach_here_line(const char *file, int line) {
 583     mov(rscratch1, line);
 584     reg_printf_important(file);
 585     reg_printf_important(": %d", rscratch1);
 586     stop("should_not_reach_here");
 587   }
 588 
 589   // Stack overflow checking
 590   void bang_stack_with_offset(int offset) {
 591     // stack grows down, caller passes positive offset
 592     assert(offset > 0, "must bang with negative offset");
 593     // bang with random value from r0
 594     if (operand_valid_for_add_sub_immediate(offset)) {
 595       sub(rscratch2, sp, offset);
 596       strb(r0, Address(rscratch2));
 597     } else {
 598       mov(rscratch2, offset);
 599       strb(r0, Address(sp, rscratch2, Assembler::lsl(), Address::SUB));
 600     }
 601   }
 602 
 603   // Writes to stack successive pages until offset reached to check for
 604   // stack overflow + shadow pages.  Also, clobbers tmp
 605   void bang_stack_size(Register size, Register tmp);
 606 
 607   virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
 608                                                 Register tmp,
 609                                                 int offset);
 610 
 611   // Support for serializing memory accesses between threads
 612   void serialize_memory(Register thread, Register tmp);
 613 
 614   // Arithmetics
 615 
 616   void addptr(Address dst, int32_t src) {
 617     lea(rscratch2, dst);
 618     ldr(rscratch1, Address(rscratch2));
 619     add(rscratch1, rscratch1, src);
 620     str(rscratch1, Address(rscratch2));
 621   }
 622 
 623   void cmpptr(Register src1, Address src2);
 624 
 625   void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
 626                   Label &suceed, Label *fail);
 627 
 628   void cmpxchgw(Register oldv, Register newv, Register addr, Register tmp,
 629                   Label &suceed, Label *fail);
 630 
 631   void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
 632   void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
 633 
 634   void atomic_xchg(Register prev, Register newv, Register addr);
 635   void atomic_xchgw(Register prev, Register newv, Register addr);
 636 
 637   void orptr(Address adr, RegisterOrConstant src) {
 638     ldr(rscratch2, adr);
 639     if (src.is_register())
 640       orr(rscratch2, rscratch2, src.as_register());
 641     else
 642       orr(rscratch2, rscratch2, src.as_constant());
 643     str(rscratch2, adr);
 644   }
 645 
 646   // Calls
 647 
 648   void trampoline_call(Address entry, CodeBuffer *cbuf = NULL);
 649 
 650   static bool far_branches() {
 651     return ReservedCodeCacheSize > branch_range;
 652   }
 653 
 654   // Jumps that can reach anywhere in the code cache.
 655   // Trashes tmp.
 656   void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
 657   void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
 658 
 659   static int far_branch_size() {
 660     // TODO performance issue: always generate real far jumps
 661     if (far_branches()) {
 662       return 3 * 4;  // movw, movt, br
 663     } else {
 664       return 4;
 665     }
 666   }
 667 
 668   // Emit the CompiledIC call idiom
 669   void ic_call(address entry);
 670 
 671 public:
 672   // Data
 673   void mov_metadata(Register dst, Metadata* obj);
 674   Address allocate_metadata_address(Metadata* obj);
 675   Address constant_oop_address(jobject obj);
 676 
 677   void movoop(Register dst, jobject obj, bool immediate = false);
 678 
 679   // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
 680   void kernel_crc32(Register crc, Register buf, Register len,
 681         Register table0, Register table1, Register table2, Register table3,
 682         Register tmp, Register tmp2, Register tmp3);
 683 
 684 #undef VIRTUAL
 685 
 686   // Stack push and pop individual 64 bit registers
 687   void push(Register src);
 688   void pop(Register dst);
 689 
 690   // push all registers onto the stack
 691   void pusha();
 692   void popa();
 693 
 694   void repne_scan(Register addr, Register value, Register count,
 695                   Register scratch);
 696   void repne_scanw(Register addr, Register value, Register count,
 697                    Register scratch);
 698 
 699   // Form an address from base + offset in Rd. Rd may or may not actually be
 700   // used: you must use the Address that is returned. It is up to you to ensure
 701   // that the shift provided matches the size of your data.
 702   Address form_address(Register Rd, Register base, long byte_offset, int shift);
 703 
 704  public:
 705 
 706   void ldr_constant(Register dest, const Address &const_addr) {
 707     if (NearCpool) {
 708       ldr(dest, const_addr);
 709     } else {
 710       mov(dest, InternalAddress(const_addr.target()));
 711       ldr(dest, dest);
 712     }
 713   }
 714 
 715   address read_polling_page(Register r, address page, relocInfo::relocType rtype);
 716   address read_polling_page(Register r, relocInfo::relocType rtype);
 717 
 718   // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
 719   void update_byte_crc32(Register crc, Register val, Register table);
 720   void update_word_crc32(Register crc, Register v, Register tmp, Register tmp2,
 721         Register table0, Register table1, Register table2, Register table3);
 722 
 723   // Auto dispatch for barriers isb, dmb & dsb.
 724   void isb() {
 725     if(VM_Version::features() & FT_ARMV7) {
 726       Assembler::isb();
 727     } else {
 728       cp15isb();
 729     }
 730   }
 731 
 732   void dsb(enum barrier option) {
 733     if(VM_Version::features() & FT_ARMV7) {
 734       Assembler::dsb(option);
 735     } else {
 736       cp15dsb();
 737     }
 738   }
 739 
 740   void dmb(enum barrier option) {
 741     if(VM_Version::features() & FT_ARMV7) {
 742       Assembler::dmb(option);
 743     } else {
 744       cp15dmb();
 745     }
 746   }
 747 
 748   void membar(Membar_mask_bits order_constraint) {
 749     dmb(Assembler::barrier(order_constraint));
 750   }
 751 
 752   // ISB may be needed because of a safepoint
 753   void maybe_isb() { MacroAssembler::isb(); }
 754 
 755   // Helper functions for 64-bit multipliction, division and remainder
 756   // does <Rd+1:Rd> = <Rn+1:Rn> * <Rm+1:Rm>
 757   void mult_long(Register Rd, Register Rn, Register Rm);
 758   // does <Rdh:Rd> = <Rnh:Rn> * <Rmh:Rm>
 759   void mult_long(Register Rd, Register Rdh, Register Rn, Register Rnh, Register Rm, Register Rmh);
 760 
 761  private:
 762   void divide32(Register res, Register num, Register den, bool want_mod);
 763  public:
 764   // <Rd+1:Rd> = <Rn+1:Rn> / <Rm+1:Rm>
 765   // <Rd+1:Rd> = <Rn+1:Rn> % <Rm+1:Rm>
 766   // <Rd> = <Rn> / <Rm>
 767   // <Rd> = <Rn> % <Rm>
 768   void divide(Register Rd, Register Rn, Register Rm, int width, bool want_remainder);
 769 
 770   void extract_bits(Register dest, Register source, int lsb, int width);
 771 
 772   // These functions require that the src/dst register is an even register
 773   // and will emit LDREXD/STREXD if there are multiple cores and the procesor
 774   // supports it. If there's only one core then LDRD/STRD will be emit instead.
 775   // If the processor has multiple cores and doesn't support LDREXD/STREXD then
 776   // LDRD/STRD will be emitted and a warning message printed.
 777   void atomic_ldrd(Register Rt, Register RtII, Register Rbase);
 778   void atomic_strd(Register Rt, Register RtII, Register Rbase,
 779                    Register temp, Register tempII);
 780 
 781  private:
 782   // generic fallback ldrd generator. may need to use temporary register
 783   // when register collisions are found
 784   //
 785   // since double_ld_failed_dispatch can introduce address manipulation instructions
 786   // it should return offset of first load/store instruction that will be used
 787   // while constructing implicit null check table
 788   int double_ld_failed_dispatch(Register Rt, Register Rt2, const Address& adr,
 789                             void (Assembler::* mul)(unsigned, const Address&, Condition),
 790                             void (Assembler::* sgl)(Register, const Address&, Condition),
 791                             Register Rtmp, Condition cond);
 792   // ldrd/strd generator. can handle all strd cases and those ldrd where there
 793   // are no register collisions
 794   void double_ldst_failed_dispatch(Register Rt, Register Rt2, const Address& adr,
 795                             void (Assembler::* mul)(unsigned, const Address&, Condition),
 796                             void (Assembler::* sgl)(Register, const Address&, Condition),
 797                             Condition cond);
 798 public:
 799   // override ldrd/strd to perform a magic for when Rt + 1 != Rt2 or any other
 800   // conditions which prevent to use single ldrd/strd insn. a pair of ldr/str
 801   // is used instead then
 802   //
 803   // Since ldrd/strd macro can introduce address manipulation instructions
 804   // it should return offset of first load/store instruction that will be used
 805   // while constructing implicit null check table
 806   using Assembler::ldrd;
 807   int ldrd(Register Rt, Register Rt2, const Address& adr, Register Rmp = rscratch1, Condition cond = C_DFLT);
 808   using Assembler::strd;
 809   int strd(Register Rt, Register Rt2, const Address& adr, Condition cond = C_DFLT);
 810 
 811 private:
 812   void bfc_impl(Register rd, int lsb, int width, Condition cond);
 813 public:
 814   void bfc(Register Rd, int lsb, int width, Condition cond = C_DFLT) {
 815     if (VM_Version::features() & (FT_ARMV6T2 | FT_ARMV7))
 816       Assembler::bfc(Rd, lsb, width, cond);
 817     else
 818       bfc_impl(Rd, lsb, width, cond);
 819   }
 820 
 821   void align_stack() {
 822     if (StackAlignmentInBytes > 4)
 823       bic(sp, sp, StackAlignmentInBytes-1);
 824   }
 825 
 826 #ifdef ASSERT
 827   void verify_stack_alignment();
 828 #endif
 829 
 830   // Debug helper
 831   void save_machine_state();
 832   void restore_machine_state();
 833 
 834   static uint32_t bytecodes_until_print;
 835   static uint32_t bytecodes_executed;
 836   static int enable_debug;
 837   static int enable_method_debug;
 838   static int enable_debugging_static;
 839 
 840 
 841   void bytecode_seen(Register bc_reg, Register scratch);
 842   static void print_unseen_bytecodes();
 843   void reg_printf_internal(bool important, const char *fmt, Register a = r0, Register b = r0, Register c = r0);
 844   void reg_printf_important(const char *fmt, Register a = r0, Register b = r0, Register c = r0);
 845   void reg_printf(const char *fmt, Register a = r0, Register b = r0, Register c = r0);
 846   void print_method_entry(Register rmethod, bool native);
 847   void print_method_exit(bool normal = true);
 848   void get_bytecode(Register bc, Register dst);
 849   static void print_cpool(InstanceKlass *klass);
 850 
 851   void create_breakpoint();
 852 };
 853 
 854 
 855 #ifdef ASSERT
 856 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
 857 #endif
 858 
 859 /**
 860  * class SkipIfEqual:
 861  *
 862  * Instantiating this class will result in assembly code being output that will
 863  * jump around any code emitted between the creation of the instance and it's
 864  * automatic destruction at the end of a scope block, depending on the value of
 865  * the flag passed to the constructor, which will be checked at run-time.
 866  */
 867 class SkipIfEqual {
 868  private:
 869   MacroAssembler* _masm;
 870   Label _label;
 871 
 872  public:
 873    SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
 874    ~SkipIfEqual();
 875 };
 876 
 877 struct tableswitch {
 878   Register _reg;
 879   int _insn_index;
 880   jint _first_key;
 881   jint _last_key;
 882   Label _after;
 883   Label _branches;
 884 };
 885 
 886 #endif // CPU_AARCH32_VM_MACROASSEMBLER_AARCH32_HPP