array-cp-pre-barriers Wdiff src/cpu/sparc/vm/stubGenerator_sparc.cpp

Print this page

Split	Close
Expand all
Collapse all

          --- old/src/cpu/sparc/vm/stubGenerator_sparc.cpp
          +++ new/src/cpu/sparc/vm/stubGenerator_sparc.cpp
   1    1  /*
   2      - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
        2 + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
   3    3   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4    4   *
   5    5   * This code is free software; you can redistribute it and/or modify it
   6    6   * under the terms of the GNU General Public License version 2 only, as
   7    7   * published by the Free Software Foundation.
   8    8   *
   9    9   * This code is distributed in the hope that it will be useful, but WITHOUT
  10   10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11   11   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12   12   * version 2 for more details (a copy is included in the LICENSE file that

  13   13   * accompanied this code).
  14   14   *
  15   15   * You should have received a copy of the GNU General Public License version
  16   16   * 2 along with this work; if not, write to the Free Software Foundation,
  17   17   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18   18   *
  19   19   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20   20   * or visit www.oracle.com if you need additional information or have any
  21   21   * questions.
  22   22   *
  23   23   */
  24   24  
  25   25  #include "precompiled.hpp"
  26   26  #include "asm/assembler.hpp"
  27   27  #include "assembler_sparc.inline.hpp"
  28   28  #include "interpreter/interpreter.hpp"
  29   29  #include "nativeInst_sparc.hpp"
  30   30  #include "oops/instanceOop.hpp"
  31   31  #include "oops/methodOop.hpp"
  32   32  #include "oops/objArrayKlass.hpp"
  33   33  #include "oops/oop.inline.hpp"
  34   34  #include "prims/methodHandles.hpp"
  35   35  #include "runtime/frame.inline.hpp"
  36   36  #include "runtime/handles.inline.hpp"
  37   37  #include "runtime/sharedRuntime.hpp"
  38   38  #include "runtime/stubCodeGenerator.hpp"
  39   39  #include "runtime/stubRoutines.hpp"
  40   40  #include "utilities/top.hpp"
  41   41  #ifdef TARGET_OS_FAMILY_linux
  42   42  # include "thread_linux.inline.hpp"
  43   43  #endif
  44   44  #ifdef TARGET_OS_FAMILY_solaris
  45   45  # include "thread_solaris.inline.hpp"
  46   46  #endif
  47   47  #ifdef COMPILER2
  48   48  #include "opto/runtime.hpp"
  49   49  #endif
  50   50  
  51   51  // Declaration and definition of StubGenerator (no .hpp file).
  52   52  // For a more detailed description of the stub routine structure
  53   53  // see the comment in stubRoutines.hpp.
  54   54  
  55   55  #define __ _masm->
  56   56  
  57   57  #ifdef PRODUCT
  58   58  #define BLOCK_COMMENT(str) /* nothing */
  59   59  #else
  60   60  #define BLOCK_COMMENT(str) __ block_comment(str)
  61   61  #endif
  62   62  
  63   63  #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  64   64  
  65   65  // Note:  The register L7 is used as L7_thread_cache, and may not be used
  66   66  //        any other way within this module.
  67   67  
  68   68  
  69   69  static const Register& Lstub_temp = L2;
  70   70  
  71   71  // -------------------------------------------------------------------------------------------------------------------------
  72   72  // Stub Code definitions
  73   73  
  74   74  static address handle_unsafe_access() {
  75   75    JavaThread* thread = JavaThread::current();
  76   76    address pc  = thread->saved_exception_pc();
  77   77    address npc = thread->saved_exception_npc();
  78   78    // pc is the instruction which we must emulate
  79   79    // doing a no-op is fine:  return garbage from the load
  80   80  
  81   81    // request an async exception
  82   82    thread->set_pending_unsafe_access_error();
  83   83  
  84   84    // return address of next instruction to execute
  85   85    return npc;
  86   86  }
  87   87  
  88   88  class StubGenerator: public StubCodeGenerator {
  89   89   private:
  90   90  
  91   91  #ifdef PRODUCT
  92   92  #define inc_counter_np(a,b,c) (0)
  93   93  #else
  94   94  #define inc_counter_np(counter, t1, t2) \
  95   95    BLOCK_COMMENT("inc_counter " #counter); \
  96   96    __ inc_counter(&counter, t1, t2);
  97   97  #endif
  98   98  
  99   99    //----------------------------------------------------------------------------------------------------
 100  100    // Call stubs are used to call Java from C
 101  101  
 102  102    address generate_call_stub(address& return_pc) {
 103  103      StubCodeMark mark(this, "StubRoutines", "call_stub");
 104  104      address start = __ pc();
 105  105  
 106  106      // Incoming arguments:
 107  107      //
 108  108      // o0         : call wrapper address
 109  109      // o1         : result (address)
 110  110      // o2         : result type
 111  111      // o3         : method
 112  112      // o4         : (interpreter) entry point
 113  113      // o5         : parameters (address)
 114  114      // [sp + 0x5c]: parameter size (in words)
 115  115      // [sp + 0x60]: thread
 116  116      //
 117  117      // +---------------+ <--- sp + 0
 118  118      // |               |
 119  119      // . reg save area .
 120  120      // |               |
 121  121      // +---------------+ <--- sp + 0x40
 122  122      // |               |
 123  123      // . extra 7 slots .
 124  124      // |               |
 125  125      // +---------------+ <--- sp + 0x5c
 126  126      // |  param. size  |
 127  127      // +---------------+ <--- sp + 0x60
 128  128      // |    thread     |
 129  129      // +---------------+
 130  130      // |               |
 131  131  
 132  132      // note: if the link argument position changes, adjust
 133  133      //       the code in frame::entry_frame_call_wrapper()
 134  134  
 135  135      const Argument link           = Argument(0, false); // used only for GC
 136  136      const Argument result         = Argument(1, false);
 137  137      const Argument result_type    = Argument(2, false);
 138  138      const Argument method         = Argument(3, false);
 139  139      const Argument entry_point    = Argument(4, false);
 140  140      const Argument parameters     = Argument(5, false);
 141  141      const Argument parameter_size = Argument(6, false);
 142  142      const Argument thread         = Argument(7, false);
 143  143  
 144  144      // setup thread register
 145  145      __ ld_ptr(thread.as_address(), G2_thread);
 146  146      __ reinit_heapbase();
 147  147  
 148  148  #ifdef ASSERT
 149  149      // make sure we have no pending exceptions
 150  150      { const Register t = G3_scratch;
 151  151        Label L;
 152  152        __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t);
 153  153        __ br_null(t, false, Assembler::pt, L);
 154  154        __ delayed()->nop();
 155  155        __ stop("StubRoutines::call_stub: entered with pending exception");
 156  156        __ bind(L);
 157  157      }
 158  158  #endif
 159  159  
 160  160      // create activation frame & allocate space for parameters
 161  161      { const Register t = G3_scratch;
 162  162        __ ld_ptr(parameter_size.as_address(), t);                // get parameter size (in words)
 163  163        __ add(t, frame::memory_parameter_word_sp_offset, t);     // add space for save area (in words)
 164  164        __ round_to(t, WordsPerLong);                             // make sure it is multiple of 2 (in words)
 165  165        __ sll(t, Interpreter::logStackElementSize, t);           // compute number of bytes
 166  166        __ neg(t);                                                // negate so it can be used with save
 167  167        __ save(SP, t, SP);                                       // setup new frame
 168  168      }
 169  169  
 170  170      // +---------------+ <--- sp + 0
 171  171      // |               |
 172  172      // . reg save area .
 173  173      // |               |
 174  174      // +---------------+ <--- sp + 0x40
 175  175      // |               |
 176  176      // . extra 7 slots .
 177  177      // |               |
 178  178      // +---------------+ <--- sp + 0x5c
 179  179      // |  empty slot   |      (only if parameter size is even)
 180  180      // +---------------+
 181  181      // |               |
 182  182      // .  parameters   .
 183  183      // |               |
 184  184      // +---------------+ <--- fp + 0
 185  185      // |               |
 186  186      // . reg save area .
 187  187      // |               |
 188  188      // +---------------+ <--- fp + 0x40
 189  189      // |               |
 190  190      // . extra 7 slots .
 191  191      // |               |
 192  192      // +---------------+ <--- fp + 0x5c
 193  193      // |  param. size  |
 194  194      // +---------------+ <--- fp + 0x60
 195  195      // |    thread     |
 196  196      // +---------------+
 197  197      // |               |
 198  198  
 199  199      // pass parameters if any
 200  200      BLOCK_COMMENT("pass parameters if any");
 201  201      { const Register src = parameters.as_in().as_register();
 202  202        const Register dst = Lentry_args;
 203  203        const Register tmp = G3_scratch;
 204  204        const Register cnt = G4_scratch;
 205  205  
 206  206        // test if any parameters & setup of Lentry_args
 207  207        Label exit;
 208  208        __ ld_ptr(parameter_size.as_in().as_address(), cnt);      // parameter counter
 209  209        __ add( FP, STACK_BIAS, dst );
 210  210        __ tst(cnt);
 211  211        __ br(Assembler::zero, false, Assembler::pn, exit);
 212  212        __ delayed()->sub(dst, BytesPerWord, dst);                 // setup Lentry_args
 213  213  
 214  214        // copy parameters if any
 215  215        Label loop;
 216  216        __ BIND(loop);
 217  217        // Store parameter value
 218  218        __ ld_ptr(src, 0, tmp);
 219  219        __ add(src, BytesPerWord, src);
 220  220        __ st_ptr(tmp, dst, 0);
 221  221        __ deccc(cnt);
 222  222        __ br(Assembler::greater, false, Assembler::pt, loop);
 223  223        __ delayed()->sub(dst, Interpreter::stackElementSize, dst);
 224  224  
 225  225        // done
 226  226        __ BIND(exit);
 227  227      }
 228  228  
 229  229      // setup parameters, method & call Java function
 230  230  #ifdef ASSERT
 231  231      // layout_activation_impl checks it's notion of saved SP against
 232  232      // this register, so if this changes update it as well.
 233  233      const Register saved_SP = Lscratch;
 234  234      __ mov(SP, saved_SP);                               // keep track of SP before call
 235  235  #endif
 236  236  
 237  237      // setup parameters
 238  238      const Register t = G3_scratch;
 239  239      __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words)
 240  240      __ sll(t, Interpreter::logStackElementSize, t);    // compute number of bytes
 241  241      __ sub(FP, t, Gargs);                              // setup parameter pointer
 242  242  #ifdef _LP64
 243  243      __ add( Gargs, STACK_BIAS, Gargs );                // Account for LP64 stack bias
 244  244  #endif
 245  245      __ mov(SP, O5_savedSP);
 246  246  
 247  247  
 248  248      // do the call
 249  249      //
 250  250      // the following register must be setup:
 251  251      //
 252  252      // G2_thread
 253  253      // G5_method
 254  254      // Gargs
 255  255      BLOCK_COMMENT("call Java function");
 256  256      __ jmpl(entry_point.as_in().as_register(), G0, O7);
 257  257      __ delayed()->mov(method.as_in().as_register(), G5_method);   // setup method
 258  258  
 259  259      BLOCK_COMMENT("call_stub_return_address:");
 260  260      return_pc = __ pc();
 261  261  
 262  262      // The callee, if it wasn't interpreted, can return with SP changed so
 263  263      // we can no longer assert of change of SP.
 264  264  
 265  265      // store result depending on type
 266  266      // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE
 267  267      //  is treated as T_INT)
 268  268      { const Register addr = result     .as_in().as_register();
 269  269        const Register type = result_type.as_in().as_register();
 270  270        Label is_long, is_float, is_double, is_object, exit;
 271  271        __            cmp(type, T_OBJECT);  __ br(Assembler::equal, false, Assembler::pn, is_object);
 272  272        __ delayed()->cmp(type, T_FLOAT);   __ br(Assembler::equal, false, Assembler::pn, is_float);
 273  273        __ delayed()->cmp(type, T_DOUBLE);  __ br(Assembler::equal, false, Assembler::pn, is_double);
 274  274        __ delayed()->cmp(type, T_LONG);    __ br(Assembler::equal, false, Assembler::pn, is_long);
 275  275        __ delayed()->nop();
 276  276  
 277  277        // store int result
 278  278        __ st(O0, addr, G0);
 279  279  
 280  280        __ BIND(exit);
 281  281        __ ret();
 282  282        __ delayed()->restore();
 283  283  
 284  284        __ BIND(is_object);
 285  285        __ ba(false, exit);
 286  286        __ delayed()->st_ptr(O0, addr, G0);
 287  287  
 288  288        __ BIND(is_float);
 289  289        __ ba(false, exit);
 290  290        __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
 291  291  
 292  292        __ BIND(is_double);
 293  293        __ ba(false, exit);
 294  294        __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
 295  295  
 296  296        __ BIND(is_long);
 297  297  #ifdef _LP64
 298  298        __ ba(false, exit);
 299  299        __ delayed()->st_long(O0, addr, G0);      // store entire long
 300  300  #else
 301  301  #if defined(COMPILER2)
 302  302    // All return values are where we want them, except for Longs.  C2 returns
 303  303    // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
 304  304    // Since the interpreter will return longs in G1 and O0/O1 in the 32bit
 305  305    // build we simply always use G1.
 306  306    // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
 307  307    // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
 308  308    // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
 309  309  
 310  310        __ ba(false, exit);
 311  311        __ delayed()->stx(G1, addr, G0);  // store entire long
 312  312  #else
 313  313        __ st(O1, addr, BytesPerInt);
 314  314        __ ba(false, exit);
 315  315        __ delayed()->st(O0, addr, G0);
 316  316  #endif /* COMPILER2 */
 317  317  #endif /* _LP64 */
 318  318       }
 319  319       return start;
 320  320    }
 321  321  
 322  322  
 323  323    //----------------------------------------------------------------------------------------------------
 324  324    // Return point for a Java call if there's an exception thrown in Java code.
 325  325    // The exception is caught and transformed into a pending exception stored in
 326  326    // JavaThread that can be tested from within the VM.
 327  327    //
 328  328    // Oexception: exception oop
 329  329  
 330  330    address generate_catch_exception() {
 331  331      StubCodeMark mark(this, "StubRoutines", "catch_exception");
 332  332  
 333  333      address start = __ pc();
 334  334      // verify that thread corresponds
 335  335      __ verify_thread();
 336  336  
 337  337      const Register& temp_reg = Gtemp;
 338  338      Address pending_exception_addr    (G2_thread, Thread::pending_exception_offset());
 339  339      Address exception_file_offset_addr(G2_thread, Thread::exception_file_offset   ());
 340  340      Address exception_line_offset_addr(G2_thread, Thread::exception_line_offset   ());
 341  341  
 342  342      // set pending exception
 343  343      __ verify_oop(Oexception);
 344  344      __ st_ptr(Oexception, pending_exception_addr);
 345  345      __ set((intptr_t)__FILE__, temp_reg);
 346  346      __ st_ptr(temp_reg, exception_file_offset_addr);
 347  347      __ set((intptr_t)__LINE__, temp_reg);
 348  348      __ st(temp_reg, exception_line_offset_addr);
 349  349  
 350  350      // complete return to VM
 351  351      assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before");
 352  352  
 353  353      AddressLiteral stub_ret(StubRoutines::_call_stub_return_address);
 354  354      __ jump_to(stub_ret, temp_reg);
 355  355      __ delayed()->nop();
 356  356  
 357  357      return start;
 358  358    }
 359  359  
 360  360  
 361  361    //----------------------------------------------------------------------------------------------------
 362  362    // Continuation point for runtime calls returning with a pending exception
 363  363    // The pending exception check happened in the runtime or native call stub
 364  364    // The pending exception in Thread is converted into a Java-level exception
 365  365    //
 366  366    // Contract with Java-level exception handler: O0 = exception
 367  367    //                                             O1 = throwing pc
 368  368  
 369  369    address generate_forward_exception() {
 370  370      StubCodeMark mark(this, "StubRoutines", "forward_exception");
 371  371      address start = __ pc();
 372  372  
 373  373      // Upon entry, O7 has the return address returning into Java
 374  374      // (interpreted or compiled) code; i.e. the return address
 375  375      // becomes the throwing pc.
 376  376  
 377  377      const Register& handler_reg = Gtemp;
 378  378  
 379  379      Address exception_addr(G2_thread, Thread::pending_exception_offset());
 380  380  
 381  381  #ifdef ASSERT
 382  382      // make sure that this code is only executed if there is a pending exception
 383  383      { Label L;
 384  384        __ ld_ptr(exception_addr, Gtemp);
 385  385        __ br_notnull(Gtemp, false, Assembler::pt, L);
 386  386        __ delayed()->nop();
 387  387        __ stop("StubRoutines::forward exception: no pending exception (1)");
 388  388        __ bind(L);
 389  389      }
 390  390  #endif
 391  391  
 392  392      // compute exception handler into handler_reg
 393  393      __ get_thread();
 394  394      __ ld_ptr(exception_addr, Oexception);
 395  395      __ verify_oop(Oexception);
 396  396      __ save_frame(0);             // compensates for compiler weakness
 397  397      __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC
 398  398      BLOCK_COMMENT("call exception_handler_for_return_address");
 399  399      __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch);
 400  400      __ mov(O0, handler_reg);
 401  401      __ restore();                 // compensates for compiler weakness
 402  402  
 403  403      __ ld_ptr(exception_addr, Oexception);
 404  404      __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC
 405  405  
 406  406  #ifdef ASSERT
 407  407      // make sure exception is set
 408  408      { Label L;
 409  409        __ br_notnull(Oexception, false, Assembler::pt, L);
 410  410        __ delayed()->nop();
 411  411        __ stop("StubRoutines::forward exception: no pending exception (2)");
 412  412        __ bind(L);
 413  413      }
 414  414  #endif
 415  415      // jump to exception handler
 416  416      __ jmp(handler_reg, 0);
 417  417      // clear pending exception
 418  418      __ delayed()->st_ptr(G0, exception_addr);
 419  419  
 420  420      return start;
 421  421    }
 422  422  
 423  423  
 424  424    //------------------------------------------------------------------------------------------------------------------------
 425  425    // Continuation point for throwing of implicit exceptions that are not handled in
 426  426    // the current activation. Fabricates an exception oop and initiates normal
 427  427    // exception dispatching in this frame. Only callee-saved registers are preserved
 428  428    // (through the normal register window / RegisterMap handling).
 429  429    // If the compiler needs all registers to be preserved between the fault
 430  430    // point and the exception handler then it must assume responsibility for that in
 431  431    // AbstractCompiler::continuation_for_implicit_null_exception or
 432  432    // continuation_for_implicit_division_by_zero_exception. All other implicit
 433  433    // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are
 434  434    // either at call sites or otherwise assume that stack unwinding will be initiated,
 435  435    // so caller saved registers were assumed volatile in the compiler.
 436  436  
 437  437    // Note that we generate only this stub into a RuntimeStub, because it needs to be
 438  438    // properly traversed and ignored during GC, so we change the meaning of the "__"
 439  439    // macro within this method.
 440  440  #undef __
 441  441  #define __ masm->
 442  442  
 443  443    address generate_throw_exception(const char* name, address runtime_entry, bool restore_saved_exception_pc) {
 444  444  #ifdef ASSERT
 445  445      int insts_size = VerifyThread ? 1 * K : 600;
 446  446  #else
 447  447      int insts_size = VerifyThread ? 1 * K : 256;
 448  448  #endif /* ASSERT */
 449  449      int locs_size  = 32;
 450  450  
 451  451      CodeBuffer      code(name, insts_size, locs_size);
 452  452      MacroAssembler* masm = new MacroAssembler(&code);
 453  453  
 454  454      __ verify_thread();
 455  455  
 456  456      // This is an inlined and slightly modified version of call_VM
 457  457      // which has the ability to fetch the return PC out of thread-local storage
 458  458      __ assert_not_delayed();
 459  459  
 460  460      // Note that we always push a frame because on the SPARC
 461  461      // architecture, for all of our implicit exception kinds at call
 462  462      // sites, the implicit exception is taken before the callee frame
 463  463      // is pushed.
 464  464      __ save_frame(0);
 465  465  
 466  466      int frame_complete = __ offset();
 467  467  
 468  468      if (restore_saved_exception_pc) {
 469  469        __ ld_ptr(G2_thread, JavaThread::saved_exception_pc_offset(), I7);
 470  470        __ sub(I7, frame::pc_return_offset, I7);
 471  471      }
 472  472  
 473  473      // Note that we always have a runtime stub frame on the top of stack by this point
 474  474      Register last_java_sp = SP;
 475  475      // 64-bit last_java_sp is biased!
 476  476      __ set_last_Java_frame(last_java_sp, G0);
 477  477      if (VerifyThread)  __ mov(G2_thread, O0); // about to be smashed; pass early
 478  478      __ save_thread(noreg);
 479  479      // do the call
 480  480      BLOCK_COMMENT("call runtime_entry");
 481  481      __ call(runtime_entry, relocInfo::runtime_call_type);
 482  482      if (!VerifyThread)
 483  483        __ delayed()->mov(G2_thread, O0);  // pass thread as first argument
 484  484      else
 485  485        __ delayed()->nop();             // (thread already passed)
 486  486      __ restore_thread(noreg);
 487  487      __ reset_last_Java_frame();
 488  488  
 489  489      // check for pending exceptions. use Gtemp as scratch register.
 490  490  #ifdef ASSERT
 491  491      Label L;
 492  492  
 493  493      Address exception_addr(G2_thread, Thread::pending_exception_offset());
 494  494      Register scratch_reg = Gtemp;
 495  495      __ ld_ptr(exception_addr, scratch_reg);
 496  496      __ br_notnull(scratch_reg, false, Assembler::pt, L);
 497  497      __ delayed()->nop();
 498  498      __ should_not_reach_here();
 499  499      __ bind(L);
 500  500  #endif // ASSERT
 501  501      BLOCK_COMMENT("call forward_exception_entry");
 502  502      __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
 503  503      // we use O7 linkage so that forward_exception_entry has the issuing PC
 504  504      __ delayed()->restore();
 505  505  
 506  506      RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false);
 507  507      return stub->entry_point();
 508  508    }
 509  509  
 510  510  #undef __
 511  511  #define __ _masm->
 512  512  
 513  513  
 514  514    // Generate a routine that sets all the registers so we
 515  515    // can tell if the stop routine prints them correctly.
 516  516    address generate_test_stop() {
 517  517      StubCodeMark mark(this, "StubRoutines", "test_stop");
 518  518      address start = __ pc();
 519  519  
 520  520      int i;
 521  521  
 522  522      __ save_frame(0);
 523  523  
 524  524      static jfloat zero = 0.0, one = 1.0;
 525  525  
 526  526      // put addr in L0, then load through L0 to F0
 527  527      __ set((intptr_t)&zero, L0);  __ ldf( FloatRegisterImpl::S, L0, 0, F0);
 528  528      __ set((intptr_t)&one,  L0);  __ ldf( FloatRegisterImpl::S, L0, 0, F1); // 1.0 to F1
 529  529  
 530  530      // use add to put 2..18 in F2..F18
 531  531      for ( i = 2;  i <= 18;  ++i ) {
 532  532        __ fadd( FloatRegisterImpl::S, F1, as_FloatRegister(i-1),  as_FloatRegister(i));
 533  533      }
 534  534  
 535  535      // Now put double 2 in F16, double 18 in F18
 536  536      __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F2, F16 );
 537  537      __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F18, F18 );
 538  538  
 539  539      // use add to put 20..32 in F20..F32
 540  540      for (i = 20; i < 32; i += 2) {
 541  541        __ fadd( FloatRegisterImpl::D, F16, as_FloatRegister(i-2),  as_FloatRegister(i));
 542  542      }
 543  543  
 544  544      // put 0..7 in i's, 8..15 in l's, 16..23 in o's, 24..31 in g's
 545  545      for ( i = 0; i < 8; ++i ) {
 546  546        if (i < 6) {
 547  547          __ set(     i, as_iRegister(i));
 548  548          __ set(16 + i, as_oRegister(i));
 549  549          __ set(24 + i, as_gRegister(i));
 550  550        }
 551  551        __ set( 8 + i, as_lRegister(i));
 552  552      }
 553  553  
 554  554      __ stop("testing stop");
 555  555  
 556  556  
 557  557      __ ret();
 558  558      __ delayed()->restore();
 559  559  
 560  560      return start;
 561  561    }
 562  562  
 563  563  
 564  564    address generate_stop_subroutine() {
 565  565      StubCodeMark mark(this, "StubRoutines", "stop_subroutine");
 566  566      address start = __ pc();
 567  567  
 568  568      __ stop_subroutine();
 569  569  
 570  570      return start;
 571  571    }
 572  572  
 573  573    address generate_flush_callers_register_windows() {
 574  574      StubCodeMark mark(this, "StubRoutines", "flush_callers_register_windows");
 575  575      address start = __ pc();
 576  576  
 577  577      __ flush_windows();
 578  578      __ retl(false);
 579  579      __ delayed()->add( FP, STACK_BIAS, O0 );
 580  580      // The returned value must be a stack pointer whose register save area
 581  581      // is flushed, and will stay flushed while the caller executes.
 582  582  
 583  583      return start;
 584  584    }
 585  585  
 586  586    // Helper functions for v8 atomic operations.
 587  587    //
 588  588    void get_v8_oop_lock_ptr(Register lock_ptr_reg, Register mark_oop_reg, Register scratch_reg) {
 589  589      if (mark_oop_reg == noreg) {
 590  590        address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr();
 591  591        __ set((intptr_t)lock_ptr, lock_ptr_reg);
 592  592      } else {
 593  593        assert(scratch_reg != noreg, "just checking");
 594  594        address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache;
 595  595        __ set((intptr_t)lock_ptr, lock_ptr_reg);
 596  596        __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg);
 597  597        __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg);
 598  598      }
 599  599    }
 600  600  
 601  601    void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
 602  602  
 603  603      get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg);
 604  604      __ set(StubRoutines::Sparc::locked, lock_reg);
 605  605      // Initialize yield counter
 606  606      __ mov(G0,yield_reg);
 607  607  
 608  608      __ BIND(retry);
 609  609      __ cmp(yield_reg, V8AtomicOperationUnderLockSpinCount);
 610  610      __ br(Assembler::less, false, Assembler::pt, dontyield);
 611  611      __ delayed()->nop();
 612  612  
 613  613      // This code can only be called from inside the VM, this
 614  614      // stub is only invoked from Atomic::add().  We do not
 615  615      // want to use call_VM, because _last_java_sp and such
 616  616      // must already be set.
 617  617      //
 618  618      // Save the regs and make space for a C call
 619  619      __ save(SP, -96, SP);
 620  620      __ save_all_globals_into_locals();
 621  621      BLOCK_COMMENT("call os::naked_sleep");
 622  622      __ call(CAST_FROM_FN_PTR(address, os::naked_sleep));
 623  623      __ delayed()->nop();
 624  624      __ restore_globals_from_locals();
 625  625      __ restore();
 626  626      // reset the counter
 627  627      __ mov(G0,yield_reg);
 628  628  
 629  629      __ BIND(dontyield);
 630  630  
 631  631      // try to get lock
 632  632      __ swap(lock_ptr_reg, 0, lock_reg);
 633  633  
 634  634      // did we get the lock?
 635  635      __ cmp(lock_reg, StubRoutines::Sparc::unlocked);
 636  636      __ br(Assembler::notEqual, true, Assembler::pn, retry);
 637  637      __ delayed()->add(yield_reg,1,yield_reg);
 638  638  
 639  639      // yes, got lock. do the operation here.
 640  640    }
 641  641  
 642  642    void generate_v8_lock_epilogue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
 643  643      __ st(lock_reg, lock_ptr_reg, 0); // unlock
 644  644    }
 645  645  
 646  646    // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest).
 647  647    //
 648  648    // Arguments :
 649  649    //
 650  650    //      exchange_value: O0
 651  651    //      dest:           O1
 652  652    //
 653  653    // Results:
 654  654    //
 655  655    //     O0: the value previously stored in dest
 656  656    //
 657  657    address generate_atomic_xchg() {
 658  658      StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
 659  659      address start = __ pc();
 660  660  
 661  661      if (UseCASForSwap) {
 662  662        // Use CAS instead of swap, just in case the MP hardware
 663  663        // prefers to work with just one kind of synch. instruction.
 664  664        Label retry;
 665  665        __ BIND(retry);
 666  666        __ mov(O0, O3);       // scratch copy of exchange value
 667  667        __ ld(O1, 0, O2);     // observe the previous value
 668  668        // try to replace O2 with O3
 669  669        __ cas_under_lock(O1, O2, O3,
 670  670        (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
 671  671        __ cmp(O2, O3);
 672  672        __ br(Assembler::notEqual, false, Assembler::pn, retry);
 673  673        __ delayed()->nop();
 674  674  
 675  675        __ retl(false);
 676  676        __ delayed()->mov(O2, O0);  // report previous value to caller
 677  677  
 678  678      } else {
 679  679        if (VM_Version::v9_instructions_work()) {
 680  680          __ retl(false);
 681  681          __ delayed()->swap(O1, 0, O0);
 682  682        } else {
 683  683          const Register& lock_reg = O2;
 684  684          const Register& lock_ptr_reg = O3;
 685  685          const Register& yield_reg = O4;
 686  686  
 687  687          Label retry;
 688  688          Label dontyield;
 689  689  
 690  690          generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
 691  691          // got the lock, do the swap
 692  692          __ swap(O1, 0, O0);
 693  693  
 694  694          generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
 695  695          __ retl(false);
 696  696          __ delayed()->nop();
 697  697        }
 698  698      }
 699  699  
 700  700      return start;
 701  701    }
 702  702  
 703  703  
 704  704    // Support for jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value)
 705  705    //
 706  706    // Arguments :
 707  707    //
 708  708    //      exchange_value: O0
 709  709    //      dest:           O1
 710  710    //      compare_value:  O2
 711  711    //
 712  712    // Results:
 713  713    //
 714  714    //     O0: the value previously stored in dest
 715  715    //
 716  716    // Overwrites (v8): O3,O4,O5
 717  717    //
 718  718    address generate_atomic_cmpxchg() {
 719  719      StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg");
 720  720      address start = __ pc();
 721  721  
 722  722      // cmpxchg(dest, compare_value, exchange_value)
 723  723      __ cas_under_lock(O1, O2, O0,
 724  724        (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
 725  725      __ retl(false);
 726  726      __ delayed()->nop();
 727  727  
 728  728      return start;
 729  729    }
 730  730  
 731  731    // Support for jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong *dest, jlong compare_value)
 732  732    //
 733  733    // Arguments :
 734  734    //
 735  735    //      exchange_value: O1:O0
 736  736    //      dest:           O2
 737  737    //      compare_value:  O4:O3
 738  738    //
 739  739    // Results:
 740  740    //
 741  741    //     O1:O0: the value previously stored in dest
 742  742    //
 743  743    // This only works on V9, on V8 we don't generate any
 744  744    // code and just return NULL.
 745  745    //
 746  746    // Overwrites: G1,G2,G3
 747  747    //
 748  748    address generate_atomic_cmpxchg_long() {
 749  749      StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long");
 750  750      address start = __ pc();
 751  751  
 752  752      if (!VM_Version::supports_cx8())
 753  753          return NULL;;
 754  754      __ sllx(O0, 32, O0);
 755  755      __ srl(O1, 0, O1);
 756  756      __ or3(O0,O1,O0);      // O0 holds 64-bit value from compare_value
 757  757      __ sllx(O3, 32, O3);
 758  758      __ srl(O4, 0, O4);
 759  759      __ or3(O3,O4,O3);     // O3 holds 64-bit value from exchange_value
 760  760      __ casx(O2, O3, O0);
 761  761      __ srl(O0, 0, O1);    // unpacked return value in O1:O0
 762  762      __ retl(false);
 763  763      __ delayed()->srlx(O0, 32, O0);
 764  764  
 765  765      return start;
 766  766    }
 767  767  
 768  768  
 769  769    // Support for jint Atomic::add(jint add_value, volatile jint* dest).
 770  770    //
 771  771    // Arguments :
 772  772    //
 773  773    //      add_value: O0   (e.g., +1 or -1)
 774  774    //      dest:      O1
 775  775    //
 776  776    // Results:
 777  777    //
 778  778    //     O0: the new value stored in dest
 779  779    //
 780  780    // Overwrites (v9): O3
 781  781    // Overwrites (v8): O3,O4,O5
 782  782    //
 783  783    address generate_atomic_add() {
 784  784      StubCodeMark mark(this, "StubRoutines", "atomic_add");
 785  785      address start = __ pc();
 786  786      __ BIND(_atomic_add_stub);
 787  787  
 788  788      if (VM_Version::v9_instructions_work()) {
 789  789        Label(retry);
 790  790        __ BIND(retry);
 791  791  
 792  792        __ lduw(O1, 0, O2);
 793  793        __ add(O0,   O2, O3);
 794  794        __ cas(O1,   O2, O3);
 795  795        __ cmp(      O2, O3);
 796  796        __ br(Assembler::notEqual, false, Assembler::pn, retry);
 797  797        __ delayed()->nop();
 798  798        __ retl(false);
 799  799        __ delayed()->add(O0, O2, O0); // note that cas made O2==O3
 800  800      } else {
 801  801        const Register& lock_reg = O2;
 802  802        const Register& lock_ptr_reg = O3;
 803  803        const Register& value_reg = O4;
 804  804        const Register& yield_reg = O5;
 805  805  
 806  806        Label(retry);
 807  807        Label(dontyield);
 808  808  
 809  809        generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
 810  810        // got lock, do the increment
 811  811        __ ld(O1, 0, value_reg);
 812  812        __ add(O0, value_reg, value_reg);
 813  813        __ st(value_reg, O1, 0);
 814  814  
 815  815        // %%% only for RMO and PSO
 816  816        __ membar(Assembler::StoreStore);
 817  817  
 818  818        generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
 819  819  
 820  820        __ retl(false);
 821  821        __ delayed()->mov(value_reg, O0);
 822  822      }
 823  823  
 824  824      return start;
 825  825    }
 826  826    Label _atomic_add_stub;  // called from other stubs
 827  827  
 828  828  
 829  829    //------------------------------------------------------------------------------------------------------------------------
 830  830    // The following routine generates a subroutine to throw an asynchronous
 831  831    // UnknownError when an unsafe access gets a fault that could not be
 832  832    // reasonably prevented by the programmer.  (Example: SIGBUS/OBJERR.)
 833  833    //
 834  834    // Arguments :
 835  835    //
 836  836    //      trapping PC:    O7
 837  837    //
 838  838    // Results:
 839  839    //     posts an asynchronous exception, skips the trapping instruction
 840  840    //
 841  841  
 842  842    address generate_handler_for_unsafe_access() {
 843  843      StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
 844  844      address start = __ pc();
 845  845  
 846  846      const int preserve_register_words = (64 * 2);
 847  847      Address preserve_addr(FP, (-preserve_register_words * wordSize) + STACK_BIAS);
 848  848  
 849  849      Register Lthread = L7_thread_cache;
 850  850      int i;
 851  851  
 852  852      __ save_frame(0);
 853  853      __ mov(G1, L1);
 854  854      __ mov(G2, L2);
 855  855      __ mov(G3, L3);
 856  856      __ mov(G4, L4);
 857  857      __ mov(G5, L5);
 858  858      for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) {
 859  859        __ stf(FloatRegisterImpl::D, as_FloatRegister(i), preserve_addr, i * wordSize);
 860  860      }
 861  861  
 862  862      address entry_point = CAST_FROM_FN_PTR(address, handle_unsafe_access);
 863  863      BLOCK_COMMENT("call handle_unsafe_access");
 864  864      __ call(entry_point, relocInfo::runtime_call_type);
 865  865      __ delayed()->nop();
 866  866  
 867  867      __ mov(L1, G1);
 868  868      __ mov(L2, G2);
 869  869      __ mov(L3, G3);
 870  870      __ mov(L4, G4);
 871  871      __ mov(L5, G5);
 872  872      for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) {
 873  873        __ ldf(FloatRegisterImpl::D, preserve_addr, as_FloatRegister(i), i * wordSize);
 874  874      }
 875  875  
 876  876      __ verify_thread();
 877  877  
 878  878      __ jmp(O0, 0);
 879  879      __ delayed()->restore();
 880  880  
 881  881      return start;
 882  882    }
 883  883  
 884  884  
 885  885    // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super );
 886  886    // Arguments :
 887  887    //
 888  888    //      ret  : O0, returned
 889  889    //      icc/xcc: set as O0 (depending on wordSize)
 890  890    //      sub  : O1, argument, not changed
 891  891    //      super: O2, argument, not changed
 892  892    //      raddr: O7, blown by call
 893  893    address generate_partial_subtype_check() {
 894  894      __ align(CodeEntryAlignment);
 895  895      StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
 896  896      address start = __ pc();
 897  897      Label miss;
 898  898  
 899  899  #if defined(COMPILER2) && !defined(_LP64)
 900  900      // Do not use a 'save' because it blows the 64-bit O registers.
 901  901      __ add(SP,-4*wordSize,SP);  // Make space for 4 temps (stack must be 2 words aligned)
 902  902      __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize);
 903  903      __ st_ptr(L1,SP,(frame::register_save_words+1)*wordSize);
 904  904      __ st_ptr(L2,SP,(frame::register_save_words+2)*wordSize);
 905  905      __ st_ptr(L3,SP,(frame::register_save_words+3)*wordSize);
 906  906      Register Rret   = O0;
 907  907      Register Rsub   = O1;
 908  908      Register Rsuper = O2;
 909  909  #else
 910  910      __ save_frame(0);
 911  911      Register Rret   = I0;
 912  912      Register Rsub   = I1;
 913  913      Register Rsuper = I2;
 914  914  #endif
 915  915  
 916  916      Register L0_ary_len = L0;
 917  917      Register L1_ary_ptr = L1;
 918  918      Register L2_super   = L2;
 919  919      Register L3_index   = L3;
 920  920  
 921  921      __ check_klass_subtype_slow_path(Rsub, Rsuper,
 922  922                                       L0, L1, L2, L3,
 923  923                                       NULL, &miss);
 924  924  
 925  925      // Match falls through here.
 926  926      __ addcc(G0,0,Rret);        // set Z flags, Z result
 927  927  
 928  928  #if defined(COMPILER2) && !defined(_LP64)
 929  929      __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
 930  930      __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
 931  931      __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
 932  932      __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3);
 933  933      __ retl();                  // Result in Rret is zero; flags set to Z
 934  934      __ delayed()->add(SP,4*wordSize,SP);
 935  935  #else
 936  936      __ ret();                   // Result in Rret is zero; flags set to Z
 937  937      __ delayed()->restore();
 938  938  #endif
 939  939  
 940  940      __ BIND(miss);
 941  941      __ addcc(G0,1,Rret);        // set NZ flags, NZ result
 942  942  
 943  943  #if defined(COMPILER2) && !defined(_LP64)
 944  944      __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
 945  945      __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
 946  946      __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
 947  947      __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3);
 948  948      __ retl();                  // Result in Rret is != 0; flags set to NZ
 949  949      __ delayed()->add(SP,4*wordSize,SP);
 950  950  #else
 951  951      __ ret();                   // Result in Rret is != 0; flags set to NZ
 952  952      __ delayed()->restore();
 953  953  #endif
 954  954  
 955  955      return start;
 956  956    }
 957  957  
 958  958  
 959  959    // Called from MacroAssembler::verify_oop
 960  960    //

↓ open down ↓

948 lines elided

↑ open up ↑

 961  961    address generate_verify_oop_subroutine() {
 962  962      StubCodeMark mark(this, "StubRoutines", "verify_oop_stub");
 963  963  
 964  964      address start = __ pc();
 965  965  
 966  966      __ verify_oop_subroutine();
 967  967  
 968  968      return start;
 969  969    }
 970  970  
 971      -  static address disjoint_byte_copy_entry;
 972      -  static address disjoint_short_copy_entry;
 973      -  static address disjoint_int_copy_entry;
 974      -  static address disjoint_long_copy_entry;
 975      -  static address disjoint_oop_copy_entry;
 976      -
 977      -  static address byte_copy_entry;
 978      -  static address short_copy_entry;
 979      -  static address int_copy_entry;
 980      -  static address long_copy_entry;
 981      -  static address oop_copy_entry;
 982      -
 983      -  static address checkcast_copy_entry;
 984  971  
 985  972    //
 986  973    // Verify that a register contains clean 32-bits positive value
 987  974    // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax).
 988  975    //
 989  976    //  Input:
 990  977    //    Rint  -  32-bits value
 991  978    //    Rtmp  -  scratch
 992  979    //
 993  980    void assert_clean_int(Register Rint, Register Rtmp) {

 994  981  #if defined(ASSERT) && defined(_LP64)
 995  982      __ signx(Rint, Rtmp);
 996  983      __ cmp(Rint, Rtmp);
 997  984      __ breakpoint_trap(Assembler::notEqual, Assembler::xcc);
 998  985  #endif
 999  986    }
1000  987  
1001  988    //
1002  989    //  Generate overlap test for array copy stubs
1003  990    //
1004  991    //  Input:
1005  992    //    O0    -  array1
1006  993    //    O1    -  array2
1007  994    //    O2    -  element count
1008  995    //
1009  996    //  Kills temps:  O3, O4
1010  997    //
1011  998    void array_overlap_test(address no_overlap_target, int log2_elem_size) {
1012  999      assert(no_overlap_target != NULL, "must be generated");
1013 1000      array_overlap_test(no_overlap_target, NULL, log2_elem_size);
1014 1001    }
1015 1002    void array_overlap_test(Label& L_no_overlap, int log2_elem_size) {
1016 1003      array_overlap_test(NULL, &L_no_overlap, log2_elem_size);
1017 1004    }
1018 1005    void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) {
1019 1006      const Register from       = O0;
1020 1007      const Register to         = O1;
1021 1008      const Register count      = O2;
1022 1009      const Register to_from    = O3; // to - from
1023 1010      const Register byte_count = O4; // count << log2_elem_size
1024 1011  
1025 1012        __ subcc(to, from, to_from);
1026 1013        __ sll_ptr(count, log2_elem_size, byte_count);
1027 1014        if (NOLp == NULL)
1028 1015          __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, no_overlap_target);
1029 1016        else
1030 1017          __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, (*NOLp));
1031 1018        __ delayed()->cmp(to_from, byte_count);
1032 1019        if (NOLp == NULL)
1033 1020          __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, no_overlap_target);
1034 1021        else
1035 1022          __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, (*NOLp));
1036 1023        __ delayed()->nop();
1037 1024    }
1038 1025  
1039 1026    //
1040 1027    //  Generate pre-write barrier for array.
1041 1028    //
1042 1029    //  Input:
1043 1030    //     addr     - register containing starting address
1044 1031    //     count    - register containing element count
1045 1032    //     tmp      - scratch register
1046 1033    //
1047 1034    //  The input registers are overwritten.
1048 1035    //
1049 1036    void gen_write_ref_array_pre_barrier(Register addr, Register count) {
1050 1037      BarrierSet* bs = Universe::heap()->barrier_set();
1051 1038      if (bs->has_write_ref_pre_barrier()) {
1052 1039        assert(bs->has_write_ref_array_pre_opt(),
1053 1040               "Else unsupported barrier set.");
1054 1041  
1055 1042        __ save_frame(0);
1056 1043        // Save the necessary global regs... will be used after.
1057 1044        if (addr->is_global()) {
1058 1045          __ mov(addr, L0);
1059 1046        }
1060 1047        if (count->is_global()) {
1061 1048          __ mov(count, L1);
1062 1049        }
1063 1050        __ mov(addr->after_save(), O0);
1064 1051        // Get the count into O1
1065 1052        __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
1066 1053        __ delayed()->mov(count->after_save(), O1);
1067 1054        if (addr->is_global()) {
1068 1055          __ mov(L0, addr);
1069 1056        }
1070 1057        if (count->is_global()) {
1071 1058          __ mov(L1, count);
1072 1059        }
1073 1060        __ restore();
1074 1061      }
1075 1062    }
1076 1063    //
1077 1064    //  Generate post-write barrier for array.
1078 1065    //
1079 1066    //  Input:
1080 1067    //     addr     - register containing starting address
1081 1068    //     count    - register containing element count
1082 1069    //     tmp      - scratch register
1083 1070    //
1084 1071    //  The input registers are overwritten.
1085 1072    //
1086 1073    void gen_write_ref_array_post_barrier(Register addr, Register count,
1087 1074                                     Register tmp) {
1088 1075      BarrierSet* bs = Universe::heap()->barrier_set();
1089 1076  
1090 1077      switch (bs->kind()) {
1091 1078        case BarrierSet::G1SATBCT:
1092 1079        case BarrierSet::G1SATBCTLogging:
1093 1080          {
1094 1081            // Get some new fresh output registers.
1095 1082            __ save_frame(0);
1096 1083            __ mov(addr->after_save(), O0);
1097 1084            __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
1098 1085            __ delayed()->mov(count->after_save(), O1);
1099 1086            __ restore();
1100 1087          }
1101 1088          break;
1102 1089        case BarrierSet::CardTableModRef:
1103 1090        case BarrierSet::CardTableExtension:
1104 1091          {
1105 1092            CardTableModRefBS* ct = (CardTableModRefBS*)bs;
1106 1093            assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1107 1094            assert_different_registers(addr, count, tmp);
1108 1095  
1109 1096            Label L_loop;
1110 1097  
1111 1098            __ sll_ptr(count, LogBytesPerHeapOop, count);
1112 1099            __ sub(count, BytesPerHeapOop, count);
1113 1100            __ add(count, addr, count);
1114 1101            // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
1115 1102            __ srl_ptr(addr, CardTableModRefBS::card_shift, addr);
1116 1103            __ srl_ptr(count, CardTableModRefBS::card_shift, count);
1117 1104            __ sub(count, addr, count);
1118 1105            AddressLiteral rs(ct->byte_map_base);
1119 1106            __ set(rs, tmp);
1120 1107          __ BIND(L_loop);
1121 1108            __ stb(G0, tmp, addr);
1122 1109            __ subcc(count, 1, count);
1123 1110            __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
1124 1111            __ delayed()->add(addr, 1, addr);
1125 1112          }
1126 1113          break;
1127 1114        case BarrierSet::ModRef:
1128 1115          break;
1129 1116        default:
1130 1117          ShouldNotReachHere();
1131 1118      }
1132 1119    }
1133 1120  
1134 1121  
1135 1122    // Copy big chunks forward with shift
1136 1123    //
1137 1124    // Inputs:
1138 1125    //   from      - source arrays
1139 1126    //   to        - destination array aligned to 8-bytes
1140 1127    //   count     - elements count to copy >= the count equivalent to 16 bytes
1141 1128    //   count_dec - elements count's decrement equivalent to 16 bytes
1142 1129    //   L_copy_bytes - copy exit label
1143 1130    //
1144 1131    void copy_16_bytes_forward_with_shift(Register from, Register to,
1145 1132                       Register count, int count_dec, Label& L_copy_bytes) {
1146 1133      Label L_loop, L_aligned_copy, L_copy_last_bytes;
1147 1134  
1148 1135      // if both arrays have the same alignment mod 8, do 8 bytes aligned copy
1149 1136        __ andcc(from, 7, G1); // misaligned bytes
1150 1137        __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
1151 1138        __ delayed()->nop();
1152 1139  
1153 1140      const Register left_shift  = G1; // left  shift bit counter
1154 1141      const Register right_shift = G5; // right shift bit counter
1155 1142  
1156 1143        __ sll(G1, LogBitsPerByte, left_shift);
1157 1144        __ mov(64, right_shift);
1158 1145        __ sub(right_shift, left_shift, right_shift);
1159 1146  
1160 1147      //
1161 1148      // Load 2 aligned 8-bytes chunks and use one from previous iteration
1162 1149      // to form 2 aligned 8-bytes chunks to store.
1163 1150      //
1164 1151        __ deccc(count, count_dec); // Pre-decrement 'count'
1165 1152        __ andn(from, 7, from);     // Align address
1166 1153        __ ldx(from, 0, O3);
1167 1154        __ inc(from, 8);
1168 1155        __ align(OptoLoopAlignment);
1169 1156      __ BIND(L_loop);
1170 1157        __ ldx(from, 0, O4);
1171 1158        __ deccc(count, count_dec); // Can we do next iteration after this one?
1172 1159        __ ldx(from, 8, G4);
1173 1160        __ inc(to, 16);
1174 1161        __ inc(from, 16);
1175 1162        __ sllx(O3, left_shift,  O3);
1176 1163        __ srlx(O4, right_shift, G3);
1177 1164        __ bset(G3, O3);
1178 1165        __ stx(O3, to, -16);
1179 1166        __ sllx(O4, left_shift,  O4);
1180 1167        __ srlx(G4, right_shift, G3);
1181 1168        __ bset(G3, O4);
1182 1169        __ stx(O4, to, -8);
1183 1170        __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
1184 1171        __ delayed()->mov(G4, O3);
1185 1172  
1186 1173        __ inccc(count, count_dec>>1 ); // + 8 bytes
1187 1174        __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
1188 1175        __ delayed()->inc(count, count_dec>>1); // restore 'count'
1189 1176  
1190 1177        // copy 8 bytes, part of them already loaded in O3
1191 1178        __ ldx(from, 0, O4);
1192 1179        __ inc(to, 8);
1193 1180        __ inc(from, 8);
1194 1181        __ sllx(O3, left_shift,  O3);
1195 1182        __ srlx(O4, right_shift, G3);
1196 1183        __ bset(O3, G3);
1197 1184        __ stx(G3, to, -8);
1198 1185  
1199 1186      __ BIND(L_copy_last_bytes);
1200 1187        __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes
1201 1188        __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
1202 1189        __ delayed()->sub(from, right_shift, from);       // restore address
1203 1190  
1204 1191      __ BIND(L_aligned_copy);
1205 1192    }
1206 1193  
1207 1194    // Copy big chunks backward with shift
1208 1195    //
1209 1196    // Inputs:
1210 1197    //   end_from  - source arrays end address
1211 1198    //   end_to    - destination array end address aligned to 8-bytes
1212 1199    //   count     - elements count to copy >= the count equivalent to 16 bytes
1213 1200    //   count_dec - elements count's decrement equivalent to 16 bytes
1214 1201    //   L_aligned_copy - aligned copy exit label
1215 1202    //   L_copy_bytes   - copy exit label
1216 1203    //
1217 1204    void copy_16_bytes_backward_with_shift(Register end_from, Register end_to,
1218 1205                       Register count, int count_dec,
1219 1206                       Label& L_aligned_copy, Label& L_copy_bytes) {
1220 1207      Label L_loop, L_copy_last_bytes;
1221 1208  
1222 1209      // if both arrays have the same alignment mod 8, do 8 bytes aligned copy
1223 1210        __ andcc(end_from, 7, G1); // misaligned bytes
1224 1211        __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
1225 1212        __ delayed()->deccc(count, count_dec); // Pre-decrement 'count'
1226 1213  
1227 1214      const Register left_shift  = G1; // left  shift bit counter
1228 1215      const Register right_shift = G5; // right shift bit counter
1229 1216  
1230 1217        __ sll(G1, LogBitsPerByte, left_shift);
1231 1218        __ mov(64, right_shift);
1232 1219        __ sub(right_shift, left_shift, right_shift);
1233 1220  
1234 1221      //
1235 1222      // Load 2 aligned 8-bytes chunks and use one from previous iteration
1236 1223      // to form 2 aligned 8-bytes chunks to store.
1237 1224      //
1238 1225        __ andn(end_from, 7, end_from);     // Align address
1239 1226        __ ldx(end_from, 0, O3);
1240 1227        __ align(OptoLoopAlignment);
1241 1228      __ BIND(L_loop);
1242 1229        __ ldx(end_from, -8, O4);
1243 1230        __ deccc(count, count_dec); // Can we do next iteration after this one?
1244 1231        __ ldx(end_from, -16, G4);
1245 1232        __ dec(end_to, 16);
1246 1233        __ dec(end_from, 16);
1247 1234        __ srlx(O3, right_shift, O3);
1248 1235        __ sllx(O4, left_shift,  G3);
1249 1236        __ bset(G3, O3);
1250 1237        __ stx(O3, end_to, 8);
1251 1238        __ srlx(O4, right_shift, O4);
1252 1239        __ sllx(G4, left_shift,  G3);
1253 1240        __ bset(G3, O4);
1254 1241        __ stx(O4, end_to, 0);
1255 1242        __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
1256 1243        __ delayed()->mov(G4, O3);
1257 1244  
1258 1245        __ inccc(count, count_dec>>1 ); // + 8 bytes
1259 1246        __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
1260 1247        __ delayed()->inc(count, count_dec>>1); // restore 'count'
1261 1248  
1262 1249        // copy 8 bytes, part of them already loaded in O3
1263 1250        __ ldx(end_from, -8, O4);
1264 1251        __ dec(end_to, 8);
1265 1252        __ dec(end_from, 8);
1266 1253        __ srlx(O3, right_shift, O3);
1267 1254        __ sllx(O4, left_shift,  G3);
1268 1255        __ bset(O3, G3);
1269 1256        __ stx(G3, end_to, 0);
1270 1257  
1271 1258      __ BIND(L_copy_last_bytes);
1272 1259        __ srl(left_shift, LogBitsPerByte, left_shift);    // misaligned bytes
1273 1260        __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
1274 1261        __ delayed()->add(end_from, left_shift, end_from); // restore address
1275 1262    }

↓ open down ↓

282 lines elided

↑ open up ↑

1276 1263  
1277 1264    //
1278 1265    //  Generate stub for disjoint byte copy.  If "aligned" is true, the
1279 1266    //  "from" and "to" addresses are assumed to be heapword aligned.
1280 1267    //
1281 1268    // Arguments for generated stub:
1282 1269    //      from:  O0
1283 1270    //      to:    O1
1284 1271    //      count: O2 treated as signed
1285 1272    //
1286      -  address generate_disjoint_byte_copy(bool aligned, const char * name) {
     1273 +  address generate_disjoint_byte_copy(bool aligned, address *entry, const char *name) {
1287 1274      __ align(CodeEntryAlignment);
1288 1275      StubCodeMark mark(this, "StubRoutines", name);
1289 1276      address start = __ pc();
1290 1277  
1291 1278      Label L_skip_alignment, L_align;
1292 1279      Label L_copy_byte, L_copy_byte_loop, L_exit;
1293 1280  
1294 1281      const Register from      = O0;   // source array address
1295 1282      const Register to        = O1;   // destination array address
1296 1283      const Register count     = O2;   // elements count
1297 1284      const Register offset    = O5;   // offset from start of arrays
1298 1285      // O3, O4, G3, G4 are used as temp registers
1299 1286  
1300 1287      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1301 1288  
1302      -    if (!aligned)  disjoint_byte_copy_entry = __ pc();
1303      -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1304      -    if (!aligned)  BLOCK_COMMENT("Entry:");
     1289 +    if (entry != NULL) {
     1290 +      *entry = __ pc();
     1291 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
     1292 +      BLOCK_COMMENT("Entry:");
     1293 +    }
1305 1294  
1306 1295      // for short arrays, just do single element copy
1307 1296      __ cmp(count, 23); // 16 + 7
1308 1297      __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
1309 1298      __ delayed()->mov(G0, offset);
1310 1299  
1311 1300      if (aligned) {
1312 1301        // 'aligned' == true when it is known statically during compilation
1313 1302        // of this arraycopy call site that both 'from' and 'to' addresses
1314 1303        // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).

1315 1304        //
1316 1305        // Aligned arrays have 4 bytes alignment in 32-bits VM
1317 1306        // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
1318 1307        //
1319 1308  #ifndef _LP64
1320 1309        // copy a 4-bytes word if necessary to align 'to' to 8 bytes
1321 1310        __ andcc(to, 7, G0);
1322 1311        __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment);
1323 1312        __ delayed()->ld(from, 0, O3);
1324 1313        __ inc(from, 4);
1325 1314        __ inc(to, 4);
1326 1315        __ dec(count, 4);
1327 1316        __ st(O3, to, -4);
1328 1317      __ BIND(L_skip_alignment);
1329 1318  #endif
1330 1319      } else {
1331 1320        // copy bytes to align 'to' on 8 byte boundary
1332 1321        __ andcc(to, 7, G1); // misaligned bytes
1333 1322        __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1334 1323        __ delayed()->neg(G1);
1335 1324        __ inc(G1, 8);       // bytes need to copy to next 8-bytes alignment
1336 1325        __ sub(count, G1, count);
1337 1326      __ BIND(L_align);
1338 1327        __ ldub(from, 0, O3);
1339 1328        __ deccc(G1);
1340 1329        __ inc(from);
1341 1330        __ stb(O3, to, 0);
1342 1331        __ br(Assembler::notZero, false, Assembler::pt, L_align);
1343 1332        __ delayed()->inc(to);
1344 1333      __ BIND(L_skip_alignment);
1345 1334      }
1346 1335  #ifdef _LP64
1347 1336      if (!aligned)
1348 1337  #endif
1349 1338      {
1350 1339        // Copy with shift 16 bytes per iteration if arrays do not have
1351 1340        // the same alignment mod 8, otherwise fall through to the next
1352 1341        // code for aligned copy.
1353 1342        // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
1354 1343        // Also jump over aligned copy after the copy with shift completed.
1355 1344  
1356 1345        copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte);
1357 1346      }
1358 1347  
1359 1348      // Both array are 8 bytes aligned, copy 16 bytes at a time
1360 1349        __ and3(count, 7, G4); // Save count
1361 1350        __ srl(count, 3, count);
1362 1351       generate_disjoint_long_copy_core(aligned);
1363 1352        __ mov(G4, count);     // Restore count
1364 1353  
1365 1354      // copy tailing bytes
1366 1355      __ BIND(L_copy_byte);
1367 1356        __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1368 1357        __ delayed()->nop();
1369 1358        __ align(OptoLoopAlignment);
1370 1359      __ BIND(L_copy_byte_loop);
1371 1360        __ ldub(from, offset, O3);
1372 1361        __ deccc(count);
1373 1362        __ stb(O3, to, offset);
1374 1363        __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
1375 1364        __ delayed()->inc(offset);
1376 1365  
1377 1366      __ BIND(L_exit);
1378 1367        // O3, O4 are used as temp registers
1379 1368        inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1380 1369        __ retl();
1381 1370        __ delayed()->mov(G0, O0); // return 0
1382 1371      return start;
1383 1372    }

↓ open down ↓

69 lines elided

↑ open up ↑

1384 1373  
1385 1374    //
1386 1375    //  Generate stub for conjoint byte copy.  If "aligned" is true, the
1387 1376    //  "from" and "to" addresses are assumed to be heapword aligned.
1388 1377    //
1389 1378    // Arguments for generated stub:
1390 1379    //      from:  O0
1391 1380    //      to:    O1
1392 1381    //      count: O2 treated as signed
1393 1382    //
1394      -  address generate_conjoint_byte_copy(bool aligned, const char * name) {
     1383 +  address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
     1384 +                                      address *entry, const char *name) {
1395 1385      // Do reverse copy.
1396 1386  
1397 1387      __ align(CodeEntryAlignment);
1398 1388      StubCodeMark mark(this, "StubRoutines", name);
1399 1389      address start = __ pc();
1400      -    address nooverlap_target = aligned ?
1401      -        StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
1402      -        disjoint_byte_copy_entry;
1403 1390  
1404 1391      Label L_skip_alignment, L_align, L_aligned_copy;
1405 1392      Label L_copy_byte, L_copy_byte_loop, L_exit;
1406 1393  
1407 1394      const Register from      = O0;   // source array address
1408 1395      const Register to        = O1;   // destination array address
1409 1396      const Register count     = O2;   // elements count
1410 1397      const Register end_from  = from; // source array end address
1411 1398      const Register end_to    = to;   // destination array end address
1412 1399  
1413 1400      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1414 1401  
1415      -    if (!aligned)  byte_copy_entry = __ pc();
1416      -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1417      -    if (!aligned)  BLOCK_COMMENT("Entry:");
     1402 +    if (entry != NULL) {
     1403 +      *entry = __ pc();
     1404 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
     1405 +      BLOCK_COMMENT("Entry:");
     1406 +    }
1418 1407  
1419 1408      array_overlap_test(nooverlap_target, 0);
1420 1409  
1421 1410      __ add(to, count, end_to);       // offset after last copied element
1422 1411  
1423 1412      // for short arrays, just do single element copy
1424 1413      __ cmp(count, 23); // 16 + 7
1425 1414      __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
1426 1415      __ delayed()->add(from, count, end_from);
1427 1416

1428 1417      {
1429 1418        // Align end of arrays since they could be not aligned even
1430 1419        // when arrays itself are aligned.
1431 1420  
1432 1421        // copy bytes to align 'end_to' on 8 byte boundary
1433 1422        __ andcc(end_to, 7, G1); // misaligned bytes
1434 1423        __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1435 1424        __ delayed()->nop();
1436 1425        __ sub(count, G1, count);
1437 1426      __ BIND(L_align);
1438 1427        __ dec(end_from);
1439 1428        __ dec(end_to);
1440 1429        __ ldub(end_from, 0, O3);
1441 1430        __ deccc(G1);
1442 1431        __ brx(Assembler::notZero, false, Assembler::pt, L_align);
1443 1432        __ delayed()->stb(O3, end_to, 0);
1444 1433      __ BIND(L_skip_alignment);
1445 1434      }
1446 1435  #ifdef _LP64
1447 1436      if (aligned) {
1448 1437        // Both arrays are aligned to 8-bytes in 64-bits VM.
1449 1438        // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1450 1439        // in unaligned case.
1451 1440        __ dec(count, 16);
1452 1441      } else
1453 1442  #endif
1454 1443      {
1455 1444        // Copy with shift 16 bytes per iteration if arrays do not have
1456 1445        // the same alignment mod 8, otherwise jump to the next
1457 1446        // code for aligned copy (and substracting 16 from 'count' before jump).
1458 1447        // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1459 1448        // Also jump over aligned copy after the copy with shift completed.
1460 1449  
1461 1450        copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
1462 1451                                          L_aligned_copy, L_copy_byte);
1463 1452      }
1464 1453      // copy 4 elements (16 bytes) at a time
1465 1454        __ align(OptoLoopAlignment);
1466 1455      __ BIND(L_aligned_copy);
1467 1456        __ dec(end_from, 16);
1468 1457        __ ldx(end_from, 8, O3);
1469 1458        __ ldx(end_from, 0, O4);
1470 1459        __ dec(end_to, 16);
1471 1460        __ deccc(count, 16);
1472 1461        __ stx(O3, end_to, 8);
1473 1462        __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1474 1463        __ delayed()->stx(O4, end_to, 0);
1475 1464        __ inc(count, 16);
1476 1465  
1477 1466      // copy 1 element (2 bytes) at a time
1478 1467      __ BIND(L_copy_byte);
1479 1468        __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1480 1469        __ delayed()->nop();
1481 1470        __ align(OptoLoopAlignment);
1482 1471      __ BIND(L_copy_byte_loop);
1483 1472        __ dec(end_from);
1484 1473        __ dec(end_to);
1485 1474        __ ldub(end_from, 0, O4);
1486 1475        __ deccc(count);
1487 1476        __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
1488 1477        __ delayed()->stb(O4, end_to, 0);
1489 1478  
1490 1479      __ BIND(L_exit);
1491 1480      // O3, O4 are used as temp registers
1492 1481      inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
1493 1482      __ retl();
1494 1483      __ delayed()->mov(G0, O0); // return 0
1495 1484      return start;
1496 1485    }

↓ open down ↓

69 lines elided

↑ open up ↑

1497 1486  
1498 1487    //
1499 1488    //  Generate stub for disjoint short copy.  If "aligned" is true, the
1500 1489    //  "from" and "to" addresses are assumed to be heapword aligned.
1501 1490    //
1502 1491    // Arguments for generated stub:
1503 1492    //      from:  O0
1504 1493    //      to:    O1
1505 1494    //      count: O2 treated as signed
1506 1495    //
1507      -  address generate_disjoint_short_copy(bool aligned, const char * name) {
     1496 +  address generate_disjoint_short_copy(bool aligned, address *entry, const char * name) {
1508 1497      __ align(CodeEntryAlignment);
1509 1498      StubCodeMark mark(this, "StubRoutines", name);
1510 1499      address start = __ pc();
1511 1500  
1512 1501      Label L_skip_alignment, L_skip_alignment2;
1513 1502      Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit;
1514 1503  
1515 1504      const Register from      = O0;   // source array address
1516 1505      const Register to        = O1;   // destination array address
1517 1506      const Register count     = O2;   // elements count
1518 1507      const Register offset    = O5;   // offset from start of arrays
1519 1508      // O3, O4, G3, G4 are used as temp registers
1520 1509  
1521 1510      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1522 1511  
1523      -    if (!aligned)  disjoint_short_copy_entry = __ pc();
1524      -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1525      -    if (!aligned)  BLOCK_COMMENT("Entry:");
     1512 +    if (entry != NULL) {
     1513 +      *entry = __ pc();
     1514 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
     1515 +      BLOCK_COMMENT("Entry:");
     1516 +    }
1526 1517  
1527 1518      // for short arrays, just do single element copy
1528 1519      __ cmp(count, 11); // 8 + 3  (22 bytes)
1529 1520      __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
1530 1521      __ delayed()->mov(G0, offset);
1531 1522  
1532 1523      if (aligned) {
1533 1524        // 'aligned' == true when it is known statically during compilation
1534 1525        // of this arraycopy call site that both 'from' and 'to' addresses
1535 1526        // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).

1536 1527        //
1537 1528        // Aligned arrays have 4 bytes alignment in 32-bits VM
1538 1529        // and 8 bytes - in 64-bits VM.
1539 1530        //
1540 1531  #ifndef _LP64
1541 1532        // copy a 2-elements word if necessary to align 'to' to 8 bytes
1542 1533        __ andcc(to, 7, G0);
1543 1534        __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1544 1535        __ delayed()->ld(from, 0, O3);
1545 1536        __ inc(from, 4);
1546 1537        __ inc(to, 4);
1547 1538        __ dec(count, 2);
1548 1539        __ st(O3, to, -4);
1549 1540      __ BIND(L_skip_alignment);
1550 1541  #endif
1551 1542      } else {
1552 1543        // copy 1 element if necessary to align 'to' on an 4 bytes
1553 1544        __ andcc(to, 3, G0);
1554 1545        __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1555 1546        __ delayed()->lduh(from, 0, O3);
1556 1547        __ inc(from, 2);
1557 1548        __ inc(to, 2);
1558 1549        __ dec(count);
1559 1550        __ sth(O3, to, -2);
1560 1551      __ BIND(L_skip_alignment);
1561 1552  
1562 1553        // copy 2 elements to align 'to' on an 8 byte boundary
1563 1554        __ andcc(to, 7, G0);
1564 1555        __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1565 1556        __ delayed()->lduh(from, 0, O3);
1566 1557        __ dec(count, 2);
1567 1558        __ lduh(from, 2, O4);
1568 1559        __ inc(from, 4);
1569 1560        __ inc(to, 4);
1570 1561        __ sth(O3, to, -4);
1571 1562        __ sth(O4, to, -2);
1572 1563      __ BIND(L_skip_alignment2);
1573 1564      }
1574 1565  #ifdef _LP64
1575 1566      if (!aligned)
1576 1567  #endif
1577 1568      {
1578 1569        // Copy with shift 16 bytes per iteration if arrays do not have
1579 1570        // the same alignment mod 8, otherwise fall through to the next
1580 1571        // code for aligned copy.
1581 1572        // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1582 1573        // Also jump over aligned copy after the copy with shift completed.
1583 1574  
1584 1575        copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes);
1585 1576      }
1586 1577  
1587 1578      // Both array are 8 bytes aligned, copy 16 bytes at a time
1588 1579        __ and3(count, 3, G4); // Save
1589 1580        __ srl(count, 2, count);
1590 1581       generate_disjoint_long_copy_core(aligned);
1591 1582        __ mov(G4, count); // restore
1592 1583  
1593 1584      // copy 1 element at a time
1594 1585      __ BIND(L_copy_2_bytes);
1595 1586        __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1596 1587        __ delayed()->nop();
1597 1588        __ align(OptoLoopAlignment);
1598 1589      __ BIND(L_copy_2_bytes_loop);
1599 1590        __ lduh(from, offset, O3);
1600 1591        __ deccc(count);
1601 1592        __ sth(O3, to, offset);
1602 1593        __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
1603 1594        __ delayed()->inc(offset, 2);
1604 1595  
1605 1596      __ BIND(L_exit);
1606 1597        // O3, O4 are used as temp registers
1607 1598        inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1608 1599        __ retl();
1609 1600        __ delayed()->mov(G0, O0); // return 0
1610 1601      return start;
1611 1602    }
1612 1603  
1613 1604    //
1614 1605    //  Generate stub for disjoint short fill.  If "aligned" is true, the
1615 1606    //  "to" address is assumed to be heapword aligned.
1616 1607    //
1617 1608    // Arguments for generated stub:
1618 1609    //      to:    O0
1619 1610    //      value: O1
1620 1611    //      count: O2 treated as signed
1621 1612    //
1622 1613    address generate_fill(BasicType t, bool aligned, const char* name) {
1623 1614      __ align(CodeEntryAlignment);
1624 1615      StubCodeMark mark(this, "StubRoutines", name);
1625 1616      address start = __ pc();
1626 1617  
1627 1618      const Register to        = O0;   // source array address
1628 1619      const Register value     = O1;   // fill value
1629 1620      const Register count     = O2;   // elements count
1630 1621      // O3 is used as a temp register
1631 1622  
1632 1623      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1633 1624  
1634 1625      Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
1635 1626      Label L_fill_2_bytes, L_fill_elements, L_fill_32_bytes;
1636 1627  
1637 1628      int shift = -1;
1638 1629      switch (t) {
1639 1630         case T_BYTE:
1640 1631          shift = 2;
1641 1632          break;
1642 1633         case T_SHORT:
1643 1634          shift = 1;
1644 1635          break;
1645 1636        case T_INT:
1646 1637           shift = 0;
1647 1638          break;
1648 1639        default: ShouldNotReachHere();
1649 1640      }
1650 1641  
1651 1642      BLOCK_COMMENT("Entry:");
1652 1643  
1653 1644      if (t == T_BYTE) {
1654 1645        // Zero extend value
1655 1646        __ and3(value, 0xff, value);
1656 1647        __ sllx(value, 8, O3);
1657 1648        __ or3(value, O3, value);
1658 1649      }
1659 1650      if (t == T_SHORT) {
1660 1651        // Zero extend value
1661 1652        __ sllx(value, 48, value);
1662 1653        __ srlx(value, 48, value);
1663 1654      }
1664 1655      if (t == T_BYTE || t == T_SHORT) {
1665 1656        __ sllx(value, 16, O3);
1666 1657        __ or3(value, O3, value);
1667 1658      }
1668 1659  
1669 1660      __ cmp(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
1670 1661      __ brx(Assembler::lessUnsigned, false, Assembler::pn, L_fill_elements); // use unsigned cmp
1671 1662      __ delayed()->andcc(count, 1, G0);
1672 1663  
1673 1664      if (!aligned && (t == T_BYTE || t == T_SHORT)) {
1674 1665        // align source address at 4 bytes address boundary
1675 1666        if (t == T_BYTE) {
1676 1667          // One byte misalignment happens only for byte arrays
1677 1668          __ andcc(to, 1, G0);
1678 1669          __ br(Assembler::zero, false, Assembler::pt, L_skip_align1);
1679 1670          __ delayed()->nop();
1680 1671          __ stb(value, to, 0);
1681 1672          __ inc(to, 1);
1682 1673          __ dec(count, 1);
1683 1674          __ BIND(L_skip_align1);
1684 1675        }
1685 1676        // Two bytes misalignment happens only for byte and short (char) arrays
1686 1677        __ andcc(to, 2, G0);
1687 1678        __ br(Assembler::zero, false, Assembler::pt, L_skip_align2);
1688 1679        __ delayed()->nop();
1689 1680        __ sth(value, to, 0);
1690 1681        __ inc(to, 2);
1691 1682        __ dec(count, 1 << (shift - 1));
1692 1683        __ BIND(L_skip_align2);
1693 1684      }
1694 1685  #ifdef _LP64
1695 1686      if (!aligned) {
1696 1687  #endif
1697 1688      // align to 8 bytes, we know we are 4 byte aligned to start
1698 1689      __ andcc(to, 7, G0);
1699 1690      __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes);
1700 1691      __ delayed()->nop();
1701 1692      __ stw(value, to, 0);
1702 1693      __ inc(to, 4);
1703 1694      __ dec(count, 1 << shift);
1704 1695      __ BIND(L_fill_32_bytes);
1705 1696  #ifdef _LP64
1706 1697      }
1707 1698  #endif
1708 1699  
1709 1700      if (t == T_INT) {
1710 1701        // Zero extend value
1711 1702        __ srl(value, 0, value);
1712 1703      }
1713 1704      if (t == T_BYTE || t == T_SHORT || t == T_INT) {
1714 1705        __ sllx(value, 32, O3);
1715 1706        __ or3(value, O3, value);
1716 1707      }
1717 1708  
1718 1709      Label L_check_fill_8_bytes;
1719 1710      // Fill 32-byte chunks
1720 1711      __ subcc(count, 8 << shift, count);
1721 1712      __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes);
1722 1713      __ delayed()->nop();
1723 1714  
1724 1715      Label L_fill_32_bytes_loop, L_fill_4_bytes;
1725 1716      __ align(16);
1726 1717      __ BIND(L_fill_32_bytes_loop);
1727 1718  
1728 1719      __ stx(value, to, 0);
1729 1720      __ stx(value, to, 8);
1730 1721      __ stx(value, to, 16);
1731 1722      __ stx(value, to, 24);
1732 1723  
1733 1724      __ subcc(count, 8 << shift, count);
1734 1725      __ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop);
1735 1726      __ delayed()->add(to, 32, to);
1736 1727  
1737 1728      __ BIND(L_check_fill_8_bytes);
1738 1729      __ addcc(count, 8 << shift, count);
1739 1730      __ brx(Assembler::zero, false, Assembler::pn, L_exit);
1740 1731      __ delayed()->subcc(count, 1 << (shift + 1), count);
1741 1732      __ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes);
1742 1733      __ delayed()->andcc(count, 1<<shift, G0);
1743 1734  
1744 1735      //
1745 1736      // length is too short, just fill 8 bytes at a time
1746 1737      //
1747 1738      Label L_fill_8_bytes_loop;
1748 1739      __ BIND(L_fill_8_bytes_loop);
1749 1740      __ stx(value, to, 0);
1750 1741      __ subcc(count, 1 << (shift + 1), count);
1751 1742      __ brx(Assembler::greaterEqual, false, Assembler::pn, L_fill_8_bytes_loop);
1752 1743      __ delayed()->add(to, 8, to);
1753 1744  
1754 1745      // fill trailing 4 bytes
1755 1746      __ andcc(count, 1<<shift, G0);  // in delay slot of branches
1756 1747      if (t == T_INT) {
1757 1748        __ BIND(L_fill_elements);
1758 1749      }
1759 1750      __ BIND(L_fill_4_bytes);
1760 1751      __ brx(Assembler::zero, false, Assembler::pt, L_fill_2_bytes);
1761 1752      if (t == T_BYTE || t == T_SHORT) {
1762 1753        __ delayed()->andcc(count, 1<<(shift-1), G0);
1763 1754      } else {
1764 1755        __ delayed()->nop();
1765 1756      }
1766 1757      __ stw(value, to, 0);
1767 1758      if (t == T_BYTE || t == T_SHORT) {
1768 1759        __ inc(to, 4);
1769 1760        // fill trailing 2 bytes
1770 1761        __ andcc(count, 1<<(shift-1), G0); // in delay slot of branches
1771 1762        __ BIND(L_fill_2_bytes);
1772 1763        __ brx(Assembler::zero, false, Assembler::pt, L_fill_byte);
1773 1764        __ delayed()->andcc(count, 1, count);
1774 1765        __ sth(value, to, 0);
1775 1766        if (t == T_BYTE) {
1776 1767          __ inc(to, 2);
1777 1768          // fill trailing byte
1778 1769          __ andcc(count, 1, count);  // in delay slot of branches
1779 1770          __ BIND(L_fill_byte);
1780 1771          __ brx(Assembler::zero, false, Assembler::pt, L_exit);
1781 1772          __ delayed()->nop();
1782 1773          __ stb(value, to, 0);
1783 1774        } else {
1784 1775          __ BIND(L_fill_byte);
1785 1776        }
1786 1777      } else {
1787 1778        __ BIND(L_fill_2_bytes);
1788 1779      }
1789 1780      __ BIND(L_exit);
1790 1781      __ retl();
1791 1782      __ delayed()->nop();
1792 1783  
1793 1784      // Handle copies less than 8 bytes.  Int is handled elsewhere.
1794 1785      if (t == T_BYTE) {
1795 1786        __ BIND(L_fill_elements);
1796 1787        Label L_fill_2, L_fill_4;
1797 1788        // in delay slot __ andcc(count, 1, G0);
1798 1789        __ brx(Assembler::zero, false, Assembler::pt, L_fill_2);
1799 1790        __ delayed()->andcc(count, 2, G0);
1800 1791        __ stb(value, to, 0);
1801 1792        __ inc(to, 1);
1802 1793        __ BIND(L_fill_2);
1803 1794        __ brx(Assembler::zero, false, Assembler::pt, L_fill_4);
1804 1795        __ delayed()->andcc(count, 4, G0);
1805 1796        __ stb(value, to, 0);
1806 1797        __ stb(value, to, 1);
1807 1798        __ inc(to, 2);
1808 1799        __ BIND(L_fill_4);
1809 1800        __ brx(Assembler::zero, false, Assembler::pt, L_exit);
1810 1801        __ delayed()->nop();
1811 1802        __ stb(value, to, 0);
1812 1803        __ stb(value, to, 1);
1813 1804        __ stb(value, to, 2);
1814 1805        __ retl();
1815 1806        __ delayed()->stb(value, to, 3);
1816 1807      }
1817 1808  
1818 1809      if (t == T_SHORT) {
1819 1810        Label L_fill_2;
1820 1811        __ BIND(L_fill_elements);
1821 1812        // in delay slot __ andcc(count, 1, G0);
1822 1813        __ brx(Assembler::zero, false, Assembler::pt, L_fill_2);
1823 1814        __ delayed()->andcc(count, 2, G0);
1824 1815        __ sth(value, to, 0);
1825 1816        __ inc(to, 2);
1826 1817        __ BIND(L_fill_2);
1827 1818        __ brx(Assembler::zero, false, Assembler::pt, L_exit);
1828 1819        __ delayed()->nop();
1829 1820        __ sth(value, to, 0);
1830 1821        __ retl();
1831 1822        __ delayed()->sth(value, to, 2);
1832 1823      }
1833 1824      return start;
1834 1825    }

↓ open down ↓

299 lines elided

↑ open up ↑

1835 1826  
1836 1827    //
1837 1828    //  Generate stub for conjoint short copy.  If "aligned" is true, the
1838 1829    //  "from" and "to" addresses are assumed to be heapword aligned.
1839 1830    //
1840 1831    // Arguments for generated stub:
1841 1832    //      from:  O0
1842 1833    //      to:    O1
1843 1834    //      count: O2 treated as signed
1844 1835    //
1845      -  address generate_conjoint_short_copy(bool aligned, const char * name) {
     1836 +  address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
     1837 +                                       address *entry, const char *name) {
1846 1838      // Do reverse copy.
1847 1839  
1848 1840      __ align(CodeEntryAlignment);
1849 1841      StubCodeMark mark(this, "StubRoutines", name);
1850 1842      address start = __ pc();
1851      -    address nooverlap_target = aligned ?
1852      -        StubRoutines::arrayof_jshort_disjoint_arraycopy() :
1853      -        disjoint_short_copy_entry;
1854 1843  
1855 1844      Label L_skip_alignment, L_skip_alignment2, L_aligned_copy;
1856 1845      Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit;
1857 1846  
1858 1847      const Register from      = O0;   // source array address
1859 1848      const Register to        = O1;   // destination array address
1860 1849      const Register count     = O2;   // elements count
1861 1850      const Register end_from  = from; // source array end address
1862 1851      const Register end_to    = to;   // destination array end address
1863 1852  
1864 1853      const Register byte_count = O3;  // bytes count to copy
1865 1854  
1866 1855      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
1867 1856  
1868      -    if (!aligned)  short_copy_entry = __ pc();
1869      -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1870      -    if (!aligned)  BLOCK_COMMENT("Entry:");
     1857 +    if (entry != NULL) {
     1858 +      *entry = __ pc();
     1859 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
     1860 +      BLOCK_COMMENT("Entry:");
     1861 +    }
1871 1862  
1872 1863      array_overlap_test(nooverlap_target, 1);
1873 1864  
1874 1865      __ sllx(count, LogBytesPerShort, byte_count);
1875 1866      __ add(to, byte_count, end_to);  // offset after last copied element
1876 1867  
1877 1868      // for short arrays, just do single element copy
1878 1869      __ cmp(count, 11); // 8 + 3  (22 bytes)
1879 1870      __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
1880 1871      __ delayed()->add(from, byte_count, end_from);

1881 1872  
1882 1873      {
1883 1874        // Align end of arrays since they could be not aligned even
1884 1875        // when arrays itself are aligned.
1885 1876  
1886 1877        // copy 1 element if necessary to align 'end_to' on an 4 bytes
1887 1878        __ andcc(end_to, 3, G0);
1888 1879        __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
1889 1880        __ delayed()->lduh(end_from, -2, O3);
1890 1881        __ dec(end_from, 2);
1891 1882        __ dec(end_to, 2);
1892 1883        __ dec(count);
1893 1884        __ sth(O3, end_to, 0);
1894 1885      __ BIND(L_skip_alignment);
1895 1886  
1896 1887        // copy 2 elements to align 'end_to' on an 8 byte boundary
1897 1888        __ andcc(end_to, 7, G0);
1898 1889        __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
1899 1890        __ delayed()->lduh(end_from, -2, O3);
1900 1891        __ dec(count, 2);
1901 1892        __ lduh(end_from, -4, O4);
1902 1893        __ dec(end_from, 4);
1903 1894        __ dec(end_to, 4);
1904 1895        __ sth(O3, end_to, 2);
1905 1896        __ sth(O4, end_to, 0);
1906 1897      __ BIND(L_skip_alignment2);
1907 1898      }
1908 1899  #ifdef _LP64
1909 1900      if (aligned) {
1910 1901        // Both arrays are aligned to 8-bytes in 64-bits VM.
1911 1902        // The 'count' is decremented in copy_16_bytes_backward_with_shift()
1912 1903        // in unaligned case.
1913 1904        __ dec(count, 8);
1914 1905      } else
1915 1906  #endif
1916 1907      {
1917 1908        // Copy with shift 16 bytes per iteration if arrays do not have
1918 1909        // the same alignment mod 8, otherwise jump to the next
1919 1910        // code for aligned copy (and substracting 8 from 'count' before jump).
1920 1911        // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
1921 1912        // Also jump over aligned copy after the copy with shift completed.
1922 1913  
1923 1914        copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
1924 1915                                          L_aligned_copy, L_copy_2_bytes);
1925 1916      }
1926 1917      // copy 4 elements (16 bytes) at a time
1927 1918        __ align(OptoLoopAlignment);
1928 1919      __ BIND(L_aligned_copy);
1929 1920        __ dec(end_from, 16);
1930 1921        __ ldx(end_from, 8, O3);
1931 1922        __ ldx(end_from, 0, O4);
1932 1923        __ dec(end_to, 16);
1933 1924        __ deccc(count, 8);
1934 1925        __ stx(O3, end_to, 8);
1935 1926        __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
1936 1927        __ delayed()->stx(O4, end_to, 0);
1937 1928        __ inc(count, 8);
1938 1929  
1939 1930      // copy 1 element (2 bytes) at a time
1940 1931      __ BIND(L_copy_2_bytes);
1941 1932        __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
1942 1933        __ delayed()->nop();
1943 1934      __ BIND(L_copy_2_bytes_loop);
1944 1935        __ dec(end_from, 2);
1945 1936        __ dec(end_to, 2);
1946 1937        __ lduh(end_from, 0, O4);
1947 1938        __ deccc(count);
1948 1939        __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
1949 1940        __ delayed()->sth(O4, end_to, 0);
1950 1941  
1951 1942      __ BIND(L_exit);
1952 1943      // O3, O4 are used as temp registers
1953 1944      inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
1954 1945      __ retl();
1955 1946      __ delayed()->mov(G0, O0); // return 0
1956 1947      return start;
1957 1948    }
1958 1949  
1959 1950    //
1960 1951    //  Generate core code for disjoint int copy (and oop copy on 32-bit).
1961 1952    //  If "aligned" is true, the "from" and "to" addresses are assumed
1962 1953    //  to be heapword aligned.
1963 1954    //
1964 1955    // Arguments:
1965 1956    //      from:  O0
1966 1957    //      to:    O1
1967 1958    //      count: O2 treated as signed
1968 1959    //
1969 1960    void generate_disjoint_int_copy_core(bool aligned) {
1970 1961  
1971 1962      Label L_skip_alignment, L_aligned_copy;
1972 1963      Label L_copy_16_bytes,  L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
1973 1964  
1974 1965      const Register from      = O0;   // source array address
1975 1966      const Register to        = O1;   // destination array address
1976 1967      const Register count     = O2;   // elements count
1977 1968      const Register offset    = O5;   // offset from start of arrays
1978 1969      // O3, O4, G3, G4 are used as temp registers
1979 1970  
1980 1971      // 'aligned' == true when it is known statically during compilation
1981 1972      // of this arraycopy call site that both 'from' and 'to' addresses
1982 1973      // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
1983 1974      //
1984 1975      // Aligned arrays have 4 bytes alignment in 32-bits VM
1985 1976      // and 8 bytes - in 64-bits VM.
1986 1977      //
1987 1978  #ifdef _LP64
1988 1979      if (!aligned)
1989 1980  #endif
1990 1981      {
1991 1982        // The next check could be put under 'ifndef' since the code in
1992 1983        // generate_disjoint_long_copy_core() has own checks and set 'offset'.
1993 1984  
1994 1985        // for short arrays, just do single element copy
1995 1986        __ cmp(count, 5); // 4 + 1 (20 bytes)
1996 1987        __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes);
1997 1988        __ delayed()->mov(G0, offset);
1998 1989  
1999 1990        // copy 1 element to align 'to' on an 8 byte boundary
2000 1991        __ andcc(to, 7, G0);
2001 1992        __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
2002 1993        __ delayed()->ld(from, 0, O3);
2003 1994        __ inc(from, 4);
2004 1995        __ inc(to, 4);
2005 1996        __ dec(count);
2006 1997        __ st(O3, to, -4);
2007 1998      __ BIND(L_skip_alignment);
2008 1999  
2009 2000      // if arrays have same alignment mod 8, do 4 elements copy
2010 2001        __ andcc(from, 7, G0);
2011 2002        __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
2012 2003        __ delayed()->ld(from, 0, O3);
2013 2004  
2014 2005      //
2015 2006      // Load 2 aligned 8-bytes chunks and use one from previous iteration
2016 2007      // to form 2 aligned 8-bytes chunks to store.
2017 2008      //
2018 2009      // copy_16_bytes_forward_with_shift() is not used here since this
2019 2010      // code is more optimal.
2020 2011  
2021 2012      // copy with shift 4 elements (16 bytes) at a time
2022 2013        __ dec(count, 4);   // The cmp at the beginning guaranty count >= 4
2023 2014  
2024 2015        __ align(OptoLoopAlignment);
2025 2016      __ BIND(L_copy_16_bytes);
2026 2017        __ ldx(from, 4, O4);
2027 2018        __ deccc(count, 4); // Can we do next iteration after this one?
2028 2019        __ ldx(from, 12, G4);
2029 2020        __ inc(to, 16);
2030 2021        __ inc(from, 16);
2031 2022        __ sllx(O3, 32, O3);
2032 2023        __ srlx(O4, 32, G3);
2033 2024        __ bset(G3, O3);
2034 2025        __ stx(O3, to, -16);
2035 2026        __ sllx(O4, 32, O4);
2036 2027        __ srlx(G4, 32, G3);
2037 2028        __ bset(G3, O4);
2038 2029        __ stx(O4, to, -8);
2039 2030        __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
2040 2031        __ delayed()->mov(G4, O3);
2041 2032  
2042 2033        __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2043 2034        __ delayed()->inc(count, 4); // restore 'count'
2044 2035  
2045 2036      __ BIND(L_aligned_copy);
2046 2037      }
2047 2038      // copy 4 elements (16 bytes) at a time
2048 2039        __ and3(count, 1, G4); // Save
2049 2040        __ srl(count, 1, count);
2050 2041       generate_disjoint_long_copy_core(aligned);
2051 2042        __ mov(G4, count);     // Restore
2052 2043  
2053 2044      // copy 1 element at a time
2054 2045      __ BIND(L_copy_4_bytes);
2055 2046        __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
2056 2047        __ delayed()->nop();
2057 2048      __ BIND(L_copy_4_bytes_loop);
2058 2049        __ ld(from, offset, O3);
2059 2050        __ deccc(count);
2060 2051        __ st(O3, to, offset);
2061 2052        __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop);
2062 2053        __ delayed()->inc(offset, 4);
2063 2054      __ BIND(L_exit);
2064 2055    }

↓ open down ↓

184 lines elided

↑ open up ↑

2065 2056  
2066 2057    //
2067 2058    //  Generate stub for disjoint int copy.  If "aligned" is true, the
2068 2059    //  "from" and "to" addresses are assumed to be heapword aligned.
2069 2060    //
2070 2061    // Arguments for generated stub:
2071 2062    //      from:  O0
2072 2063    //      to:    O1
2073 2064    //      count: O2 treated as signed
2074 2065    //
2075      -  address generate_disjoint_int_copy(bool aligned, const char * name) {
     2066 +  address generate_disjoint_int_copy(bool aligned, address *entry, const char *name) {
2076 2067      __ align(CodeEntryAlignment);
2077 2068      StubCodeMark mark(this, "StubRoutines", name);
2078 2069      address start = __ pc();
2079 2070  
2080 2071      const Register count = O2;
2081 2072      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
2082 2073  
2083      -    if (!aligned)  disjoint_int_copy_entry = __ pc();
2084      -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2085      -    if (!aligned)  BLOCK_COMMENT("Entry:");
     2074 +    if (entry != NULL) {
     2075 +      *entry = __ pc();
     2076 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
     2077 +      BLOCK_COMMENT("Entry:");
     2078 +    }
2086 2079  
2087 2080      generate_disjoint_int_copy_core(aligned);
2088 2081  
2089 2082      // O3, O4 are used as temp registers
2090 2083      inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
2091 2084      __ retl();
2092 2085      __ delayed()->mov(G0, O0); // return 0
2093 2086      return start;
2094 2087    }
2095 2088

2096 2089    //
2097 2090    //  Generate core code for conjoint int copy (and oop copy on 32-bit).
2098 2091    //  If "aligned" is true, the "from" and "to" addresses are assumed
2099 2092    //  to be heapword aligned.
2100 2093    //
2101 2094    // Arguments:
2102 2095    //      from:  O0
2103 2096    //      to:    O1
2104 2097    //      count: O2 treated as signed
2105 2098    //
2106 2099    void generate_conjoint_int_copy_core(bool aligned) {
2107 2100      // Do reverse copy.
2108 2101  
2109 2102      Label L_skip_alignment, L_aligned_copy;
2110 2103      Label L_copy_16_bytes,  L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
2111 2104  
2112 2105      const Register from      = O0;   // source array address
2113 2106      const Register to        = O1;   // destination array address
2114 2107      const Register count     = O2;   // elements count
2115 2108      const Register end_from  = from; // source array end address
2116 2109      const Register end_to    = to;   // destination array end address
2117 2110      // O3, O4, O5, G3 are used as temp registers
2118 2111  
2119 2112      const Register byte_count = O3;  // bytes count to copy
2120 2113  
2121 2114        __ sllx(count, LogBytesPerInt, byte_count);
2122 2115        __ add(to, byte_count, end_to); // offset after last copied element
2123 2116  
2124 2117        __ cmp(count, 5); // for short arrays, just do single element copy
2125 2118        __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes);
2126 2119        __ delayed()->add(from, byte_count, end_from);
2127 2120  
2128 2121      // copy 1 element to align 'to' on an 8 byte boundary
2129 2122        __ andcc(end_to, 7, G0);
2130 2123        __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
2131 2124        __ delayed()->nop();
2132 2125        __ dec(count);
2133 2126        __ dec(end_from, 4);
2134 2127        __ dec(end_to,   4);
2135 2128        __ ld(end_from, 0, O4);
2136 2129        __ st(O4, end_to, 0);
2137 2130      __ BIND(L_skip_alignment);
2138 2131  
2139 2132      // Check if 'end_from' and 'end_to' has the same alignment.
2140 2133        __ andcc(end_from, 7, G0);
2141 2134        __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
2142 2135        __ delayed()->dec(count, 4); // The cmp at the start guaranty cnt >= 4
2143 2136  
2144 2137      // copy with shift 4 elements (16 bytes) at a time
2145 2138      //
2146 2139      // Load 2 aligned 8-bytes chunks and use one from previous iteration
2147 2140      // to form 2 aligned 8-bytes chunks to store.
2148 2141      //
2149 2142        __ ldx(end_from, -4, O3);
2150 2143        __ align(OptoLoopAlignment);
2151 2144      __ BIND(L_copy_16_bytes);
2152 2145        __ ldx(end_from, -12, O4);
2153 2146        __ deccc(count, 4);
2154 2147        __ ldx(end_from, -20, O5);
2155 2148        __ dec(end_to, 16);
2156 2149        __ dec(end_from, 16);
2157 2150        __ srlx(O3, 32, O3);
2158 2151        __ sllx(O4, 32, G3);
2159 2152        __ bset(G3, O3);
2160 2153        __ stx(O3, end_to, 8);
2161 2154        __ srlx(O4, 32, O4);
2162 2155        __ sllx(O5, 32, G3);
2163 2156        __ bset(O4, G3);
2164 2157        __ stx(G3, end_to, 0);
2165 2158        __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
2166 2159        __ delayed()->mov(O5, O3);
2167 2160  
2168 2161        __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2169 2162        __ delayed()->inc(count, 4);
2170 2163  
2171 2164      // copy 4 elements (16 bytes) at a time
2172 2165        __ align(OptoLoopAlignment);
2173 2166      __ BIND(L_aligned_copy);
2174 2167        __ dec(end_from, 16);
2175 2168        __ ldx(end_from, 8, O3);
2176 2169        __ ldx(end_from, 0, O4);
2177 2170        __ dec(end_to, 16);
2178 2171        __ deccc(count, 4);
2179 2172        __ stx(O3, end_to, 8);
2180 2173        __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
2181 2174        __ delayed()->stx(O4, end_to, 0);
2182 2175        __ inc(count, 4);
2183 2176  
2184 2177      // copy 1 element (4 bytes) at a time
2185 2178      __ BIND(L_copy_4_bytes);
2186 2179        __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
2187 2180        __ delayed()->nop();
2188 2181      __ BIND(L_copy_4_bytes_loop);
2189 2182        __ dec(end_from, 4);
2190 2183        __ dec(end_to, 4);
2191 2184        __ ld(end_from, 0, O4);
2192 2185        __ deccc(count);
2193 2186        __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop);
2194 2187        __ delayed()->st(O4, end_to, 0);
2195 2188      __ BIND(L_exit);
2196 2189    }

↓ open down ↓

101 lines elided

↑ open up ↑

2197 2190  
2198 2191    //
2199 2192    //  Generate stub for conjoint int copy.  If "aligned" is true, the
2200 2193    //  "from" and "to" addresses are assumed to be heapword aligned.
2201 2194    //
2202 2195    // Arguments for generated stub:
2203 2196    //      from:  O0
2204 2197    //      to:    O1
2205 2198    //      count: O2 treated as signed
2206 2199    //
2207      -  address generate_conjoint_int_copy(bool aligned, const char * name) {
     2200 +  address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
     2201 +                                     address *entry, const char *name) {
2208 2202      __ align(CodeEntryAlignment);
2209 2203      StubCodeMark mark(this, "StubRoutines", name);
2210 2204      address start = __ pc();
2211 2205  
2212      -    address nooverlap_target = aligned ?
2213      -        StubRoutines::arrayof_jint_disjoint_arraycopy() :
2214      -        disjoint_int_copy_entry;
2215      -
2216 2206      assert_clean_int(O2, O3);     // Make sure 'count' is clean int.
2217 2207  
2218      -    if (!aligned)  int_copy_entry = __ pc();
2219      -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2220      -    if (!aligned)  BLOCK_COMMENT("Entry:");
     2208 +    if (entry != NULL) {
     2209 +      *entry = __ pc();
     2210 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
     2211 +      BLOCK_COMMENT("Entry:");
     2212 +    }
2221 2213  
2222 2214      array_overlap_test(nooverlap_target, 2);
2223 2215  
2224 2216      generate_conjoint_int_copy_core(aligned);
2225 2217  
2226 2218      // O3, O4 are used as temp registers
2227 2219      inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
2228 2220      __ retl();
2229 2221      __ delayed()->mov(G0, O0); // return 0
2230 2222      return start;

2231 2223    }
2232 2224  
2233 2225    //
2234 2226    //  Generate core code for disjoint long copy (and oop copy on 64-bit).
2235 2227    //  "aligned" is ignored, because we must make the stronger
2236 2228    //  assumption that both addresses are always 64-bit aligned.
2237 2229    //
2238 2230    // Arguments:
2239 2231    //      from:  O0
2240 2232    //      to:    O1
2241 2233    //      count: O2 treated as signed
2242 2234    //
2243 2235    // count -= 2;
2244 2236    // if ( count >= 0 ) { // >= 2 elements
2245 2237    //   if ( count > 6) { // >= 8 elements
2246 2238    //     count -= 6; // original count - 8
2247 2239    //     do {
2248 2240    //       copy_8_elements;
2249 2241    //       count -= 8;
2250 2242    //     } while ( count >= 0 );
2251 2243    //     count += 6;
2252 2244    //   }
2253 2245    //   if ( count >= 0 ) { // >= 2 elements
2254 2246    //     do {
2255 2247    //       copy_2_elements;
2256 2248    //     } while ( (count=count-2) >= 0 );
2257 2249    //   }
2258 2250    // }
2259 2251    // count += 2;
2260 2252    // if ( count != 0 ) { // 1 element left
2261 2253    //   copy_1_element;
2262 2254    // }
2263 2255    //
2264 2256    void generate_disjoint_long_copy_core(bool aligned) {
2265 2257      Label L_copy_8_bytes, L_copy_16_bytes, L_exit;
2266 2258      const Register from    = O0;  // source array address
2267 2259      const Register to      = O1;  // destination array address
2268 2260      const Register count   = O2;  // elements count
2269 2261      const Register offset0 = O4;  // element offset
2270 2262      const Register offset8 = O5;  // next element offset
2271 2263  
2272 2264        __ deccc(count, 2);
2273 2265        __ mov(G0, offset0);   // offset from start of arrays (0)
2274 2266        __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
2275 2267        __ delayed()->add(offset0, 8, offset8);
2276 2268  
2277 2269      // Copy by 64 bytes chunks
2278 2270      Label L_copy_64_bytes;
2279 2271      const Register from64 = O3;  // source address
2280 2272      const Register to64   = G3;  // destination address
2281 2273        __ subcc(count, 6, O3);
2282 2274        __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes );
2283 2275        __ delayed()->mov(to,   to64);
2284 2276        // Now we can use O4(offset0), O5(offset8) as temps
2285 2277        __ mov(O3, count);
2286 2278        __ mov(from, from64);
2287 2279  
2288 2280        __ align(OptoLoopAlignment);
2289 2281      __ BIND(L_copy_64_bytes);
2290 2282        for( int off = 0; off < 64; off += 16 ) {
2291 2283          __ ldx(from64,  off+0, O4);
2292 2284          __ ldx(from64,  off+8, O5);
2293 2285          __ stx(O4, to64,  off+0);
2294 2286          __ stx(O5, to64,  off+8);
2295 2287        }
2296 2288        __ deccc(count, 8);
2297 2289        __ inc(from64, 64);
2298 2290        __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_64_bytes);
2299 2291        __ delayed()->inc(to64, 64);
2300 2292  
2301 2293        // Restore O4(offset0), O5(offset8)
2302 2294        __ sub(from64, from, offset0);
2303 2295        __ inccc(count, 6);
2304 2296        __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
2305 2297        __ delayed()->add(offset0, 8, offset8);
2306 2298  
2307 2299        // Copy by 16 bytes chunks
2308 2300        __ align(OptoLoopAlignment);
2309 2301      __ BIND(L_copy_16_bytes);
2310 2302        __ ldx(from, offset0, O3);
2311 2303        __ ldx(from, offset8, G3);
2312 2304        __ deccc(count, 2);
2313 2305        __ stx(O3, to, offset0);
2314 2306        __ inc(offset0, 16);
2315 2307        __ stx(G3, to, offset8);
2316 2308        __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
2317 2309        __ delayed()->inc(offset8, 16);
2318 2310  
2319 2311        // Copy last 8 bytes
2320 2312      __ BIND(L_copy_8_bytes);
2321 2313        __ inccc(count, 2);
2322 2314        __ brx(Assembler::zero, true, Assembler::pn, L_exit );
2323 2315        __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs
2324 2316        __ ldx(from, offset0, O3);
2325 2317        __ stx(O3, to, offset0);
2326 2318      __ BIND(L_exit);
2327 2319    }
2328 2320

↓ open down ↓

98 lines elided

↑ open up ↑

2329 2321    //
2330 2322    //  Generate stub for disjoint long copy.
2331 2323    //  "aligned" is ignored, because we must make the stronger
2332 2324    //  assumption that both addresses are always 64-bit aligned.
2333 2325    //
2334 2326    // Arguments for generated stub:
2335 2327    //      from:  O0
2336 2328    //      to:    O1
2337 2329    //      count: O2 treated as signed
2338 2330    //
2339      -  address generate_disjoint_long_copy(bool aligned, const char * name) {
     2331 +  address generate_disjoint_long_copy(bool aligned, address *entry, const char *name) {
2340 2332      __ align(CodeEntryAlignment);
2341 2333      StubCodeMark mark(this, "StubRoutines", name);
2342 2334      address start = __ pc();
2343 2335  
2344 2336      assert_clean_int(O2, O3);     // Make sure 'count' is clean int.
2345 2337  
2346      -    if (!aligned)  disjoint_long_copy_entry = __ pc();
2347      -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2348      -    if (!aligned)  BLOCK_COMMENT("Entry:");
     2338 +    if (entry != NULL) {
     2339 +      *entry = __ pc();
     2340 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
     2341 +      BLOCK_COMMENT("Entry:");
     2342 +    }
2349 2343  
2350 2344      generate_disjoint_long_copy_core(aligned);
2351 2345  
2352 2346      // O3, O4 are used as temp registers
2353 2347      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
2354 2348      __ retl();
2355 2349      __ delayed()->mov(G0, O0); // return 0
2356 2350      return start;
2357 2351    }
2358 2352

2359 2353    //
2360 2354    //  Generate core code for conjoint long copy (and oop copy on 64-bit).
2361 2355    //  "aligned" is ignored, because we must make the stronger
2362 2356    //  assumption that both addresses are always 64-bit aligned.
2363 2357    //
2364 2358    // Arguments:
2365 2359    //      from:  O0
2366 2360    //      to:    O1
2367 2361    //      count: O2 treated as signed
2368 2362    //
2369 2363    void generate_conjoint_long_copy_core(bool aligned) {
2370 2364      // Do reverse copy.
2371 2365      Label L_copy_8_bytes, L_copy_16_bytes, L_exit;
2372 2366      const Register from    = O0;  // source array address
2373 2367      const Register to      = O1;  // destination array address
2374 2368      const Register count   = O2;  // elements count
2375 2369      const Register offset8 = O4;  // element offset
2376 2370      const Register offset0 = O5;  // previous element offset
2377 2371  
2378 2372        __ subcc(count, 1, count);
2379 2373        __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_8_bytes );
2380 2374        __ delayed()->sllx(count, LogBytesPerLong, offset8);
2381 2375        __ sub(offset8, 8, offset0);
2382 2376        __ align(OptoLoopAlignment);
2383 2377      __ BIND(L_copy_16_bytes);
2384 2378        __ ldx(from, offset8, O2);
2385 2379        __ ldx(from, offset0, O3);
2386 2380        __ stx(O2, to, offset8);
2387 2381        __ deccc(offset8, 16);      // use offset8 as counter
2388 2382        __ stx(O3, to, offset0);
2389 2383        __ brx(Assembler::greater, false, Assembler::pt, L_copy_16_bytes);
2390 2384        __ delayed()->dec(offset0, 16);
2391 2385  
2392 2386      __ BIND(L_copy_8_bytes);
2393 2387        __ brx(Assembler::negative, false, Assembler::pn, L_exit );
2394 2388        __ delayed()->nop();
2395 2389        __ ldx(from, 0, O3);
2396 2390        __ stx(O3, to, 0);
2397 2391      __ BIND(L_exit);
2398 2392    }

↓ open down ↓

40 lines elided

↑ open up ↑

2399 2393  
2400 2394    //  Generate stub for conjoint long copy.
2401 2395    //  "aligned" is ignored, because we must make the stronger
2402 2396    //  assumption that both addresses are always 64-bit aligned.
2403 2397    //
2404 2398    // Arguments for generated stub:
2405 2399    //      from:  O0
2406 2400    //      to:    O1
2407 2401    //      count: O2 treated as signed
2408 2402    //
2409      -  address generate_conjoint_long_copy(bool aligned, const char * name) {
     2403 +  address generate_conjoint_long_copy(bool aligned, address nooverlap_target,
     2404 +                                      address *entry, const char *name) {
2410 2405      __ align(CodeEntryAlignment);
2411 2406      StubCodeMark mark(this, "StubRoutines", name);
2412 2407      address start = __ pc();
2413 2408  
2414 2409      assert(!aligned, "usage");
2415      -    address nooverlap_target = disjoint_long_copy_entry;
2416 2410  
2417 2411      assert_clean_int(O2, O3);     // Make sure 'count' is clean int.
2418 2412  
2419      -    if (!aligned)  long_copy_entry = __ pc();
2420      -    // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2421      -    if (!aligned)  BLOCK_COMMENT("Entry:");
     2413 +    if (entry != NULL) {
     2414 +      *entry = __ pc();
     2415 +      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
     2416 +      BLOCK_COMMENT("Entry:");
     2417 +    }
2422 2418  
2423 2419      array_overlap_test(nooverlap_target, 3);
2424 2420  
2425 2421      generate_conjoint_long_copy_core(aligned);
2426 2422  
2427 2423      // O3, O4 are used as temp registers
2428 2424      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
2429 2425      __ retl();
2430 2426      __ delayed()->mov(G0, O0); // return 0
2431 2427      return start;
2432 2428    }
2433 2429  
2434 2430    //  Generate stub for disjoint oop copy.  If "aligned" is true, the
2435 2431    //  "from" and "to" addresses are assumed to be heapword aligned.
2436 2432    //
2437 2433    // Arguments for generated stub:
2438 2434    //      from:  O0
2439 2435    //      to:    O1
2440 2436    //      count: O2 treated as signed
2441 2437    //
2442      -  address generate_disjoint_oop_copy(bool aligned, const char * name) {
     2438 +  address generate_disjoint_oop_copy(bool aligned, address *entry, const char *name) {
2443 2439  
2444 2440      const Register from  = O0;  // source array address
2445 2441      const Register to    = O1;  // destination array address
2446 2442      const Register count = O2;  // elements count
2447 2443  
2448 2444      __ align(CodeEntryAlignment);
2449 2445      StubCodeMark mark(this, "StubRoutines", name);
2450 2446      address start = __ pc();
2451 2447  
2452 2448      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
2453 2449  
2454      -    if (!aligned)  disjoint_oop_copy_entry = __ pc();
2455      -    // caller can pass a 64-bit byte count here
2456      -    if (!aligned)  BLOCK_COMMENT("Entry:");
     2450 +    if (entry != NULL) {
     2451 +      *entry = __ pc();
     2452 +      // caller can pass a 64-bit byte count here
     2453 +      BLOCK_COMMENT("Entry:");
     2454 +    }
2457 2455  
2458 2456      // save arguments for barrier generation
2459 2457      __ mov(to, G1);
2460 2458      __ mov(count, G5);
2461 2459      gen_write_ref_array_pre_barrier(G1, G5);
2462 2460    #ifdef _LP64
2463 2461      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
2464 2462      if (UseCompressedOops) {
2465 2463        generate_disjoint_int_copy_core(aligned);
2466 2464      } else {

2467 2465        generate_disjoint_long_copy_core(aligned);
2468 2466      }
2469 2467    #else
2470 2468      generate_disjoint_int_copy_core(aligned);
2471 2469    #endif
2472 2470      // O0 is used as temp register
2473 2471      gen_write_ref_array_post_barrier(G1, G5, O0);
2474 2472  
2475 2473      // O3, O4 are used as temp registers
2476 2474      inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
2477 2475      __ retl();
2478 2476      __ delayed()->mov(G0, O0); // return 0
2479 2477      return start;

↓ open down ↓

13 lines elided

↑ open up ↑

2480 2478    }
2481 2479  
2482 2480    //  Generate stub for conjoint oop copy.  If "aligned" is true, the
2483 2481    //  "from" and "to" addresses are assumed to be heapword aligned.
2484 2482    //
2485 2483    // Arguments for generated stub:
2486 2484    //      from:  O0
2487 2485    //      to:    O1
2488 2486    //      count: O2 treated as signed
2489 2487    //
2490      -  address generate_conjoint_oop_copy(bool aligned, const char * name) {
     2488 +  address generate_conjoint_oop_copy(bool aligned, address nooverlap_target,
     2489 +                                     address *entry, const char *name) {
2491 2490  
2492 2491      const Register from  = O0;  // source array address
2493 2492      const Register to    = O1;  // destination array address
2494 2493      const Register count = O2;  // elements count
2495 2494  
2496 2495      __ align(CodeEntryAlignment);
2497 2496      StubCodeMark mark(this, "StubRoutines", name);
2498 2497      address start = __ pc();
2499 2498  
2500 2499      assert_clean_int(count, O3);     // Make sure 'count' is clean int.
2501 2500  
2502      -    if (!aligned)  oop_copy_entry = __ pc();
2503      -    // caller can pass a 64-bit byte count here
2504      -    if (!aligned)  BLOCK_COMMENT("Entry:");
     2501 +    if (entry != NULL) {
     2502 +      *entry = __ pc();
     2503 +      // caller can pass a 64-bit byte count here
     2504 +      BLOCK_COMMENT("Entry:");
     2505 +    }
     2506 +
     2507 +    array_overlap_test(nooverlap_target, LogBytesPerHeapOop);
2505 2508  
2506 2509      // save arguments for barrier generation
2507 2510      __ mov(to, G1);
2508 2511      __ mov(count, G5);
2509      -
2510 2512      gen_write_ref_array_pre_barrier(G1, G5);
2511 2513  
2512      -    address nooverlap_target = aligned ?
2513      -        StubRoutines::arrayof_oop_disjoint_arraycopy() :
2514      -        disjoint_oop_copy_entry;
2515      -
2516      -    array_overlap_test(nooverlap_target, LogBytesPerHeapOop);
2517      -
2518 2514    #ifdef _LP64
2519 2515      if (UseCompressedOops) {
2520 2516        generate_conjoint_int_copy_core(aligned);
2521 2517      } else {
2522 2518        generate_conjoint_long_copy_core(aligned);
2523 2519      }
2524 2520    #else
2525 2521      generate_conjoint_int_copy_core(aligned);
2526 2522    #endif
2527 2523

2528 2524      // O0 is used as temp register
2529 2525      gen_write_ref_array_post_barrier(G1, G5, O0);
2530 2526  
2531 2527      // O3, O4 are used as temp registers
2532 2528      inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
2533 2529      __ retl();
2534 2530      __ delayed()->mov(G0, O0); // return 0
2535 2531      return start;
2536 2532    }
2537 2533  
2538 2534  
2539 2535    // Helper for generating a dynamic type check.
2540 2536    // Smashes only the given temp registers.
2541 2537    void generate_type_check(Register sub_klass,
2542 2538                             Register super_check_offset,
2543 2539                             Register super_klass,
2544 2540                             Register temp,
2545 2541                             Label& L_success) {
2546 2542      assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
2547 2543  
2548 2544      BLOCK_COMMENT("type_check:");
2549 2545  
2550 2546      Label L_miss, L_pop_to_miss;
2551 2547  
2552 2548      assert_clean_int(super_check_offset, temp);
2553 2549  
2554 2550      __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
2555 2551                                       &L_success, &L_miss, NULL,
2556 2552                                       super_check_offset);
2557 2553  
2558 2554      BLOCK_COMMENT("type_check_slow_path:");
2559 2555      __ save_frame(0);
2560 2556      __ check_klass_subtype_slow_path(sub_klass->after_save(),
2561 2557                                       super_klass->after_save(),
2562 2558                                       L0, L1, L2, L4,
2563 2559                                       NULL, &L_pop_to_miss);
2564 2560      __ ba(false, L_success);
2565 2561      __ delayed()->restore();
2566 2562  
2567 2563      __ bind(L_pop_to_miss);
2568 2564      __ restore();
2569 2565  
2570 2566      // Fall through on failure!
2571 2567      __ BIND(L_miss);
2572 2568    }
2573 2569  
2574 2570

↓ open down ↓

47 lines elided

↑ open up ↑

2575 2571    //  Generate stub for checked oop copy.
2576 2572    //
2577 2573    // Arguments for generated stub:
2578 2574    //      from:  O0
2579 2575    //      to:    O1
2580 2576    //      count: O2 treated as signed
2581 2577    //      ckoff: O3 (super_check_offset)
2582 2578    //      ckval: O4 (super_klass)
2583 2579    //      ret:   O0 zero for success; (-1^K) where K is partial transfer count
2584 2580    //
2585      -  address generate_checkcast_copy(const char* name) {
     2581 +  address generate_checkcast_copy(const char *name, address *entry) {
2586 2582  
2587 2583      const Register O0_from   = O0;      // source array address
2588 2584      const Register O1_to     = O1;      // destination array address
2589 2585      const Register O2_count  = O2;      // elements count
2590 2586      const Register O3_ckoff  = O3;      // super_check_offset
2591 2587      const Register O4_ckval  = O4;      // super_klass
2592 2588  
2593 2589      const Register O5_offset = O5;      // loop var, with stride wordSize
2594 2590      const Register G1_remain = G1;      // loop var, with stride -1
2595 2591      const Register G3_oop    = G3;      // actual oop copied
2596 2592      const Register G4_klass  = G4;      // oop._klass
2597 2593      const Register G5_super  = G5;      // oop._klass._primary_supers[ckval]
2598 2594  
2599 2595      __ align(CodeEntryAlignment);
2600 2596      StubCodeMark mark(this, "StubRoutines", name);
2601 2597      address start = __ pc();
2602 2598  
2603      -    gen_write_ref_array_pre_barrier(O1, O2);
2604      -
2605 2599  #ifdef ASSERT
2606 2600      // We sometimes save a frame (see generate_type_check below).
2607 2601      // If this will cause trouble, let's fail now instead of later.
2608 2602      __ save_frame(0);
2609 2603      __ restore();
2610 2604  #endif
2611 2605  
2612 2606      assert_clean_int(O2_count, G1);     // Make sure 'count' is clean int.
2613 2607  
2614 2608  #ifdef ASSERT

2615 2609      // caller guarantees that the arrays really are different
2616 2610      // otherwise, we would have to make conjoint checks
2617 2611      { Label L;

↓ open down ↓

3 lines elided

↑ open up ↑

2618 2612        __ mov(O3, G1);           // spill: overlap test smashes O3
2619 2613        __ mov(O4, G4);           // spill: overlap test smashes O4
2620 2614        array_overlap_test(L, LogBytesPerHeapOop);
2621 2615        __ stop("checkcast_copy within a single array");
2622 2616        __ bind(L);
2623 2617        __ mov(G1, O3);
2624 2618        __ mov(G4, O4);
2625 2619      }
2626 2620  #endif //ASSERT
2627 2621  
2628      -    checkcast_copy_entry = __ pc();
2629      -    // caller can pass a 64-bit byte count here (from generic stub)
2630      -    BLOCK_COMMENT("Entry:");
     2622 +    if (entry != NULL) {
     2623 +      *entry = __ pc();
     2624 +      // caller can pass a 64-bit byte count here (from generic stub)
     2625 +      BLOCK_COMMENT("Entry:");
     2626 +    }
     2627 +
     2628 +    gen_write_ref_array_pre_barrier(O1_to, O2_count);
2631 2629  
2632 2630      Label load_element, store_element, do_card_marks, fail, done;
2633 2631      __ addcc(O2_count, 0, G1_remain);   // initialize loop index, and test it
2634 2632      __ brx(Assembler::notZero, false, Assembler::pt, load_element);
2635 2633      __ delayed()->mov(G0, O5_offset);   // offset from start of arrays
2636 2634  
2637 2635      // Empty array:  Nothing to do.
2638 2636      inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
2639 2637      __ retl();
2640 2638      __ delayed()->set(0, O0);           // return 0 on (trivial) success

2641 2639  
2642 2640      // ======== begin loop ========
2643 2641      // (Loop is rotated; its entry is load_element.)
2644 2642      // Loop variables:
2645 2643      //   (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
2646 2644      //   (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
2647 2645      //   G3, G4, G5 --- current oop, oop.klass, oop.klass.super
2648 2646      __ align(OptoLoopAlignment);
2649 2647  
2650 2648      __ BIND(store_element);
2651 2649      __ deccc(G1_remain);                // decrement the count
2652 2650      __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
2653 2651      __ inc(O5_offset, heapOopSize);     // step to next offset
2654 2652      __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
2655 2653      __ delayed()->set(0, O0);           // return -1 on success
2656 2654  
2657 2655      // ======== loop entry is here ========
2658 2656      __ BIND(load_element);
2659 2657      __ load_heap_oop(O0_from, O5_offset, G3_oop);  // load the oop
2660 2658      __ br_null(G3_oop, true, Assembler::pt, store_element);
2661 2659      __ delayed()->nop();
2662 2660  
2663 2661      __ load_klass(G3_oop, G4_klass); // query the object klass
2664 2662  
2665 2663      generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
2666 2664                          // branch to this on success:
2667 2665                          store_element);
2668 2666      // ======== end loop ========
2669 2667  
2670 2668      // It was a real error; we must depend on the caller to finish the job.
2671 2669      // Register G1 has number of *remaining* oops, O2 number of *total* oops.
2672 2670      // Emit GC store barriers for the oops we have copied (O2 minus G1),
2673 2671      // and report their number to the caller.
2674 2672      __ BIND(fail);
2675 2673      __ subcc(O2_count, G1_remain, O2_count);
2676 2674      __ brx(Assembler::zero, false, Assembler::pt, done);
2677 2675      __ delayed()->not1(O2_count, O0);   // report (-1^K) to caller
2678 2676  
2679 2677      __ BIND(do_card_marks);
2680 2678      gen_write_ref_array_post_barrier(O1_to, O2_count, O3);   // store check on O1[0..O2]
2681 2679  
2682 2680      __ BIND(done);
2683 2681      inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
2684 2682      __ retl();
2685 2683      __ delayed()->nop();             // return value in 00
2686 2684  
2687 2685      return start;
2688 2686    }
2689 2687  
2690 2688  
2691 2689    //  Generate 'unsafe' array copy stub
2692 2690    //  Though just as safe as the other stubs, it takes an unscaled

↓ open down ↓

52 lines elided

↑ open up ↑

2693 2691    //  size_t argument instead of an element count.
2694 2692    //
2695 2693    // Arguments for generated stub:
2696 2694    //      from:  O0
2697 2695    //      to:    O1
2698 2696    //      count: O2 byte count, treated as ssize_t, can be zero
2699 2697    //
2700 2698    // Examines the alignment of the operands and dispatches
2701 2699    // to a long, int, short, or byte copy loop.
2702 2700    //
2703      -  address generate_unsafe_copy(const char* name) {
     2701 +  address generate_unsafe_copy(const char* name,
     2702 +                               address byte_copy_entry,
     2703 +                               address short_copy_entry,
     2704 +                               address int_copy_entry,
     2705 +                               address long_copy_entry) {
2704 2706  
2705 2707      const Register O0_from   = O0;      // source array address
2706 2708      const Register O1_to     = O1;      // destination array address
2707 2709      const Register O2_count  = O2;      // elements count
2708 2710  
2709 2711      const Register G1_bits   = G1;      // test copy of low bits
2710 2712  
2711 2713      __ align(CodeEntryAlignment);
2712 2714      StubCodeMark mark(this, "StubRoutines", name);
2713 2715      address start = __ pc();

2714 2716  
2715 2717      // bump this on entry, not on exit:
2716 2718      inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr, G1, G3);
2717 2719  
2718 2720      __ or3(O0_from, O1_to, G1_bits);
2719 2721      __ or3(O2_count,       G1_bits, G1_bits);
2720 2722  
2721 2723      __ btst(BytesPerLong-1, G1_bits);
2722 2724      __ br(Assembler::zero, true, Assembler::pt,
2723 2725            long_copy_entry, relocInfo::runtime_call_type);
2724 2726      // scale the count on the way out:
2725 2727      __ delayed()->srax(O2_count, LogBytesPerLong, O2_count);
2726 2728  
2727 2729      __ btst(BytesPerInt-1, G1_bits);
2728 2730      __ br(Assembler::zero, true, Assembler::pt,
2729 2731            int_copy_entry, relocInfo::runtime_call_type);
2730 2732      // scale the count on the way out:
2731 2733      __ delayed()->srax(O2_count, LogBytesPerInt, O2_count);
2732 2734  
2733 2735      __ btst(BytesPerShort-1, G1_bits);
2734 2736      __ br(Assembler::zero, true, Assembler::pt,
2735 2737            short_copy_entry, relocInfo::runtime_call_type);
2736 2738      // scale the count on the way out:
2737 2739      __ delayed()->srax(O2_count, LogBytesPerShort, O2_count);
2738 2740  
2739 2741      __ br(Assembler::always, false, Assembler::pt,
2740 2742            byte_copy_entry, relocInfo::runtime_call_type);
2741 2743      __ delayed()->nop();
2742 2744  
2743 2745      return start;
2744 2746    }
2745 2747  
2746 2748  
2747 2749    // Perform range checks on the proposed arraycopy.
2748 2750    // Kills the two temps, but nothing else.
2749 2751    // Also, clean the sign bits of src_pos and dst_pos.
2750 2752    void arraycopy_range_checks(Register src,     // source array oop (O0)
2751 2753                                Register src_pos, // source position (O1)
2752 2754                                Register dst,     // destination array oo (O2)
2753 2755                                Register dst_pos, // destination position (O3)
2754 2756                                Register length,  // length of copy (O4)
2755 2757                                Register temp1, Register temp2,
2756 2758                                Label& L_failed) {
2757 2759      BLOCK_COMMENT("arraycopy_range_checks:");
2758 2760  
2759 2761      //  if (src_pos + length > arrayOop(src)->length() ) FAIL;
2760 2762  
2761 2763      const Register array_length = temp1;  // scratch
2762 2764      const Register end_pos      = temp2;  // scratch
2763 2765  
2764 2766      // Note:  This next instruction may be in the delay slot of a branch:
2765 2767      __ add(length, src_pos, end_pos);  // src_pos + length
2766 2768      __ lduw(src, arrayOopDesc::length_offset_in_bytes(), array_length);
2767 2769      __ cmp(end_pos, array_length);
2768 2770      __ br(Assembler::greater, false, Assembler::pn, L_failed);
2769 2771  
2770 2772      //  if (dst_pos + length > arrayOop(dst)->length() ) FAIL;
2771 2773      __ delayed()->add(length, dst_pos, end_pos); // dst_pos + length
2772 2774      __ lduw(dst, arrayOopDesc::length_offset_in_bytes(), array_length);
2773 2775      __ cmp(end_pos, array_length);
2774 2776      __ br(Assembler::greater, false, Assembler::pn, L_failed);
2775 2777  
2776 2778      // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
2777 2779      // Move with sign extension can be used since they are positive.
2778 2780      __ delayed()->signx(src_pos, src_pos);
2779 2781      __ signx(dst_pos, dst_pos);
2780 2782  
2781 2783      BLOCK_COMMENT("arraycopy_range_checks done");
2782 2784    }
2783 2785  
2784 2786  
2785 2787    //
2786 2788    //  Generate generic array copy stubs
2787 2789    //
2788 2790    //  Input:

↓ open down ↓

75 lines elided

↑ open up ↑

2789 2791    //    O0    -  src oop
2790 2792    //    O1    -  src_pos
2791 2793    //    O2    -  dst oop
2792 2794    //    O3    -  dst_pos
2793 2795    //    O4    -  element count
2794 2796    //
2795 2797    //  Output:
2796 2798    //    O0 ==  0  -  success
2797 2799    //    O0 == -1  -  need to call System.arraycopy
2798 2800    //
2799      -  address generate_generic_copy(const char *name) {
2800      -
     2801 +  address generate_generic_copy(const char *name,
     2802 +                                address entry_jbyte_arraycopy,
     2803 +                                address entry_jshort_arraycopy,
     2804 +                                address entry_jint_arraycopy,
     2805 +                                address entry_oop_arraycopy,
     2806 +                                address entry_jlong_arraycopy,
     2807 +                                address entry_checkcast_arraycopy) {
2801 2808      Label L_failed, L_objArray;
2802 2809  
2803 2810      // Input registers
2804 2811      const Register src      = O0;  // source array oop
2805 2812      const Register src_pos  = O1;  // source position
2806 2813      const Register dst      = O2;  // destination array oop
2807 2814      const Register dst_pos  = O3;  // destination position
2808 2815      const Register length   = O4;  // elements count
2809 2816  
2810 2817      // registers used as temp

2811 2818      const Register G3_src_klass = G3; // source array klass
2812 2819      const Register G4_dst_klass = G4; // destination array klass
2813 2820      const Register G5_lh        = G5; // layout handler
2814 2821      const Register O5_temp      = O5;
2815 2822  
2816 2823      __ align(CodeEntryAlignment);
2817 2824      StubCodeMark mark(this, "StubRoutines", name);
2818 2825      address start = __ pc();
2819 2826  
2820 2827      // bump this on entry, not on exit:
2821 2828      inc_counter_np(SharedRuntime::_generic_array_copy_ctr, G1, G3);
2822 2829  
2823 2830      // In principle, the int arguments could be dirty.
2824 2831      //assert_clean_int(src_pos, G1);
2825 2832      //assert_clean_int(dst_pos, G1);
2826 2833      //assert_clean_int(length, G1);
2827 2834  
2828 2835      //-----------------------------------------------------------------------
2829 2836      // Assembler stubs will be used for this call to arraycopy
2830 2837      // if the following conditions are met:
2831 2838      //
2832 2839      // (1) src and dst must not be null.
2833 2840      // (2) src_pos must not be negative.
2834 2841      // (3) dst_pos must not be negative.
2835 2842      // (4) length  must not be negative.
2836 2843      // (5) src klass and dst klass should be the same and not NULL.
2837 2844      // (6) src and dst should be arrays.
2838 2845      // (7) src_pos + length must not exceed length of src.
2839 2846      // (8) dst_pos + length must not exceed length of dst.
2840 2847      BLOCK_COMMENT("arraycopy initial argument checks");
2841 2848  
2842 2849      //  if (src == NULL) return -1;
2843 2850      __ br_null(src, false, Assembler::pn, L_failed);
2844 2851  
2845 2852      //  if (src_pos < 0) return -1;
2846 2853      __ delayed()->tst(src_pos);
2847 2854      __ br(Assembler::negative, false, Assembler::pn, L_failed);
2848 2855      __ delayed()->nop();
2849 2856  
2850 2857      //  if (dst == NULL) return -1;
2851 2858      __ br_null(dst, false, Assembler::pn, L_failed);
2852 2859  
2853 2860      //  if (dst_pos < 0) return -1;
2854 2861      __ delayed()->tst(dst_pos);
2855 2862      __ br(Assembler::negative, false, Assembler::pn, L_failed);
2856 2863  
2857 2864      //  if (length < 0) return -1;
2858 2865      __ delayed()->tst(length);
2859 2866      __ br(Assembler::negative, false, Assembler::pn, L_failed);
2860 2867  
2861 2868      BLOCK_COMMENT("arraycopy argument klass checks");
2862 2869      //  get src->klass()
2863 2870      if (UseCompressedOops) {
2864 2871        __ delayed()->nop(); // ??? not good
2865 2872        __ load_klass(src, G3_src_klass);
2866 2873      } else {
2867 2874        __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass);
2868 2875      }
2869 2876  
2870 2877  #ifdef ASSERT
2871 2878      //  assert(src->klass() != NULL);
2872 2879      BLOCK_COMMENT("assert klasses not null");
2873 2880      { Label L_a, L_b;
2874 2881        __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL
2875 2882        __ delayed()->nop();
2876 2883        __ bind(L_a);
2877 2884        __ stop("broken null klass");
2878 2885        __ bind(L_b);
2879 2886        __ load_klass(dst, G4_dst_klass);
2880 2887        __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also
2881 2888        __ delayed()->mov(G0, G4_dst_klass);      // scribble the temp
2882 2889        BLOCK_COMMENT("assert done");
2883 2890      }
2884 2891  #endif
2885 2892  
2886 2893      // Load layout helper
2887 2894      //
2888 2895      //  |array_tag|     | header_size | element_type |     |log2_element_size|
2889 2896      // 32        30    24            16              8     2                 0
2890 2897      //
2891 2898      //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
2892 2899      //
2893 2900  
2894 2901      int lh_offset = klassOopDesc::header_size() * HeapWordSize +
2895 2902                      Klass::layout_helper_offset_in_bytes();
2896 2903  
2897 2904      // Load 32-bits signed value. Use br() instruction with it to check icc.
2898 2905      __ lduw(G3_src_klass, lh_offset, G5_lh);
2899 2906  
2900 2907      if (UseCompressedOops) {
2901 2908        __ load_klass(dst, G4_dst_klass);
2902 2909      }
2903 2910      // Handle objArrays completely differently...
2904 2911      juint objArray_lh = Klass::array_layout_helper(T_OBJECT);
2905 2912      __ set(objArray_lh, O5_temp);
2906 2913      __ cmp(G5_lh,       O5_temp);
2907 2914      __ br(Assembler::equal, false, Assembler::pt, L_objArray);
2908 2915      if (UseCompressedOops) {
2909 2916        __ delayed()->nop();
2910 2917      } else {
2911 2918        __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
2912 2919      }
2913 2920  
2914 2921      //  if (src->klass() != dst->klass()) return -1;
2915 2922      __ cmp(G3_src_klass, G4_dst_klass);
2916 2923      __ brx(Assembler::notEqual, false, Assembler::pn, L_failed);
2917 2924      __ delayed()->nop();
2918 2925  
2919 2926      //  if (!src->is_Array()) return -1;
2920 2927      __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0
2921 2928      __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed);
2922 2929  
2923 2930      // At this point, it is known to be a typeArray (array_tag 0x3).
2924 2931  #ifdef ASSERT
2925 2932      __ delayed()->nop();
2926 2933      { Label L;
2927 2934        jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
2928 2935        __ set(lh_prim_tag_in_place, O5_temp);
2929 2936        __ cmp(G5_lh,                O5_temp);
2930 2937        __ br(Assembler::greaterEqual, false, Assembler::pt, L);
2931 2938        __ delayed()->nop();
2932 2939        __ stop("must be a primitive array");
2933 2940        __ bind(L);
2934 2941      }
2935 2942  #else
2936 2943      __ delayed();                               // match next insn to prev branch
2937 2944  #endif
2938 2945  
2939 2946      arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
2940 2947                             O5_temp, G4_dst_klass, L_failed);
2941 2948  
2942 2949      // typeArrayKlass
2943 2950      //
2944 2951      // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
2945 2952      // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
2946 2953      //
2947 2954  
2948 2955      const Register G4_offset = G4_dst_klass;    // array offset
2949 2956      const Register G3_elsize = G3_src_klass;    // log2 element size
2950 2957  
2951 2958      __ srl(G5_lh, Klass::_lh_header_size_shift, G4_offset);
2952 2959      __ and3(G4_offset, Klass::_lh_header_size_mask, G4_offset); // array_offset
2953 2960      __ add(src, G4_offset, src);       // src array offset
2954 2961      __ add(dst, G4_offset, dst);       // dst array offset
2955 2962      __ and3(G5_lh, Klass::_lh_log2_element_size_mask, G3_elsize); // log2 element size
2956 2963  
2957 2964      // next registers should be set before the jump to corresponding stub
2958 2965      const Register from     = O0;  // source array address
2959 2966      const Register to       = O1;  // destination array address
2960 2967      const Register count    = O2;  // elements count
2961 2968  
2962 2969      // 'from', 'to', 'count' registers should be set in this order

↓ open down ↓

152 lines elided

↑ open up ↑

2963 2970      // since they are the same as 'src', 'src_pos', 'dst'.
2964 2971  
2965 2972      BLOCK_COMMENT("scale indexes to element size");
2966 2973      __ sll_ptr(src_pos, G3_elsize, src_pos);
2967 2974      __ sll_ptr(dst_pos, G3_elsize, dst_pos);
2968 2975      __ add(src, src_pos, from);       // src_addr
2969 2976      __ add(dst, dst_pos, to);         // dst_addr
2970 2977  
2971 2978      BLOCK_COMMENT("choose copy loop based on element size");
2972 2979      __ cmp(G3_elsize, 0);
2973      -    __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jbyte_arraycopy);
     2980 +    __ br(Assembler::equal, true, Assembler::pt, entry_jbyte_arraycopy);
2974 2981      __ delayed()->signx(length, count); // length
2975 2982  
2976 2983      __ cmp(G3_elsize, LogBytesPerShort);
2977      -    __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jshort_arraycopy);
     2984 +    __ br(Assembler::equal, true, Assembler::pt, entry_jshort_arraycopy);
2978 2985      __ delayed()->signx(length, count); // length
2979 2986  
2980 2987      __ cmp(G3_elsize, LogBytesPerInt);
2981      -    __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jint_arraycopy);
     2988 +    __ br(Assembler::equal, true, Assembler::pt, entry_jint_arraycopy);
2982 2989      __ delayed()->signx(length, count); // length
2983 2990  #ifdef ASSERT
2984 2991      { Label L;
2985 2992        __ cmp(G3_elsize, LogBytesPerLong);
2986 2993        __ br(Assembler::equal, false, Assembler::pt, L);
2987 2994        __ delayed()->nop();
2988 2995        __ stop("must be long copy, but elsize is wrong");
2989 2996        __ bind(L);
2990 2997      }
2991 2998  #endif
2992      -    __ br(Assembler::always,false,Assembler::pt,StubRoutines::_jlong_arraycopy);
     2999 +    __ br(Assembler::always, false, Assembler::pt, entry_jlong_arraycopy);
2993 3000      __ delayed()->signx(length, count); // length
2994 3001  
2995 3002      // objArrayKlass
2996 3003    __ BIND(L_objArray);
2997 3004      // live at this point:  G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length
2998 3005  
2999 3006      Label L_plain_copy, L_checkcast_copy;
3000 3007      //  test array classes for subtyping
3001 3008      __ cmp(G3_src_klass, G4_dst_klass);         // usual case is exact equality
3002 3009      __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy);

3003 3010      __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below
3004 3011  
3005 3012      // Identically typed arrays can be copied without element-wise checks.

↓ open down ↓

3 lines elided

↑ open up ↑

3006 3013      arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
3007 3014                             O5_temp, G5_lh, L_failed);
3008 3015  
3009 3016      __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset
3010 3017      __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset
3011 3018      __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos);
3012 3019      __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos);
3013 3020      __ add(src, src_pos, from);       // src_addr
3014 3021      __ add(dst, dst_pos, to);         // dst_addr
3015 3022    __ BIND(L_plain_copy);
3016      -    __ br(Assembler::always, false, Assembler::pt,StubRoutines::_oop_arraycopy);
     3023 +    __ br(Assembler::always, false, Assembler::pt, entry_oop_arraycopy);
3017 3024      __ delayed()->signx(length, count); // length
3018 3025  
3019 3026    __ BIND(L_checkcast_copy);
3020 3027      // live at this point:  G3_src_klass, G4_dst_klass
3021 3028      {
3022 3029        // Before looking at dst.length, make sure dst is also an objArray.
3023 3030        // lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted to delay slot
3024 3031        __ cmp(G5_lh,                    O5_temp);
3025 3032        __ br(Assembler::notEqual, false, Assembler::pn, L_failed);
3026 3033

3027 3034        // It is safe to examine both src.length and dst.length.
3028 3035        __ delayed();                             // match next insn to prev branch
3029 3036        arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
3030 3037                               O5_temp, G5_lh, L_failed);
3031 3038  
3032 3039        // Marshal the base address arguments now, freeing registers.
3033 3040        __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset
3034 3041        __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset
3035 3042        __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos);
3036 3043        __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos);
3037 3044        __ add(src, src_pos, from);               // src_addr
3038 3045        __ add(dst, dst_pos, to);                 // dst_addr
3039 3046        __ signx(length, count);                  // length (reloaded)
3040 3047  
3041 3048        Register sco_temp = O3;                   // this register is free now
3042 3049        assert_different_registers(from, to, count, sco_temp,
3043 3050                                   G4_dst_klass, G3_src_klass);
3044 3051  
3045 3052        // Generate the type check.
3046 3053        int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
3047 3054                          Klass::super_check_offset_offset_in_bytes());
3048 3055        __ lduw(G4_dst_klass, sco_offset, sco_temp);
3049 3056        generate_type_check(G3_src_klass, sco_temp, G4_dst_klass,

↓ open down ↓

23 lines elided

↑ open up ↑

3050 3057                            O5_temp, L_plain_copy);
3051 3058  
3052 3059        // Fetch destination element klass from the objArrayKlass header.
3053 3060        int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
3054 3061                         objArrayKlass::element_klass_offset_in_bytes());
3055 3062  
3056 3063        // the checkcast_copy loop needs two extra arguments:
3057 3064        __ ld_ptr(G4_dst_klass, ek_offset, O4);   // dest elem klass
3058 3065        // lduw(O4, sco_offset, O3);              // sco of elem klass
3059 3066  
3060      -      __ br(Assembler::always, false, Assembler::pt, checkcast_copy_entry);
     3067 +      __ br(Assembler::always, false, Assembler::pt, entry_checkcast_arraycopy);
3061 3068        __ delayed()->lduw(O4, sco_offset, O3);
3062 3069      }
3063 3070  
3064 3071    __ BIND(L_failed);
3065 3072      __ retl();
3066 3073      __ delayed()->sub(G0, 1, O0); // return -1
3067 3074      return start;
3068 3075    }
3069 3076  
3070 3077    void generate_arraycopy_stubs() {
     3078 +    address entry;
     3079 +    address entry_jbyte_arraycopy;
     3080 +    address entry_jshort_arraycopy;
     3081 +    address entry_jint_arraycopy;
     3082 +    address entry_oop_arraycopy;
     3083 +    address entry_jlong_arraycopy;
     3084 +    address entry_checkcast_arraycopy;
     3085 +
     3086 +    StubRoutines::_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(false, &entry,
     3087 +                                                                           "jbyte_disjoint_arraycopy");
     3088 +    StubRoutines::_jbyte_arraycopy           = generate_conjoint_byte_copy(false, entry, &entry_jbyte_arraycopy,
     3089 +                                                                           "jbyte_arraycopy");
     3090 +    StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry,
     3091 +                                                                            "jshort_disjoint_arraycopy");
     3092 +    StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy,
     3093 +                                                                            "jshort_arraycopy");
     3094 +    StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_copy(false, &entry,
     3095 +                                                                          "jint_disjoint_arraycopy");
     3096 +    StubRoutines::_jint_arraycopy            = generate_conjoint_int_copy(false, entry, &entry_jint_arraycopy,
     3097 +                                                                          "jint_arraycopy");
     3098 +    StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(false, &entry,
     3099 +                                                                           "jlong_disjoint_arraycopy");
     3100 +    StubRoutines::_jlong_arraycopy           = generate_conjoint_long_copy(false, entry, &entry_jlong_arraycopy,
     3101 +                                                                           "jlong_arraycopy");
     3102 +    StubRoutines::_oop_disjoint_arraycopy    = generate_disjoint_oop_copy(false, &entry,
     3103 +                                                                          "oop_disjoint_arraycopy");
     3104 +    StubRoutines::_oop_arraycopy             = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy,
     3105 +                                                                          "oop_arraycopy");
     3106 +
     3107 +
     3108 +    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(true, &entry,
     3109 +                                                                                   "arrayof_jbyte_disjoint_arraycopy");
     3110 +    StubRoutines::_arrayof_jbyte_arraycopy           = generate_conjoint_byte_copy(true, entry, NULL,
     3111 +                                                                                   "arrayof_jbyte_arraycopy");
     3112 +
     3113 +    StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
     3114 +                                                                                    "arrayof_jshort_disjoint_arraycopy");
     3115 +    StubRoutines::_arrayof_jshort_arraycopy          = generate_conjoint_short_copy(true, entry, NULL,
     3116 +                                                                                    "arrayof_jshort_arraycopy");
3071 3117  
3072      -    // Note:  the disjoint stubs must be generated first, some of
3073      -    //        the conjoint stubs use them.
3074      -    StubRoutines::_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
3075      -    StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
3076      -    StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy");
3077      -    StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
3078      -    StubRoutines::_oop_disjoint_arraycopy    = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy");
3079      -    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy");
3080      -    StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
3081      -    StubRoutines::_arrayof_jint_disjoint_arraycopy   = generate_disjoint_int_copy(true, "arrayof_jint_disjoint_arraycopy");
3082      -    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy");
3083      -    StubRoutines::_arrayof_oop_disjoint_arraycopy    =  generate_disjoint_oop_copy(true, "arrayof_oop_disjoint_arraycopy");
3084      -
3085      -    StubRoutines::_jbyte_arraycopy  = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
3086      -    StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
3087      -    StubRoutines::_jint_arraycopy   = generate_conjoint_int_copy(false, "jint_arraycopy");
3088      -    StubRoutines::_jlong_arraycopy  = generate_conjoint_long_copy(false, "jlong_arraycopy");
3089      -    StubRoutines::_oop_arraycopy    = generate_conjoint_oop_copy(false, "oop_arraycopy");
3090      -    StubRoutines::_arrayof_jbyte_arraycopy    = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy");
3091      -    StubRoutines::_arrayof_jshort_arraycopy   = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy");
     3118 +    StubRoutines::_arrayof_jint_disjoint_arraycopy   = generate_disjoint_int_copy(true, &entry,
     3119 +                                                                                  "arrayof_jint_disjoint_arraycopy");
3092 3120  #ifdef _LP64
3093 3121      // since sizeof(jint) < sizeof(HeapWord), there's a different flavor:
3094      -    StubRoutines::_arrayof_jint_arraycopy     = generate_conjoint_int_copy(true, "arrayof_jint_arraycopy");
     3122 +    StubRoutines::_arrayof_jint_arraycopy     = generate_conjoint_int_copy(true, entry, NULL, "arrayof_jint_arraycopy");
3095 3123    #else
3096 3124      StubRoutines::_arrayof_jint_arraycopy     = StubRoutines::_jint_arraycopy;
3097 3125  #endif
     3126 +
     3127 +    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(true, NULL,
     3128 +                                                                                   "arrayof_jlong_disjoint_arraycopy");
     3129 +    StubRoutines::_arrayof_oop_disjoint_arraycopy    =  generate_disjoint_oop_copy(true, NULL,
     3130 +                                                                                   "arrayof_oop_disjoint_arraycopy");
     3131 +
3098 3132      StubRoutines::_arrayof_jlong_arraycopy    = StubRoutines::_jlong_arraycopy;
3099 3133      StubRoutines::_arrayof_oop_arraycopy      = StubRoutines::_oop_arraycopy;
3100 3134  
3101      -    StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
3102      -    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy");
3103      -    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy");
     3135 +    StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
     3136 +    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
     3137 +                                                              entry_jbyte_arraycopy,
     3138 +                                                              entry_jshort_arraycopy,
     3139 +                                                              entry_jint_arraycopy,
     3140 +                                                              entry_jlong_arraycopy);
     3141 +    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
     3142 +                                                               entry_jbyte_arraycopy,
     3143 +                                                               entry_jshort_arraycopy,
     3144 +                                                               entry_jint_arraycopy,
     3145 +                                                               entry_oop_arraycopy,
     3146 +                                                               entry_jlong_arraycopy,
     3147 +                                                               entry_checkcast_arraycopy);
3104 3148  
3105 3149      StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
3106 3150      StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
3107 3151      StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
3108 3152      StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
3109 3153      StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
3110 3154      StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
3111 3155    }
3112 3156  
3113 3157    void generate_initial() {

3114 3158      // Generates all stubs and initializes the entry points
3115 3159  
3116 3160      //------------------------------------------------------------------------------------------------------------------------
3117 3161      // entry points that exist in all platforms
3118 3162      // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
3119 3163      //       the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
3120 3164      StubRoutines::_forward_exception_entry                 = generate_forward_exception();
3121 3165  
3122 3166      StubRoutines::_call_stub_entry                         = generate_call_stub(StubRoutines::_call_stub_return_address);
3123 3167      StubRoutines::_catch_exception_entry                   = generate_catch_exception();
3124 3168  
3125 3169      //------------------------------------------------------------------------------------------------------------------------
3126 3170      // entry points that are platform specific
3127 3171      StubRoutines::Sparc::_test_stop_entry                  = generate_test_stop();
3128 3172  
3129 3173      StubRoutines::Sparc::_stop_subroutine_entry            = generate_stop_subroutine();
3130 3174      StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows();
3131 3175  
3132 3176  #if !defined(COMPILER2) && !defined(_LP64)
3133 3177      StubRoutines::_atomic_xchg_entry         = generate_atomic_xchg();
3134 3178      StubRoutines::_atomic_cmpxchg_entry      = generate_atomic_cmpxchg();
3135 3179      StubRoutines::_atomic_add_entry          = generate_atomic_add();
3136 3180      StubRoutines::_atomic_xchg_ptr_entry     = StubRoutines::_atomic_xchg_entry;
3137 3181      StubRoutines::_atomic_cmpxchg_ptr_entry  = StubRoutines::_atomic_cmpxchg_entry;
3138 3182      StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
3139 3183      StubRoutines::_atomic_add_ptr_entry      = StubRoutines::_atomic_add_entry;
3140 3184  #endif  // COMPILER2 !=> _LP64
3141 3185    }
3142 3186  
3143 3187  
3144 3188    void generate_all() {
3145 3189      // Generates all stubs and initializes the entry points
3146 3190  
3147 3191      // Generate partial_subtype_check first here since its code depends on
3148 3192      // UseZeroBaseCompressedOops which is defined after heap initialization.
3149 3193      StubRoutines::Sparc::_partial_subtype_check                = generate_partial_subtype_check();
3150 3194      // These entry points require SharedInfo::stack0 to be set up in non-core builds
3151 3195      StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
3152 3196      StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError),  false);
3153 3197      StubRoutines::_throw_ArithmeticException_entry         = generate_throw_exception("ArithmeticException throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException),  true);
3154 3198      StubRoutines::_throw_NullPointerException_entry        = generate_throw_exception("NullPointerException throw_exception",         CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true);
3155 3199      StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
3156 3200      StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),   false);
3157 3201  
3158 3202      StubRoutines::_handler_for_unsafe_access_entry =
3159 3203        generate_handler_for_unsafe_access();
3160 3204  
3161 3205      // support for verify_oop (must happen after universe_init)
3162 3206      StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop_subroutine();
3163 3207  
3164 3208      // arraycopy stubs used by compilers
3165 3209      generate_arraycopy_stubs();
3166 3210  
3167 3211      // Don't initialize the platform math functions since sparc
3168 3212      // doesn't have intrinsics for these operations.
3169 3213    }
3170 3214  
3171 3215  
3172 3216   public:
3173 3217    StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3174 3218      // replace the standard masm with a special one:
3175 3219      _masm = new MacroAssembler(code);
3176 3220  
3177 3221      _stub_count = !all ? 0x100 : 0x200;
3178 3222      if (all) {
3179 3223        generate_all();
3180 3224      } else {
3181 3225        generate_initial();
3182 3226      }
3183 3227  
3184 3228      // make sure this stub is available for all local calls
3185 3229      if (_atomic_add_stub.is_unbound()) {
3186 3230        // generate a second time, if necessary
3187 3231        (void) generate_atomic_add();
3188 3232      }
3189 3233    }
3190 3234  
3191 3235  
3192 3236   private:
3193 3237    int _stub_count;
3194 3238    void stub_prolog(StubCodeDesc* cdesc) {
3195 3239      # ifdef ASSERT
3196 3240        // put extra information in the stub code, to make it more readable
3197 3241  #ifdef _LP64
3198 3242  // Write the high part of the address
3199 3243  // [RGV] Check if there is a dependency on the size of this prolog
3200 3244        __ emit_data((intptr_t)cdesc >> 32,    relocInfo::none);
3201 3245  #endif
3202 3246        __ emit_data((intptr_t)cdesc,    relocInfo::none);
3203 3247        __ emit_data(++_stub_count, relocInfo::none);
3204 3248      # endif
3205 3249      align(true);
3206 3250    }
3207 3251  
3208 3252    void align(bool at_header = false) {
3209 3253      // %%%%% move this constant somewhere else
3210 3254      // UltraSPARC cache line size is 8 instructions:
3211 3255      const unsigned int icache_line_size = 32;
3212 3256      const unsigned int icache_half_line_size = 16;
3213 3257  
3214 3258      if (at_header) {
3215 3259        while ((intptr_t)(__ pc()) % icache_line_size != 0) {
3216 3260          __ emit_data(0, relocInfo::none);

↓ open down ↓

103 lines elided

↑ open up ↑

3217 3261        }
3218 3262      } else {
3219 3263        while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
3220 3264          __ nop();
3221 3265        }
3222 3266      }
3223 3267    }
3224 3268  
3225 3269  }; // end class declaration
3226 3270  
3227      -
3228      -address StubGenerator::disjoint_byte_copy_entry  = NULL;
3229      -address StubGenerator::disjoint_short_copy_entry = NULL;
3230      -address StubGenerator::disjoint_int_copy_entry   = NULL;
3231      -address StubGenerator::disjoint_long_copy_entry  = NULL;
3232      -address StubGenerator::disjoint_oop_copy_entry   = NULL;
3233      -
3234      -address StubGenerator::byte_copy_entry  = NULL;
3235      -address StubGenerator::short_copy_entry = NULL;
3236      -address StubGenerator::int_copy_entry   = NULL;
3237      -address StubGenerator::long_copy_entry  = NULL;
3238      -address StubGenerator::oop_copy_entry   = NULL;
3239      -
3240      -address StubGenerator::checkcast_copy_entry = NULL;
3241      -
3242 3271  void StubGenerator_generate(CodeBuffer* code, bool all) {
3243 3272    StubGenerator g(code, all);
3244 3273  }

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX