< prev index next >

src/hotspot/cpu/arm/stubGenerator_arm.cpp

rename things

8198949_arraycopy

7  *                                                                                                                                   
8  * This code is distributed in the hope that it will be useful, but WITHOUT                                                          
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or                                                             
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License                                                             
11  * version 2 for more details (a copy is included in the LICENSE file that                                                           
12  * accompanied this code).                                                                                                           
13  *                                                                                                                                   
14  * You should have received a copy of the GNU General Public License version                                                         
15  * 2 along with this work; if not, write to the Free Software Foundation,                                                            
16  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.                                                                     
17  *                                                                                                                                   
18  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA                                                           
19  * or visit www.oracle.com if you need additional information or have any                                                            
20  * questions.                                                                                                                        
21  *                                                                                                                                   
22  */                                                                                                                                  
23 
24 #include "precompiled.hpp"                                                                                                           
25 #include "asm/assembler.hpp"                                                                                                         
26 #include "assembler_arm.inline.hpp"                                                                                                  
27 #include "gc/shared/cardTable.hpp"                                                                                                   
28 #include "gc/shared/cardTableBarrierSet.hpp"                                                                                         
29 #include "interpreter/interpreter.hpp"                                                                                               
30 #include "nativeInst_arm.hpp"                                                                                                        
31 #include "oops/instanceOop.hpp"                                                                                                      
32 #include "oops/method.hpp"                                                                                                           
33 #include "oops/objArrayKlass.hpp"                                                                                                    
34 #include "oops/oop.inline.hpp"                                                                                                       
35 #include "prims/methodHandles.hpp"                                                                                                   
36 #include "runtime/frame.inline.hpp"                                                                                                  
37 #include "runtime/handles.inline.hpp"                                                                                                
38 #include "runtime/sharedRuntime.hpp"                                                                                                 
39 #include "runtime/stubCodeGenerator.hpp"                                                                                             
40 #include "runtime/stubRoutines.hpp"                                                                                                  
41 #include "utilities/align.hpp"                                                                                                       
42 #ifdef COMPILER2                                                                                                                     
43 #include "opto/runtime.hpp"                                                                                                          
44 #endif                                                                                                                               
45 
46 // Declaration and definition of StubGenerator (no .hpp file).                                                                       
47 // For a more detailed description of the stub routine structure                                                                     

7  *
8  * This code is distributed in the hope that it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
11  * version 2 for more details (a copy is included in the LICENSE file that
12  * accompanied this code).
13  *
14  * You should have received a copy of the GNU General Public License version
15  * 2 along with this work; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
17  *
18  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
19  * or visit www.oracle.com if you need additional information or have any
20  * questions.
21  *
22  */
23 
24 #include "precompiled.hpp"
25 #include "asm/assembler.hpp"
26 #include "assembler_arm.inline.hpp"
27 #include "gc/shared/barrierSet.hpp"
28 #include "gc/shared/barrierSetAssembler.hpp"
29 #include "interpreter/interpreter.hpp"
30 #include "nativeInst_arm.hpp"
31 #include "oops/instanceOop.hpp"
32 #include "oops/method.hpp"
33 #include "oops/objArrayKlass.hpp"
34 #include "oops/oop.inline.hpp"
35 #include "prims/methodHandles.hpp"
36 #include "runtime/frame.inline.hpp"
37 #include "runtime/handles.inline.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "runtime/stubCodeGenerator.hpp"
40 #include "runtime/stubRoutines.hpp"
41 #include "utilities/align.hpp"
42 #ifdef COMPILER2
43 #include "opto/runtime.hpp"
44 #endif
45 
46 // Declaration and definition of StubGenerator (no .hpp file).
47 // For a more detailed description of the stub routine structure

2837       }                                                                                                                              
2838 
2839       __ ret();                                                                                                                      
2840     }                                                                                                                                
2841 
2842     if (! to_is_aligned) {                                                                                                           
2843       __ BIND(L_unaligned_dst);                                                                                                      
2844       int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);                    
2845       assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");                     
2846 
2847       if (status) {                                                                                                                  
2848         __ mov(R0, 0); // OK                                                                                                         
2849       }                                                                                                                              
2850 
2851       __ ret();                                                                                                                      
2852     }                                                                                                                                
2853 
2854     return start;                                                                                                                    
2855   }                                                                                                                                  
2856 
2857 #if INCLUDE_ALL_GCS                                                                                                                  
2858   //                                                                                                                                 
2859   //  Generate pre-write barrier for array.                                                                                          
2860   //                                                                                                                                 
2861   //  Input:                                                                                                                         
2862   //     addr     - register containing starting address                                                                             
2863   //     count    - register containing element count, 32-bit int                                                                    
2864   //     callee_saved_regs -                                                                                                         
2865   //                the call must preserve this number of registers: R0, R1, ..., R[callee_saved_regs-1]                             
2866   //                                                                                                                                 
2867   //  callee_saved_regs must include addr and count                                                                                  
2868   //  Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) except for callee_saved_regs.                 
2869   void gen_write_ref_array_pre_barrier(Register addr, Register count, int callee_saved_regs) {                                       
2870     BarrierSet* bs = Universe::heap()->barrier_set();                                                                                
2871     switch (bs->kind()) {                                                                                                            
2872     case BarrierSet::G1BarrierSet:                                                                                                   
2873       {                                                                                                                              
2874         assert( addr->encoding() < callee_saved_regs, "addr must be saved");                                                         
2875         assert(count->encoding() < callee_saved_regs, "count must be saved");                                                        
2876                                                                                                                                      
2877         BLOCK_COMMENT("PreBarrier");                                                                                                 
2878                                                                                                                                      
2879 #ifdef AARCH64                                                                                                                       
2880         callee_saved_regs = align_up(callee_saved_regs, 2);                                                                          
2881         for (int i = 0; i < callee_saved_regs; i += 2) {                                                                             
2882           __ raw_push(as_Register(i), as_Register(i+1));                                                                             
2883         }                                                                                                                            
2884 #else                                                                                                                                
2885         RegisterSet saved_regs = RegisterSet(R0, as_Register(callee_saved_regs-1));                                                  
2886         __ push(saved_regs | R9ifScratched);                                                                                         
2887 #endif // AARCH64                                                                                                                    
2888                                                                                                                                      
2889         if (addr != R0) {                                                                                                            
2890           assert_different_registers(count, R0);                                                                                     
2891           __ mov(R0, addr);                                                                                                          
2892         }                                                                                                                            
2893 #ifdef AARCH64                                                                                                                       
2894         __ zero_extend(R1, count, 32); // BarrierSet::static_write_ref_array_pre takes size_t                                        
2895 #else                                                                                                                                
2896         if (count != R1) {                                                                                                           
2897           __ mov(R1, count);                                                                                                         
2898         }                                                                                                                            
2899 #endif // AARCH64                                                                                                                    
2900                                                                                                                                      
2901         __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));                                                  
2902                                                                                                                                      
2903 #ifdef AARCH64                                                                                                                       
2904         for (int i = callee_saved_regs - 2; i >= 0; i -= 2) {                                                                        
2905           __ raw_pop(as_Register(i), as_Register(i+1));                                                                              
2906         }                                                                                                                            
2907 #else                                                                                                                                
2908         __ pop(saved_regs | R9ifScratched);                                                                                          
2909 #endif // AARCH64                                                                                                                    
2910       }                                                                                                                              
2911     case BarrierSet::CardTableBarrierSet:                                                                                            
2912       break;                                                                                                                         
2913     default:                                                                                                                         
2914       ShouldNotReachHere();                                                                                                          
2915     }                                                                                                                                
2916   }                                                                                                                                  
2917 #endif // INCLUDE_ALL_GCS                                                                                                            
2918                                                                                                                                      
2919   //                                                                                                                                 
2920   //  Generate post-write barrier for array.                                                                                         
2921   //                                                                                                                                 
2922   //  Input:                                                                                                                         
2923   //     addr     - register containing starting address (can be scratched)                                                          
2924   //     count    - register containing element count, 32-bit int (can be scratched)                                                 
2925   //     tmp      - scratch register                                                                                                 
2926   //                                                                                                                                 
2927   //  Note: LR can be scratched but might be equal to addr, count or tmp                                                             
2928   //  Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).                                              
2929   void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp) {                                               
2930     assert_different_registers(addr, count, tmp);                                                                                    
2931     BarrierSet* bs = Universe::heap()->barrier_set();                                                                                
2932                                                                                                                                      
2933     switch (bs->kind()) {                                                                                                            
2934     case BarrierSet::G1BarrierSet:                                                                                                   
2935       {                                                                                                                              
2936         BLOCK_COMMENT("G1PostBarrier");                                                                                              
2937         if (addr != R0) {                                                                                                            
2938           assert_different_registers(count, R0);                                                                                     
2939           __ mov(R0, addr);                                                                                                          
2940         }                                                                                                                            
2941 #ifdef AARCH64                                                                                                                       
2942         __ zero_extend(R1, count, 32); // BarrierSet::static_write_ref_array_post takes size_t                                       
2943 #else                                                                                                                                
2944         if (count != R1) {                                                                                                           
2945           __ mov(R1, count);                                                                                                         
2946         }                                                                                                                            
2947 #if R9_IS_SCRATCHED                                                                                                                  
2948         // Safer to save R9 here since callers may have been written                                                                 
2949         // assuming R9 survives. This is suboptimal but is not in                                                                    
2950         // general worth optimizing for the few platforms where R9                                                                   
2951         // is scratched. Note that the optimization might not be to                                                                  
2952         // difficult for this particular call site.                                                                                  
2953         __ push(R9);                                                                                                                 
2954 #endif                                                                                                                               
2955 #endif // !AARCH64                                                                                                                   
2956         __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));                                                 
2957 #ifndef AARCH64                                                                                                                      
2958 #if R9_IS_SCRATCHED                                                                                                                  
2959         __ pop(R9);                                                                                                                  
2960 #endif                                                                                                                               
2961 #endif // !AARCH64                                                                                                                   
2962       }                                                                                                                              
2963       break;                                                                                                                         
2964     case BarrierSet::CardTableBarrierSet:                                                                                            
2965       {                                                                                                                              
2966         BLOCK_COMMENT("CardTablePostBarrier");                                                                                       
2967         CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);                                                       
2968         CardTable* ct = ctbs->card_table();                                                                                          
2969         assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");                                                   
2970                                                                                                                                      
2971         Label L_cardtable_loop, L_done;                                                                                              
2972                                                                                                                                      
2973         __ cbz_32(count, L_done); // zero count - nothing to do                                                                      
2974                                                                                                                                      
2975         __ add_ptr_scaled_int32(count, addr, count, LogBytesPerHeapOop);                                                             
2976         __ sub(count, count, BytesPerHeapOop);                            // last addr                                               
2977                                                                                                                                      
2978         __ logical_shift_right(addr, addr, CardTable::card_shift);                                                                   
2979         __ logical_shift_right(count, count, CardTable::card_shift);                                                                 
2980         __ sub(count, count, addr); // nb of cards                                                                                   
2981                                                                                                                                      
2982         // warning: Rthread has not been preserved                                                                                   
2983         __ mov_address(tmp, (address) ct->byte_map_base(), symbolic_Relocation::card_table_reference);                               
2984         __ add(addr,tmp, addr);                                                                                                      
2985                                                                                                                                      
2986         Register zero = __ zero_register(tmp);                                                                                       
2987                                                                                                                                      
2988         __ BIND(L_cardtable_loop);                                                                                                   
2989         __ strb(zero, Address(addr, 1, post_indexed));                                                                               
2990         __ subs(count, count, 1);                                                                                                    
2991         __ b(L_cardtable_loop, ge);                                                                                                  
2992         __ BIND(L_done);                                                                                                             
2993       }                                                                                                                              
2994       break;                                                                                                                         
2995     default:                                                                                                                         
2996       ShouldNotReachHere();                                                                                                          
2997     }                                                                                                                                
2998   }                                                                                                                                  
2999 
3000   // Generates pattern of code to be placed after raw data copying in generate_oop_copy                                              
3001   // Includes return from arraycopy stub.                                                                                            
3002   //                                                                                                                                 
3003   // Arguments:                                                                                                                      
3004   //     to:       destination pointer after copying.                                                                                
3005   //               if 'forward' then 'to' == upper bound, else 'to' == beginning of the modified region                              
3006   //     count:    total number of copied elements, 32-bit int                                                                       
3007   //                                                                                                                                 
3008   // Blows all volatile (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) and 'to', 'count', 'tmp' registers.                      
3009   void oop_arraycopy_stub_epilogue_helper(Register to, Register count, Register tmp, bool status, bool forward) {                    
3010     assert_different_registers(to, count, tmp);                                                                                      
3011 
3012     if (forward) {                                                                                                                   
3013       // 'to' is upper bound of the modified region                                                                                  
3014       // restore initial dst:                                                                                                        
3015       __ sub_ptr_scaled_int32(to, to, count, LogBytesPerHeapOop);                                                                    
3016     }                                                                                                                                
3017 
3018     // 'to' is the beginning of the region                                                                                           
3019 
3020     gen_write_ref_array_post_barrier(to, count, tmp);                                                                                
                                                                                                                                     
3021 
3022     if (status) {                                                                                                                    
3023       __ mov(R0, 0); // OK                                                                                                           
3024     }                                                                                                                                
3025 
3026 #ifdef AARCH64                                                                                                                       
3027     __ raw_pop(LR, ZR);                                                                                                              
3028     __ ret();                                                                                                                        
3029 #else                                                                                                                                
3030     __ pop(PC);                                                                                                                      
3031 #endif // AARCH64                                                                                                                    
3032   }                                                                                                                                  
3033 
3034 
3035   //  Generate stub for assign-compatible oop copy.  If "aligned" is true, the                                                       
3036   //  "from" and "to" addresses are assumed to be heapword aligned.                                                                  
3037   //                                                                                                                                 
3038   //  If "disjoint" is true, arrays are assumed to be disjoint, otherwise they may overlap and                                       
3039   //  "nooverlap_target" must be specified as the address to jump if they don't.                                                     

2837       }
2838 
2839       __ ret();
2840     }
2841 
2842     if (! to_is_aligned) {
2843       __ BIND(L_unaligned_dst);
2844       int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
2845       assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");
2846 
2847       if (status) {
2848         __ mov(R0, 0); // OK
2849       }
2850 
2851       __ ret();
2852     }
2853 
2854     return start;
2855   }
2856 














































































































































2857 
2858   // Generates pattern of code to be placed after raw data copying in generate_oop_copy
2859   // Includes return from arraycopy stub.
2860   //
2861   // Arguments:
2862   //     to:       destination pointer after copying.
2863   //               if 'forward' then 'to' == upper bound, else 'to' == beginning of the modified region
2864   //     count:    total number of copied elements, 32-bit int
2865   //
2866   // Blows all volatile (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) and 'to', 'count', 'tmp' registers.
2867   void oop_arraycopy_stub_epilogue_helper(Register to, Register count, Register tmp, bool status, bool forward, DecoratorSet decorato
2868     assert_different_registers(to, count, tmp);
2869 
2870     if (forward) {
2871       // 'to' is upper bound of the modified region
2872       // restore initial dst:
2873       __ sub_ptr_scaled_int32(to, to, count, LogBytesPerHeapOop);
2874     }
2875 
2876     // 'to' is the beginning of the region
2877 
2878     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2879     bs->arraycopy_epilogue(this, decorators, true, to, count, tmp);
2880 
2881     if (status) {
2882       __ mov(R0, 0); // OK
2883     }
2884 
2885 #ifdef AARCH64
2886     __ raw_pop(LR, ZR);
2887     __ ret();
2888 #else
2889     __ pop(PC);
2890 #endif // AARCH64
2891   }
2892 
2893 
2894   //  Generate stub for assign-compatible oop copy.  If "aligned" is true, the
2895   //  "from" and "to" addresses are assumed to be heapword aligned.
2896   //
2897   //  If "disjoint" is true, arrays are assumed to be disjoint, otherwise they may overlap and
2898   //  "nooverlap_target" must be specified as the address to jump if they don't.

3068 
3069     inc_counter_np(SharedRuntime::_oop_array_copy_ctr, tmp1, tmp2);                                                                  
3070 
3071     // Conjoint case: since execution reaches this point, the arrays overlap, so performing backward copy                            
3072     // Disjoint case: perform forward copy                                                                                           
3073     bool forward = disjoint;                                                                                                         
3074 
3075     const int bytes_per_count = BytesPerHeapOop;                                                                                     
3076     const int log_bytes_per_count = LogBytesPerHeapOop;                                                                              
3077 
3078     const Register saved_count = LR;                                                                                                 
3079     const int callee_saved_regs = 3; // R0-R2                                                                                        
3080 
3081     // LR is used later to save barrier args                                                                                         
3082 #ifdef AARCH64                                                                                                                       
3083     __ raw_push(LR, ZR);                                                                                                             
3084 #else                                                                                                                                
3085     __ push(LR);                                                                                                                     
3086 #endif // AARCH64                                                                                                                    
3087 
3088 #if INCLUDE_ALL_GCS                                                                                                                  
3089     gen_write_ref_array_pre_barrier(to, count, callee_saved_regs);                                                                   
3090 #endif // INCLUDE_ALL_GCS                                                                                                            
                                                                                                                                     
                                                                                                                                     
                                                                                                                                     
                                                                                                                                     
                                                                                                                                     
                                                                                                                                     
                                                                                                                                     
3091 
3092     // save arguments for barrier generation (after the pre barrier)                                                                 
3093     __ mov(saved_count, count);                                                                                                      
3094 
3095     if (!forward) {                                                                                                                  
3096       __ add_ptr_scaled_int32(to,   to,   count, log_bytes_per_count);                                                               
3097       __ add_ptr_scaled_int32(from, from, count, log_bytes_per_count);                                                               
3098     }                                                                                                                                
3099 
3100     // for short arrays, just do single element copy                                                                                 
3101     Label L_small_array;                                                                                                             
3102     const int small_copy_limit = (8*wordSize + 7)/bytes_per_count; // XXX optim: tune the limit higher ?                             
3103     __ cmp_32(count, small_copy_limit);                                                                                              
3104     __ b(L_small_array, le);                                                                                                         
3105 
3106     bool from_is_aligned = (bytes_per_count >= 8);                                                                                   
3107     if (aligned && forward && (HeapWordSize % 8 == 0)) {                                                                             
3108         // if 'from' is heapword aligned and HeapWordSize is divisible by 8,                                                         
3109         //  then from is aligned by 8                                                                                                

2927 
2928     inc_counter_np(SharedRuntime::_oop_array_copy_ctr, tmp1, tmp2);
2929 
2930     // Conjoint case: since execution reaches this point, the arrays overlap, so performing backward copy
2931     // Disjoint case: perform forward copy
2932     bool forward = disjoint;
2933 
2934     const int bytes_per_count = BytesPerHeapOop;
2935     const int log_bytes_per_count = LogBytesPerHeapOop;
2936 
2937     const Register saved_count = LR;
2938     const int callee_saved_regs = 3; // R0-R2
2939 
2940     // LR is used later to save barrier args
2941 #ifdef AARCH64
2942     __ raw_push(LR, ZR);
2943 #else
2944     __ push(LR);
2945 #endif // AARCH64
2946 
2947     DecoratorSet decorators = 0;
2948     if (disjoint) {
2949       decorators |= ARRAYCOPY_DISJOINT;
2950     }
2951     if (aligned) {
2952       decorators |= ARRAYCOPY_ALIGNED;
2953     }
2954 
2955     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2956     bs->arraycopy_prologue(this, decorators, true, to, count, callee_saved_regs);
2957 
2958     // save arguments for barrier generation (after the pre barrier)
2959     __ mov(saved_count, count);
2960 
2961     if (!forward) {
2962       __ add_ptr_scaled_int32(to,   to,   count, log_bytes_per_count);
2963       __ add_ptr_scaled_int32(from, from, count, log_bytes_per_count);
2964     }
2965 
2966     // for short arrays, just do single element copy
2967     Label L_small_array;
2968     const int small_copy_limit = (8*wordSize + 7)/bytes_per_count; // XXX optim: tune the limit higher ?
2969     __ cmp_32(count, small_copy_limit);
2970     __ b(L_small_array, le);
2971 
2972     bool from_is_aligned = (bytes_per_count >= 8);
2973     if (aligned && forward && (HeapWordSize % 8 == 0)) {
2974         // if 'from' is heapword aligned and HeapWordSize is divisible by 8,
2975         //  then from is aligned by 8

3128         to_is_aligned = true;                                                                                                        
3129       }                                                                                                                              
3130     }                                                                                                                                
3131 
3132     Label L_unaligned_dst;                                                                                                           
3133 
3134     if (!to_is_aligned) {                                                                                                            
3135       BLOCK_COMMENT("Check dst alignment:");                                                                                         
3136       __ tst(to, wordSize - 1);                                                                                                      
3137       __ b(L_unaligned_dst, ne); // 'to' is not aligned                                                                              
3138     }                                                                                                                                
3139 
3140     int min_copy;                                                                                                                    
3141     if (forward) {                                                                                                                   
3142       min_copy = generate_forward_aligned_copy_loop(from, to, count, bytes_per_count);                                               
3143     } else {                                                                                                                         
3144       min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count);                                              
3145     }                                                                                                                                
3146     assert(small_copy_limit >= count_required_to_align + min_copy, "first loop might exhaust count");                                
3147 
3148     oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);                                            
3149 
3150     {                                                                                                                                
3151       copy_small_array(from, to, count, tmp1, noreg, bytes_per_count, forward, L_small_array);                                       
3152 
3153       oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);                                          
3154     }                                                                                                                                
3155 
3156     if (!to_is_aligned) {                                                                                                            
3157       // !to_is_aligned <=> UseCompressedOops && AArch64                                                                             
3158       __ BIND(L_unaligned_dst);                                                                                                      
3159 #ifdef AARCH64                                                                                                                       
3160       assert (UseCompressedOops, "unaligned oop array copy may be requested only with UseCompressedOops");                           
3161 #else                                                                                                                                
3162       ShouldNotReachHere();                                                                                                          
3163 #endif // AARCH64                                                                                                                    
3164       int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);                    
3165       assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");                     
3166 
3167       oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);                                          
3168     }                                                                                                                                
3169 
3170     return start;                                                                                                                    
3171   }                                                                                                                                  
3172 
3173   //  Generate 'unsafe' array copy stub                                                                                              
3174   //  Though just as safe as the other stubs, it takes an unscaled                                                                   
3175   //  size_t argument instead of an element count.                                                                                   
3176   //                                                                                                                                 
3177   // Arguments for generated stub:                                                                                                   
3178   //      from:  R0                                                                                                                  
3179   //      to:    R1                                                                                                                  
3180   //      count: R2 byte count, treated as ssize_t, can be zero                                                                      
3181   //                                                                                                                                 
3182   // Examines the alignment of the operands and dispatches                                                                           
3183   // to a long, int, short, or byte copy loop.                                                                                       
3184   //                                                                                                                                 
3185   address generate_unsafe_copy(const char* name) {                                                                                   
3186 

2994         to_is_aligned = true;
2995       }
2996     }
2997 
2998     Label L_unaligned_dst;
2999 
3000     if (!to_is_aligned) {
3001       BLOCK_COMMENT("Check dst alignment:");
3002       __ tst(to, wordSize - 1);
3003       __ b(L_unaligned_dst, ne); // 'to' is not aligned
3004     }
3005 
3006     int min_copy;
3007     if (forward) {
3008       min_copy = generate_forward_aligned_copy_loop(from, to, count, bytes_per_count);
3009     } else {
3010       min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count);
3011     }
3012     assert(small_copy_limit >= count_required_to_align + min_copy, "first loop might exhaust count");
3013 
3014     oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward, decorators);
3015 
3016     {
3017       copy_small_array(from, to, count, tmp1, noreg, bytes_per_count, forward, L_small_array);
3018 
3019       oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward, decorators);
3020     }
3021 
3022     if (!to_is_aligned) {
3023       // !to_is_aligned <=> UseCompressedOops && AArch64
3024       __ BIND(L_unaligned_dst);
3025 #ifdef AARCH64
3026       assert (UseCompressedOops, "unaligned oop array copy may be requested only with UseCompressedOops");
3027 #else
3028       ShouldNotReachHere();
3029 #endif // AARCH64
3030       int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
3031       assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");
3032 
3033       oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward, decorators);
3034     }
3035 
3036     return start;
3037   }
3038 
3039   //  Generate 'unsafe' array copy stub
3040   //  Though just as safe as the other stubs, it takes an unscaled
3041   //  size_t argument instead of an element count.
3042   //
3043   // Arguments for generated stub:
3044   //      from:  R0
3045   //      to:    R1
3046   //      count: R2 byte count, treated as ssize_t, can be zero
3047   //
3048   // Examines the alignment of the operands and dispatches
3049   // to a long, int, short, or byte copy loop.
3050   //
3051   address generate_unsafe_copy(const char* name) {
3052 

3318   //      to:    R1                                                                                                                  
3319   //      count: R2 treated as signed 32-bit int                                                                                     
3320   //      ckoff: R3 (super_check_offset)                                                                                             
3321   //      ckval: R4 (AArch64) / SP[0] (32-bit ARM) (super_klass)                                                                     
3322   //      ret:   R0 zero for success; (-1^K) where K is partial transfer count (32-bit)                                              
3323   //                                                                                                                                 
3324   address generate_checkcast_copy(const char * name) {                                                                               
3325     __ align(CodeEntryAlignment);                                                                                                    
3326     StubCodeMark mark(this, "StubRoutines", name);                                                                                   
3327     address start = __ pc();                                                                                                         
3328 
3329     const Register from  = R0;  // source array address                                                                              
3330     const Register to    = R1;  // destination array address                                                                         
3331     const Register count = R2;  // elements count                                                                                    
3332 
3333     const Register R3_ckoff  = R3;      // super_check_offset                                                                        
3334     const Register R4_ckval  = R4;      // super_klass                                                                               
3335 
3336     const int callee_saved_regs = AARCH64_ONLY(5) NOT_AARCH64(4); // LR saved differently                                            
3337 
3338     Label load_element, store_element, do_card_marks, fail;                                                                          
3339 
3340     BLOCK_COMMENT("Entry:");                                                                                                         
3341 
3342     __ zap_high_non_significant_bits(R2);                                                                                            
3343 
3344 #ifdef AARCH64                                                                                                                       
3345     __ raw_push(LR, ZR);                                                                                                             
3346     __ raw_push(R19, R20);                                                                                                           
3347 #else                                                                                                                                
3348     int pushed = 0;                                                                                                                  
3349     __ push(LR);                                                                                                                     
3350     pushed+=1;                                                                                                                       
3351 #endif // AARCH64                                                                                                                    
3352 
3353 #if INCLUDE_ALL_GCS                                                                                                                  
3354     gen_write_ref_array_pre_barrier(to, count, callee_saved_regs);                                                                   
3355 #endif // INCLUDE_ALL_GCS                                                                                                            
                                                                                                                                     
3356 
3357 #ifndef AARCH64                                                                                                                      
3358     const RegisterSet caller_saved_regs = RegisterSet(R4,R6) | RegisterSet(R8,R9) | altFP_7_11;                                      
3359     __ push(caller_saved_regs);                                                                                                      
3360     assert(caller_saved_regs.size() == 6, "check the count");                                                                        
3361     pushed+=6;                                                                                                                       
3362 
3363     __ ldr(R4_ckval,Address(SP, wordSize*pushed)); // read the argument that was on the stack                                        
3364 #endif // !AARCH64                                                                                                                   
3365 
3366     // Save arguments for barrier generation (after the pre barrier):                                                                
3367     // - must be a caller saved register and not LR                                                                                  
3368     // - ARM32: avoid R10 in case RThread is needed                                                                                  
3369     const Register saved_count = AARCH64_ONLY(R19) NOT_AARCH64(altFP_7_11);                                                          
3370 #ifdef AARCH64                                                                                                                       
3371     __ mov_w(saved_count, count);                                                                                                    
3372     __ cbnz_w(count, load_element); // and test count                                                                                
3373 #else                                                                                                                                
3374     __ movs(saved_count, count); // and test count                                                                                   

3184   //      to:    R1
3185   //      count: R2 treated as signed 32-bit int
3186   //      ckoff: R3 (super_check_offset)
3187   //      ckval: R4 (AArch64) / SP[0] (32-bit ARM) (super_klass)
3188   //      ret:   R0 zero for success; (-1^K) where K is partial transfer count (32-bit)
3189   //
3190   address generate_checkcast_copy(const char * name) {
3191     __ align(CodeEntryAlignment);
3192     StubCodeMark mark(this, "StubRoutines", name);
3193     address start = __ pc();
3194 
3195     const Register from  = R0;  // source array address
3196     const Register to    = R1;  // destination array address
3197     const Register count = R2;  // elements count
3198 
3199     const Register R3_ckoff  = R3;      // super_check_offset
3200     const Register R4_ckval  = R4;      // super_klass
3201 
3202     const int callee_saved_regs = AARCH64_ONLY(5) NOT_AARCH64(4); // LR saved differently
3203 
3204     Label load_element, store_element, do_epilogue, fail;
3205 
3206     BLOCK_COMMENT("Entry:");
3207 
3208     __ zap_high_non_significant_bits(R2);
3209 
3210 #ifdef AARCH64
3211     __ raw_push(LR, ZR);
3212     __ raw_push(R19, R20);
3213 #else
3214     int pushed = 0;
3215     __ push(LR);
3216     pushed+=1;
3217 #endif // AARCH64
3218 
3219     DecoratorSet decorators = ARRAYCOPY_CHECKCAST;
3220 
3221     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
3222     bs->arraycopy_prologue(this, decorators, true, to, count, callee_saved_regs);
3223 
3224 #ifndef AARCH64
3225     const RegisterSet caller_saved_regs = RegisterSet(R4,R6) | RegisterSet(R8,R9) | altFP_7_11;
3226     __ push(caller_saved_regs);
3227     assert(caller_saved_regs.size() == 6, "check the count");
3228     pushed+=6;
3229 
3230     __ ldr(R4_ckval,Address(SP, wordSize*pushed)); // read the argument that was on the stack
3231 #endif // !AARCH64
3232 
3233     // Save arguments for barrier generation (after the pre barrier):
3234     // - must be a caller saved register and not LR
3235     // - ARM32: avoid R10 in case RThread is needed
3236     const Register saved_count = AARCH64_ONLY(R19) NOT_AARCH64(altFP_7_11);
3237 #ifdef AARCH64
3238     __ mov_w(saved_count, count);
3239     __ cbnz_w(count, load_element); // and test count
3240 #else
3241     __ movs(saved_count, count); // and test count

3381 #ifdef AARCH64                                                                                                                       
3382     __ raw_pop(R19, R20);                                                                                                            
3383     __ raw_pop(LR, ZR);                                                                                                              
3384     __ ret();                                                                                                                        
3385 #else                                                                                                                                
3386     __ pop(caller_saved_regs);                                                                                                       
3387     __ pop(PC);                                                                                                                      
3388 #endif // AARCH64                                                                                                                    
3389 
3390     // ======== begin loop ========                                                                                                  
3391     // (Loop is rotated; its entry is load_element.)                                                                                 
3392     __ align(OptoLoopAlignment);                                                                                                     
3393     __ BIND(store_element);                                                                                                          
3394     if (UseCompressedOops) {                                                                                                         
3395       __ store_heap_oop(R5, Address(to, BytesPerHeapOop, post_indexed));  // store the oop, changes flags                            
3396       __ subs_32(count,count,1);                                                                                                     
3397     } else {                                                                                                                         
3398       __ subs_32(count,count,1);                                                                                                     
3399       __ str(R5, Address(to, BytesPerHeapOop, post_indexed));             // store the oop                                           
3400     }                                                                                                                                
3401     __ b(do_card_marks, eq); // count exhausted                                                                                      
3402 
3403     // ======== loop entry is here ========                                                                                          
3404     __ BIND(load_element);                                                                                                           
3405     __ load_heap_oop(R5, Address(from, BytesPerHeapOop, post_indexed));  // load the oop                                             
3406     __ cbz(R5, store_element); // NULL                                                                                               
3407 
3408     __ load_klass(R6, R5);                                                                                                           
3409 
3410     generate_type_check(R6, R3_ckoff, R4_ckval, /*tmps*/ R12, R8, R9,                                                                
3411                         // branch to this on success:                                                                                
3412                         store_element);                                                                                              
3413     // ======== end loop ========                                                                                                    
3414 
3415     // It was a real error; we must depend on the caller to finish the job.                                                          
3416     // Register count has number of *remaining* oops, saved_count number of *total* oops.                                            
3417     // Emit GC store barriers for the oops we have copied                                                                            
3418     // and report their number to the caller (0 or (-1^n))                                                                           
3419     __ BIND(fail);                                                                                                                   
3420 
3421     // Note: fail marked by the fact that count differs from saved_count                                                             
3422 
3423     __ BIND(do_card_marks);                                                                                                          
3424 
3425     Register copied = AARCH64_ONLY(R20) NOT_AARCH64(R4); // saved                                                                    
3426     Label L_not_copied;                                                                                                              
3427 
3428     __ subs_32(copied, saved_count, count); // copied count (in saved reg)                                                           
3429     __ b(L_not_copied, eq); // nothing was copied, skip post barrier                                                                 
3430     __ sub(to, to, AsmOperand(copied, lsl, LogBytesPerHeapOop)); // initial to value                                                 
3431     __ mov(R12, copied); // count arg scratched by post barrier                                                                      
3432 
3433     gen_write_ref_array_post_barrier(to, R12, R3);                                                                                   
3434 
3435     assert_different_registers(R3,R12,LR,copied,saved_count);                                                                        
3436     inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, R3, R12);                                                               
3437 
3438     __ BIND(L_not_copied);                                                                                                           
3439     __ cmp_32(copied, saved_count); // values preserved in saved registers                                                           
3440 
3441 #ifdef AARCH64                                                                                                                       
3442     __ csinv(R0, ZR, copied, eq); // 0 if all copied else NOT(copied)                                                                
3443     __ raw_pop(R19, R20);                                                                                                            
3444     __ raw_pop(LR, ZR);                                                                                                              
3445     __ ret();                                                                                                                        
3446 #else                                                                                                                                
3447     __ mov(R0, 0, eq); // 0 if all copied                                                                                            
3448     __ mvn(R0, copied, ne); // else NOT(copied)                                                                                      
3449     __ pop(caller_saved_regs);                                                                                                       
3450     __ pop(PC);                                                                                                                      
3451 #endif // AARCH64                                                                                                                    
3452 

3248 #ifdef AARCH64
3249     __ raw_pop(R19, R20);
3250     __ raw_pop(LR, ZR);
3251     __ ret();
3252 #else
3253     __ pop(caller_saved_regs);
3254     __ pop(PC);
3255 #endif // AARCH64
3256 
3257     // ======== begin loop ========
3258     // (Loop is rotated; its entry is load_element.)
3259     __ align(OptoLoopAlignment);
3260     __ BIND(store_element);
3261     if (UseCompressedOops) {
3262       __ store_heap_oop(R5, Address(to, BytesPerHeapOop, post_indexed));  // store the oop, changes flags
3263       __ subs_32(count,count,1);
3264     } else {
3265       __ subs_32(count,count,1);
3266       __ str(R5, Address(to, BytesPerHeapOop, post_indexed));             // store the oop
3267     }
3268     __ b(do_epilogue, eq); // count exhausted
3269 
3270     // ======== loop entry is here ========
3271     __ BIND(load_element);
3272     __ load_heap_oop(R5, Address(from, BytesPerHeapOop, post_indexed));  // load the oop
3273     __ cbz(R5, store_element); // NULL
3274 
3275     __ load_klass(R6, R5);
3276 
3277     generate_type_check(R6, R3_ckoff, R4_ckval, /*tmps*/ R12, R8, R9,
3278                         // branch to this on success:
3279                         store_element);
3280     // ======== end loop ========
3281 
3282     // It was a real error; we must depend on the caller to finish the job.
3283     // Register count has number of *remaining* oops, saved_count number of *total* oops.
3284     // Emit GC store barriers for the oops we have copied
3285     // and report their number to the caller (0 or (-1^n))
3286     __ BIND(fail);
3287 
3288     // Note: fail marked by the fact that count differs from saved_count
3289 
3290     __ BIND(do_epilogue);
3291 
3292     Register copied = AARCH64_ONLY(R20) NOT_AARCH64(R4); // saved
3293     Label L_not_copied;
3294 
3295     __ subs_32(copied, saved_count, count); // copied count (in saved reg)
3296     __ b(L_not_copied, eq); // nothing was copied, skip post barrier
3297     __ sub(to, to, AsmOperand(copied, lsl, LogBytesPerHeapOop)); // initial to value
3298     __ mov(R12, copied); // count arg scratched by post barrier
3299 
3300     bs->arraycopy_epilogue(this, decorators, true, to, R12, R3);
3301 
3302     assert_different_registers(R3,R12,LR,copied,saved_count);
3303     inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, R3, R12);
3304 
3305     __ BIND(L_not_copied);
3306     __ cmp_32(copied, saved_count); // values preserved in saved registers
3307 
3308 #ifdef AARCH64
3309     __ csinv(R0, ZR, copied, eq); // 0 if all copied else NOT(copied)
3310     __ raw_pop(R19, R20);
3311     __ raw_pop(LR, ZR);
3312     __ ret();
3313 #else
3314     __ mov(R0, 0, eq); // 0 if all copied
3315     __ mvn(R0, copied, ne); // else NOT(copied)
3316     __ pop(caller_saved_regs);
3317     __ pop(PC);
3318 #endif // AARCH64
3319 
< prev index next >