7 *
8 * This code is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 * version 2 for more details (a copy is included in the LICENSE file that
12 * accompanied this code).
13 *
14 * You should have received a copy of the GNU General Public License version
15 * 2 along with this work; if not, write to the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
19 * or visit www.oracle.com if you need additional information or have any
20 * questions.
21 *
22 */
23
24 #include "precompiled.hpp"
25 #include "asm/assembler.hpp"
26 #include "assembler_arm.inline.hpp"
27 #include "gc/shared/cardTable.hpp"
28 #include "gc/shared/cardTableModRefBS.hpp"
29 #include "interpreter/interpreter.hpp"
30 #include "nativeInst_arm.hpp"
31 #include "oops/instanceOop.hpp"
32 #include "oops/method.hpp"
33 #include "oops/objArrayKlass.hpp"
34 #include "oops/oop.inline.hpp"
35 #include "prims/methodHandles.hpp"
36 #include "runtime/frame.inline.hpp"
37 #include "runtime/handles.inline.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "runtime/stubCodeGenerator.hpp"
40 #include "runtime/stubRoutines.hpp"
41 #include "utilities/align.hpp"
42 #ifdef COMPILER2
43 #include "opto/runtime.hpp"
44 #endif
45
46 // Declaration and definition of StubGenerator (no .hpp file).
47 // For a more detailed description of the stub routine structure
|
7 *
8 * This code is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 * version 2 for more details (a copy is included in the LICENSE file that
12 * accompanied this code).
13 *
14 * You should have received a copy of the GNU General Public License version
15 * 2 along with this work; if not, write to the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
19 * or visit www.oracle.com if you need additional information or have any
20 * questions.
21 *
22 */
23
24 #include "precompiled.hpp"
25 #include "asm/assembler.hpp"
26 #include "assembler_arm.inline.hpp"
27 #include "gc/shared/barrierSet.hpp"
28 #include "gc/shared/barrierSetCodeGen.hpp"
29 #include "interpreter/interpreter.hpp"
30 #include "nativeInst_arm.hpp"
31 #include "oops/instanceOop.hpp"
32 #include "oops/method.hpp"
33 #include "oops/objArrayKlass.hpp"
34 #include "oops/oop.inline.hpp"
35 #include "prims/methodHandles.hpp"
36 #include "runtime/frame.inline.hpp"
37 #include "runtime/handles.inline.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "runtime/stubCodeGenerator.hpp"
40 #include "runtime/stubRoutines.hpp"
41 #include "utilities/align.hpp"
42 #ifdef COMPILER2
43 #include "opto/runtime.hpp"
44 #endif
45
46 // Declaration and definition of StubGenerator (no .hpp file).
47 // For a more detailed description of the stub routine structure
|
2837 }
2838
2839 __ ret();
2840 }
2841
2842 if (! to_is_aligned) {
2843 __ BIND(L_unaligned_dst);
2844 int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
2845 assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");
2846
2847 if (status) {
2848 __ mov(R0, 0); // OK
2849 }
2850
2851 __ ret();
2852 }
2853
2854 return start;
2855 }
2856
2857 #if INCLUDE_ALL_GCS
2858 //
2859 // Generate pre-write barrier for array.
2860 //
2861 // Input:
2862 // addr - register containing starting address
2863 // count - register containing element count, 32-bit int
2864 // callee_saved_regs -
2865 // the call must preserve this number of registers: R0, R1, ..., R[callee_saved_regs-1]
2866 //
2867 // callee_saved_regs must include addr and count
2868 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) except for callee_saved_regs.
2869 void gen_write_ref_array_pre_barrier(Register addr, Register count, int callee_saved_regs) {
2870 BarrierSet* bs = Universe::heap()->barrier_set();
2871 switch (bs->kind()) {
2872 case BarrierSet::G1BarrierSet:
2873 {
2874 assert( addr->encoding() < callee_saved_regs, "addr must be saved");
2875 assert(count->encoding() < callee_saved_regs, "count must be saved");
2876
2877 BLOCK_COMMENT("PreBarrier");
2878
2879 #ifdef AARCH64
2880 callee_saved_regs = align_up(callee_saved_regs, 2);
2881 for (int i = 0; i < callee_saved_regs; i += 2) {
2882 __ raw_push(as_Register(i), as_Register(i+1));
2883 }
2884 #else
2885 RegisterSet saved_regs = RegisterSet(R0, as_Register(callee_saved_regs-1));
2886 __ push(saved_regs | R9ifScratched);
2887 #endif // AARCH64
2888
2889 if (addr != R0) {
2890 assert_different_registers(count, R0);
2891 __ mov(R0, addr);
2892 }
2893 #ifdef AARCH64
2894 __ zero_extend(R1, count, 32); // BarrierSet::static_write_ref_array_pre takes size_t
2895 #else
2896 if (count != R1) {
2897 __ mov(R1, count);
2898 }
2899 #endif // AARCH64
2900
2901 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
2902
2903 #ifdef AARCH64
2904 for (int i = callee_saved_regs - 2; i >= 0; i -= 2) {
2905 __ raw_pop(as_Register(i), as_Register(i+1));
2906 }
2907 #else
2908 __ pop(saved_regs | R9ifScratched);
2909 #endif // AARCH64
2910 }
2911 case BarrierSet::CardTableModRef:
2912 break;
2913 default:
2914 ShouldNotReachHere();
2915 }
2916 }
2917 #endif // INCLUDE_ALL_GCS
2918
2919 //
2920 // Generate post-write barrier for array.
2921 //
2922 // Input:
2923 // addr - register containing starting address (can be scratched)
2924 // count - register containing element count, 32-bit int (can be scratched)
2925 // tmp - scratch register
2926 //
2927 // Note: LR can be scratched but might be equal to addr, count or tmp
2928 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
2929 void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp) {
2930 assert_different_registers(addr, count, tmp);
2931 BarrierSet* bs = Universe::heap()->barrier_set();
2932
2933 switch (bs->kind()) {
2934 case BarrierSet::G1BarrierSet:
2935 {
2936 BLOCK_COMMENT("G1PostBarrier");
2937 if (addr != R0) {
2938 assert_different_registers(count, R0);
2939 __ mov(R0, addr);
2940 }
2941 #ifdef AARCH64
2942 __ zero_extend(R1, count, 32); // BarrierSet::static_write_ref_array_post takes size_t
2943 #else
2944 if (count != R1) {
2945 __ mov(R1, count);
2946 }
2947 #if R9_IS_SCRATCHED
2948 // Safer to save R9 here since callers may have been written
2949 // assuming R9 survives. This is suboptimal but is not in
2950 // general worth optimizing for the few platforms where R9
2951 // is scratched. Note that the optimization might not be to
2952 // difficult for this particular call site.
2953 __ push(R9);
2954 #endif
2955 #endif // !AARCH64
2956 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
2957 #ifndef AARCH64
2958 #if R9_IS_SCRATCHED
2959 __ pop(R9);
2960 #endif
2961 #endif // !AARCH64
2962 }
2963 break;
2964 case BarrierSet::CardTableModRef:
2965 {
2966 BLOCK_COMMENT("CardTablePostBarrier");
2967 CardTableModRefBS* ctbs = barrier_set_cast<CardTableModRefBS>(bs);
2968 CardTable* ct = ctbs->card_table();
2969 assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
2970
2971 Label L_cardtable_loop, L_done;
2972
2973 __ cbz_32(count, L_done); // zero count - nothing to do
2974
2975 __ add_ptr_scaled_int32(count, addr, count, LogBytesPerHeapOop);
2976 __ sub(count, count, BytesPerHeapOop); // last addr
2977
2978 __ logical_shift_right(addr, addr, CardTable::card_shift);
2979 __ logical_shift_right(count, count, CardTable::card_shift);
2980 __ sub(count, count, addr); // nb of cards
2981
2982 // warning: Rthread has not been preserved
2983 __ mov_address(tmp, (address) ct->byte_map_base(), symbolic_Relocation::card_table_reference);
2984 __ add(addr,tmp, addr);
2985
2986 Register zero = __ zero_register(tmp);
2987
2988 __ BIND(L_cardtable_loop);
2989 __ strb(zero, Address(addr, 1, post_indexed));
2990 __ subs(count, count, 1);
2991 __ b(L_cardtable_loop, ge);
2992 __ BIND(L_done);
2993 }
2994 break;
2995 default:
2996 ShouldNotReachHere();
2997 }
2998 }
2999
3000 // Generates pattern of code to be placed after raw data copying in generate_oop_copy
3001 // Includes return from arraycopy stub.
3002 //
3003 // Arguments:
3004 // to: destination pointer after copying.
3005 // if 'forward' then 'to' == upper bound, else 'to' == beginning of the modified region
3006 // count: total number of copied elements, 32-bit int
3007 //
3008 // Blows all volatile (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) and 'to', 'count', 'tmp' registers.
3009 void oop_arraycopy_stub_epilogue_helper(Register to, Register count, Register tmp, bool status, bool forward) {
3010 assert_different_registers(to, count, tmp);
3011
3012 if (forward) {
3013 // 'to' is upper bound of the modified region
3014 // restore initial dst:
3015 __ sub_ptr_scaled_int32(to, to, count, LogBytesPerHeapOop);
3016 }
3017
3018 // 'to' is the beginning of the region
3019
3020 gen_write_ref_array_post_barrier(to, count, tmp);
3021
3022 if (status) {
3023 __ mov(R0, 0); // OK
3024 }
3025
3026 #ifdef AARCH64
3027 __ raw_pop(LR, ZR);
3028 __ ret();
3029 #else
3030 __ pop(PC);
3031 #endif // AARCH64
3032 }
3033
3034
3035 // Generate stub for assign-compatible oop copy. If "aligned" is true, the
3036 // "from" and "to" addresses are assumed to be heapword aligned.
3037 //
3038 // If "disjoint" is true, arrays are assumed to be disjoint, otherwise they may overlap and
3039 // "nooverlap_target" must be specified as the address to jump if they don't.
|
2837 }
2838
2839 __ ret();
2840 }
2841
2842 if (! to_is_aligned) {
2843 __ BIND(L_unaligned_dst);
2844 int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
2845 assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");
2846
2847 if (status) {
2848 __ mov(R0, 0); // OK
2849 }
2850
2851 __ ret();
2852 }
2853
2854 return start;
2855 }
2856
2857
2858 // Generates pattern of code to be placed after raw data copying in generate_oop_copy
2859 // Includes return from arraycopy stub.
2860 //
2861 // Arguments:
2862 // to: destination pointer after copying.
2863 // if 'forward' then 'to' == upper bound, else 'to' == beginning of the modified region
2864 // count: total number of copied elements, 32-bit int
2865 //
2866 // Blows all volatile (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) and 'to', 'count', 'tmp' registers.
2867 void oop_arraycopy_stub_epilogue_helper(Register to, Register count, Register tmp, bool status, bool forward, DecoratorSet decorato
2868 assert_different_registers(to, count, tmp);
2869
2870 if (forward) {
2871 // 'to' is upper bound of the modified region
2872 // restore initial dst:
2873 __ sub_ptr_scaled_int32(to, to, count, LogBytesPerHeapOop);
2874 }
2875
2876 // 'to' is the beginning of the region
2877
2878 BarrierSet *bs = BarrierSet::barrier_set();
2879 BarrierSetCodeGen *code_gen = bs->code_gen();
2880 code_gen->arraycopy_epilogue(this, decorators, true, to, count, tmp);
2881
2882 if (status) {
2883 __ mov(R0, 0); // OK
2884 }
2885
2886 #ifdef AARCH64
2887 __ raw_pop(LR, ZR);
2888 __ ret();
2889 #else
2890 __ pop(PC);
2891 #endif // AARCH64
2892 }
2893
2894
2895 // Generate stub for assign-compatible oop copy. If "aligned" is true, the
2896 // "from" and "to" addresses are assumed to be heapword aligned.
2897 //
2898 // If "disjoint" is true, arrays are assumed to be disjoint, otherwise they may overlap and
2899 // "nooverlap_target" must be specified as the address to jump if they don't.
|
3068
3069 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, tmp1, tmp2);
3070
3071 // Conjoint case: since execution reaches this point, the arrays overlap, so performing backward copy
3072 // Disjoint case: perform forward copy
3073 bool forward = disjoint;
3074
3075 const int bytes_per_count = BytesPerHeapOop;
3076 const int log_bytes_per_count = LogBytesPerHeapOop;
3077
3078 const Register saved_count = LR;
3079 const int callee_saved_regs = 3; // R0-R2
3080
3081 // LR is used later to save barrier args
3082 #ifdef AARCH64
3083 __ raw_push(LR, ZR);
3084 #else
3085 __ push(LR);
3086 #endif // AARCH64
3087
3088 #if INCLUDE_ALL_GCS
3089 gen_write_ref_array_pre_barrier(to, count, callee_saved_regs);
3090 #endif // INCLUDE_ALL_GCS
3091
3092 // save arguments for barrier generation (after the pre barrier)
3093 __ mov(saved_count, count);
3094
3095 if (!forward) {
3096 __ add_ptr_scaled_int32(to, to, count, log_bytes_per_count);
3097 __ add_ptr_scaled_int32(from, from, count, log_bytes_per_count);
3098 }
3099
3100 // for short arrays, just do single element copy
3101 Label L_small_array;
3102 const int small_copy_limit = (8*wordSize + 7)/bytes_per_count; // XXX optim: tune the limit higher ?
3103 __ cmp_32(count, small_copy_limit);
3104 __ b(L_small_array, le);
3105
3106 bool from_is_aligned = (bytes_per_count >= 8);
3107 if (aligned && forward && (HeapWordSize % 8 == 0)) {
3108 // if 'from' is heapword aligned and HeapWordSize is divisible by 8,
3109 // then from is aligned by 8
|
2928
2929 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, tmp1, tmp2);
2930
2931 // Conjoint case: since execution reaches this point, the arrays overlap, so performing backward copy
2932 // Disjoint case: perform forward copy
2933 bool forward = disjoint;
2934
2935 const int bytes_per_count = BytesPerHeapOop;
2936 const int log_bytes_per_count = LogBytesPerHeapOop;
2937
2938 const Register saved_count = LR;
2939 const int callee_saved_regs = 3; // R0-R2
2940
2941 // LR is used later to save barrier args
2942 #ifdef AARCH64
2943 __ raw_push(LR, ZR);
2944 #else
2945 __ push(LR);
2946 #endif // AARCH64
2947
2948 BarrierSet *bs = BarrierSet::barrier_set();
2949 BarrierSetCodeGen *code_gen = bs->code_gen();
2950 DecoratorSet decorators = 0;
2951 if (disjoint) {
2952 decorators |= ARRAYCOPY_DISJOINT;
2953 }
2954 if (aligned) {
2955 decorators |= ARRAYCOPY_ALIGNED;
2956 }
2957 code_gen->arraycopy_prologue(this, decorators, true, to, count, callee_saved_regs);
2958
2959 // save arguments for barrier generation (after the pre barrier)
2960 __ mov(saved_count, count);
2961
2962 if (!forward) {
2963 __ add_ptr_scaled_int32(to, to, count, log_bytes_per_count);
2964 __ add_ptr_scaled_int32(from, from, count, log_bytes_per_count);
2965 }
2966
2967 // for short arrays, just do single element copy
2968 Label L_small_array;
2969 const int small_copy_limit = (8*wordSize + 7)/bytes_per_count; // XXX optim: tune the limit higher ?
2970 __ cmp_32(count, small_copy_limit);
2971 __ b(L_small_array, le);
2972
2973 bool from_is_aligned = (bytes_per_count >= 8);
2974 if (aligned && forward && (HeapWordSize % 8 == 0)) {
2975 // if 'from' is heapword aligned and HeapWordSize is divisible by 8,
2976 // then from is aligned by 8
|
3128 to_is_aligned = true;
3129 }
3130 }
3131
3132 Label L_unaligned_dst;
3133
3134 if (!to_is_aligned) {
3135 BLOCK_COMMENT("Check dst alignment:");
3136 __ tst(to, wordSize - 1);
3137 __ b(L_unaligned_dst, ne); // 'to' is not aligned
3138 }
3139
3140 int min_copy;
3141 if (forward) {
3142 min_copy = generate_forward_aligned_copy_loop(from, to, count, bytes_per_count);
3143 } else {
3144 min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count);
3145 }
3146 assert(small_copy_limit >= count_required_to_align + min_copy, "first loop might exhaust count");
3147
3148 oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);
3149
3150 {
3151 copy_small_array(from, to, count, tmp1, noreg, bytes_per_count, forward, L_small_array);
3152
3153 oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);
3154 }
3155
3156 if (!to_is_aligned) {
3157 // !to_is_aligned <=> UseCompressedOops && AArch64
3158 __ BIND(L_unaligned_dst);
3159 #ifdef AARCH64
3160 assert (UseCompressedOops, "unaligned oop array copy may be requested only with UseCompressedOops");
3161 #else
3162 ShouldNotReachHere();
3163 #endif // AARCH64
3164 int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
3165 assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");
3166
3167 oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);
3168 }
3169
3170 return start;
3171 }
3172
3173 // Generate 'unsafe' array copy stub
3174 // Though just as safe as the other stubs, it takes an unscaled
3175 // size_t argument instead of an element count.
3176 //
3177 // Arguments for generated stub:
3178 // from: R0
3179 // to: R1
3180 // count: R2 byte count, treated as ssize_t, can be zero
3181 //
3182 // Examines the alignment of the operands and dispatches
3183 // to a long, int, short, or byte copy loop.
3184 //
3185 address generate_unsafe_copy(const char* name) {
3186
|
2995 to_is_aligned = true;
2996 }
2997 }
2998
2999 Label L_unaligned_dst;
3000
3001 if (!to_is_aligned) {
3002 BLOCK_COMMENT("Check dst alignment:");
3003 __ tst(to, wordSize - 1);
3004 __ b(L_unaligned_dst, ne); // 'to' is not aligned
3005 }
3006
3007 int min_copy;
3008 if (forward) {
3009 min_copy = generate_forward_aligned_copy_loop(from, to, count, bytes_per_count);
3010 } else {
3011 min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count);
3012 }
3013 assert(small_copy_limit >= count_required_to_align + min_copy, "first loop might exhaust count");
3014
3015 oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward, decorators);
3016
3017 {
3018 copy_small_array(from, to, count, tmp1, noreg, bytes_per_count, forward, L_small_array);
3019
3020 oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward, decorators);
3021 }
3022
3023 if (!to_is_aligned) {
3024 // !to_is_aligned <=> UseCompressedOops && AArch64
3025 __ BIND(L_unaligned_dst);
3026 #ifdef AARCH64
3027 assert (UseCompressedOops, "unaligned oop array copy may be requested only with UseCompressedOops");
3028 #else
3029 ShouldNotReachHere();
3030 #endif // AARCH64
3031 int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
3032 assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");
3033
3034 oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward, decorators);
3035 }
3036
3037 return start;
3038 }
3039
3040 // Generate 'unsafe' array copy stub
3041 // Though just as safe as the other stubs, it takes an unscaled
3042 // size_t argument instead of an element count.
3043 //
3044 // Arguments for generated stub:
3045 // from: R0
3046 // to: R1
3047 // count: R2 byte count, treated as ssize_t, can be zero
3048 //
3049 // Examines the alignment of the operands and dispatches
3050 // to a long, int, short, or byte copy loop.
3051 //
3052 address generate_unsafe_copy(const char* name) {
3053
|
3318 // to: R1
3319 // count: R2 treated as signed 32-bit int
3320 // ckoff: R3 (super_check_offset)
3321 // ckval: R4 (AArch64) / SP[0] (32-bit ARM) (super_klass)
3322 // ret: R0 zero for success; (-1^K) where K is partial transfer count (32-bit)
3323 //
3324 address generate_checkcast_copy(const char * name) {
3325 __ align(CodeEntryAlignment);
3326 StubCodeMark mark(this, "StubRoutines", name);
3327 address start = __ pc();
3328
3329 const Register from = R0; // source array address
3330 const Register to = R1; // destination array address
3331 const Register count = R2; // elements count
3332
3333 const Register R3_ckoff = R3; // super_check_offset
3334 const Register R4_ckval = R4; // super_klass
3335
3336 const int callee_saved_regs = AARCH64_ONLY(5) NOT_AARCH64(4); // LR saved differently
3337
3338 Label load_element, store_element, do_card_marks, fail;
3339
3340 BLOCK_COMMENT("Entry:");
3341
3342 __ zap_high_non_significant_bits(R2);
3343
3344 #ifdef AARCH64
3345 __ raw_push(LR, ZR);
3346 __ raw_push(R19, R20);
3347 #else
3348 int pushed = 0;
3349 __ push(LR);
3350 pushed+=1;
3351 #endif // AARCH64
3352
3353 #if INCLUDE_ALL_GCS
3354 gen_write_ref_array_pre_barrier(to, count, callee_saved_regs);
3355 #endif // INCLUDE_ALL_GCS
3356
3357 #ifndef AARCH64
3358 const RegisterSet caller_saved_regs = RegisterSet(R4,R6) | RegisterSet(R8,R9) | altFP_7_11;
3359 __ push(caller_saved_regs);
3360 assert(caller_saved_regs.size() == 6, "check the count");
3361 pushed+=6;
3362
3363 __ ldr(R4_ckval,Address(SP, wordSize*pushed)); // read the argument that was on the stack
3364 #endif // !AARCH64
3365
3366 // Save arguments for barrier generation (after the pre barrier):
3367 // - must be a caller saved register and not LR
3368 // - ARM32: avoid R10 in case RThread is needed
3369 const Register saved_count = AARCH64_ONLY(R19) NOT_AARCH64(altFP_7_11);
3370 #ifdef AARCH64
3371 __ mov_w(saved_count, count);
3372 __ cbnz_w(count, load_element); // and test count
3373 #else
3374 __ movs(saved_count, count); // and test count
|
3185 // to: R1
3186 // count: R2 treated as signed 32-bit int
3187 // ckoff: R3 (super_check_offset)
3188 // ckval: R4 (AArch64) / SP[0] (32-bit ARM) (super_klass)
3189 // ret: R0 zero for success; (-1^K) where K is partial transfer count (32-bit)
3190 //
3191 address generate_checkcast_copy(const char * name) {
3192 __ align(CodeEntryAlignment);
3193 StubCodeMark mark(this, "StubRoutines", name);
3194 address start = __ pc();
3195
3196 const Register from = R0; // source array address
3197 const Register to = R1; // destination array address
3198 const Register count = R2; // elements count
3199
3200 const Register R3_ckoff = R3; // super_check_offset
3201 const Register R4_ckval = R4; // super_klass
3202
3203 const int callee_saved_regs = AARCH64_ONLY(5) NOT_AARCH64(4); // LR saved differently
3204
3205 Label load_element, store_element, do_epilogue, fail;
3206
3207 BLOCK_COMMENT("Entry:");
3208
3209 __ zap_high_non_significant_bits(R2);
3210
3211 #ifdef AARCH64
3212 __ raw_push(LR, ZR);
3213 __ raw_push(R19, R20);
3214 #else
3215 int pushed = 0;
3216 __ push(LR);
3217 pushed+=1;
3218 #endif // AARCH64
3219
3220 BarrierSet *bs = BarrierSet::barrier_set();
3221 BarrierSetCodeGen *code_gen = bs->code_gen();
3222 DecoratorSet decorators = ARRAYCOPY_CHECKCAST;
3223 code_gen->arraycopy_prologue(this, decorators, true, to, count, callee_saved_regs);
3224
3225 #ifndef AARCH64
3226 const RegisterSet caller_saved_regs = RegisterSet(R4,R6) | RegisterSet(R8,R9) | altFP_7_11;
3227 __ push(caller_saved_regs);
3228 assert(caller_saved_regs.size() == 6, "check the count");
3229 pushed+=6;
3230
3231 __ ldr(R4_ckval,Address(SP, wordSize*pushed)); // read the argument that was on the stack
3232 #endif // !AARCH64
3233
3234 // Save arguments for barrier generation (after the pre barrier):
3235 // - must be a caller saved register and not LR
3236 // - ARM32: avoid R10 in case RThread is needed
3237 const Register saved_count = AARCH64_ONLY(R19) NOT_AARCH64(altFP_7_11);
3238 #ifdef AARCH64
3239 __ mov_w(saved_count, count);
3240 __ cbnz_w(count, load_element); // and test count
3241 #else
3242 __ movs(saved_count, count); // and test count
|
3381 #ifdef AARCH64
3382 __ raw_pop(R19, R20);
3383 __ raw_pop(LR, ZR);
3384 __ ret();
3385 #else
3386 __ pop(caller_saved_regs);
3387 __ pop(PC);
3388 #endif // AARCH64
3389
3390 // ======== begin loop ========
3391 // (Loop is rotated; its entry is load_element.)
3392 __ align(OptoLoopAlignment);
3393 __ BIND(store_element);
3394 if (UseCompressedOops) {
3395 __ store_heap_oop(R5, Address(to, BytesPerHeapOop, post_indexed)); // store the oop, changes flags
3396 __ subs_32(count,count,1);
3397 } else {
3398 __ subs_32(count,count,1);
3399 __ str(R5, Address(to, BytesPerHeapOop, post_indexed)); // store the oop
3400 }
3401 __ b(do_card_marks, eq); // count exhausted
3402
3403 // ======== loop entry is here ========
3404 __ BIND(load_element);
3405 __ load_heap_oop(R5, Address(from, BytesPerHeapOop, post_indexed)); // load the oop
3406 __ cbz(R5, store_element); // NULL
3407
3408 __ load_klass(R6, R5);
3409
3410 generate_type_check(R6, R3_ckoff, R4_ckval, /*tmps*/ R12, R8, R9,
3411 // branch to this on success:
3412 store_element);
3413 // ======== end loop ========
3414
3415 // It was a real error; we must depend on the caller to finish the job.
3416 // Register count has number of *remaining* oops, saved_count number of *total* oops.
3417 // Emit GC store barriers for the oops we have copied
3418 // and report their number to the caller (0 or (-1^n))
3419 __ BIND(fail);
3420
3421 // Note: fail marked by the fact that count differs from saved_count
3422
3423 __ BIND(do_card_marks);
3424
3425 Register copied = AARCH64_ONLY(R20) NOT_AARCH64(R4); // saved
3426 Label L_not_copied;
3427
3428 __ subs_32(copied, saved_count, count); // copied count (in saved reg)
3429 __ b(L_not_copied, eq); // nothing was copied, skip post barrier
3430 __ sub(to, to, AsmOperand(copied, lsl, LogBytesPerHeapOop)); // initial to value
3431 __ mov(R12, copied); // count arg scratched by post barrier
3432
3433 gen_write_ref_array_post_barrier(to, R12, R3);
3434
3435 assert_different_registers(R3,R12,LR,copied,saved_count);
3436 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, R3, R12);
3437
3438 __ BIND(L_not_copied);
3439 __ cmp_32(copied, saved_count); // values preserved in saved registers
3440
3441 #ifdef AARCH64
3442 __ csinv(R0, ZR, copied, eq); // 0 if all copied else NOT(copied)
3443 __ raw_pop(R19, R20);
3444 __ raw_pop(LR, ZR);
3445 __ ret();
3446 #else
3447 __ mov(R0, 0, eq); // 0 if all copied
3448 __ mvn(R0, copied, ne); // else NOT(copied)
3449 __ pop(caller_saved_regs);
3450 __ pop(PC);
3451 #endif // AARCH64
3452
|
3249 #ifdef AARCH64
3250 __ raw_pop(R19, R20);
3251 __ raw_pop(LR, ZR);
3252 __ ret();
3253 #else
3254 __ pop(caller_saved_regs);
3255 __ pop(PC);
3256 #endif // AARCH64
3257
3258 // ======== begin loop ========
3259 // (Loop is rotated; its entry is load_element.)
3260 __ align(OptoLoopAlignment);
3261 __ BIND(store_element);
3262 if (UseCompressedOops) {
3263 __ store_heap_oop(R5, Address(to, BytesPerHeapOop, post_indexed)); // store the oop, changes flags
3264 __ subs_32(count,count,1);
3265 } else {
3266 __ subs_32(count,count,1);
3267 __ str(R5, Address(to, BytesPerHeapOop, post_indexed)); // store the oop
3268 }
3269 __ b(do_epilogue, eq); // count exhausted
3270
3271 // ======== loop entry is here ========
3272 __ BIND(load_element);
3273 __ load_heap_oop(R5, Address(from, BytesPerHeapOop, post_indexed)); // load the oop
3274 __ cbz(R5, store_element); // NULL
3275
3276 __ load_klass(R6, R5);
3277
3278 generate_type_check(R6, R3_ckoff, R4_ckval, /*tmps*/ R12, R8, R9,
3279 // branch to this on success:
3280 store_element);
3281 // ======== end loop ========
3282
3283 // It was a real error; we must depend on the caller to finish the job.
3284 // Register count has number of *remaining* oops, saved_count number of *total* oops.
3285 // Emit GC store barriers for the oops we have copied
3286 // and report their number to the caller (0 or (-1^n))
3287 __ BIND(fail);
3288
3289 // Note: fail marked by the fact that count differs from saved_count
3290
3291 __ BIND(do_epilogue);
3292
3293 Register copied = AARCH64_ONLY(R20) NOT_AARCH64(R4); // saved
3294 Label L_not_copied;
3295
3296 __ subs_32(copied, saved_count, count); // copied count (in saved reg)
3297 __ b(L_not_copied, eq); // nothing was copied, skip post barrier
3298 __ sub(to, to, AsmOperand(copied, lsl, LogBytesPerHeapOop)); // initial to value
3299 __ mov(R12, copied); // count arg scratched by post barrier
3300
3301 code_gen->arraycopy_epilogue(this, decorators, true, to, R12, R3);
3302
3303 assert_different_registers(R3,R12,LR,copied,saved_count);
3304 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, R3, R12);
3305
3306 __ BIND(L_not_copied);
3307 __ cmp_32(copied, saved_count); // values preserved in saved registers
3308
3309 #ifdef AARCH64
3310 __ csinv(R0, ZR, copied, eq); // 0 if all copied else NOT(copied)
3311 __ raw_pop(R19, R20);
3312 __ raw_pop(LR, ZR);
3313 __ ret();
3314 #else
3315 __ mov(R0, 0, eq); // 0 if all copied
3316 __ mvn(R0, copied, ne); // else NOT(copied)
3317 __ pop(caller_saved_regs);
3318 __ pop(PC);
3319 #endif // AARCH64
3320
|