7 *
8 * This code is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 * version 2 for more details (a copy is included in the LICENSE file that
12 * accompanied this code).
13 *
14 * You should have received a copy of the GNU General Public License version
15 * 2 along with this work; if not, write to the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
19 * or visit www.oracle.com if you need additional information or have any
20 * questions.
21 *
22 */
23
24 #include "precompiled.hpp"
25 #include "asm/assembler.hpp"
26 #include "assembler_arm.inline.hpp"
27 #include "gc/shared/cardTable.hpp"
28 #include "gc/shared/cardTableBarrierSet.hpp"
29 #include "interpreter/interpreter.hpp"
30 #include "nativeInst_arm.hpp"
31 #include "oops/instanceOop.hpp"
32 #include "oops/method.hpp"
33 #include "oops/objArrayKlass.hpp"
34 #include "oops/oop.inline.hpp"
35 #include "prims/methodHandles.hpp"
36 #include "runtime/frame.inline.hpp"
37 #include "runtime/handles.inline.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "runtime/stubCodeGenerator.hpp"
40 #include "runtime/stubRoutines.hpp"
41 #include "utilities/align.hpp"
42 #ifdef COMPILER2
43 #include "opto/runtime.hpp"
44 #endif
45
46 // Declaration and definition of StubGenerator (no .hpp file).
47 // For a more detailed description of the stub routine structure
|
7 *
8 * This code is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 * version 2 for more details (a copy is included in the LICENSE file that
12 * accompanied this code).
13 *
14 * You should have received a copy of the GNU General Public License version
15 * 2 along with this work; if not, write to the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
19 * or visit www.oracle.com if you need additional information or have any
20 * questions.
21 *
22 */
23
24 #include "precompiled.hpp"
25 #include "asm/assembler.hpp"
26 #include "assembler_arm.inline.hpp"
27 #include "gc/shared/barrierSet.hpp"
28 #include "gc/shared/barrierSetAssembler.hpp"
29 #include "interpreter/interpreter.hpp"
30 #include "nativeInst_arm.hpp"
31 #include "oops/instanceOop.hpp"
32 #include "oops/method.hpp"
33 #include "oops/objArrayKlass.hpp"
34 #include "oops/oop.inline.hpp"
35 #include "prims/methodHandles.hpp"
36 #include "runtime/frame.inline.hpp"
37 #include "runtime/handles.inline.hpp"
38 #include "runtime/sharedRuntime.hpp"
39 #include "runtime/stubCodeGenerator.hpp"
40 #include "runtime/stubRoutines.hpp"
41 #include "utilities/align.hpp"
42 #ifdef COMPILER2
43 #include "opto/runtime.hpp"
44 #endif
45
46 // Declaration and definition of StubGenerator (no .hpp file).
47 // For a more detailed description of the stub routine structure
|
2837 }
2838
2839 __ ret();
2840 }
2841
2842 if (! to_is_aligned) {
2843 __ BIND(L_unaligned_dst);
2844 int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
2845 assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");
2846
2847 if (status) {
2848 __ mov(R0, 0); // OK
2849 }
2850
2851 __ ret();
2852 }
2853
2854 return start;
2855 }
2856
2857 #if INCLUDE_ALL_GCS
2858 //
2859 // Generate pre-write barrier for array.
2860 //
2861 // Input:
2862 // addr - register containing starting address
2863 // count - register containing element count, 32-bit int
2864 // callee_saved_regs -
2865 // the call must preserve this number of registers: R0, R1, ..., R[callee_saved_regs-1]
2866 //
2867 // callee_saved_regs must include addr and count
2868 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) except for callee_saved_regs.
2869 void gen_write_ref_array_pre_barrier(Register addr, Register count, int callee_saved_regs) {
2870 BarrierSet* bs = Universe::heap()->barrier_set();
2871 switch (bs->kind()) {
2872 case BarrierSet::G1BarrierSet:
2873 {
2874 assert( addr->encoding() < callee_saved_regs, "addr must be saved");
2875 assert(count->encoding() < callee_saved_regs, "count must be saved");
2876
2877 BLOCK_COMMENT("PreBarrier");
2878
2879 #ifdef AARCH64
2880 callee_saved_regs = align_up(callee_saved_regs, 2);
2881 for (int i = 0; i < callee_saved_regs; i += 2) {
2882 __ raw_push(as_Register(i), as_Register(i+1));
2883 }
2884 #else
2885 RegisterSet saved_regs = RegisterSet(R0, as_Register(callee_saved_regs-1));
2886 __ push(saved_regs | R9ifScratched);
2887 #endif // AARCH64
2888
2889 if (addr != R0) {
2890 assert_different_registers(count, R0);
2891 __ mov(R0, addr);
2892 }
2893 #ifdef AARCH64
2894 __ zero_extend(R1, count, 32); // BarrierSet::static_write_ref_array_pre takes size_t
2895 #else
2896 if (count != R1) {
2897 __ mov(R1, count);
2898 }
2899 #endif // AARCH64
2900
2901 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
2902
2903 #ifdef AARCH64
2904 for (int i = callee_saved_regs - 2; i >= 0; i -= 2) {
2905 __ raw_pop(as_Register(i), as_Register(i+1));
2906 }
2907 #else
2908 __ pop(saved_regs | R9ifScratched);
2909 #endif // AARCH64
2910 }
2911 case BarrierSet::CardTableBarrierSet:
2912 break;
2913 default:
2914 ShouldNotReachHere();
2915 }
2916 }
2917 #endif // INCLUDE_ALL_GCS
2918
2919 //
2920 // Generate post-write barrier for array.
2921 //
2922 // Input:
2923 // addr - register containing starting address (can be scratched)
2924 // count - register containing element count, 32-bit int (can be scratched)
2925 // tmp - scratch register
2926 //
2927 // Note: LR can be scratched but might be equal to addr, count or tmp
2928 // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
2929 void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp) {
2930 assert_different_registers(addr, count, tmp);
2931 BarrierSet* bs = Universe::heap()->barrier_set();
2932
2933 switch (bs->kind()) {
2934 case BarrierSet::G1BarrierSet:
2935 {
2936 BLOCK_COMMENT("G1PostBarrier");
2937 if (addr != R0) {
2938 assert_different_registers(count, R0);
2939 __ mov(R0, addr);
2940 }
2941 #ifdef AARCH64
2942 __ zero_extend(R1, count, 32); // BarrierSet::static_write_ref_array_post takes size_t
2943 #else
2944 if (count != R1) {
2945 __ mov(R1, count);
2946 }
2947 #if R9_IS_SCRATCHED
2948 // Safer to save R9 here since callers may have been written
2949 // assuming R9 survives. This is suboptimal but is not in
2950 // general worth optimizing for the few platforms where R9
2951 // is scratched. Note that the optimization might not be to
2952 // difficult for this particular call site.
2953 __ push(R9);
2954 #endif
2955 #endif // !AARCH64
2956 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
2957 #ifndef AARCH64
2958 #if R9_IS_SCRATCHED
2959 __ pop(R9);
2960 #endif
2961 #endif // !AARCH64
2962 }
2963 break;
2964 case BarrierSet::CardTableBarrierSet:
2965 {
2966 BLOCK_COMMENT("CardTablePostBarrier");
2967 CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
2968 CardTable* ct = ctbs->card_table();
2969 assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
2970
2971 Label L_cardtable_loop, L_done;
2972
2973 __ cbz_32(count, L_done); // zero count - nothing to do
2974
2975 __ add_ptr_scaled_int32(count, addr, count, LogBytesPerHeapOop);
2976 __ sub(count, count, BytesPerHeapOop); // last addr
2977
2978 __ logical_shift_right(addr, addr, CardTable::card_shift);
2979 __ logical_shift_right(count, count, CardTable::card_shift);
2980 __ sub(count, count, addr); // nb of cards
2981
2982 // warning: Rthread has not been preserved
2983 __ mov_address(tmp, (address) ct->byte_map_base(), symbolic_Relocation::card_table_reference);
2984 __ add(addr,tmp, addr);
2985
2986 Register zero = __ zero_register(tmp);
2987
2988 __ BIND(L_cardtable_loop);
2989 __ strb(zero, Address(addr, 1, post_indexed));
2990 __ subs(count, count, 1);
2991 __ b(L_cardtable_loop, ge);
2992 __ BIND(L_done);
2993 }
2994 break;
2995 default:
2996 ShouldNotReachHere();
2997 }
2998 }
2999
3000 // Generates pattern of code to be placed after raw data copying in generate_oop_copy
3001 // Includes return from arraycopy stub.
3002 //
3003 // Arguments:
3004 // to: destination pointer after copying.
3005 // if 'forward' then 'to' == upper bound, else 'to' == beginning of the modified region
3006 // count: total number of copied elements, 32-bit int
3007 //
3008 // Blows all volatile (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) and 'to', 'count', 'tmp' registers.
3009 void oop_arraycopy_stub_epilogue_helper(Register to, Register count, Register tmp, bool status, bool forward) {
3010 assert_different_registers(to, count, tmp);
3011
3012 if (forward) {
3013 // 'to' is upper bound of the modified region
3014 // restore initial dst:
3015 __ sub_ptr_scaled_int32(to, to, count, LogBytesPerHeapOop);
3016 }
3017
3018 // 'to' is the beginning of the region
3019
3020 gen_write_ref_array_post_barrier(to, count, tmp);
3021
3022 if (status) {
3023 __ mov(R0, 0); // OK
3024 }
3025
3026 #ifdef AARCH64
3027 __ raw_pop(LR, ZR);
3028 __ ret();
3029 #else
3030 __ pop(PC);
3031 #endif // AARCH64
3032 }
3033
3034
3035 // Generate stub for assign-compatible oop copy. If "aligned" is true, the
3036 // "from" and "to" addresses are assumed to be heapword aligned.
3037 //
3038 // If "disjoint" is true, arrays are assumed to be disjoint, otherwise they may overlap and
3039 // "nooverlap_target" must be specified as the address to jump if they don't.
|
2837 }
2838
2839 __ ret();
2840 }
2841
2842 if (! to_is_aligned) {
2843 __ BIND(L_unaligned_dst);
2844 int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
2845 assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");
2846
2847 if (status) {
2848 __ mov(R0, 0); // OK
2849 }
2850
2851 __ ret();
2852 }
2853
2854 return start;
2855 }
2856
2857
2858 // Generates pattern of code to be placed after raw data copying in generate_oop_copy
2859 // Includes return from arraycopy stub.
2860 //
2861 // Arguments:
2862 // to: destination pointer after copying.
2863 // if 'forward' then 'to' == upper bound, else 'to' == beginning of the modified region
2864 // count: total number of copied elements, 32-bit int
2865 //
2866 // Blows all volatile (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) and 'to', 'count', 'tmp' registers.
2867 void oop_arraycopy_stub_epilogue_helper(Register to, Register count, Register tmp, bool status, bool forward, DecoratorSet decorato
2868 assert_different_registers(to, count, tmp);
2869
2870 if (forward) {
2871 // 'to' is upper bound of the modified region
2872 // restore initial dst:
2873 __ sub_ptr_scaled_int32(to, to, count, LogBytesPerHeapOop);
2874 }
2875
2876 // 'to' is the beginning of the region
2877
2878 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2879 bs->arraycopy_epilogue(this, decorators, true, to, count, tmp);
2880
2881 if (status) {
2882 __ mov(R0, 0); // OK
2883 }
2884
2885 #ifdef AARCH64
2886 __ raw_pop(LR, ZR);
2887 __ ret();
2888 #else
2889 __ pop(PC);
2890 #endif // AARCH64
2891 }
2892
2893
2894 // Generate stub for assign-compatible oop copy. If "aligned" is true, the
2895 // "from" and "to" addresses are assumed to be heapword aligned.
2896 //
2897 // If "disjoint" is true, arrays are assumed to be disjoint, otherwise they may overlap and
2898 // "nooverlap_target" must be specified as the address to jump if they don't.
|
3068
3069 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, tmp1, tmp2);
3070
3071 // Conjoint case: since execution reaches this point, the arrays overlap, so performing backward copy
3072 // Disjoint case: perform forward copy
3073 bool forward = disjoint;
3074
3075 const int bytes_per_count = BytesPerHeapOop;
3076 const int log_bytes_per_count = LogBytesPerHeapOop;
3077
3078 const Register saved_count = LR;
3079 const int callee_saved_regs = 3; // R0-R2
3080
3081 // LR is used later to save barrier args
3082 #ifdef AARCH64
3083 __ raw_push(LR, ZR);
3084 #else
3085 __ push(LR);
3086 #endif // AARCH64
3087
3088 #if INCLUDE_ALL_GCS
3089 gen_write_ref_array_pre_barrier(to, count, callee_saved_regs);
3090 #endif // INCLUDE_ALL_GCS
3091
3092 // save arguments for barrier generation (after the pre barrier)
3093 __ mov(saved_count, count);
3094
3095 if (!forward) {
3096 __ add_ptr_scaled_int32(to, to, count, log_bytes_per_count);
3097 __ add_ptr_scaled_int32(from, from, count, log_bytes_per_count);
3098 }
3099
3100 // for short arrays, just do single element copy
3101 Label L_small_array;
3102 const int small_copy_limit = (8*wordSize + 7)/bytes_per_count; // XXX optim: tune the limit higher ?
3103 __ cmp_32(count, small_copy_limit);
3104 __ b(L_small_array, le);
3105
3106 bool from_is_aligned = (bytes_per_count >= 8);
3107 if (aligned && forward && (HeapWordSize % 8 == 0)) {
3108 // if 'from' is heapword aligned and HeapWordSize is divisible by 8,
3109 // then from is aligned by 8
|
2927
2928 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, tmp1, tmp2);
2929
2930 // Conjoint case: since execution reaches this point, the arrays overlap, so performing backward copy
2931 // Disjoint case: perform forward copy
2932 bool forward = disjoint;
2933
2934 const int bytes_per_count = BytesPerHeapOop;
2935 const int log_bytes_per_count = LogBytesPerHeapOop;
2936
2937 const Register saved_count = LR;
2938 const int callee_saved_regs = 3; // R0-R2
2939
2940 // LR is used later to save barrier args
2941 #ifdef AARCH64
2942 __ raw_push(LR, ZR);
2943 #else
2944 __ push(LR);
2945 #endif // AARCH64
2946
2947 DecoratorSet decorators = 0;
2948 if (disjoint) {
2949 decorators |= ARRAYCOPY_DISJOINT;
2950 }
2951 if (aligned) {
2952 decorators |= ARRAYCOPY_ALIGNED;
2953 }
2954
2955 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2956 bs->arraycopy_prologue(this, decorators, true, to, count, callee_saved_regs);
2957
2958 // save arguments for barrier generation (after the pre barrier)
2959 __ mov(saved_count, count);
2960
2961 if (!forward) {
2962 __ add_ptr_scaled_int32(to, to, count, log_bytes_per_count);
2963 __ add_ptr_scaled_int32(from, from, count, log_bytes_per_count);
2964 }
2965
2966 // for short arrays, just do single element copy
2967 Label L_small_array;
2968 const int small_copy_limit = (8*wordSize + 7)/bytes_per_count; // XXX optim: tune the limit higher ?
2969 __ cmp_32(count, small_copy_limit);
2970 __ b(L_small_array, le);
2971
2972 bool from_is_aligned = (bytes_per_count >= 8);
2973 if (aligned && forward && (HeapWordSize % 8 == 0)) {
2974 // if 'from' is heapword aligned and HeapWordSize is divisible by 8,
2975 // then from is aligned by 8
|
3128 to_is_aligned = true;
3129 }
3130 }
3131
3132 Label L_unaligned_dst;
3133
3134 if (!to_is_aligned) {
3135 BLOCK_COMMENT("Check dst alignment:");
3136 __ tst(to, wordSize - 1);
3137 __ b(L_unaligned_dst, ne); // 'to' is not aligned
3138 }
3139
3140 int min_copy;
3141 if (forward) {
3142 min_copy = generate_forward_aligned_copy_loop(from, to, count, bytes_per_count);
3143 } else {
3144 min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count);
3145 }
3146 assert(small_copy_limit >= count_required_to_align + min_copy, "first loop might exhaust count");
3147
3148 oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);
3149
3150 {
3151 copy_small_array(from, to, count, tmp1, noreg, bytes_per_count, forward, L_small_array);
3152
3153 oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);
3154 }
3155
3156 if (!to_is_aligned) {
3157 // !to_is_aligned <=> UseCompressedOops && AArch64
3158 __ BIND(L_unaligned_dst);
3159 #ifdef AARCH64
3160 assert (UseCompressedOops, "unaligned oop array copy may be requested only with UseCompressedOops");
3161 #else
3162 ShouldNotReachHere();
3163 #endif // AARCH64
3164 int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
3165 assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");
3166
3167 oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward);
3168 }
3169
3170 return start;
3171 }
3172
3173 // Generate 'unsafe' array copy stub
3174 // Though just as safe as the other stubs, it takes an unscaled
3175 // size_t argument instead of an element count.
3176 //
3177 // Arguments for generated stub:
3178 // from: R0
3179 // to: R1
3180 // count: R2 byte count, treated as ssize_t, can be zero
3181 //
3182 // Examines the alignment of the operands and dispatches
3183 // to a long, int, short, or byte copy loop.
3184 //
3185 address generate_unsafe_copy(const char* name) {
3186
|
2994 to_is_aligned = true;
2995 }
2996 }
2997
2998 Label L_unaligned_dst;
2999
3000 if (!to_is_aligned) {
3001 BLOCK_COMMENT("Check dst alignment:");
3002 __ tst(to, wordSize - 1);
3003 __ b(L_unaligned_dst, ne); // 'to' is not aligned
3004 }
3005
3006 int min_copy;
3007 if (forward) {
3008 min_copy = generate_forward_aligned_copy_loop(from, to, count, bytes_per_count);
3009 } else {
3010 min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count);
3011 }
3012 assert(small_copy_limit >= count_required_to_align + min_copy, "first loop might exhaust count");
3013
3014 oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward, decorators);
3015
3016 {
3017 copy_small_array(from, to, count, tmp1, noreg, bytes_per_count, forward, L_small_array);
3018
3019 oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward, decorators);
3020 }
3021
3022 if (!to_is_aligned) {
3023 // !to_is_aligned <=> UseCompressedOops && AArch64
3024 __ BIND(L_unaligned_dst);
3025 #ifdef AARCH64
3026 assert (UseCompressedOops, "unaligned oop array copy may be requested only with UseCompressedOops");
3027 #else
3028 ShouldNotReachHere();
3029 #endif // AARCH64
3030 int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
3031 assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");
3032
3033 oop_arraycopy_stub_epilogue_helper(to, saved_count, /* tmp */ tmp1, status, forward, decorators);
3034 }
3035
3036 return start;
3037 }
3038
3039 // Generate 'unsafe' array copy stub
3040 // Though just as safe as the other stubs, it takes an unscaled
3041 // size_t argument instead of an element count.
3042 //
3043 // Arguments for generated stub:
3044 // from: R0
3045 // to: R1
3046 // count: R2 byte count, treated as ssize_t, can be zero
3047 //
3048 // Examines the alignment of the operands and dispatches
3049 // to a long, int, short, or byte copy loop.
3050 //
3051 address generate_unsafe_copy(const char* name) {
3052
|
3318 // to: R1
3319 // count: R2 treated as signed 32-bit int
3320 // ckoff: R3 (super_check_offset)
3321 // ckval: R4 (AArch64) / SP[0] (32-bit ARM) (super_klass)
3322 // ret: R0 zero for success; (-1^K) where K is partial transfer count (32-bit)
3323 //
3324 address generate_checkcast_copy(const char * name) {
3325 __ align(CodeEntryAlignment);
3326 StubCodeMark mark(this, "StubRoutines", name);
3327 address start = __ pc();
3328
3329 const Register from = R0; // source array address
3330 const Register to = R1; // destination array address
3331 const Register count = R2; // elements count
3332
3333 const Register R3_ckoff = R3; // super_check_offset
3334 const Register R4_ckval = R4; // super_klass
3335
3336 const int callee_saved_regs = AARCH64_ONLY(5) NOT_AARCH64(4); // LR saved differently
3337
3338 Label load_element, store_element, do_card_marks, fail;
3339
3340 BLOCK_COMMENT("Entry:");
3341
3342 __ zap_high_non_significant_bits(R2);
3343
3344 #ifdef AARCH64
3345 __ raw_push(LR, ZR);
3346 __ raw_push(R19, R20);
3347 #else
3348 int pushed = 0;
3349 __ push(LR);
3350 pushed+=1;
3351 #endif // AARCH64
3352
3353 #if INCLUDE_ALL_GCS
3354 gen_write_ref_array_pre_barrier(to, count, callee_saved_regs);
3355 #endif // INCLUDE_ALL_GCS
3356
3357 #ifndef AARCH64
3358 const RegisterSet caller_saved_regs = RegisterSet(R4,R6) | RegisterSet(R8,R9) | altFP_7_11;
3359 __ push(caller_saved_regs);
3360 assert(caller_saved_regs.size() == 6, "check the count");
3361 pushed+=6;
3362
3363 __ ldr(R4_ckval,Address(SP, wordSize*pushed)); // read the argument that was on the stack
3364 #endif // !AARCH64
3365
3366 // Save arguments for barrier generation (after the pre barrier):
3367 // - must be a caller saved register and not LR
3368 // - ARM32: avoid R10 in case RThread is needed
3369 const Register saved_count = AARCH64_ONLY(R19) NOT_AARCH64(altFP_7_11);
3370 #ifdef AARCH64
3371 __ mov_w(saved_count, count);
3372 __ cbnz_w(count, load_element); // and test count
3373 #else
3374 __ movs(saved_count, count); // and test count
|
3184 // to: R1
3185 // count: R2 treated as signed 32-bit int
3186 // ckoff: R3 (super_check_offset)
3187 // ckval: R4 (AArch64) / SP[0] (32-bit ARM) (super_klass)
3188 // ret: R0 zero for success; (-1^K) where K is partial transfer count (32-bit)
3189 //
3190 address generate_checkcast_copy(const char * name) {
3191 __ align(CodeEntryAlignment);
3192 StubCodeMark mark(this, "StubRoutines", name);
3193 address start = __ pc();
3194
3195 const Register from = R0; // source array address
3196 const Register to = R1; // destination array address
3197 const Register count = R2; // elements count
3198
3199 const Register R3_ckoff = R3; // super_check_offset
3200 const Register R4_ckval = R4; // super_klass
3201
3202 const int callee_saved_regs = AARCH64_ONLY(5) NOT_AARCH64(4); // LR saved differently
3203
3204 Label load_element, store_element, do_epilogue, fail;
3205
3206 BLOCK_COMMENT("Entry:");
3207
3208 __ zap_high_non_significant_bits(R2);
3209
3210 #ifdef AARCH64
3211 __ raw_push(LR, ZR);
3212 __ raw_push(R19, R20);
3213 #else
3214 int pushed = 0;
3215 __ push(LR);
3216 pushed+=1;
3217 #endif // AARCH64
3218
3219 DecoratorSet decorators = ARRAYCOPY_CHECKCAST;
3220
3221 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
3222 bs->arraycopy_prologue(this, decorators, true, to, count, callee_saved_regs);
3223
3224 #ifndef AARCH64
3225 const RegisterSet caller_saved_regs = RegisterSet(R4,R6) | RegisterSet(R8,R9) | altFP_7_11;
3226 __ push(caller_saved_regs);
3227 assert(caller_saved_regs.size() == 6, "check the count");
3228 pushed+=6;
3229
3230 __ ldr(R4_ckval,Address(SP, wordSize*pushed)); // read the argument that was on the stack
3231 #endif // !AARCH64
3232
3233 // Save arguments for barrier generation (after the pre barrier):
3234 // - must be a caller saved register and not LR
3235 // - ARM32: avoid R10 in case RThread is needed
3236 const Register saved_count = AARCH64_ONLY(R19) NOT_AARCH64(altFP_7_11);
3237 #ifdef AARCH64
3238 __ mov_w(saved_count, count);
3239 __ cbnz_w(count, load_element); // and test count
3240 #else
3241 __ movs(saved_count, count); // and test count
|
3381 #ifdef AARCH64
3382 __ raw_pop(R19, R20);
3383 __ raw_pop(LR, ZR);
3384 __ ret();
3385 #else
3386 __ pop(caller_saved_regs);
3387 __ pop(PC);
3388 #endif // AARCH64
3389
3390 // ======== begin loop ========
3391 // (Loop is rotated; its entry is load_element.)
3392 __ align(OptoLoopAlignment);
3393 __ BIND(store_element);
3394 if (UseCompressedOops) {
3395 __ store_heap_oop(R5, Address(to, BytesPerHeapOop, post_indexed)); // store the oop, changes flags
3396 __ subs_32(count,count,1);
3397 } else {
3398 __ subs_32(count,count,1);
3399 __ str(R5, Address(to, BytesPerHeapOop, post_indexed)); // store the oop
3400 }
3401 __ b(do_card_marks, eq); // count exhausted
3402
3403 // ======== loop entry is here ========
3404 __ BIND(load_element);
3405 __ load_heap_oop(R5, Address(from, BytesPerHeapOop, post_indexed)); // load the oop
3406 __ cbz(R5, store_element); // NULL
3407
3408 __ load_klass(R6, R5);
3409
3410 generate_type_check(R6, R3_ckoff, R4_ckval, /*tmps*/ R12, R8, R9,
3411 // branch to this on success:
3412 store_element);
3413 // ======== end loop ========
3414
3415 // It was a real error; we must depend on the caller to finish the job.
3416 // Register count has number of *remaining* oops, saved_count number of *total* oops.
3417 // Emit GC store barriers for the oops we have copied
3418 // and report their number to the caller (0 or (-1^n))
3419 __ BIND(fail);
3420
3421 // Note: fail marked by the fact that count differs from saved_count
3422
3423 __ BIND(do_card_marks);
3424
3425 Register copied = AARCH64_ONLY(R20) NOT_AARCH64(R4); // saved
3426 Label L_not_copied;
3427
3428 __ subs_32(copied, saved_count, count); // copied count (in saved reg)
3429 __ b(L_not_copied, eq); // nothing was copied, skip post barrier
3430 __ sub(to, to, AsmOperand(copied, lsl, LogBytesPerHeapOop)); // initial to value
3431 __ mov(R12, copied); // count arg scratched by post barrier
3432
3433 gen_write_ref_array_post_barrier(to, R12, R3);
3434
3435 assert_different_registers(R3,R12,LR,copied,saved_count);
3436 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, R3, R12);
3437
3438 __ BIND(L_not_copied);
3439 __ cmp_32(copied, saved_count); // values preserved in saved registers
3440
3441 #ifdef AARCH64
3442 __ csinv(R0, ZR, copied, eq); // 0 if all copied else NOT(copied)
3443 __ raw_pop(R19, R20);
3444 __ raw_pop(LR, ZR);
3445 __ ret();
3446 #else
3447 __ mov(R0, 0, eq); // 0 if all copied
3448 __ mvn(R0, copied, ne); // else NOT(copied)
3449 __ pop(caller_saved_regs);
3450 __ pop(PC);
3451 #endif // AARCH64
3452
|
3248 #ifdef AARCH64
3249 __ raw_pop(R19, R20);
3250 __ raw_pop(LR, ZR);
3251 __ ret();
3252 #else
3253 __ pop(caller_saved_regs);
3254 __ pop(PC);
3255 #endif // AARCH64
3256
3257 // ======== begin loop ========
3258 // (Loop is rotated; its entry is load_element.)
3259 __ align(OptoLoopAlignment);
3260 __ BIND(store_element);
3261 if (UseCompressedOops) {
3262 __ store_heap_oop(R5, Address(to, BytesPerHeapOop, post_indexed)); // store the oop, changes flags
3263 __ subs_32(count,count,1);
3264 } else {
3265 __ subs_32(count,count,1);
3266 __ str(R5, Address(to, BytesPerHeapOop, post_indexed)); // store the oop
3267 }
3268 __ b(do_epilogue, eq); // count exhausted
3269
3270 // ======== loop entry is here ========
3271 __ BIND(load_element);
3272 __ load_heap_oop(R5, Address(from, BytesPerHeapOop, post_indexed)); // load the oop
3273 __ cbz(R5, store_element); // NULL
3274
3275 __ load_klass(R6, R5);
3276
3277 generate_type_check(R6, R3_ckoff, R4_ckval, /*tmps*/ R12, R8, R9,
3278 // branch to this on success:
3279 store_element);
3280 // ======== end loop ========
3281
3282 // It was a real error; we must depend on the caller to finish the job.
3283 // Register count has number of *remaining* oops, saved_count number of *total* oops.
3284 // Emit GC store barriers for the oops we have copied
3285 // and report their number to the caller (0 or (-1^n))
3286 __ BIND(fail);
3287
3288 // Note: fail marked by the fact that count differs from saved_count
3289
3290 __ BIND(do_epilogue);
3291
3292 Register copied = AARCH64_ONLY(R20) NOT_AARCH64(R4); // saved
3293 Label L_not_copied;
3294
3295 __ subs_32(copied, saved_count, count); // copied count (in saved reg)
3296 __ b(L_not_copied, eq); // nothing was copied, skip post barrier
3297 __ sub(to, to, AsmOperand(copied, lsl, LogBytesPerHeapOop)); // initial to value
3298 __ mov(R12, copied); // count arg scratched by post barrier
3299
3300 bs->arraycopy_epilogue(this, decorators, true, to, R12, R3);
3301
3302 assert_different_registers(R3,R12,LR,copied,saved_count);
3303 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, R3, R12);
3304
3305 __ BIND(L_not_copied);
3306 __ cmp_32(copied, saved_count); // values preserved in saved registers
3307
3308 #ifdef AARCH64
3309 __ csinv(R0, ZR, copied, eq); // 0 if all copied else NOT(copied)
3310 __ raw_pop(R19, R20);
3311 __ raw_pop(LR, ZR);
3312 __ ret();
3313 #else
3314 __ mov(R0, 0, eq); // 0 if all copied
3315 __ mvn(R0, copied, ne); // else NOT(copied)
3316 __ pop(caller_saved_regs);
3317 __ pop(PC);
3318 #endif // AARCH64
3319
|