< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp

Print this page
rev 60615 : 8231441: Initial SVE backend support
Reviewed-by: adinn, pli
Contributed-by: joshua.zhu@arm.com, yang.zhang@arm.com, ningsheng.jian@arm.com


 871   void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
 872   void encode_heap_oop_not_null(Register r);
 873   void decode_heap_oop_not_null(Register r);
 874   void encode_heap_oop_not_null(Register dst, Register src);
 875   void decode_heap_oop_not_null(Register dst, Register src);
 876 
 877   void set_narrow_oop(Register dst, jobject obj);
 878 
 879   void encode_klass_not_null(Register r);
 880   void decode_klass_not_null(Register r);
 881   void encode_klass_not_null(Register dst, Register src);
 882   void decode_klass_not_null(Register dst, Register src);
 883 
 884   void set_narrow_klass(Register dst, Klass* k);
 885 
 886   // if heap base register is used - reinit it with the correct value
 887   void reinit_heapbase();
 888 
 889   DEBUG_ONLY(void verify_heapbase(const char* msg);)
 890 
 891   void push_CPU_state(bool save_vectors = false);
 892   void pop_CPU_state(bool restore_vectors = false) ;


 893 
 894   // Round up to a power of two
 895   void round_to(Register reg, int modulus);
 896 
 897   // allocation
 898   void eden_allocate(
 899     Register obj,                      // result: pointer to object after successful allocation
 900     Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
 901     int      con_size_in_bytes,        // object size in bytes if   known at compile time
 902     Register t1,                       // temp register
 903     Label&   slow_case                 // continuation point if fast allocation fails
 904   );
 905   void tlab_allocate(
 906     Register obj,                      // result: pointer to object after successful allocation
 907     Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
 908     int      con_size_in_bytes,        // object size in bytes if   known at compile time
 909     Register t1,                       // temp register
 910     Register t2,                       // temp register
 911     Label&   slow_case                 // continuation point if fast allocation fails
 912   );


 952                                      Register temp_reg,
 953                                      Register temp2_reg,
 954                                      Label* L_success,
 955                                      Label* L_failure,
 956                                      bool set_cond_codes = false);
 957 
 958   // Simplified, combined version, good for typical uses.
 959   // Falls through on failure.
 960   void check_klass_subtype(Register sub_klass,
 961                            Register super_klass,
 962                            Register temp_reg,
 963                            Label& L_success);
 964 
 965   void clinit_barrier(Register klass,
 966                       Register thread,
 967                       Label* L_fast_path = NULL,
 968                       Label* L_slow_path = NULL);
 969 
 970   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 971 





 972 
 973   // Debugging
 974 
 975   // only if +VerifyOops
 976   void verify_oop(Register reg, const char* s = "broken oop");
 977   void verify_oop_addr(Address addr, const char * s = "broken oop addr");
 978 
 979 // TODO: verify method and klass metadata (compare against vptr?)
 980   void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
 981   void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
 982 
 983 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
 984 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
 985 
 986   // only if +VerifyFPU
 987   void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
 988 
 989   // prints msg, dumps registers and stops execution
 990   void stop(const char* msg);
 991 


1301         Register len, Register tmp0, Register tmp1, Register tmp2,
1302         Register tmp3);
1303 public:
1304   void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
1305                        Register zlen, Register tmp1, Register tmp2, Register tmp3,
1306                        Register tmp4, Register tmp5, Register tmp6, Register tmp7);
1307   void mul_add(Register out, Register in, Register offs, Register len, Register k);
1308   // ISB may be needed because of a safepoint
1309   void maybe_isb() { isb(); }
1310 
1311 private:
1312   // Return the effective address r + (r1 << ext) + offset.
1313   // Uses rscratch2.
1314   Address offsetted_address(Register r, Register r1, Address::extend ext,
1315                             int offset, int size);
1316 
1317 private:
1318   // Returns an address on the stack which is reachable with a ldr/str of size
1319   // Uses rscratch2 if the address is not directly reachable
1320   Address spill_address(int size, int offset, Register tmp=rscratch2);

1321 
1322   bool merge_alignment_check(Register base, size_t size, int64_t cur_offset, int64_t prev_offset) const;
1323 
1324   // Check whether two loads/stores can be merged into ldp/stp.
1325   bool ldst_can_merge(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store) const;
1326 
1327   // Merge current load/store with previous load/store into ldp/stp.
1328   void merge_ldst(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store);
1329 
1330   // Try to merge two loads/stores into ldp/stp. If success, returns true else false.
1331   bool try_merge_ldst(Register rt, const Address &adr, size_t cur_size_in_bytes, bool is_store);
1332 
1333 public:
1334   void spill(Register Rx, bool is64, int offset) {
1335     if (is64) {
1336       str(Rx, spill_address(8, offset));
1337     } else {
1338       strw(Rx, spill_address(4, offset));
1339     }
1340   }
1341   void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
1342     str(Vx, T, spill_address(1 << (int)T, offset));
1343   }



1344   void unspill(Register Rx, bool is64, int offset) {
1345     if (is64) {
1346       ldr(Rx, spill_address(8, offset));
1347     } else {
1348       ldrw(Rx, spill_address(4, offset));
1349     }
1350   }
1351   void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
1352     ldr(Vx, T, spill_address(1 << (int)T, offset));
1353   }



1354   void spill_copy128(int src_offset, int dst_offset,
1355                      Register tmp1=rscratch1, Register tmp2=rscratch2) {
1356     if (src_offset < 512 && (src_offset & 7) == 0 &&
1357         dst_offset < 512 && (dst_offset & 7) == 0) {
1358       ldp(tmp1, tmp2, Address(sp, src_offset));
1359       stp(tmp1, tmp2, Address(sp, dst_offset));
1360     } else {
1361       unspill(tmp1, true, src_offset);
1362       spill(tmp1, true, dst_offset);
1363       unspill(tmp1, true, src_offset+8);
1364       spill(tmp1, true, dst_offset+8);
1365     }
1366   }
1367 








1368   void cache_wb(Address line);
1369   void cache_wbsync(bool is_pre);
1370 };
1371 
1372 #ifdef ASSERT
1373 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
1374 #endif
1375 
1376 /**
1377  * class SkipIfEqual:
1378  *
1379  * Instantiating this class will result in assembly code being output that will
1380  * jump around any code emitted between the creation of the instance and it's
1381  * automatic destruction at the end of a scope block, depending on the value of
1382  * the flag passed to the constructor, which will be checked at run-time.
1383  */
1384 class SkipIfEqual {
1385  private:
1386   MacroAssembler* _masm;
1387   Label _label;


 871   void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
 872   void encode_heap_oop_not_null(Register r);
 873   void decode_heap_oop_not_null(Register r);
 874   void encode_heap_oop_not_null(Register dst, Register src);
 875   void decode_heap_oop_not_null(Register dst, Register src);
 876 
 877   void set_narrow_oop(Register dst, jobject obj);
 878 
 879   void encode_klass_not_null(Register r);
 880   void decode_klass_not_null(Register r);
 881   void encode_klass_not_null(Register dst, Register src);
 882   void decode_klass_not_null(Register dst, Register src);
 883 
 884   void set_narrow_klass(Register dst, Klass* k);
 885 
 886   // if heap base register is used - reinit it with the correct value
 887   void reinit_heapbase();
 888 
 889   DEBUG_ONLY(void verify_heapbase(const char* msg);)
 890 
 891   void push_CPU_state(bool save_vectors = false, bool use_sve = false,
 892                       int sve_vector_size_in_bytes = 0);
 893   void pop_CPU_state(bool restore_vectors = false, bool use_sve = false,
 894                       int sve_vector_size_in_bytes = 0);
 895 
 896   // Round up to a power of two
 897   void round_to(Register reg, int modulus);
 898 
 899   // allocation
 900   void eden_allocate(
 901     Register obj,                      // result: pointer to object after successful allocation
 902     Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
 903     int      con_size_in_bytes,        // object size in bytes if   known at compile time
 904     Register t1,                       // temp register
 905     Label&   slow_case                 // continuation point if fast allocation fails
 906   );
 907   void tlab_allocate(
 908     Register obj,                      // result: pointer to object after successful allocation
 909     Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
 910     int      con_size_in_bytes,        // object size in bytes if   known at compile time
 911     Register t1,                       // temp register
 912     Register t2,                       // temp register
 913     Label&   slow_case                 // continuation point if fast allocation fails
 914   );


 954                                      Register temp_reg,
 955                                      Register temp2_reg,
 956                                      Label* L_success,
 957                                      Label* L_failure,
 958                                      bool set_cond_codes = false);
 959 
 960   // Simplified, combined version, good for typical uses.
 961   // Falls through on failure.
 962   void check_klass_subtype(Register sub_klass,
 963                            Register super_klass,
 964                            Register temp_reg,
 965                            Label& L_success);
 966 
 967   void clinit_barrier(Register klass,
 968                       Register thread,
 969                       Label* L_fast_path = NULL,
 970                       Label* L_slow_path = NULL);
 971 
 972   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 973 
 974   void verify_sve_vector_length();
 975   void reinitialize_ptrue() {
 976     sve_ptrue(ptrue, B);
 977   }
 978   void verify_ptrue();
 979 
 980   // Debugging
 981 
 982   // only if +VerifyOops
 983   void verify_oop(Register reg, const char* s = "broken oop");
 984   void verify_oop_addr(Address addr, const char * s = "broken oop addr");
 985 
 986 // TODO: verify method and klass metadata (compare against vptr?)
 987   void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
 988   void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
 989 
 990 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
 991 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
 992 
 993   // only if +VerifyFPU
 994   void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
 995 
 996   // prints msg, dumps registers and stops execution
 997   void stop(const char* msg);
 998 


1308         Register len, Register tmp0, Register tmp1, Register tmp2,
1309         Register tmp3);
1310 public:
1311   void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
1312                        Register zlen, Register tmp1, Register tmp2, Register tmp3,
1313                        Register tmp4, Register tmp5, Register tmp6, Register tmp7);
1314   void mul_add(Register out, Register in, Register offs, Register len, Register k);
1315   // ISB may be needed because of a safepoint
1316   void maybe_isb() { isb(); }
1317 
1318 private:
1319   // Return the effective address r + (r1 << ext) + offset.
1320   // Uses rscratch2.
1321   Address offsetted_address(Register r, Register r1, Address::extend ext,
1322                             int offset, int size);
1323 
1324 private:
1325   // Returns an address on the stack which is reachable with a ldr/str of size
1326   // Uses rscratch2 if the address is not directly reachable
1327   Address spill_address(int size, int offset, Register tmp=rscratch2);
1328   Address sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp=rscratch2);
1329 
1330   bool merge_alignment_check(Register base, size_t size, int64_t cur_offset, int64_t prev_offset) const;
1331 
1332   // Check whether two loads/stores can be merged into ldp/stp.
1333   bool ldst_can_merge(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store) const;
1334 
1335   // Merge current load/store with previous load/store into ldp/stp.
1336   void merge_ldst(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store);
1337 
1338   // Try to merge two loads/stores into ldp/stp. If success, returns true else false.
1339   bool try_merge_ldst(Register rt, const Address &adr, size_t cur_size_in_bytes, bool is_store);
1340 
1341 public:
1342   void spill(Register Rx, bool is64, int offset) {
1343     if (is64) {
1344       str(Rx, spill_address(8, offset));
1345     } else {
1346       strw(Rx, spill_address(4, offset));
1347     }
1348   }
1349   void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
1350     str(Vx, T, spill_address(1 << (int)T, offset));
1351   }
1352   void spill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
1353     sve_str(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
1354   }
1355   void unspill(Register Rx, bool is64, int offset) {
1356     if (is64) {
1357       ldr(Rx, spill_address(8, offset));
1358     } else {
1359       ldrw(Rx, spill_address(4, offset));
1360     }
1361   }
1362   void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
1363     ldr(Vx, T, spill_address(1 << (int)T, offset));
1364   }
1365   void unspill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
1366     sve_ldr(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
1367   }
1368   void spill_copy128(int src_offset, int dst_offset,
1369                      Register tmp1=rscratch1, Register tmp2=rscratch2) {
1370     if (src_offset < 512 && (src_offset & 7) == 0 &&
1371         dst_offset < 512 && (dst_offset & 7) == 0) {
1372       ldp(tmp1, tmp2, Address(sp, src_offset));
1373       stp(tmp1, tmp2, Address(sp, dst_offset));
1374     } else {
1375       unspill(tmp1, true, src_offset);
1376       spill(tmp1, true, dst_offset);
1377       unspill(tmp1, true, src_offset+8);
1378       spill(tmp1, true, dst_offset+8);
1379     }
1380   }
1381   void spill_copy_sve_vector_stack_to_stack(int src_offset, int dst_offset,
1382                                             int sve_vec_reg_size_in_bytes) {
1383     assert(sve_vec_reg_size_in_bytes % 16 == 0, "unexpected sve vector reg size");
1384     for (int i = 0; i < sve_vec_reg_size_in_bytes / 16; i++) {
1385       spill_copy128(src_offset, dst_offset);
1386       src_offset += 16;
1387       dst_offset += 16;
1388     }
1389   }
1390   void cache_wb(Address line);
1391   void cache_wbsync(bool is_pre);
1392 };
1393 
1394 #ifdef ASSERT
1395 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
1396 #endif
1397 
1398 /**
1399  * class SkipIfEqual:
1400  *
1401  * Instantiating this class will result in assembly code being output that will
1402  * jump around any code emitted between the creation of the instance and it's
1403  * automatic destruction at the end of a scope block, depending on the value of
1404  * the flag passed to the constructor, which will be checked at run-time.
1405  */
1406 class SkipIfEqual {
1407  private:
1408   MacroAssembler* _masm;
1409   Label _label;
< prev index next >