< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp

Print this page
8248238: Adding Windows support to OpenJDK on AArch64

Summary: LP64 vs LLP64 changes to add Windows support

Contributed-by: Monica Beckwith <monica.beckwith@microsoft.com>, Ludovic Henry <luhenry@microsoft.com>
Reviewed-by:


 439     umaddl(Rd, Rn, Rm, zr);
 440   }
 441 
 442 #define WRAP(INSN)                                                            \
 443   void INSN(Register Rd, Register Rn, Register Rm, Register Ra) {             \
 444     if ((VM_Version::features() & VM_Version::CPU_A53MAC) && Ra != zr)        \
 445       nop();                                                                  \
 446     Assembler::INSN(Rd, Rn, Rm, Ra);                                          \
 447   }
 448 
 449   WRAP(madd) WRAP(msub) WRAP(maddw) WRAP(msubw)
 450   WRAP(smaddl) WRAP(smsubl) WRAP(umaddl) WRAP(umsubl)
 451 #undef WRAP
 452 
 453 
 454   // macro assembly operations needed for aarch64
 455 
 456   // first two private routines for loading 32 bit or 64 bit constants
 457 private:
 458 
 459   void mov_immediate64(Register dst, u_int64_t imm64);
 460   void mov_immediate32(Register dst, u_int32_t imm32);
 461 
 462   int push(unsigned int bitset, Register stack);
 463   int pop(unsigned int bitset, Register stack);
 464 
 465   int push_fp(unsigned int bitset, Register stack);
 466   int pop_fp(unsigned int bitset, Register stack);
 467 
 468   void mov(Register dst, Address a);
 469 
 470 public:
 471   void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
 472   void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
 473 
 474   void push_fp(RegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
 475   void pop_fp(RegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
 476 
 477   // Push and pop everything that might be clobbered by a native
 478   // runtime call except rscratch1 and rscratch2.  (They are always
 479   // scratch, so we don't have to protect them.)  Only save the lower
 480   // 64 bits of each vector register.
 481   void push_call_clobbered_registers();
 482   void pop_call_clobbered_registers();
 483 
 484   // now mov instructions for loading absolute addresses and 32 or
 485   // 64 bit integers
 486 
 487   inline void mov(Register dst, address addr)
 488   {
 489     mov_immediate64(dst, (u_int64_t)addr);
 490   }
 491 
 492   inline void mov(Register dst, u_int64_t imm64)
 493   {
 494     mov_immediate64(dst, imm64);
 495   }
 496 
 497   inline void movw(Register dst, u_int32_t imm32)
 498   {
 499     mov_immediate32(dst, imm32);
 500   }
 501 
 502   inline void mov(Register dst, long l)
 503   {
 504     mov(dst, (u_int64_t)l);
 505   }
 506 
 507   inline void mov(Register dst, int i)
 508   {
 509     mov(dst, (long)i);
 510   }
 511 
 512   void mov(Register dst, RegisterOrConstant src) {
 513     if (src.is_register())
 514       mov(dst, src.as_register());
 515     else
 516       mov(dst, src.as_constant());
 517   }
 518 
 519   void movptr(Register r, uintptr_t imm64);
 520 
 521   void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32);
 522 
 523   void mov(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {
 524     orr(Vd, T, Vn, Vn);
 525   }
 526 
 527 public:
 528 
 529   // Generalized Test Bit And Branch, including a "far" variety which
 530   // spans more than 32KiB.
 531   void tbr(Condition cond, Register Rt, int bitpos, Label &dest, bool far = false) {
 532     assert(cond == EQ || cond == NE, "must be");
 533 
 534     if (far)
 535       cond = ~cond;
 536 
 537     void (Assembler::* branch)(Register Rt, int bitpos, Label &L);
 538     if (cond == Assembler::EQ)
 539       branch = &Assembler::tbz;
 540     else
 541       branch = &Assembler::tbnz;


1153              enum shift_kind kind, unsigned shift = 0) {                \
1154     Assembler::INSN(Rd, Rn, Rm, kind, shift);                           \
1155   }                                                                     \
1156                                                                         \
1157   void INSN(Register Rd, Register Rn, Register Rm) {                    \
1158     Assembler::INSN(Rd, Rn, Rm);                                        \
1159   }                                                                     \
1160                                                                         \
1161   void INSN(Register Rd, Register Rn, Register Rm,                      \
1162            ext::operation option, int amount = 0) {                     \
1163     Assembler::INSN(Rd, Rn, Rm, option, amount);                        \
1164   }
1165 
1166   WRAP(adds) WRAP(addsw) WRAP(subs) WRAP(subsw)
1167 
1168   void add(Register Rd, Register Rn, RegisterOrConstant increment);
1169   void addw(Register Rd, Register Rn, RegisterOrConstant increment);
1170   void sub(Register Rd, Register Rn, RegisterOrConstant decrement);
1171   void subw(Register Rd, Register Rn, RegisterOrConstant decrement);
1172 
1173   void adrp(Register reg1, const Address &dest, unsigned long &byte_offset);
1174 
1175   void tableswitch(Register index, jint lowbound, jint highbound,
1176                    Label &jumptable, Label &jumptable_end, int stride = 1) {
1177     adr(rscratch1, jumptable);
1178     subsw(rscratch2, index, lowbound);
1179     subsw(zr, rscratch2, highbound - lowbound);
1180     br(Assembler::HS, jumptable_end);
1181     add(rscratch1, rscratch1, rscratch2,
1182         ext::sxtw, exact_log2(stride * Assembler::instruction_size));
1183     br(rscratch1);
1184   }
1185 
1186   // Form an address from base + offset in Rd.  Rd may or may not
1187   // actually be used: you must use the Address that is returned.  It
1188   // is up to you to ensure that the shift provided matches the size
1189   // of your data.
1190   Address form_address(Register Rd, Register base, long byte_offset, int shift);
1191 
1192   // Return true iff an address is within the 48-bit AArch64 address
1193   // space.
1194   bool is_valid_AArch64_address(address a) {
1195     return ((uint64_t)a >> 48) == 0;
1196   }
1197 
1198   // Load the base of the cardtable byte map into reg.
1199   void load_byte_map_base(Register reg);
1200 
1201   // Prolog generator routines to support switch between x86 code and
1202   // generated ARM code
1203 
1204   // routine to generate an x86 prolog for a stub function which
1205   // bootstraps into the generated ARM code which directly follows the
1206   // stub
1207   //
1208 
1209   public:
1210 
1211   void ldr_constant(Register dest, const Address &const_addr) {
1212     if (NearCpool) {
1213       ldr(dest, const_addr);
1214     } else {
1215       unsigned long offset;
1216       adrp(dest, InternalAddress(const_addr.target()), offset);
1217       ldr(dest, Address(dest, offset));
1218     }
1219   }
1220 
1221   address read_polling_page(Register r, relocInfo::relocType rtype);
1222   void get_polling_page(Register dest, relocInfo::relocType rtype);
1223   address fetch_and_read_polling_page(Register r, relocInfo::relocType rtype);
1224 
1225   // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
1226   void update_byte_crc32(Register crc, Register val, Register table);
1227   void update_word_crc32(Register crc, Register v, Register tmp,
1228         Register table0, Register table1, Register table2, Register table3,
1229         bool upper = false);
1230 
1231   void has_negatives(Register ary1, Register len, Register result);
1232 
1233   void arrays_equals(Register a1, Register a2, Register result, Register cnt1,
1234                      Register tmp1, Register tmp2, Register tmp3, int elem_size);
1235 
1236   void string_equals(Register a1, Register a2, Register result, Register cnt1,
1237                      int elem_size);
1238 
1239   void fill_words(Register base, Register cnt, Register value);
1240   void zero_words(Register base, u_int64_t cnt);
1241   void zero_words(Register ptr, Register cnt);
1242   void zero_dcache_blocks(Register base, Register cnt);
1243 
1244   static const int zero_words_block_size;
1245 
1246   void byte_array_inflate(Register src, Register dst, Register len,
1247                           FloatRegister vtmp1, FloatRegister vtmp2,
1248                           FloatRegister vtmp3, Register tmp4);
1249 
1250   void char_array_compress(Register src, Register dst, Register len,
1251                            FloatRegister tmp1Reg, FloatRegister tmp2Reg,
1252                            FloatRegister tmp3Reg, FloatRegister tmp4Reg,
1253                            Register result);
1254 
1255   void encode_iso_array(Register src, Register dst,
1256                         Register len, Register result,
1257                         FloatRegister Vtmp1, FloatRegister Vtmp2,
1258                         FloatRegister Vtmp3, FloatRegister Vtmp4);
1259   void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2,
1260                 FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5,


1293         Register tmp3);
1294 public:
1295   void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
1296                        Register zlen, Register tmp1, Register tmp2, Register tmp3,
1297                        Register tmp4, Register tmp5, Register tmp6, Register tmp7);
1298   void mul_add(Register out, Register in, Register offs, Register len, Register k);
1299   // ISB may be needed because of a safepoint
1300   void maybe_isb() { isb(); }
1301 
1302 private:
1303   // Return the effective address r + (r1 << ext) + offset.
1304   // Uses rscratch2.
1305   Address offsetted_address(Register r, Register r1, Address::extend ext,
1306                             int offset, int size);
1307 
1308 private:
1309   // Returns an address on the stack which is reachable with a ldr/str of size
1310   // Uses rscratch2 if the address is not directly reachable
1311   Address spill_address(int size, int offset, Register tmp=rscratch2);
1312 
1313   bool merge_alignment_check(Register base, size_t size, long cur_offset, long prev_offset) const;
1314 
1315   // Check whether two loads/stores can be merged into ldp/stp.
1316   bool ldst_can_merge(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store) const;
1317 
1318   // Merge current load/store with previous load/store into ldp/stp.
1319   void merge_ldst(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store);
1320 
1321   // Try to merge two loads/stores into ldp/stp. If success, returns true else false.
1322   bool try_merge_ldst(Register rt, const Address &adr, size_t cur_size_in_bytes, bool is_store);
1323 
1324 public:
1325   void spill(Register Rx, bool is64, int offset) {
1326     if (is64) {
1327       str(Rx, spill_address(8, offset));
1328     } else {
1329       strw(Rx, spill_address(4, offset));
1330     }
1331   }
1332   void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
1333     str(Vx, T, spill_address(1 << (int)T, offset));




 439     umaddl(Rd, Rn, Rm, zr);
 440   }
 441 
 442 #define WRAP(INSN)                                                            \
 443   void INSN(Register Rd, Register Rn, Register Rm, Register Ra) {             \
 444     if ((VM_Version::features() & VM_Version::CPU_A53MAC) && Ra != zr)        \
 445       nop();                                                                  \
 446     Assembler::INSN(Rd, Rn, Rm, Ra);                                          \
 447   }
 448 
 449   WRAP(madd) WRAP(msub) WRAP(maddw) WRAP(msubw)
 450   WRAP(smaddl) WRAP(smsubl) WRAP(umaddl) WRAP(umsubl)
 451 #undef WRAP
 452 
 453 
 454   // macro assembly operations needed for aarch64
 455 
 456   // first two private routines for loading 32 bit or 64 bit constants
 457 private:
 458 
 459   void mov_immediate64(Register dst, uint64_t imm64);
 460   void mov_immediate32(Register dst, uint32_t imm32);
 461 
 462   int push(unsigned int bitset, Register stack);
 463   int pop(unsigned int bitset, Register stack);
 464 
 465   int push_fp(unsigned int bitset, Register stack);
 466   int pop_fp(unsigned int bitset, Register stack);
 467 
 468   void mov(Register dst, Address a);
 469 
 470 public:
 471   void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
 472   void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
 473 
 474   void push_fp(RegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
 475   void pop_fp(RegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
 476 
 477   // Push and pop everything that might be clobbered by a native
 478   // runtime call except rscratch1 and rscratch2.  (They are always
 479   // scratch, so we don't have to protect them.)  Only save the lower
 480   // 64 bits of each vector register.
 481   void push_call_clobbered_registers();
 482   void pop_call_clobbered_registers();
 483 
 484   // now mov instructions for loading absolute addresses and 32 or
 485   // 64 bit integers
 486 
 487   inline void mov(Register dst, address addr)
 488   {
 489     mov_immediate64(dst, (uint64_t)addr);
 490   }
 491 
 492   inline void mov(Register dst, uint64_t imm64)
 493   {
 494     mov_immediate64(dst, imm64);
 495   }
 496 
 497   inline void movw(Register dst, uint32_t imm32)
 498   {
 499     mov_immediate32(dst, imm32);
 500   }
 501 
 502   inline void mov(Register dst, int64_t l)
 503   {
 504     mov(dst, (uint64_t)l);
 505   }
 506 
 507   inline void mov(Register dst, int i)
 508   {
 509     mov(dst, (int64_t)i);
 510   }
 511 
 512   void mov(Register dst, RegisterOrConstant src) {
 513     if (src.is_register())
 514       mov(dst, src.as_register());
 515     else
 516       mov(dst, src.as_constant());
 517   }
 518 
 519   void movptr(Register r, uintptr_t imm64);
 520 
 521   void mov(FloatRegister Vd, SIMD_Arrangement T, uint32_t imm32);
 522 
 523   void mov(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {
 524     orr(Vd, T, Vn, Vn);
 525   }
 526 
 527 public:
 528 
 529   // Generalized Test Bit And Branch, including a "far" variety which
 530   // spans more than 32KiB.
 531   void tbr(Condition cond, Register Rt, int bitpos, Label &dest, bool far = false) {
 532     assert(cond == EQ || cond == NE, "must be");
 533 
 534     if (far)
 535       cond = ~cond;
 536 
 537     void (Assembler::* branch)(Register Rt, int bitpos, Label &L);
 538     if (cond == Assembler::EQ)
 539       branch = &Assembler::tbz;
 540     else
 541       branch = &Assembler::tbnz;


1153              enum shift_kind kind, unsigned shift = 0) {                \
1154     Assembler::INSN(Rd, Rn, Rm, kind, shift);                           \
1155   }                                                                     \
1156                                                                         \
1157   void INSN(Register Rd, Register Rn, Register Rm) {                    \
1158     Assembler::INSN(Rd, Rn, Rm);                                        \
1159   }                                                                     \
1160                                                                         \
1161   void INSN(Register Rd, Register Rn, Register Rm,                      \
1162            ext::operation option, int amount = 0) {                     \
1163     Assembler::INSN(Rd, Rn, Rm, option, amount);                        \
1164   }
1165 
1166   WRAP(adds) WRAP(addsw) WRAP(subs) WRAP(subsw)
1167 
1168   void add(Register Rd, Register Rn, RegisterOrConstant increment);
1169   void addw(Register Rd, Register Rn, RegisterOrConstant increment);
1170   void sub(Register Rd, Register Rn, RegisterOrConstant decrement);
1171   void subw(Register Rd, Register Rn, RegisterOrConstant decrement);
1172 
1173   void adrp(Register reg1, const Address &dest, uint64_t &byte_offset);
1174 
1175   void tableswitch(Register index, jint lowbound, jint highbound,
1176                    Label &jumptable, Label &jumptable_end, int stride = 1) {
1177     adr(rscratch1, jumptable);
1178     subsw(rscratch2, index, lowbound);
1179     subsw(zr, rscratch2, highbound - lowbound);
1180     br(Assembler::HS, jumptable_end);
1181     add(rscratch1, rscratch1, rscratch2,
1182         ext::sxtw, exact_log2(stride * Assembler::instruction_size));
1183     br(rscratch1);
1184   }
1185 
1186   // Form an address from base + offset in Rd.  Rd may or may not
1187   // actually be used: you must use the Address that is returned.  It
1188   // is up to you to ensure that the shift provided matches the size
1189   // of your data.
1190   Address form_address(Register Rd, Register base, int64_t byte_offset, int shift);
1191 
1192   // Return true iff an address is within the 48-bit AArch64 address
1193   // space.
1194   bool is_valid_AArch64_address(address a) {
1195     return ((uint64_t)a >> 48) == 0;
1196   }
1197 
1198   // Load the base of the cardtable byte map into reg.
1199   void load_byte_map_base(Register reg);
1200 
1201   // Prolog generator routines to support switch between x86 code and
1202   // generated ARM code
1203 
1204   // routine to generate an x86 prolog for a stub function which
1205   // bootstraps into the generated ARM code which directly follows the
1206   // stub
1207   //
1208 
1209   public:
1210 
1211   void ldr_constant(Register dest, const Address &const_addr) {
1212     if (NearCpool) {
1213       ldr(dest, const_addr);
1214     } else {
1215       uint64_t offset;
1216       adrp(dest, InternalAddress(const_addr.target()), offset);
1217       ldr(dest, Address(dest, offset));
1218     }
1219   }
1220 
1221   address read_polling_page(Register r, relocInfo::relocType rtype);
1222   void get_polling_page(Register dest, relocInfo::relocType rtype);
1223   address fetch_and_read_polling_page(Register r, relocInfo::relocType rtype);
1224 
1225   // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
1226   void update_byte_crc32(Register crc, Register val, Register table);
1227   void update_word_crc32(Register crc, Register v, Register tmp,
1228         Register table0, Register table1, Register table2, Register table3,
1229         bool upper = false);
1230 
1231   void has_negatives(Register ary1, Register len, Register result);
1232 
1233   void arrays_equals(Register a1, Register a2, Register result, Register cnt1,
1234                      Register tmp1, Register tmp2, Register tmp3, int elem_size);
1235 
1236   void string_equals(Register a1, Register a2, Register result, Register cnt1,
1237                      int elem_size);
1238 
1239   void fill_words(Register base, Register cnt, Register value);
1240   void zero_words(Register base, uint64_t cnt);
1241   void zero_words(Register ptr, Register cnt);
1242   void zero_dcache_blocks(Register base, Register cnt);
1243 
1244   static const int zero_words_block_size;
1245 
1246   void byte_array_inflate(Register src, Register dst, Register len,
1247                           FloatRegister vtmp1, FloatRegister vtmp2,
1248                           FloatRegister vtmp3, Register tmp4);
1249 
1250   void char_array_compress(Register src, Register dst, Register len,
1251                            FloatRegister tmp1Reg, FloatRegister tmp2Reg,
1252                            FloatRegister tmp3Reg, FloatRegister tmp4Reg,
1253                            Register result);
1254 
1255   void encode_iso_array(Register src, Register dst,
1256                         Register len, Register result,
1257                         FloatRegister Vtmp1, FloatRegister Vtmp2,
1258                         FloatRegister Vtmp3, FloatRegister Vtmp4);
1259   void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2,
1260                 FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5,


1293         Register tmp3);
1294 public:
1295   void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
1296                        Register zlen, Register tmp1, Register tmp2, Register tmp3,
1297                        Register tmp4, Register tmp5, Register tmp6, Register tmp7);
1298   void mul_add(Register out, Register in, Register offs, Register len, Register k);
1299   // ISB may be needed because of a safepoint
1300   void maybe_isb() { isb(); }
1301 
1302 private:
1303   // Return the effective address r + (r1 << ext) + offset.
1304   // Uses rscratch2.
1305   Address offsetted_address(Register r, Register r1, Address::extend ext,
1306                             int offset, int size);
1307 
1308 private:
1309   // Returns an address on the stack which is reachable with a ldr/str of size
1310   // Uses rscratch2 if the address is not directly reachable
1311   Address spill_address(int size, int offset, Register tmp=rscratch2);
1312 
1313   bool merge_alignment_check(Register base, size_t size, int64_t cur_offset, int64_t prev_offset) const;
1314 
1315   // Check whether two loads/stores can be merged into ldp/stp.
1316   bool ldst_can_merge(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store) const;
1317 
1318   // Merge current load/store with previous load/store into ldp/stp.
1319   void merge_ldst(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store);
1320 
1321   // Try to merge two loads/stores into ldp/stp. If success, returns true else false.
1322   bool try_merge_ldst(Register rt, const Address &adr, size_t cur_size_in_bytes, bool is_store);
1323 
1324 public:
1325   void spill(Register Rx, bool is64, int offset) {
1326     if (is64) {
1327       str(Rx, spill_address(8, offset));
1328     } else {
1329       strw(Rx, spill_address(4, offset));
1330     }
1331   }
1332   void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
1333     str(Vx, T, spill_address(1 << (int)T, offset));


< prev index next >