< prev index next >

src/hotspot/cpu/x86/x86_64.ad

Print this page
rev 47415 : Add Thread Local handshakes and thread local polling


 300 reg_class_dynamic ptr_no_rax_rbx_reg(ptr_no_rax_rbx_reg_no_rbp, ptr_no_rax_rbx_reg_with_rbp, %{ PreserveFramePointer %});
 301 
 302 // Singleton class for RAX pointer register
 303 reg_class ptr_rax_reg(RAX, RAX_H);
 304 
 305 // Singleton class for RBX pointer register
 306 reg_class ptr_rbx_reg(RBX, RBX_H);
 307 
 308 // Singleton class for RSI pointer register
 309 reg_class ptr_rsi_reg(RSI, RSI_H);
 310 
 311 // Singleton class for RDI pointer register
 312 reg_class ptr_rdi_reg(RDI, RDI_H);
 313 
 314 // Singleton class for stack pointer
 315 reg_class ptr_rsp_reg(RSP, RSP_H);
 316 
 317 // Singleton class for TLS pointer
 318 reg_class ptr_r15_reg(R15, R15_H);
 319 












 320 // Class for all long registers (excluding RSP)
 321 reg_class long_reg_with_rbp(RAX, RAX_H,
 322                             RDX, RDX_H,
 323                             RBP, RBP_H,
 324                             RDI, RDI_H,
 325                             RSI, RSI_H,
 326                             RCX, RCX_H,
 327                             RBX, RBX_H,
 328                             R8,  R8_H,
 329                             R9,  R9_H,
 330                             R10, R10_H,
 331                             R11, R11_H,
 332                             R13, R13_H,
 333                             R14, R14_H);
 334 
 335 // Class for all long registers (excluding RSP and RBP)
 336 reg_class long_reg_no_rbp(RAX, RAX_H,
 337                           RDX, RDX_H,
 338                           RDI, RDI_H,
 339                           RSI, RSI_H,


 549   return offset;
 550 }
 551 
 552 int MachCallDynamicJavaNode::ret_addr_offset()
 553 {
 554   int offset = 15; // 15 bytes from start of call to where return address points
 555   offset += clear_avx_size();
 556   return offset;
 557 }
 558 
 559 int MachCallRuntimeNode::ret_addr_offset() {
 560   int offset = 13; // movq r10,#addr; callq (r10)
 561   offset += clear_avx_size();
 562   return offset;
 563 }
 564 
 565 // Indicate if the safepoint node needs the polling page as an input,
 566 // it does if the polling page is more than disp32 away.
 567 bool SafePointNode::needs_polling_address_input()
 568 {
 569   return Assembler::is_polling_page_far();
 570 }
 571 
 572 //
 573 // Compute padding required for nodes which need alignment
 574 //
 575 
 576 // The address of the call instruction needs to be 4-byte aligned to
 577 // ensure that it does not span a cache line so that it can be patched.
 578 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 579 {
 580   current_offset += clear_avx_size(); // skip vzeroupper
 581   current_offset += 1; // skip call opcode byte
 582   return align_up(current_offset, alignment_required()) - current_offset;
 583 }
 584 
 585 // The address of the call instruction needs to be 4-byte aligned to
 586 // ensure that it does not span a cache line so that it can be patched.
 587 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 588 {
 589   current_offset += clear_avx_size(); // skip vzeroupper


 921   Compile* C = ra_->C;
 922   if (VM_Version::supports_vzeroupper()) {
 923     st->print("vzeroupper");
 924     st->cr(); st->print("\t");
 925   }
 926 
 927   int framesize = C->frame_size_in_bytes();
 928   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 929   // Remove word for return adr already pushed
 930   // and RBP
 931   framesize -= 2*wordSize;
 932 
 933   if (framesize) {
 934     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 935     st->print("\t");
 936   }
 937 
 938   st->print_cr("popq   rbp");
 939   if (do_polling() && C->is_method_compilation()) {
 940     st->print("\t");
 941     if (Assembler::is_polling_page_far()) {




 942       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
 943                    "testl  rax, [rscratch1]\t"
 944                    "# Safepoint: poll for GC");
 945     } else {
 946       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
 947                    "# Safepoint: poll for GC");
 948     }
 949   }
 950 }
 951 #endif
 952 
 953 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 954 {
 955   Compile* C = ra_->C;
 956   MacroAssembler _masm(&cbuf);
 957 
 958   // Clear upper bits of YMM registers when current compiled code uses
 959   // wide vectors to avoid AVX <-> SSE transition penalty during call.
 960   __ vzeroupper();
 961 


 972     if (framesize < 0x80) {
 973       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
 974       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 975       emit_d8(cbuf, framesize);
 976     } else {
 977       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
 978       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 979       emit_d32(cbuf, framesize);
 980     }
 981   }
 982 
 983   // popq rbp
 984   emit_opcode(cbuf, 0x58 | RBP_enc);
 985 
 986   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 987     __ reserved_stack_check();
 988   }
 989 
 990   if (do_polling() && C->is_method_compilation()) {
 991     MacroAssembler _masm(&cbuf);





 992     AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
 993     if (Assembler::is_polling_page_far()) {
 994       __ lea(rscratch1, polling_page);
 995       __ relocate(relocInfo::poll_return_type);
 996       __ testl(rax, Address(rscratch1, 0));
 997     } else {
 998       __ testl(rax, polling_page);
 999     }
1000   }

1001 }
1002 
1003 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1004 {
1005   return MachNode::size(ra_); // too many variables; just compute it
1006                               // the hard way
1007 }
1008 
1009 int MachEpilogNode::reloc() const
1010 {
1011   return 2; // a large enough number
1012 }
1013 
1014 const Pipeline* MachEpilogNode::pipeline() const
1015 {
1016   return MachNode::pipeline_class();
1017 }
1018 
1019 int MachEpilogNode::safepoint_offset() const
1020 {


3494 operand rdi_RegP()
3495 %{
3496   constraint(ALLOC_IN_RC(ptr_rdi_reg));
3497   match(RegP);
3498   match(rRegP);
3499 
3500   format %{ %}
3501   interface(REG_INTER);
3502 %}
3503 
3504 operand r15_RegP()
3505 %{
3506   constraint(ALLOC_IN_RC(ptr_r15_reg));
3507   match(RegP);
3508   match(rRegP);
3509 
3510   format %{ %}
3511   interface(REG_INTER);
3512 %}
3513 










3514 operand rRegL()
3515 %{
3516   constraint(ALLOC_IN_RC(long_reg));
3517   match(RegL);
3518   match(rax_RegL);
3519   match(rdx_RegL);
3520 
3521   format %{ %}
3522   interface(REG_INTER);
3523 %}
3524 
3525 // Special Registers
3526 operand no_rax_rdx_RegL()
3527 %{
3528   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3529   match(RegL);
3530   match(rRegL);
3531 
3532   format %{ %}
3533   interface(REG_INTER);


12043   %}
12044   ins_pipe(pipe_slow);
12045 %}
12046 
12047 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
12048   match(Set cr (FastUnlock object box));
12049   effect(TEMP tmp, USE_KILL box);
12050   ins_cost(300);
12051   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
12052   ins_encode %{
12053     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
12054   %}
12055   ins_pipe(pipe_slow);
12056 %}
12057 
12058 
12059 // ============================================================================
12060 // Safepoint Instructions
12061 instruct safePoint_poll(rFlagsReg cr)
12062 %{
12063   predicate(!Assembler::is_polling_page_far());
12064   match(SafePoint);
12065   effect(KILL cr);
12066 
12067   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
12068             "# Safepoint: poll for GC" %}
12069   ins_cost(125);
12070   ins_encode %{
12071     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
12072     __ testl(rax, addr);
12073   %}
12074   ins_pipe(ialu_reg_mem);
12075 %}
12076 
12077 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
12078 %{
12079   predicate(Assembler::is_polling_page_far());
















12080   match(SafePoint poll);
12081   effect(KILL cr, USE poll);
12082 
12083   format %{ "testl  rax, [$poll]\t"
12084             "# Safepoint: poll for GC" %}
12085   ins_cost(125);

12086   ins_encode %{
12087     __ relocate(relocInfo::poll_type);

12088     __ testl(rax, Address($poll$$Register, 0));


12089   %}
12090   ins_pipe(ialu_reg_mem);
12091 %}
12092 
12093 // ============================================================================
12094 // Procedure Call/Return Instructions
12095 // Call Java Static Instruction
12096 // Note: If this code changes, the corresponding ret_addr_offset() and
12097 //       compute_padding() functions will have to be adjusted.
12098 instruct CallStaticJavaDirect(method meth) %{
12099   match(CallStaticJava);
12100   effect(USE meth);
12101 
12102   ins_cost(300);
12103   format %{ "call,static " %}
12104   opcode(0xE8); /* E8 cd */
12105   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
12106   ins_pipe(pipe_slow);
12107   ins_alignment(4);
12108 %}




 300 reg_class_dynamic ptr_no_rax_rbx_reg(ptr_no_rax_rbx_reg_no_rbp, ptr_no_rax_rbx_reg_with_rbp, %{ PreserveFramePointer %});
 301 
 302 // Singleton class for RAX pointer register
 303 reg_class ptr_rax_reg(RAX, RAX_H);
 304 
 305 // Singleton class for RBX pointer register
 306 reg_class ptr_rbx_reg(RBX, RBX_H);
 307 
 308 // Singleton class for RSI pointer register
 309 reg_class ptr_rsi_reg(RSI, RSI_H);
 310 
 311 // Singleton class for RDI pointer register
 312 reg_class ptr_rdi_reg(RDI, RDI_H);
 313 
 314 // Singleton class for stack pointer
 315 reg_class ptr_rsp_reg(RSP, RSP_H);
 316 
 317 // Singleton class for TLS pointer
 318 reg_class ptr_r15_reg(R15, R15_H);
 319 
 320 // The registers which can be used for
 321 // a thread local safepoint poll
 322 // * R12 is reserved for heap base
 323 // * R13 cannot be encoded for addressing without an offset byte
 324 // * R15 is reserved for the JavaThread
 325 reg_class ptr_rex_reg(R8,  R8_H,
 326                       R9,  R9_H,
 327                       R10, R10_H,
 328                       R11, R11_H,
 329                       R14, R14_H);
 330 
 331 
 332 // Class for all long registers (excluding RSP)
 333 reg_class long_reg_with_rbp(RAX, RAX_H,
 334                             RDX, RDX_H,
 335                             RBP, RBP_H,
 336                             RDI, RDI_H,
 337                             RSI, RSI_H,
 338                             RCX, RCX_H,
 339                             RBX, RBX_H,
 340                             R8,  R8_H,
 341                             R9,  R9_H,
 342                             R10, R10_H,
 343                             R11, R11_H,
 344                             R13, R13_H,
 345                             R14, R14_H);
 346 
 347 // Class for all long registers (excluding RSP and RBP)
 348 reg_class long_reg_no_rbp(RAX, RAX_H,
 349                           RDX, RDX_H,
 350                           RDI, RDI_H,
 351                           RSI, RSI_H,


 561   return offset;
 562 }
 563 
 564 int MachCallDynamicJavaNode::ret_addr_offset()
 565 {
 566   int offset = 15; // 15 bytes from start of call to where return address points
 567   offset += clear_avx_size();
 568   return offset;
 569 }
 570 
 571 int MachCallRuntimeNode::ret_addr_offset() {
 572   int offset = 13; // movq r10,#addr; callq (r10)
 573   offset += clear_avx_size();
 574   return offset;
 575 }
 576 
 577 // Indicate if the safepoint node needs the polling page as an input,
 578 // it does if the polling page is more than disp32 away.
 579 bool SafePointNode::needs_polling_address_input()
 580 {
 581   return SafepointMechanism::uses_thread_local_poll() || Assembler::is_polling_page_far();
 582 }
 583 
 584 //
 585 // Compute padding required for nodes which need alignment
 586 //
 587 
 588 // The address of the call instruction needs to be 4-byte aligned to
 589 // ensure that it does not span a cache line so that it can be patched.
 590 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 591 {
 592   current_offset += clear_avx_size(); // skip vzeroupper
 593   current_offset += 1; // skip call opcode byte
 594   return align_up(current_offset, alignment_required()) - current_offset;
 595 }
 596 
 597 // The address of the call instruction needs to be 4-byte aligned to
 598 // ensure that it does not span a cache line so that it can be patched.
 599 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 600 {
 601   current_offset += clear_avx_size(); // skip vzeroupper


 933   Compile* C = ra_->C;
 934   if (VM_Version::supports_vzeroupper()) {
 935     st->print("vzeroupper");
 936     st->cr(); st->print("\t");
 937   }
 938 
 939   int framesize = C->frame_size_in_bytes();
 940   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 941   // Remove word for return adr already pushed
 942   // and RBP
 943   framesize -= 2*wordSize;
 944 
 945   if (framesize) {
 946     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 947     st->print("\t");
 948   }
 949 
 950   st->print_cr("popq   rbp");
 951   if (do_polling() && C->is_method_compilation()) {
 952     st->print("\t");
 953     if (SafepointMechanism::uses_thread_local_poll()) {
 954       st->print_cr("movq   rscratch1, poll_offset[r15_thread] #polling_page_address\n\t"
 955                    "testl  rax, [rscratch1]\t"
 956                    "# Safepoint: poll for GC");
 957     } else if (Assembler::is_polling_page_far()) {
 958       st->print_cr("movq   rscratch1, #polling_page_address\n\t"
 959                    "testl  rax, [rscratch1]\t"
 960                    "# Safepoint: poll for GC");
 961     } else {
 962       st->print_cr("testl  rax, [rip + #offset_to_poll_page]\t"
 963                    "# Safepoint: poll for GC");
 964     }
 965   }
 966 }
 967 #endif
 968 
 969 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
 970 {
 971   Compile* C = ra_->C;
 972   MacroAssembler _masm(&cbuf);
 973 
 974   // Clear upper bits of YMM registers when current compiled code uses
 975   // wide vectors to avoid AVX <-> SSE transition penalty during call.
 976   __ vzeroupper();
 977 


 988     if (framesize < 0x80) {
 989       emit_opcode(cbuf, 0x83); // addq rsp, #framesize
 990       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 991       emit_d8(cbuf, framesize);
 992     } else {
 993       emit_opcode(cbuf, 0x81); // addq rsp, #framesize
 994       emit_rm(cbuf, 0x3, 0x00, RSP_enc);
 995       emit_d32(cbuf, framesize);
 996     }
 997   }
 998 
 999   // popq rbp
1000   emit_opcode(cbuf, 0x58 | RBP_enc);
1001 
1002   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1003     __ reserved_stack_check();
1004   }
1005 
1006   if (do_polling() && C->is_method_compilation()) {
1007     MacroAssembler _masm(&cbuf);
1008     if (SafepointMechanism::uses_thread_local_poll()) {
1009       __ movq(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
1010       __ relocate(relocInfo::poll_return_type);
1011       __ testl(rax, Address(rscratch1, 0));
1012     } else {
1013       AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
1014       if (Assembler::is_polling_page_far()) {
1015         __ lea(rscratch1, polling_page);
1016         __ relocate(relocInfo::poll_return_type);
1017         __ testl(rax, Address(rscratch1, 0));
1018       } else {
1019         __ testl(rax, polling_page);
1020       }
1021     }
1022   }
1023 }
1024 
1025 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1026 {
1027   return MachNode::size(ra_); // too many variables; just compute it
1028                               // the hard way
1029 }
1030 
1031 int MachEpilogNode::reloc() const
1032 {
1033   return 2; // a large enough number
1034 }
1035 
1036 const Pipeline* MachEpilogNode::pipeline() const
1037 {
1038   return MachNode::pipeline_class();
1039 }
1040 
1041 int MachEpilogNode::safepoint_offset() const
1042 {


3516 operand rdi_RegP()
3517 %{
3518   constraint(ALLOC_IN_RC(ptr_rdi_reg));
3519   match(RegP);
3520   match(rRegP);
3521 
3522   format %{ %}
3523   interface(REG_INTER);
3524 %}
3525 
3526 operand r15_RegP()
3527 %{
3528   constraint(ALLOC_IN_RC(ptr_r15_reg));
3529   match(RegP);
3530   match(rRegP);
3531 
3532   format %{ %}
3533   interface(REG_INTER);
3534 %}
3535 
3536 operand rex_RegP()
3537 %{
3538   constraint(ALLOC_IN_RC(ptr_rex_reg));
3539   match(RegP);
3540   match(rRegP);
3541 
3542   format %{ %}
3543   interface(REG_INTER);
3544 %}
3545 
3546 operand rRegL()
3547 %{
3548   constraint(ALLOC_IN_RC(long_reg));
3549   match(RegL);
3550   match(rax_RegL);
3551   match(rdx_RegL);
3552 
3553   format %{ %}
3554   interface(REG_INTER);
3555 %}
3556 
3557 // Special Registers
3558 operand no_rax_rdx_RegL()
3559 %{
3560   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
3561   match(RegL);
3562   match(rRegL);
3563 
3564   format %{ %}
3565   interface(REG_INTER);


12075   %}
12076   ins_pipe(pipe_slow);
12077 %}
12078 
12079 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
12080   match(Set cr (FastUnlock object box));
12081   effect(TEMP tmp, USE_KILL box);
12082   ins_cost(300);
12083   format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
12084   ins_encode %{
12085     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
12086   %}
12087   ins_pipe(pipe_slow);
12088 %}
12089 
12090 
12091 // ============================================================================
12092 // Safepoint Instructions
12093 instruct safePoint_poll(rFlagsReg cr)
12094 %{
12095   predicate(!Assembler::is_polling_page_far() && SafepointMechanism::uses_global_page_poll());
12096   match(SafePoint);
12097   effect(KILL cr);
12098 
12099   format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
12100             "# Safepoint: poll for GC" %}
12101   ins_cost(125);
12102   ins_encode %{
12103     AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
12104     __ testl(rax, addr);
12105   %}
12106   ins_pipe(ialu_reg_mem);
12107 %}
12108 
12109 instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
12110 %{
12111   predicate(Assembler::is_polling_page_far() && SafepointMechanism::uses_global_page_poll());
12112   match(SafePoint poll);
12113   effect(KILL cr, USE poll);
12114 
12115   format %{ "testl  rax, [$poll]\t"
12116             "# Safepoint: poll for GC" %}
12117   ins_cost(125);
12118   ins_encode %{
12119     __ relocate(relocInfo::poll_type);
12120     __ testl(rax, Address($poll$$Register, 0));
12121   %}
12122   ins_pipe(ialu_reg_mem);
12123 %}
12124 
12125 instruct safePoint_poll_tls(rFlagsReg cr, rex_RegP poll)
12126 %{
12127   predicate(SafepointMechanism::uses_thread_local_poll());
12128   match(SafePoint poll);
12129   effect(KILL cr, USE poll);
12130 
12131   format %{ "testl  rax, [$poll]\t"
12132             "# Safepoint: poll for GC" %}
12133   ins_cost(125);
12134   size(3); /* setting an explicit size will cause debug builds to assert if size is incorrect */
12135   ins_encode %{
12136     __ relocate(relocInfo::poll_type);
12137     address pre_pc = __ pc();
12138     __ testl(rax, Address($poll$$Register, 0));
12139     address post_pc = __ pc();
12140     guarantee(pre_pc[0] == 0x41 && pre_pc[1] == 0x85, "must emit #rex test-ax [reg]");
12141   %}
12142   ins_pipe(ialu_reg_mem);
12143 %}
12144 
12145 // ============================================================================
12146 // Procedure Call/Return Instructions
12147 // Call Java Static Instruction
12148 // Note: If this code changes, the corresponding ret_addr_offset() and
12149 //       compute_padding() functions will have to be adjusted.
12150 instruct CallStaticJavaDirect(method meth) %{
12151   match(CallStaticJava);
12152   effect(USE meth);
12153 
12154   ins_cost(300);
12155   format %{ "call,static " %}
12156   opcode(0xE8); /* E8 cd */
12157   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
12158   ins_pipe(pipe_slow);
12159   ins_alignment(4);
12160 %}


< prev index next >