774 st->print("# stack alignment check");
775 #endif
776 }
777 if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
778 st->print("\n\t");
779 st->print("cmpl [r15_thread + #disarmed_offset], #disarmed_value\t");
780 st->print("\n\t");
781 st->print("je fast_entry\t");
782 st->print("\n\t");
783 st->print("call #nmethod_entry_barrier_stub\t");
784 st->print("\n\tfast_entry:");
785 }
786 st->cr();
787 }
788 #endif
789
790 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
791 Compile* C = ra_->C;
792 MacroAssembler _masm(&cbuf);
793
794 int framesize = C->frame_size_in_bytes();
795 int bangsize = C->bang_size_in_bytes();
796
797 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);
798
799 C->set_frame_complete(cbuf.insts_size());
800
801 if (C->has_mach_constant_base_node()) {
802 // NOTE: We set the table base offset here because users might be
803 // emitted before MachConstantBaseNode.
804 Compile::ConstantTable& constant_table = C->constant_table();
805 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
806 }
807 }
808
809 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
810 {
811 return MachNode::size(ra_); // too many variables; just compute it
812 // the hard way
813 }
814
815 int MachPrologNode::reloc() const
816 {
817 return 0; // a large enough number
851 "# Safepoint: poll for GC");
852 } else {
853 st->print_cr("testl rax, [rip + #offset_to_poll_page]\t"
854 "# Safepoint: poll for GC");
855 }
856 }
857 }
858 #endif
859
860 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
861 {
862 Compile* C = ra_->C;
863 MacroAssembler _masm(&cbuf);
864
865 if (generate_vzeroupper(C)) {
866 // Clear upper bits of YMM registers when current compiled code uses
867 // wide vectors to avoid AVX <-> SSE transition penalty during call.
868 __ vzeroupper();
869 }
870
871 int framesize = C->frame_size_in_bytes();
872 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
873 // Remove word for return adr already pushed
874 // and RBP
875 framesize -= 2*wordSize;
876
877 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
878
879 if (framesize) {
880 emit_opcode(cbuf, Assembler::REX_W);
881 if (framesize < 0x80) {
882 emit_opcode(cbuf, 0x83); // addq rsp, #framesize
883 emit_rm(cbuf, 0x3, 0x00, RSP_enc);
884 emit_d8(cbuf, framesize);
885 } else {
886 emit_opcode(cbuf, 0x81); // addq rsp, #framesize
887 emit_rm(cbuf, 0x3, 0x00, RSP_enc);
888 emit_d32(cbuf, framesize);
889 }
890 }
891
892 // popq rbp
893 emit_opcode(cbuf, 0x58 | RBP_enc);
894
895 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
896 __ reserved_stack_check();
897 }
898
899 if (do_polling() && C->is_method_compilation()) {
900 MacroAssembler _masm(&cbuf);
901 if (SafepointMechanism::uses_thread_local_poll()) {
902 __ movq(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
903 __ relocate(relocInfo::poll_return_type);
904 __ testl(rax, Address(rscratch1, 0));
905 } else {
906 AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
907 if (Assembler::is_polling_page_far()) {
908 __ lea(rscratch1, polling_page);
909 __ relocate(relocInfo::poll_return_type);
910 __ testl(rax, Address(rscratch1, 0));
911 } else {
912 __ testl(rax, polling_page);
913 }
1446 emit_rm(cbuf, 0x2, reg & 7, 0x04);
1447 emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1448 emit_d32(cbuf, offset);
1449 } else {
1450 emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1451 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1452 emit_rm(cbuf, 0x1, reg & 7, 0x04);
1453 emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1454 emit_d8(cbuf, offset);
1455 }
1456 }
1457
1458 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1459 {
1460 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1461 return (offset < 0x80) ? 5 : 8; // REX
1462 }
1463
1464 //=============================================================================
1465 #ifndef PRODUCT
1466 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1467 {
1468 if (UseCompressedClassPointers) {
1469 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1470 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1471 st->print_cr("\tcmpq rax, rscratch1\t # Inline cache check");
1472 } else {
1473 st->print_cr("\tcmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1474 "# Inline cache check");
1475 }
1476 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1477 st->print_cr("\tnop\t# nops to align entry point");
1478 }
1479 #endif
1480
1481 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1482 {
1483 MacroAssembler masm(&cbuf);
1484 uint insts_size = cbuf.insts_size();
1485 if (UseCompressedClassPointers) {
6584 format %{ "MEMBAR-storestore (empty encoding)" %}
6585 ins_encode( );
6586 ins_pipe(empty);
6587 %}
6588
6589 //----------Move Instructions--------------------------------------------------
6590
6591 instruct castX2P(rRegP dst, rRegL src)
6592 %{
6593 match(Set dst (CastX2P src));
6594
6595 format %{ "movq $dst, $src\t# long->ptr" %}
6596 ins_encode %{
6597 if ($dst$$reg != $src$$reg) {
6598 __ movptr($dst$$Register, $src$$Register);
6599 }
6600 %}
6601 ins_pipe(ialu_reg_reg); // XXX
6602 %}
6603
6604 instruct castP2X(rRegL dst, rRegP src)
6605 %{
6606 match(Set dst (CastP2X src));
6607
6608 format %{ "movq $dst, $src\t# ptr -> long" %}
6609 ins_encode %{
6610 if ($dst$$reg != $src$$reg) {
6611 __ movptr($dst$$Register, $src$$Register);
6612 }
6613 %}
6614 ins_pipe(ialu_reg_reg); // XXX
6615 %}
6616
6617 // Convert oop into int for vectors alignment masking
6618 instruct convP2I(rRegI dst, rRegP src)
6619 %{
6620 match(Set dst (ConvL2I (CastP2X src)));
6621
6622 format %{ "movl $dst, $src\t# ptr -> int" %}
6623 ins_encode %{
10819 ins_encode %{
10820 __ movdl($dst$$XMMRegister, $src$$Register);
10821 %}
10822 ins_pipe( pipe_slow );
10823 %}
10824
10825 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10826 match(Set dst (MoveL2D src));
10827 effect(DEF dst, USE src);
10828 ins_cost(100);
10829 format %{ "movd $dst,$src\t# MoveL2D" %}
10830 ins_encode %{
10831 __ movdq($dst$$XMMRegister, $src$$Register);
10832 %}
10833 ins_pipe( pipe_slow );
10834 %}
10835
10836
10837 // =======================================================================
10838 // fast clearing of an array
10839 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10840 Universe dummy, rFlagsReg cr)
10841 %{
10842 predicate(!((ClearArrayNode*)n)->is_large());
10843 match(Set dummy (ClearArray cnt base));
10844 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10845
10846 format %{ $$template
10847 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10848 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10849 $$emit$$"jg LARGE\n\t"
10850 $$emit$$"dec rcx\n\t"
10851 $$emit$$"js DONE\t# Zero length\n\t"
10852 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10853 $$emit$$"dec rcx\n\t"
10854 $$emit$$"jge LOOP\n\t"
10855 $$emit$$"jmp DONE\n\t"
10856 $$emit$$"# LARGE:\n\t"
10857 if (UseFastStosb) {
10858 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10859 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10860 } else if (UseXMMForObjInit) {
10861 $$emit$$"mov rdi,rax\n\t"
10862 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10863 $$emit$$"jmpq L_zero_64_bytes\n\t"
10864 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10865 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10866 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10867 $$emit$$"add 0x40,rax\n\t"
10868 $$emit$$"# L_zero_64_bytes:\n\t"
10869 $$emit$$"sub 0x8,rcx\n\t"
10870 $$emit$$"jge L_loop\n\t"
10871 $$emit$$"add 0x4,rcx\n\t"
10872 $$emit$$"jl L_tail\n\t"
10873 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10874 $$emit$$"add 0x20,rax\n\t"
10875 $$emit$$"sub 0x4,rcx\n\t"
10876 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10877 $$emit$$"add 0x4,rcx\n\t"
10878 $$emit$$"jle L_end\n\t"
10879 $$emit$$"dec rcx\n\t"
10880 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10881 $$emit$$"vmovq xmm0,(rax)\n\t"
10882 $$emit$$"add 0x8,rax\n\t"
10883 $$emit$$"dec rcx\n\t"
10884 $$emit$$"jge L_sloop\n\t"
10885 $$emit$$"# L_end:\n\t"
10886 } else {
10887 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10888 }
10889 $$emit$$"# DONE"
10890 %}
10891 ins_encode %{
10892 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10893 $tmp$$XMMRegister, false);
10894 %}
10895 ins_pipe(pipe_slow);
10896 %}
10897
10898 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
10899 Universe dummy, rFlagsReg cr)
10900 %{
10901 predicate(((ClearArrayNode*)n)->is_large());
10902 match(Set dummy (ClearArray cnt base));
10903 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
10904
10905 format %{ $$template
10906 if (UseFastStosb) {
10907 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10908 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10909 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10910 } else if (UseXMMForObjInit) {
10911 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
10912 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
10913 $$emit$$"jmpq L_zero_64_bytes\n\t"
10914 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10915 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10916 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
10917 $$emit$$"add 0x40,rax\n\t"
10918 $$emit$$"# L_zero_64_bytes:\n\t"
10919 $$emit$$"sub 0x8,rcx\n\t"
10920 $$emit$$"jge L_loop\n\t"
10921 $$emit$$"add 0x4,rcx\n\t"
10922 $$emit$$"jl L_tail\n\t"
10923 $$emit$$"vmovdqu ymm0,(rax)\n\t"
10924 $$emit$$"add 0x20,rax\n\t"
10925 $$emit$$"sub 0x4,rcx\n\t"
10926 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10927 $$emit$$"add 0x4,rcx\n\t"
10928 $$emit$$"jle L_end\n\t"
10929 $$emit$$"dec rcx\n\t"
10930 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10931 $$emit$$"vmovq xmm0,(rax)\n\t"
10932 $$emit$$"add 0x8,rax\n\t"
10933 $$emit$$"dec rcx\n\t"
10934 $$emit$$"jge L_sloop\n\t"
10935 $$emit$$"# L_end:\n\t"
10936 } else {
10937 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
10938 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
10939 }
10940 %}
10941 ins_encode %{
10942 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
10943 $tmp$$XMMRegister, true);
10944 %}
10945 ins_pipe(pipe_slow);
10946 %}
10947
10948 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
10949 rax_RegI result, legVecS tmp1, rFlagsReg cr)
10950 %{
10951 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
10952 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
10953 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
10954
10955 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
10956 ins_encode %{
10957 __ string_compare($str1$$Register, $str2$$Register,
10958 $cnt1$$Register, $cnt2$$Register, $result$$Register,
10959 $tmp1$$XMMRegister, StrIntrinsicNode::LL);
10960 %}
10961 ins_pipe( pipe_slow );
10962 %}
10963
12496
12497 ins_cost(300);
12498 format %{ "call,runtime " %}
12499 ins_encode(clear_avx, Java_To_Runtime(meth));
12500 ins_pipe(pipe_slow);
12501 %}
12502
12503 // Call runtime without safepoint
12504 instruct CallLeafDirect(method meth)
12505 %{
12506 match(CallLeaf);
12507 effect(USE meth);
12508
12509 ins_cost(300);
12510 format %{ "call_leaf,runtime " %}
12511 ins_encode(clear_avx, Java_To_Runtime(meth));
12512 ins_pipe(pipe_slow);
12513 %}
12514
12515 // Call runtime without safepoint
12516 instruct CallLeafNoFPDirect(method meth)
12517 %{
12518 match(CallLeafNoFP);
12519 effect(USE meth);
12520
12521 ins_cost(300);
12522 format %{ "call_leaf_nofp,runtime " %}
12523 ins_encode(clear_avx, Java_To_Runtime(meth));
12524 ins_pipe(pipe_slow);
12525 %}
12526
12527 // Return Instruction
12528 // Remove the return address & jump to it.
12529 // Notice: We always emit a nop after a ret to make sure there is room
12530 // for safepoint patching
12531 instruct Ret()
12532 %{
12533 match(Return);
12534
12535 format %{ "ret" %}
12536 opcode(0xC3);
12537 ins_encode(OpcP);
|
774 st->print("# stack alignment check");
775 #endif
776 }
777 if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
778 st->print("\n\t");
779 st->print("cmpl [r15_thread + #disarmed_offset], #disarmed_value\t");
780 st->print("\n\t");
781 st->print("je fast_entry\t");
782 st->print("\n\t");
783 st->print("call #nmethod_entry_barrier_stub\t");
784 st->print("\n\tfast_entry:");
785 }
786 st->cr();
787 }
788 #endif
789
790 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
791 Compile* C = ra_->C;
792 MacroAssembler _masm(&cbuf);
793
794 __ verified_entry(C);
795 __ bind(*_verified_entry);
796
797 C->set_frame_complete(cbuf.insts_size());
798
799 if (C->has_mach_constant_base_node()) {
800 // NOTE: We set the table base offset here because users might be
801 // emitted before MachConstantBaseNode.
802 Compile::ConstantTable& constant_table = C->constant_table();
803 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
804 }
805 }
806
807 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
808 {
809 return MachNode::size(ra_); // too many variables; just compute it
810 // the hard way
811 }
812
813 int MachPrologNode::reloc() const
814 {
815 return 0; // a large enough number
849 "# Safepoint: poll for GC");
850 } else {
851 st->print_cr("testl rax, [rip + #offset_to_poll_page]\t"
852 "# Safepoint: poll for GC");
853 }
854 }
855 }
856 #endif
857
858 void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
859 {
860 Compile* C = ra_->C;
861 MacroAssembler _masm(&cbuf);
862
863 if (generate_vzeroupper(C)) {
864 // Clear upper bits of YMM registers when current compiled code uses
865 // wide vectors to avoid AVX <-> SSE transition penalty during call.
866 __ vzeroupper();
867 }
868
869 __ restore_stack(C);
870
871
872 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
873 __ reserved_stack_check();
874 }
875
876 if (do_polling() && C->is_method_compilation()) {
877 MacroAssembler _masm(&cbuf);
878 if (SafepointMechanism::uses_thread_local_poll()) {
879 __ movq(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
880 __ relocate(relocInfo::poll_return_type);
881 __ testl(rax, Address(rscratch1, 0));
882 } else {
883 AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
884 if (Assembler::is_polling_page_far()) {
885 __ lea(rscratch1, polling_page);
886 __ relocate(relocInfo::poll_return_type);
887 __ testl(rax, Address(rscratch1, 0));
888 } else {
889 __ testl(rax, polling_page);
890 }
1423 emit_rm(cbuf, 0x2, reg & 7, 0x04);
1424 emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1425 emit_d32(cbuf, offset);
1426 } else {
1427 emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
1428 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1429 emit_rm(cbuf, 0x1, reg & 7, 0x04);
1430 emit_rm(cbuf, 0x0, 0x04, RSP_enc);
1431 emit_d8(cbuf, offset);
1432 }
1433 }
1434
1435 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
1436 {
1437 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1438 return (offset < 0x80) ? 5 : 8; // REX
1439 }
1440
1441 //=============================================================================
1442 #ifndef PRODUCT
1443 void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1444 {
1445 st->print_cr("MachVEPNode");
1446 }
1447 #endif
1448
1449 void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1450 {
1451 MacroAssembler masm(&cbuf);
1452 if (!_verified) {
1453 uint insts_size = cbuf.insts_size();
1454 if (UseCompressedClassPointers) {
1455 masm.load_klass(rscratch1, j_rarg0);
1456 masm.cmpptr(rax, rscratch1);
1457 } else {
1458 masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
1459 }
1460 masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1461 } else {
1462 // Unpack value type args passed as oop and then jump to
1463 // the verified entry point (skipping the unverified entry).
1464 masm.unpack_value_args(ra_->C, _receiver_only);
1465 masm.jmp(*_verified_entry);
1466 }
1467 }
1468
1469 uint MachVEPNode::size(PhaseRegAlloc* ra_) const
1470 {
1471 return MachNode::size(ra_); // too many variables; just compute it the hard way
1472 }
1473
1474 //=============================================================================
1475 #ifndef PRODUCT
1476 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1477 {
1478 if (UseCompressedClassPointers) {
1479 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1480 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1481 st->print_cr("\tcmpq rax, rscratch1\t # Inline cache check");
1482 } else {
1483 st->print_cr("\tcmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
1484 "# Inline cache check");
1485 }
1486 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
1487 st->print_cr("\tnop\t# nops to align entry point");
1488 }
1489 #endif
1490
1491 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1492 {
1493 MacroAssembler masm(&cbuf);
1494 uint insts_size = cbuf.insts_size();
1495 if (UseCompressedClassPointers) {
6594 format %{ "MEMBAR-storestore (empty encoding)" %}
6595 ins_encode( );
6596 ins_pipe(empty);
6597 %}
6598
6599 //----------Move Instructions--------------------------------------------------
6600
6601 instruct castX2P(rRegP dst, rRegL src)
6602 %{
6603 match(Set dst (CastX2P src));
6604
6605 format %{ "movq $dst, $src\t# long->ptr" %}
6606 ins_encode %{
6607 if ($dst$$reg != $src$$reg) {
6608 __ movptr($dst$$Register, $src$$Register);
6609 }
6610 %}
6611 ins_pipe(ialu_reg_reg); // XXX
6612 %}
6613
6614 instruct castN2X(rRegL dst, rRegN src)
6615 %{
6616 match(Set dst (CastP2X src));
6617
6618 format %{ "movq $dst, $src\t# ptr -> long" %}
6619 ins_encode %{
6620 if ($dst$$reg != $src$$reg) {
6621 __ movptr($dst$$Register, $src$$Register);
6622 }
6623 %}
6624 ins_pipe(ialu_reg_reg); // XXX
6625 %}
6626
6627 instruct castP2X(rRegL dst, rRegP src)
6628 %{
6629 match(Set dst (CastP2X src));
6630
6631 format %{ "movq $dst, $src\t# ptr -> long" %}
6632 ins_encode %{
6633 if ($dst$$reg != $src$$reg) {
6634 __ movptr($dst$$Register, $src$$Register);
6635 }
6636 %}
6637 ins_pipe(ialu_reg_reg); // XXX
6638 %}
6639
6640 // Convert oop into int for vectors alignment masking
6641 instruct convP2I(rRegI dst, rRegP src)
6642 %{
6643 match(Set dst (ConvL2I (CastP2X src)));
6644
6645 format %{ "movl $dst, $src\t# ptr -> int" %}
6646 ins_encode %{
10842 ins_encode %{
10843 __ movdl($dst$$XMMRegister, $src$$Register);
10844 %}
10845 ins_pipe( pipe_slow );
10846 %}
10847
10848 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
10849 match(Set dst (MoveL2D src));
10850 effect(DEF dst, USE src);
10851 ins_cost(100);
10852 format %{ "movd $dst,$src\t# MoveL2D" %}
10853 ins_encode %{
10854 __ movdq($dst$$XMMRegister, $src$$Register);
10855 %}
10856 ins_pipe( pipe_slow );
10857 %}
10858
10859
10860 // =======================================================================
10861 // fast clearing of an array
10862 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10863 Universe dummy, rFlagsReg cr)
10864 %{
10865 predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only());
10866 match(Set dummy (ClearArray (Binary cnt base) val));
10867 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
10868
10869 format %{ $$template
10870 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10871 $$emit$$"jg LARGE\n\t"
10872 $$emit$$"dec rcx\n\t"
10873 $$emit$$"js DONE\t# Zero length\n\t"
10874 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10875 $$emit$$"dec rcx\n\t"
10876 $$emit$$"jge LOOP\n\t"
10877 $$emit$$"jmp DONE\n\t"
10878 $$emit$$"# LARGE:\n\t"
10879 if (UseFastStosb) {
10880 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10881 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
10882 } else if (UseXMMForObjInit) {
10883 $$emit$$"movdq $tmp, $val\n\t"
10884 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10885 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10886 $$emit$$"jmpq L_zero_64_bytes\n\t"
10887 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10888 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10889 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10890 $$emit$$"add 0x40,rax\n\t"
10891 $$emit$$"# L_zero_64_bytes:\n\t"
10892 $$emit$$"sub 0x8,rcx\n\t"
10893 $$emit$$"jge L_loop\n\t"
10894 $$emit$$"add 0x4,rcx\n\t"
10895 $$emit$$"jl L_tail\n\t"
10896 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10897 $$emit$$"add 0x20,rax\n\t"
10898 $$emit$$"sub 0x4,rcx\n\t"
10899 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10900 $$emit$$"add 0x4,rcx\n\t"
10901 $$emit$$"jle L_end\n\t"
10902 $$emit$$"dec rcx\n\t"
10903 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10904 $$emit$$"vmovq xmm0,(rax)\n\t"
10905 $$emit$$"add 0x8,rax\n\t"
10906 $$emit$$"dec rcx\n\t"
10907 $$emit$$"jge L_sloop\n\t"
10908 $$emit$$"# L_end:\n\t"
10909 } else {
10910 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10911 }
10912 $$emit$$"# DONE"
10913 %}
10914 ins_encode %{
10915 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10916 $tmp$$XMMRegister, false, false);
10917 %}
10918 ins_pipe(pipe_slow);
10919 %}
10920
10921 instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10922 Universe dummy, rFlagsReg cr)
10923 %{
10924 predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only());
10925 match(Set dummy (ClearArray (Binary cnt base) val));
10926 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
10927
10928 format %{ $$template
10929 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
10930 $$emit$$"jg LARGE\n\t"
10931 $$emit$$"dec rcx\n\t"
10932 $$emit$$"js DONE\t# Zero length\n\t"
10933 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
10934 $$emit$$"dec rcx\n\t"
10935 $$emit$$"jge LOOP\n\t"
10936 $$emit$$"jmp DONE\n\t"
10937 $$emit$$"# LARGE:\n\t"
10938 if (UseXMMForObjInit) {
10939 $$emit$$"movdq $tmp, $val\n\t"
10940 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10941 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10942 $$emit$$"jmpq L_zero_64_bytes\n\t"
10943 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10944 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10945 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10946 $$emit$$"add 0x40,rax\n\t"
10947 $$emit$$"# L_zero_64_bytes:\n\t"
10948 $$emit$$"sub 0x8,rcx\n\t"
10949 $$emit$$"jge L_loop\n\t"
10950 $$emit$$"add 0x4,rcx\n\t"
10951 $$emit$$"jl L_tail\n\t"
10952 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10953 $$emit$$"add 0x20,rax\n\t"
10954 $$emit$$"sub 0x4,rcx\n\t"
10955 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
10956 $$emit$$"add 0x4,rcx\n\t"
10957 $$emit$$"jle L_end\n\t"
10958 $$emit$$"dec rcx\n\t"
10959 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
10960 $$emit$$"vmovq xmm0,(rax)\n\t"
10961 $$emit$$"add 0x8,rax\n\t"
10962 $$emit$$"dec rcx\n\t"
10963 $$emit$$"jge L_sloop\n\t"
10964 $$emit$$"# L_end:\n\t"
10965 } else {
10966 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
10967 }
10968 $$emit$$"# DONE"
10969 %}
10970 ins_encode %{
10971 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
10972 $tmp$$XMMRegister, false, true);
10973 %}
10974 ins_pipe(pipe_slow);
10975 %}
10976
10977 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
10978 Universe dummy, rFlagsReg cr)
10979 %{
10980 predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only());
10981 match(Set dummy (ClearArray (Binary cnt base) val));
10982 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
10983
10984 format %{ $$template
10985 if (UseFastStosb) {
10986 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
10987 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
10988 } else if (UseXMMForObjInit) {
10989 $$emit$$"movdq $tmp, $val\n\t"
10990 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
10991 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
10992 $$emit$$"jmpq L_zero_64_bytes\n\t"
10993 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
10994 $$emit$$"vmovdqu $tmp,(rax)\n\t"
10995 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
10996 $$emit$$"add 0x40,rax\n\t"
10997 $$emit$$"# L_zero_64_bytes:\n\t"
10998 $$emit$$"sub 0x8,rcx\n\t"
10999 $$emit$$"jge L_loop\n\t"
11000 $$emit$$"add 0x4,rcx\n\t"
11001 $$emit$$"jl L_tail\n\t"
11002 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11003 $$emit$$"add 0x20,rax\n\t"
11004 $$emit$$"sub 0x4,rcx\n\t"
11005 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11006 $$emit$$"add 0x4,rcx\n\t"
11007 $$emit$$"jle L_end\n\t"
11008 $$emit$$"dec rcx\n\t"
11009 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11010 $$emit$$"vmovq xmm0,(rax)\n\t"
11011 $$emit$$"add 0x8,rax\n\t"
11012 $$emit$$"dec rcx\n\t"
11013 $$emit$$"jge L_sloop\n\t"
11014 $$emit$$"# L_end:\n\t"
11015 } else {
11016 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
11017 }
11018 %}
11019 ins_encode %{
11020 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11021 $tmp$$XMMRegister, true, false);
11022 %}
11023 ins_pipe(pipe_slow);
11024 %}
11025
11026 instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val,
11027 Universe dummy, rFlagsReg cr)
11028 %{
11029 predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only());
11030 match(Set dummy (ClearArray (Binary cnt base) val));
11031 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr);
11032
11033 format %{ $$template
11034 if (UseXMMForObjInit) {
11035 $$emit$$"movdq $tmp, $val\n\t"
11036 $$emit$$"punpcklqdq $tmp, $tmp\n\t"
11037 $$emit$$"vinserti128_high $tmp, $tmp\n\t"
11038 $$emit$$"jmpq L_zero_64_bytes\n\t"
11039 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11040 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11041 $$emit$$"vmovdqu $tmp,0x20(rax)\n\t"
11042 $$emit$$"add 0x40,rax\n\t"
11043 $$emit$$"# L_zero_64_bytes:\n\t"
11044 $$emit$$"sub 0x8,rcx\n\t"
11045 $$emit$$"jge L_loop\n\t"
11046 $$emit$$"add 0x4,rcx\n\t"
11047 $$emit$$"jl L_tail\n\t"
11048 $$emit$$"vmovdqu $tmp,(rax)\n\t"
11049 $$emit$$"add 0x20,rax\n\t"
11050 $$emit$$"sub 0x4,rcx\n\t"
11051 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11052 $$emit$$"add 0x4,rcx\n\t"
11053 $$emit$$"jle L_end\n\t"
11054 $$emit$$"dec rcx\n\t"
11055 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11056 $$emit$$"vmovq xmm0,(rax)\n\t"
11057 $$emit$$"add 0x8,rax\n\t"
11058 $$emit$$"dec rcx\n\t"
11059 $$emit$$"jge L_sloop\n\t"
11060 $$emit$$"# L_end:\n\t"
11061 } else {
11062 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
11063 }
11064 %}
11065 ins_encode %{
11066 __ clear_mem($base$$Register, $cnt$$Register, $val$$Register,
11067 $tmp$$XMMRegister, true, true);
11068 %}
11069 ins_pipe(pipe_slow);
11070 %}
11071
11072 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
11073 rax_RegI result, legVecS tmp1, rFlagsReg cr)
11074 %{
11075 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11076 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11077 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11078
11079 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
11080 ins_encode %{
11081 __ string_compare($str1$$Register, $str2$$Register,
11082 $cnt1$$Register, $cnt2$$Register, $result$$Register,
11083 $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11084 %}
11085 ins_pipe( pipe_slow );
11086 %}
11087
12620
12621 ins_cost(300);
12622 format %{ "call,runtime " %}
12623 ins_encode(clear_avx, Java_To_Runtime(meth));
12624 ins_pipe(pipe_slow);
12625 %}
12626
12627 // Call runtime without safepoint
12628 instruct CallLeafDirect(method meth)
12629 %{
12630 match(CallLeaf);
12631 effect(USE meth);
12632
12633 ins_cost(300);
12634 format %{ "call_leaf,runtime " %}
12635 ins_encode(clear_avx, Java_To_Runtime(meth));
12636 ins_pipe(pipe_slow);
12637 %}
12638
12639 // Call runtime without safepoint
12640 // entry point is null, target holds the address to call
12641 instruct CallLeafNoFPInDirect(rRegP target)
12642 %{
12643 predicate(n->as_Call()->entry_point() == NULL);
12644 match(CallLeafNoFP target);
12645
12646 ins_cost(300);
12647 format %{ "call_leaf_nofp,runtime indirect " %}
12648 ins_encode %{
12649 __ call($target$$Register);
12650 %}
12651
12652 ins_pipe(pipe_slow);
12653 %}
12654
12655 instruct CallLeafNoFPDirect(method meth)
12656 %{
12657 predicate(n->as_Call()->entry_point() != NULL);
12658 match(CallLeafNoFP);
12659 effect(USE meth);
12660
12661 ins_cost(300);
12662 format %{ "call_leaf_nofp,runtime " %}
12663 ins_encode(clear_avx, Java_To_Runtime(meth));
12664 ins_pipe(pipe_slow);
12665 %}
12666
12667 // Return Instruction
12668 // Remove the return address & jump to it.
12669 // Notice: We always emit a nop after a ret to make sure there is room
12670 // for safepoint patching
12671 instruct Ret()
12672 %{
12673 match(Return);
12674
12675 format %{ "ret" %}
12676 opcode(0xC3);
12677 ins_encode(OpcP);
|