1031 rc_float, 1032 rc_stack 1033 }; 1034 1035 static enum RC rc_class(OptoReg::Name reg) 1036 { 1037 if( !OptoReg::is_valid(reg) ) return rc_bad; 1038 1039 if (OptoReg::is_stack(reg)) return rc_stack; 1040 1041 VMReg r = OptoReg::as_VMReg(reg); 1042 1043 if (r->is_Register()) return rc_int; 1044 1045 assert(r->is_XMMRegister(), "must be"); 1046 return rc_float; 1047 } 1048 1049 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 1050 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1051 int src_hi, int dst_hi, uint ireg, outputStream* st); 1052 1053 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1054 int stack_offset, int reg, uint ireg, outputStream* st); 1055 1056 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, 1057 int dst_offset, uint ireg, outputStream* st) { 1058 if (cbuf) { 1059 MacroAssembler _masm(cbuf); 1060 switch (ireg) { 1061 case Op_VecS: 1062 __ movq(Address(rsp, -8), rax); 1063 __ movl(rax, Address(rsp, src_offset)); 1064 __ movl(Address(rsp, dst_offset), rax); 1065 __ movq(rax, Address(rsp, -8)); 1066 break; 1067 case Op_VecD: 1068 __ pushq(Address(rsp, src_offset)); 1069 __ popq (Address(rsp, dst_offset)); 1070 break; 1071 case Op_VecX: 1072 __ pushq(Address(rsp, src_offset)); 1073 __ popq (Address(rsp, dst_offset)); 1074 __ pushq(Address(rsp, src_offset+8)); 1075 __ popq (Address(rsp, dst_offset+8)); 1076 break; 1077 case Op_VecY: 1078 __ vmovdqu(Address(rsp, -32), xmm0); 1079 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1080 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1081 __ vmovdqu(xmm0, Address(rsp, -32)); 1082 break; 1083 case Op_VecZ: 1084 __ evmovdquq(Address(rsp, -64), xmm0, 2); 1085 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 1086 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1087 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1088 break; 1089 default: 1090 ShouldNotReachHere(); 1091 } 1092 #ifndef PRODUCT 1093 } else { 1094 switch (ireg) { 1095 case Op_VecS: 1096 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t" 1097 "movl rax, [rsp + #%d]\n\t" 1098 "movl [rsp + #%d], rax\n\t" 1099 "movq rax, [rsp - #8]", 1100 src_offset, dst_offset); 1101 break; 1102 case Op_VecD: 1103 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1104 "popq [rsp + #%d]", 1105 src_offset, dst_offset); 1106 break; 1107 case Op_VecX: 1108 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t" 1109 "popq [rsp + #%d]\n\t" 1110 "pushq [rsp + #%d]\n\t" 1111 "popq [rsp + #%d]", 1112 src_offset, dst_offset, src_offset+8, dst_offset+8); 1113 break; 1114 case Op_VecY: 1115 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1116 "vmovdqu xmm0, [rsp + #%d]\n\t" 1117 "vmovdqu [rsp + #%d], xmm0\n\t" 1118 "vmovdqu xmm0, [rsp - #32]", 1119 src_offset, dst_offset); 1120 break; 1121 case Op_VecZ: 1122 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1123 "vmovdqu xmm0, [rsp + #%d]\n\t" 1124 "vmovdqu [rsp + #%d], xmm0\n\t" 1125 "vmovdqu xmm0, [rsp - #64]", 1126 src_offset, dst_offset); 1127 break; 1128 default: 1129 ShouldNotReachHere(); 1130 } 1131 #endif 1132 } 1133 } 1134 1135 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, 1136 PhaseRegAlloc* ra_, 1137 bool do_size, 1138 outputStream* st) const { 1139 assert(cbuf != NULL || st != NULL, "sanity"); 1140 // Get registers to move 1141 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1142 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1143 OptoReg::Name dst_second = ra_->get_reg_second(this); 1144 OptoReg::Name dst_first = ra_->get_reg_first(this); 1145 1146 enum RC src_second_rc = rc_class(src_second); 1147 enum RC src_first_rc = rc_class(src_first); 1148 enum RC dst_second_rc = rc_class(dst_second); 1149 enum RC dst_first_rc = rc_class(dst_first); 1150 1151 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), 1152 "must move at least 1 register" ); 1153 1154 if (src_first == dst_first && src_second == dst_second) { 1155 // Self copy, no move 1156 return 0; 1157 } 1158 if (bottom_type()->isa_vect() != NULL) { 1159 uint ireg = ideal_reg(); 1160 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1161 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1162 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1163 // mem -> mem 1164 int src_offset = ra_->reg2offset(src_first); 1165 int dst_offset = ra_->reg2offset(dst_first); 1166 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); 1167 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) { 1168 vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st); 1169 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1170 int stack_offset = ra_->reg2offset(dst_first); 1171 vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st); 1172 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) { 1173 int stack_offset = ra_->reg2offset(src_first); 1174 vec_spill_helper(cbuf, false, true, stack_offset, dst_first, ireg, st); 1175 } else { 1176 ShouldNotReachHere(); 1177 } 1178 return 0; 1179 } 1180 if (src_first_rc == rc_stack) { 1181 // mem -> 2803 // of ideal registers called "sig" and a "length" count. Stack-slot 2804 // offsets are based on outgoing arguments, i.e. a CALLER setting up 2805 // arguments for a CALLEE. Incoming stack arguments are 2806 // automatically biased by the preserve_stack_slots field above. 2807 2808 calling_convention 2809 %{ 2810 // No difference between ingoing/outgoing just pass false 2811 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 2812 %} 2813 2814 c_calling_convention 2815 %{ 2816 // This is obviously always outgoing 2817 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 2818 %} 2819 2820 // Location of compiled Java return values. Same as C for now. 2821 return_value 2822 %{ 2823 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, 2824 "only return normal values"); 2825 2826 static const int lo[Op_RegL + 1] = { 2827 0, 2828 0, 2829 RAX_num, // Op_RegN 2830 RAX_num, // Op_RegI 2831 RAX_num, // Op_RegP 2832 XMM0_num, // Op_RegF 2833 XMM0_num, // Op_RegD 2834 RAX_num // Op_RegL 2835 }; 2836 static const int hi[Op_RegL + 1] = { 2837 0, 2838 0, 2839 OptoReg::Bad, // Op_RegN 2840 OptoReg::Bad, // Op_RegI 2841 RAX_H_num, // Op_RegP 2842 OptoReg::Bad, // Op_RegF 2843 XMM0b_num, // Op_RegD 2844 RAX_H_num // Op_RegL 2845 }; 2846 // Excluded flags and vector registers. 2847 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type"); 2848 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); 2849 %} 2850 %} 2851 2852 //----------ATTRIBUTES--------------------------------------------------------- 2853 //----------Operand Attributes------------------------------------------------- 2854 op_attrib op_cost(0); // Required cost attribute 2855 2856 //----------Instruction Attributes--------------------------------------------- 2857 ins_attrib ins_cost(100); // Required cost attribute 2858 ins_attrib ins_size(8); // Required size attribute (in bits) 2859 ins_attrib ins_short_branch(0); // Required flag: is this instruction 2860 // a non-matching short branch variant 2861 // of some long branch? 2862 ins_attrib ins_alignment(1); // Required alignment attribute (must 2863 // be a power of 2) specifies the 2864 // alignment that some part of the 2865 // instruction (not necessarily the 2866 // start) requires. If > 1, a 2867 // compute_padding() function must be 2868 // provided for the instruction | 1031 rc_float, 1032 rc_stack 1033 }; 1034 1035 static enum RC rc_class(OptoReg::Name reg) 1036 { 1037 if( !OptoReg::is_valid(reg) ) return rc_bad; 1038 1039 if (OptoReg::is_stack(reg)) return rc_stack; 1040 1041 VMReg r = OptoReg::as_VMReg(reg); 1042 1043 if (r->is_Register()) return rc_int; 1044 1045 assert(r->is_XMMRegister(), "must be"); 1046 return rc_float; 1047 } 1048 1049 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 1050 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1051 int src_hi, int dst_hi, Opcodes ireg, outputStream* st); 1052 1053 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1054 int stack_offset, int reg, Opcodes ireg, outputStream* st); 1055 1056 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, 1057 int dst_offset, Opcodes ireg, outputStream* st) { 1058 if (cbuf) { 1059 MacroAssembler _masm(cbuf); 1060 switch (ireg) { 1061 case Opcodes::Op_VecS: 1062 __ movq(Address(rsp, -8), rax); 1063 __ movl(rax, Address(rsp, src_offset)); 1064 __ movl(Address(rsp, dst_offset), rax); 1065 __ movq(rax, Address(rsp, -8)); 1066 break; 1067 case Opcodes::Op_VecD: 1068 __ pushq(Address(rsp, src_offset)); 1069 __ popq (Address(rsp, dst_offset)); 1070 break; 1071 case Opcodes::Op_VecX: 1072 __ pushq(Address(rsp, src_offset)); 1073 __ popq (Address(rsp, dst_offset)); 1074 __ pushq(Address(rsp, src_offset+8)); 1075 __ popq (Address(rsp, dst_offset+8)); 1076 break; 1077 case Opcodes::Op_VecY: 1078 __ vmovdqu(Address(rsp, -32), xmm0); 1079 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1080 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1081 __ vmovdqu(xmm0, Address(rsp, -32)); 1082 break; 1083 case Opcodes::Op_VecZ: 1084 __ evmovdquq(Address(rsp, -64), xmm0, 2); 1085 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 1086 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1087 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1088 break; 1089 default: 1090 ShouldNotReachHere(); 1091 } 1092 #ifndef PRODUCT 1093 } else { 1094 switch (ireg) { 1095 case Opcodes::Op_VecS: 1096 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t" 1097 "movl rax, [rsp + #%d]\n\t" 1098 "movl [rsp + #%d], rax\n\t" 1099 "movq rax, [rsp - #8]", 1100 src_offset, dst_offset); 1101 break; 1102 case Opcodes::Op_VecD: 1103 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1104 "popq [rsp + #%d]", 1105 src_offset, dst_offset); 1106 break; 1107 case Opcodes::Op_VecX: 1108 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t" 1109 "popq [rsp + #%d]\n\t" 1110 "pushq [rsp + #%d]\n\t" 1111 "popq [rsp + #%d]", 1112 src_offset, dst_offset, src_offset+8, dst_offset+8); 1113 break; 1114 case Opcodes::Op_VecY: 1115 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1116 "vmovdqu xmm0, [rsp + #%d]\n\t" 1117 "vmovdqu [rsp + #%d], xmm0\n\t" 1118 "vmovdqu xmm0, [rsp - #32]", 1119 src_offset, dst_offset); 1120 break; 1121 case Opcodes::Op_VecZ: 1122 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1123 "vmovdqu xmm0, [rsp + #%d]\n\t" 1124 "vmovdqu [rsp + #%d], xmm0\n\t" 1125 "vmovdqu xmm0, [rsp - #64]", 1126 src_offset, dst_offset); 1127 break; 1128 default: 1129 ShouldNotReachHere(); 1130 } 1131 #endif 1132 } 1133 } 1134 1135 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, 1136 PhaseRegAlloc* ra_, 1137 bool do_size, 1138 outputStream* st) const { 1139 assert(cbuf != NULL || st != NULL, "sanity"); 1140 // Get registers to move 1141 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1142 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1143 OptoReg::Name dst_second = ra_->get_reg_second(this); 1144 OptoReg::Name dst_first = ra_->get_reg_first(this); 1145 1146 enum RC src_second_rc = rc_class(src_second); 1147 enum RC src_first_rc = rc_class(src_first); 1148 enum RC dst_second_rc = rc_class(dst_second); 1149 enum RC dst_first_rc = rc_class(dst_first); 1150 1151 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), 1152 "must move at least 1 register" ); 1153 1154 if (src_first == dst_first && src_second == dst_second) { 1155 // Self copy, no move 1156 return 0; 1157 } 1158 if (bottom_type()->isa_vect() != NULL) { 1159 Opcodes ireg = ideal_reg(); 1160 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1161 assert((ireg == Opcodes::Op_VecS || ireg == Opcodes::Op_VecD || ireg == Opcodes::Op_VecX || ireg == Opcodes::Op_VecY || ireg == Opcodes::Op_VecZ ), "sanity"); 1162 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1163 // mem -> mem 1164 int src_offset = ra_->reg2offset(src_first); 1165 int dst_offset = ra_->reg2offset(dst_first); 1166 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); 1167 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) { 1168 vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st); 1169 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1170 int stack_offset = ra_->reg2offset(dst_first); 1171 vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st); 1172 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) { 1173 int stack_offset = ra_->reg2offset(src_first); 1174 vec_spill_helper(cbuf, false, true, stack_offset, dst_first, ireg, st); 1175 } else { 1176 ShouldNotReachHere(); 1177 } 1178 return 0; 1179 } 1180 if (src_first_rc == rc_stack) { 1181 // mem -> 2803 // of ideal registers called "sig" and a "length" count. Stack-slot 2804 // offsets are based on outgoing arguments, i.e. a CALLER setting up 2805 // arguments for a CALLEE. Incoming stack arguments are 2806 // automatically biased by the preserve_stack_slots field above. 2807 2808 calling_convention 2809 %{ 2810 // No difference between ingoing/outgoing just pass false 2811 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 2812 %} 2813 2814 c_calling_convention 2815 %{ 2816 // This is obviously always outgoing 2817 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 2818 %} 2819 2820 // Location of compiled Java return values. Same as C for now. 2821 return_value 2822 %{ 2823 assert(ideal_reg >= Opcodes::Op_RegI && ideal_reg <= Opcodes::Op_RegL, 2824 "only return normal values"); 2825 2826 static const int lo[static_cast<uint>(Opcodes::Op_RegL) + 1] = { 2827 0, 2828 0, 2829 RAX_num, // Op_RegN 2830 RAX_num, // Op_RegI 2831 RAX_num, // Op_RegP 2832 XMM0_num, // Op_RegF 2833 XMM0_num, // Op_RegD 2834 RAX_num // Op_RegL 2835 }; 2836 static const int hi[static_cast<uint>(Opcodes::Op_RegL) + 1] = { 2837 0, 2838 0, 2839 OptoReg::Bad, // Op_RegN 2840 OptoReg::Bad, // Op_RegI 2841 RAX_H_num, // Op_RegP 2842 OptoReg::Bad, // Op_RegF 2843 XMM0b_num, // Op_RegD 2844 RAX_H_num // Op_RegL 2845 }; 2846 // Excluded flags and vector registers. 2847 assert(ARRAY_SIZE(hi) == static_cast<uint>(Opcodes::_last_machine_leaf) - 6, "missing type"); 2848 return OptoRegPair(hi[static_cast<uint>(ideal_reg)], lo[static_cast<uint>(ideal_reg)]); 2849 %} 2850 %} 2851 2852 //----------ATTRIBUTES--------------------------------------------------------- 2853 //----------Operand Attributes------------------------------------------------- 2854 op_attrib op_cost(0); // Required cost attribute 2855 2856 //----------Instruction Attributes--------------------------------------------- 2857 ins_attrib ins_cost(100); // Required cost attribute 2858 ins_attrib ins_size(8); // Required size attribute (in bits) 2859 ins_attrib ins_short_branch(0); // Required flag: is this instruction 2860 // a non-matching short branch variant 2861 // of some long branch? 2862 ins_attrib ins_alignment(1); // Required alignment attribute (must 2863 // be a power of 2) specifies the 2864 // alignment that some part of the 2865 // instruction (not necessarily the 2866 // start) requires. If > 1, a 2867 // compute_padding() function must be 2868 // provided for the instruction |