954 } 955 956 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { 957 if (reachable(src)) { 958 Assembler::addsd(dst, as_Address(src)); 959 } else { 960 lea(rscratch1, src); 961 Assembler::addsd(dst, Address(rscratch1, 0)); 962 } 963 } 964 965 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { 966 if (reachable(src)) { 967 addss(dst, as_Address(src)); 968 } else { 969 lea(rscratch1, src); 970 addss(dst, Address(rscratch1, 0)); 971 } 972 } 973 974 void MacroAssembler::align(int modulus) { 975 align(modulus, offset()); 976 } 977 978 void MacroAssembler::align(int modulus, int target) { 979 if (target % modulus != 0) { 980 nop(modulus - (target % modulus)); 981 } 982 } 983 984 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 985 // Used in sign-masking with aligned address. 986 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 987 if (reachable(src)) { 988 Assembler::andpd(dst, as_Address(src)); 989 } else { 990 lea(rscratch1, src); 991 Assembler::andpd(dst, Address(rscratch1, 0)); 992 } 993 } 5930 popa(); 5931 } 5932 5933 static const double pi_4 = 0.7853981633974483; 5934 5935 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 5936 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 5937 // was attempted in this code; unfortunately it appears that the 5938 // switch to 80-bit precision and back causes this to be 5939 // unprofitable compared with simply performing a runtime call if 5940 // the argument is out of the (-pi/4, pi/4) range. 5941 5942 Register tmp = noreg; 5943 if (!VM_Version::supports_cmov()) { 5944 // fcmp needs a temporary so preserve rbx, 5945 tmp = rbx; 5946 push(tmp); 5947 } 5948 5949 Label slow_case, done; 5950 5951 ExternalAddress pi4_adr = (address)&pi_4; 5952 if (reachable(pi4_adr)) { 5953 // x ?<= pi/4 5954 fld_d(pi4_adr); 5955 fld_s(1); // Stack: X PI/4 X 5956 fabs(); // Stack: |X| PI/4 X 5957 fcmp(tmp); 5958 jcc(Assembler::above, slow_case); 5959 5960 // fastest case: -pi/4 <= x <= pi/4 5961 switch(trig) { 5962 case 's': 5963 fsin(); 5964 break; 5965 case 'c': 5966 fcos(); 5967 break; 5968 case 't': 5969 ftan(); 5970 break; 5971 default: 5972 assert(false, "bad intrinsic"); 5973 break; 5974 } 5975 jmp(done); 5976 } 5977 5978 // slow case: runtime call 5979 bind(slow_case); 5980 5981 switch(trig) { 5982 case 's': 5983 { 5984 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); 5985 } 5986 break; 5987 case 'c': 5988 { 5989 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); 5990 } 5991 break; 5992 case 't': 5993 { 5994 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); 5995 } 5996 break; 5997 default: | 954 } 955 956 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { 957 if (reachable(src)) { 958 Assembler::addsd(dst, as_Address(src)); 959 } else { 960 lea(rscratch1, src); 961 Assembler::addsd(dst, Address(rscratch1, 0)); 962 } 963 } 964 965 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { 966 if (reachable(src)) { 967 addss(dst, as_Address(src)); 968 } else { 969 lea(rscratch1, src); 970 addss(dst, Address(rscratch1, 0)); 971 } 972 } 973 974 void MacroAssembler::addpd(XMMRegister dst, AddressLiteral src) { 975 if (reachable(src)) { 976 Assembler::addpd(dst, as_Address(src)); 977 } else { 978 lea(rscratch1, src); 979 Assembler::addpd(dst, Address(rscratch1, 0)); 980 } 981 } 982 983 void MacroAssembler::align(int modulus) { 984 align(modulus, offset()); 985 } 986 987 void MacroAssembler::align(int modulus, int target) { 988 if (target % modulus != 0) { 989 nop(modulus - (target % modulus)); 990 } 991 } 992 993 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 994 // Used in sign-masking with aligned address. 995 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 996 if (reachable(src)) { 997 Assembler::andpd(dst, as_Address(src)); 998 } else { 999 lea(rscratch1, src); 1000 Assembler::andpd(dst, Address(rscratch1, 0)); 1001 } 1002 } 5939 popa(); 5940 } 5941 5942 static const double pi_4 = 0.7853981633974483; 5943 5944 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 5945 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 5946 // was attempted in this code; unfortunately it appears that the 5947 // switch to 80-bit precision and back causes this to be 5948 // unprofitable compared with simply performing a runtime call if 5949 // the argument is out of the (-pi/4, pi/4) range. 5950 5951 Register tmp = noreg; 5952 if (!VM_Version::supports_cmov()) { 5953 // fcmp needs a temporary so preserve rbx, 5954 tmp = rbx; 5955 push(tmp); 5956 } 5957 5958 Label slow_case, done; 5959 if (trig == 't') { 5960 ExternalAddress pi4_adr = (address)&pi_4; 5961 if (reachable(pi4_adr)) { 5962 // x ?<= pi/4 5963 fld_d(pi4_adr); 5964 fld_s(1); // Stack: X PI/4 X 5965 fabs(); // Stack: |X| PI/4 X 5966 fcmp(tmp); 5967 jcc(Assembler::above, slow_case); 5968 5969 // fastest case: -pi/4 <= x <= pi/4 5970 ftan(); 5971 5972 jmp(done); 5973 } 5974 } 5975 // slow case: runtime call 5976 bind(slow_case); 5977 5978 switch(trig) { 5979 case 's': 5980 { 5981 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); 5982 } 5983 break; 5984 case 'c': 5985 { 5986 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); 5987 } 5988 break; 5989 case 't': 5990 { 5991 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); 5992 } 5993 break; 5994 default: |