hotspot_sincos Sdiff src/cpu/x86/vm

src/cpu/x86/vm/macroAssembler_x86.cpp

 954 }
 955 
 956 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) {
 957   if (reachable(src)) {
 958     Assembler::addsd(dst, as_Address(src));
 959   } else {
 960     lea(rscratch1, src);
 961     Assembler::addsd(dst, Address(rscratch1, 0));
 962   }
 963 }
 964 
 965 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) {
 966   if (reachable(src)) {
 967     addss(dst, as_Address(src));
 968   } else {
 969     lea(rscratch1, src);
 970     addss(dst, Address(rscratch1, 0));
 971   }
 972 }
 973 









 974 void MacroAssembler::align(int modulus) {
 975   align(modulus, offset());
 976 }
 977 
 978 void MacroAssembler::align(int modulus, int target) {
 979   if (target % modulus != 0) {
 980     nop(modulus - (target % modulus));
 981   }
 982 }
 983 
 984 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
 985   // Used in sign-masking with aligned address.
 986   assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
 987   if (reachable(src)) {
 988     Assembler::andpd(dst, as_Address(src));
 989   } else {
 990     lea(rscratch1, src);
 991     Assembler::andpd(dst, Address(rscratch1, 0));
 992   }
 993 }

5930   popa();
5931 }
5932 
5933 static const double     pi_4 =  0.7853981633974483;
5934 
5935 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
5936   // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
5937   // was attempted in this code; unfortunately it appears that the
5938   // switch to 80-bit precision and back causes this to be
5939   // unprofitable compared with simply performing a runtime call if
5940   // the argument is out of the (-pi/4, pi/4) range.
5941 
5942   Register tmp = noreg;
5943   if (!VM_Version::supports_cmov()) {
5944     // fcmp needs a temporary so preserve rbx,
5945     tmp = rbx;
5946     push(tmp);
5947   }
5948 
5949   Label slow_case, done;
5950 
5951   ExternalAddress pi4_adr = (address)&pi_4;
5952   if (reachable(pi4_adr)) {
5953     // x ?<= pi/4
5954     fld_d(pi4_adr);
5955     fld_s(1);                // Stack:  X  PI/4  X
5956     fabs();                  // Stack: |X| PI/4  X
5957     fcmp(tmp);
5958     jcc(Assembler::above, slow_case);
5959 
5960     // fastest case: -pi/4 <= x <= pi/4
5961     switch(trig) {
5962     case 's':
5963       fsin();
5964       break;
5965     case 'c':
5966       fcos();
5967       break;
5968     case 't':
5969       ftan();
5970       break;
5971     default:
5972       assert(false, "bad intrinsic");
5973       break;
5974     }
5975     jmp(done);
5976   }
5977 
5978   // slow case: runtime call
5979   bind(slow_case);
5980 
5981   switch(trig) {
5982   case 's':
5983     {
5984       fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
5985     }
5986     break;
5987   case 'c':
5988     {
5989       fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
5990     }
5991     break;
5992   case 't':
5993     {
5994       fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
5995     }
5996     break;
5997   default:

 954 }
 955 
 956 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) {
 957   if (reachable(src)) {
 958     Assembler::addsd(dst, as_Address(src));
 959   } else {
 960     lea(rscratch1, src);
 961     Assembler::addsd(dst, Address(rscratch1, 0));
 962   }
 963 }
 964 
 965 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) {
 966   if (reachable(src)) {
 967     addss(dst, as_Address(src));
 968   } else {
 969     lea(rscratch1, src);
 970     addss(dst, Address(rscratch1, 0));
 971   }
 972 }
 973 
 974 void MacroAssembler::addpd(XMMRegister dst, AddressLiteral src) {
 975   if (reachable(src)) {
 976     Assembler::addpd(dst, as_Address(src));
 977   } else {
 978     lea(rscratch1, src);
 979     Assembler::addpd(dst, Address(rscratch1, 0));
 980   }
 981 }
 982 
 983 void MacroAssembler::align(int modulus) {
 984   align(modulus, offset());
 985 }
 986 
 987 void MacroAssembler::align(int modulus, int target) {
 988   if (target % modulus != 0) {
 989     nop(modulus - (target % modulus));
 990   }
 991 }
 992 
 993 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
 994   // Used in sign-masking with aligned address.
 995   assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
 996   if (reachable(src)) {
 997     Assembler::andpd(dst, as_Address(src));
 998   } else {
 999     lea(rscratch1, src);
1000     Assembler::andpd(dst, Address(rscratch1, 0));
1001   }
1002 }

5939   popa();
5940 }
5941 
5942 static const double     pi_4 =  0.7853981633974483;
5943 
5944 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
5945   // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
5946   // was attempted in this code; unfortunately it appears that the
5947   // switch to 80-bit precision and back causes this to be
5948   // unprofitable compared with simply performing a runtime call if
5949   // the argument is out of the (-pi/4, pi/4) range.
5950 
5951   Register tmp = noreg;
5952   if (!VM_Version::supports_cmov()) {
5953     // fcmp needs a temporary so preserve rbx,
5954     tmp = rbx;
5955     push(tmp);
5956   }
5957 
5958   Label slow_case, done;
5959   if (trig == 't') {
5960     ExternalAddress pi4_adr = (address)&pi_4;
5961     if (reachable(pi4_adr)) {
5962       // x ?<= pi/4
5963       fld_d(pi4_adr);
5964       fld_s(1);                // Stack:  X  PI/4  X
5965       fabs();                  // Stack: |X| PI/4  X
5966       fcmp(tmp);
5967       jcc(Assembler::above, slow_case);
5968 
5969       // fastest case: -pi/4 <= x <= pi/4








5970       ftan();
5971 




5972       jmp(done);
5973     }
5974   }
5975   // slow case: runtime call
5976   bind(slow_case);
5977 
5978   switch(trig) {
5979   case 's':
5980     {
5981       fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
5982     }
5983     break;
5984   case 'c':
5985     {
5986       fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
5987     }
5988     break;
5989   case 't':
5990     {
5991       fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
5992     }
5993     break;
5994   default:

< prev index next >