src/cpu/x86/vm/assembler_x86.cpp

Print this page
rev 3362 : 7174532: jdk/test/java/lang/Math/WorstCaseTests.java failing on x86
Summary: increase precision on x86 for the steps of the computation of exp and pow.
Reviewed-by:


6910   shll(rax,20);
6911   // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN.
6912   addl(rdx,1);
6913   // Check that 1 < int(X)+1023+1 < 2048
6914   // in 3 steps:
6915   // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048
6916   // 2- (int(X)+1023+1)&-2048 != 0
6917   // 3- (int(X)+1023+1)&-2048 != 1
6918   // Do 2- first because addl just updated the flags.
6919   cmov32(Assembler::equal,rax,rcx);
6920   cmpl(rdx,1);
6921   cmov32(Assembler::equal,rax,rcx);
6922   testl(rdx,rcx);
6923   cmov32(Assembler::notEqual,rax,rcx);
6924   movl(Address(rsp,4),rax);
6925   movl(Address(rsp,0),0);
6926   fmul_d(Address(rsp,0));   // Stack: 2^X ...
6927   addptr(rsp,sizeof(jdouble));
6928 }
6929 















6930 void MacroAssembler::fast_pow() {
6931   // computes X^Y = 2^(Y * log2(X))
6932   // if fast computation is not possible, result is NaN. Requires
6933   // fallback from user of this macro.


6934   fyl2x();                 // Stack: (Y*log2(X)) ...
6935   pow_exp_core_encoding(); // Stack: exp(X) ...

6936 }
6937 
6938 void MacroAssembler::fast_exp() {
6939   // computes exp(X) = 2^(X * log2(e))
6940   // if fast computation is not possible, result is NaN. Requires
6941   // fallback from user of this macro.


6942   fldl2e();                // Stack: log2(e) X ...
6943   fmulp(1);                // Stack: (X*log2(e)) ...
6944   pow_exp_core_encoding(); // Stack: exp(X) ...

6945 }
6946 
6947 void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
6948   // kills rax, rcx, rdx
6949   // pow and exp needs 2 extra registers on the fpu stack.
6950   Label slow_case, done;
6951   Register tmp = noreg;
6952   if (!VM_Version::supports_cmov()) {
6953     // fcmp needs a temporary so preserve rdx,
6954     tmp = rdx;
6955   }
6956   Register tmp2 = rax;
6957   Register tmp3 = rcx;
6958 
6959   if (is_exp) {
6960     // Stack: X
6961     fld_s(0);                   // duplicate argument for runtime call. Stack: X X
6962     fast_exp();                 // Stack: exp(X) X
6963     fcmp(tmp, 0, false, false); // Stack: exp(X) X
6964     // exp(X) not equal to itself: exp(X) is NaN go to slow case.




6910   shll(rax,20);
6911   // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN.
6912   addl(rdx,1);
6913   // Check that 1 < int(X)+1023+1 < 2048
6914   // in 3 steps:
6915   // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048
6916   // 2- (int(X)+1023+1)&-2048 != 0
6917   // 3- (int(X)+1023+1)&-2048 != 1
6918   // Do 2- first because addl just updated the flags.
6919   cmov32(Assembler::equal,rax,rcx);
6920   cmpl(rdx,1);
6921   cmov32(Assembler::equal,rax,rcx);
6922   testl(rdx,rcx);
6923   cmov32(Assembler::notEqual,rax,rcx);
6924   movl(Address(rsp,4),rax);
6925   movl(Address(rsp,0),0);
6926   fmul_d(Address(rsp,0));   // Stack: 2^X ...
6927   addptr(rsp,sizeof(jdouble));
6928 }
6929 
6930 void MacroAssembler::increase_precision() {
6931   subptr(rsp, BytesPerWord);
6932   fnstcw(Address(rsp, 0));
6933   movl(rax, Address(rsp, 0));
6934   orl(rax, 0x300);
6935   push(rax);
6936   fldcw(Address(rsp, 0));
6937   pop(rax);
6938 }
6939 
6940 void MacroAssembler::restore_precision() {
6941   fldcw(Address(rsp, 0));
6942   addptr(rsp, BytesPerWord);
6943 }
6944 
6945 void MacroAssembler::fast_pow() {
6946   // computes X^Y = 2^(Y * log2(X))
6947   // if fast computation is not possible, result is NaN. Requires
6948   // fallback from user of this macro.
6949   // increase precision for intermediate steps of the computation
6950   increase_precision();
6951   fyl2x();                 // Stack: (Y*log2(X)) ...
6952   pow_exp_core_encoding(); // Stack: exp(X) ...
6953   restore_precision();
6954 }
6955 
6956 void MacroAssembler::fast_exp() {
6957   // computes exp(X) = 2^(X * log2(e))
6958   // if fast computation is not possible, result is NaN. Requires
6959   // fallback from user of this macro.
6960   // increase precision for intermediate steps of the computation
6961   increase_precision();
6962   fldl2e();                // Stack: log2(e) X ...
6963   fmulp(1);                // Stack: (X*log2(e)) ...
6964   pow_exp_core_encoding(); // Stack: exp(X) ...
6965   restore_precision();
6966 }
6967 
6968 void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
6969   // kills rax, rcx, rdx
6970   // pow and exp needs 2 extra registers on the fpu stack.
6971   Label slow_case, done;
6972   Register tmp = noreg;
6973   if (!VM_Version::supports_cmov()) {
6974     // fcmp needs a temporary so preserve rdx,
6975     tmp = rdx;
6976   }
6977   Register tmp2 = rax;
6978   Register tmp3 = rcx;
6979 
6980   if (is_exp) {
6981     // Stack: X
6982     fld_s(0);                   // duplicate argument for runtime call. Stack: X X
6983     fast_exp();                 // Stack: exp(X) X
6984     fcmp(tmp, 0, false, false); // Stack: exp(X) X
6985     // exp(X) not equal to itself: exp(X) is NaN go to slow case.