< prev index next >

src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page




3010   }
3011   bind(L);
3012 }
3013 
3014 void MacroAssembler::fld_d(AddressLiteral src) {
3015   fld_d(as_Address(src));
3016 }
3017 
3018 void MacroAssembler::fld_s(AddressLiteral src) {
3019   fld_s(as_Address(src));
3020 }
3021 
3022 void MacroAssembler::fld_x(AddressLiteral src) {
3023   Assembler::fld_x(as_Address(src));
3024 }
3025 
3026 void MacroAssembler::fldcw(AddressLiteral src) {
3027   Assembler::fldcw(as_Address(src));
3028 }
3029 









3030 void MacroAssembler::pow_exp_core_encoding() {
3031   // kills rax, rcx, rdx
3032   subptr(rsp,sizeof(jdouble));
3033   // computes 2^X. Stack: X ...
3034   // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and
3035   // keep it on the thread's stack to compute 2^int(X) later
3036   // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1)
3037   // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X))
3038   fld_s(0);                 // Stack: X X ...
3039   frndint();                // Stack: int(X) X ...
3040   fsuba(1);                 // Stack: int(X) X-int(X) ...
3041   fistp_s(Address(rsp,0));  // move int(X) as integer to thread's stack. Stack: X-int(X) ...
3042   f2xm1();                  // Stack: 2^(X-int(X))-1 ...
3043   fld1();                   // Stack: 1 2^(X-int(X))-1 ...
3044   faddp(1);                 // Stack: 2^(X-int(X))
3045   // computes 2^(int(X)): add exponent bias (1023) to int(X), then
3046   // shift int(X)+1023 to exponent position.
3047   // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11
3048   // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent
3049   // values so detect them and set result to NaN.


3082 }
3083 
3084 void MacroAssembler::restore_precision() {
3085   fldcw(Address(rsp, 0));
3086   addptr(rsp, BytesPerWord);
3087 }
3088 
3089 void MacroAssembler::fast_pow() {
3090   // computes X^Y = 2^(Y * log2(X))
3091   // if fast computation is not possible, result is NaN. Requires
3092   // fallback from user of this macro.
3093   // increase precision for intermediate steps of the computation
3094   BLOCK_COMMENT("fast_pow {");
3095   increase_precision();
3096   fyl2x();                 // Stack: (Y*log2(X)) ...
3097   pow_exp_core_encoding(); // Stack: exp(X) ...
3098   restore_precision();
3099   BLOCK_COMMENT("} fast_pow");
3100 }
3101 
3102 void MacroAssembler::fast_exp() {
3103   // computes exp(X) = 2^(X * log2(e))
3104   // if fast computation is not possible, result is NaN. Requires
3105   // fallback from user of this macro.
3106   // increase precision for intermediate steps of the computation
3107   increase_precision();
3108   fldl2e();                // Stack: log2(e) X ...
3109   fmulp(1);                // Stack: (X*log2(e)) ...
3110   pow_exp_core_encoding(); // Stack: exp(X) ...
3111   restore_precision();
3112 }
3113 
3114 void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
3115   // kills rax, rcx, rdx
3116   // pow and exp needs 2 extra registers on the fpu stack.
3117   Label slow_case, done;
3118   Register tmp = noreg;
3119   if (!VM_Version::supports_cmov()) {
3120     // fcmp needs a temporary so preserve rdx,
3121     tmp = rdx;
3122   }
3123   Register tmp2 = rax;
3124   Register tmp3 = rcx;
3125 
3126   if (is_exp) {
3127     // Stack: X
3128     fld_s(0);                   // duplicate argument for runtime call. Stack: X X
3129     fast_exp();                 // Stack: exp(X) X
3130     fcmp(tmp, 0, false, false); // Stack: exp(X) X
3131     // exp(X) not equal to itself: exp(X) is NaN go to slow case.
3132     jcc(Assembler::parity, slow_case);
3133     // get rid of duplicate argument. Stack: exp(X)
3134     if (num_fpu_regs_in_use > 0) {
3135       fxch();
3136       fpop();
3137     } else {
3138       ffree(1);
3139     }
3140     jmp(done);
3141   } else {
3142     // Stack: X Y
3143     Label x_negative, y_not_2;
3144 
3145     static double two = 2.0;
3146     ExternalAddress two_addr((address)&two);
3147 
3148     // constant maybe too far on 64 bit
3149     lea(tmp2, two_addr);
3150     fld_d(Address(tmp2, 0));    // Stack: 2 X Y
3151     fcmp(tmp, 2, true, false);  // Stack: X Y
3152     jcc(Assembler::parity, y_not_2);
3153     jcc(Assembler::notEqual, y_not_2);
3154 
3155     fxch(); fpop();             // Stack: X
3156     fmul(0);                    // Stack: X*X
3157 
3158     jmp(done);
3159 
3160     bind(y_not_2);
3161 


3273       bind(integer);
3274     }
3275 #endif
3276 #endif
3277 
3278     // get rid of duplicate arguments. Stack: X^Y
3279     if (num_fpu_regs_in_use > 0) {
3280       fxch(); fpop();
3281       fxch(); fpop();
3282     } else {
3283       ffree(2);
3284       ffree(1);
3285     }
3286 
3287     testl(tmp2, 1);
3288     jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
3289     // X <= 0, Y even: X^Y = -abs(X)^Y
3290 
3291     fchs();                     // Stack: -abs(X)^Y Y
3292     jmp(done);
3293   }
3294 
3295   // slow case: runtime call
3296   bind(slow_case);
3297 
3298   fpop();                       // pop incorrect result or int(Y)
3299 
3300   fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
3301                       is_exp ? 1 : 2, num_fpu_regs_in_use);
3302 
3303   // Come here with result in F-TOS
3304   bind(done);
3305 }
3306 
3307 void MacroAssembler::fpop() {
3308   ffree();
3309   fincstp();
3310 }
3311 
3312 void MacroAssembler::fremr(Register tmp) {
3313   save_rax(tmp);
3314   { Label L;
3315     bind(L);
3316     fprem();
3317     fwait(); fnstsw_ax();
3318 #ifdef _LP64
3319     testl(rax, 0x400);
3320     jcc(Assembler::notEqual, L);
3321 #else




3010   }
3011   bind(L);
3012 }
3013 
3014 void MacroAssembler::fld_d(AddressLiteral src) {
3015   fld_d(as_Address(src));
3016 }
3017 
3018 void MacroAssembler::fld_s(AddressLiteral src) {
3019   fld_s(as_Address(src));
3020 }
3021 
3022 void MacroAssembler::fld_x(AddressLiteral src) {
3023   Assembler::fld_x(as_Address(src));
3024 }
3025 
3026 void MacroAssembler::fldcw(AddressLiteral src) {
3027   Assembler::fldcw(as_Address(src));
3028 }
3029 
3030 void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
3031   if (reachable(src)) {
3032     Assembler::mulpd(dst, as_Address(src));
3033   } else {
3034     lea(rscratch1, src);
3035     Assembler::mulpd(dst, Address(rscratch1, 0));
3036   }
3037 }
3038 
3039 void MacroAssembler::pow_exp_core_encoding() {
3040   // kills rax, rcx, rdx
3041   subptr(rsp,sizeof(jdouble));
3042   // computes 2^X. Stack: X ...
3043   // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and
3044   // keep it on the thread's stack to compute 2^int(X) later
3045   // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1)
3046   // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X))
3047   fld_s(0);                 // Stack: X X ...
3048   frndint();                // Stack: int(X) X ...
3049   fsuba(1);                 // Stack: int(X) X-int(X) ...
3050   fistp_s(Address(rsp,0));  // move int(X) as integer to thread's stack. Stack: X-int(X) ...
3051   f2xm1();                  // Stack: 2^(X-int(X))-1 ...
3052   fld1();                   // Stack: 1 2^(X-int(X))-1 ...
3053   faddp(1);                 // Stack: 2^(X-int(X))
3054   // computes 2^(int(X)): add exponent bias (1023) to int(X), then
3055   // shift int(X)+1023 to exponent position.
3056   // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11
3057   // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent
3058   // values so detect them and set result to NaN.


3091 }
3092 
3093 void MacroAssembler::restore_precision() {
3094   fldcw(Address(rsp, 0));
3095   addptr(rsp, BytesPerWord);
3096 }
3097 
3098 void MacroAssembler::fast_pow() {
3099   // computes X^Y = 2^(Y * log2(X))
3100   // if fast computation is not possible, result is NaN. Requires
3101   // fallback from user of this macro.
3102   // increase precision for intermediate steps of the computation
3103   BLOCK_COMMENT("fast_pow {");
3104   increase_precision();
3105   fyl2x();                 // Stack: (Y*log2(X)) ...
3106   pow_exp_core_encoding(); // Stack: exp(X) ...
3107   restore_precision();
3108   BLOCK_COMMENT("} fast_pow");
3109 }
3110 
3111 void MacroAssembler::pow_or_exp(int num_fpu_regs_in_use) {












3112   // kills rax, rcx, rdx
3113   // pow and exp needs 2 extra registers on the fpu stack.
3114   Label slow_case, done;
3115   Register tmp = noreg;
3116   if (!VM_Version::supports_cmov()) {
3117     // fcmp needs a temporary so preserve rdx,
3118     tmp = rdx;
3119   }
3120   Register tmp2 = rax;
3121   Register tmp3 = rcx;
3122 
















3123   // Stack: X Y
3124   Label x_negative, y_not_2;
3125 
3126   static double two = 2.0;
3127   ExternalAddress two_addr((address)&two);
3128 
3129   // constant maybe too far on 64 bit
3130   lea(tmp2, two_addr);
3131   fld_d(Address(tmp2, 0));    // Stack: 2 X Y
3132   fcmp(tmp, 2, true, false);  // Stack: X Y
3133   jcc(Assembler::parity, y_not_2);
3134   jcc(Assembler::notEqual, y_not_2);
3135 
3136   fxch(); fpop();             // Stack: X
3137   fmul(0);                    // Stack: X*X
3138 
3139   jmp(done);
3140 
3141   bind(y_not_2);
3142 


3254     bind(integer);
3255   }
3256 #endif
3257 #endif
3258 
3259   // get rid of duplicate arguments. Stack: X^Y
3260   if (num_fpu_regs_in_use > 0) {
3261     fxch(); fpop();
3262     fxch(); fpop();
3263   } else {
3264     ffree(2);
3265     ffree(1);
3266   }
3267 
3268   testl(tmp2, 1);
3269   jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
3270   // X <= 0, Y even: X^Y = -abs(X)^Y
3271 
3272   fchs();                     // Stack: -abs(X)^Y Y
3273   jmp(done);

3274 
3275   // slow case: runtime call
3276   bind(slow_case);
3277 
3278   fpop();                       // pop incorrect result or int(Y)
3279 
3280   fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 2, num_fpu_regs_in_use);

3281 
3282   // Come here with result in F-TOS
3283   bind(done);
3284 }
3285 
3286 void MacroAssembler::fpop() {
3287   ffree();
3288   fincstp();
3289 }
3290 
3291 void MacroAssembler::fremr(Register tmp) {
3292   save_rax(tmp);
3293   { Label L;
3294     bind(L);
3295     fprem();
3296     fwait(); fnstsw_ax();
3297 #ifdef _LP64
3298     testl(rax, 0x400);
3299     jcc(Assembler::notEqual, L);
3300 #else


< prev index next >