3010 }
3011 bind(L);
3012 }
3013
3014 void MacroAssembler::fld_d(AddressLiteral src) {
3015 fld_d(as_Address(src));
3016 }
3017
3018 void MacroAssembler::fld_s(AddressLiteral src) {
3019 fld_s(as_Address(src));
3020 }
3021
3022 void MacroAssembler::fld_x(AddressLiteral src) {
3023 Assembler::fld_x(as_Address(src));
3024 }
3025
3026 void MacroAssembler::fldcw(AddressLiteral src) {
3027 Assembler::fldcw(as_Address(src));
3028 }
3029
3030 void MacroAssembler::pow_exp_core_encoding() {
3031 // kills rax, rcx, rdx
3032 subptr(rsp,sizeof(jdouble));
3033 // computes 2^X. Stack: X ...
3034 // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and
3035 // keep it on the thread's stack to compute 2^int(X) later
3036 // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1)
3037 // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X))
3038 fld_s(0); // Stack: X X ...
3039 frndint(); // Stack: int(X) X ...
3040 fsuba(1); // Stack: int(X) X-int(X) ...
3041 fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ...
3042 f2xm1(); // Stack: 2^(X-int(X))-1 ...
3043 fld1(); // Stack: 1 2^(X-int(X))-1 ...
3044 faddp(1); // Stack: 2^(X-int(X))
3045 // computes 2^(int(X)): add exponent bias (1023) to int(X), then
3046 // shift int(X)+1023 to exponent position.
3047 // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11
3048 // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent
3049 // values so detect them and set result to NaN.
3082 }
3083
3084 void MacroAssembler::restore_precision() {
3085 fldcw(Address(rsp, 0));
3086 addptr(rsp, BytesPerWord);
3087 }
3088
3089 void MacroAssembler::fast_pow() {
3090 // computes X^Y = 2^(Y * log2(X))
3091 // if fast computation is not possible, result is NaN. Requires
3092 // fallback from user of this macro.
3093 // increase precision for intermediate steps of the computation
3094 BLOCK_COMMENT("fast_pow {");
3095 increase_precision();
3096 fyl2x(); // Stack: (Y*log2(X)) ...
3097 pow_exp_core_encoding(); // Stack: exp(X) ...
3098 restore_precision();
3099 BLOCK_COMMENT("} fast_pow");
3100 }
3101
3102 void MacroAssembler::fast_exp() {
3103 // computes exp(X) = 2^(X * log2(e))
3104 // if fast computation is not possible, result is NaN. Requires
3105 // fallback from user of this macro.
3106 // increase precision for intermediate steps of the computation
3107 increase_precision();
3108 fldl2e(); // Stack: log2(e) X ...
3109 fmulp(1); // Stack: (X*log2(e)) ...
3110 pow_exp_core_encoding(); // Stack: exp(X) ...
3111 restore_precision();
3112 }
3113
3114 void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
3115 // kills rax, rcx, rdx
3116 // pow and exp needs 2 extra registers on the fpu stack.
3117 Label slow_case, done;
3118 Register tmp = noreg;
3119 if (!VM_Version::supports_cmov()) {
3120 // fcmp needs a temporary so preserve rdx,
3121 tmp = rdx;
3122 }
3123 Register tmp2 = rax;
3124 Register tmp3 = rcx;
3125
3126 if (is_exp) {
3127 // Stack: X
3128 fld_s(0); // duplicate argument for runtime call. Stack: X X
3129 fast_exp(); // Stack: exp(X) X
3130 fcmp(tmp, 0, false, false); // Stack: exp(X) X
3131 // exp(X) not equal to itself: exp(X) is NaN go to slow case.
3132 jcc(Assembler::parity, slow_case);
3133 // get rid of duplicate argument. Stack: exp(X)
3134 if (num_fpu_regs_in_use > 0) {
3135 fxch();
3136 fpop();
3137 } else {
3138 ffree(1);
3139 }
3140 jmp(done);
3141 } else {
3142 // Stack: X Y
3143 Label x_negative, y_not_2;
3144
3145 static double two = 2.0;
3146 ExternalAddress two_addr((address)&two);
3147
3148 // constant maybe too far on 64 bit
3149 lea(tmp2, two_addr);
3150 fld_d(Address(tmp2, 0)); // Stack: 2 X Y
3151 fcmp(tmp, 2, true, false); // Stack: X Y
3152 jcc(Assembler::parity, y_not_2);
3153 jcc(Assembler::notEqual, y_not_2);
3154
3155 fxch(); fpop(); // Stack: X
3156 fmul(0); // Stack: X*X
3157
3158 jmp(done);
3159
3160 bind(y_not_2);
3161
3273 bind(integer);
3274 }
3275 #endif
3276 #endif
3277
3278 // get rid of duplicate arguments. Stack: X^Y
3279 if (num_fpu_regs_in_use > 0) {
3280 fxch(); fpop();
3281 fxch(); fpop();
3282 } else {
3283 ffree(2);
3284 ffree(1);
3285 }
3286
3287 testl(tmp2, 1);
3288 jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
3289 // X <= 0, Y even: X^Y = -abs(X)^Y
3290
3291 fchs(); // Stack: -abs(X)^Y Y
3292 jmp(done);
3293 }
3294
3295 // slow case: runtime call
3296 bind(slow_case);
3297
3298 fpop(); // pop incorrect result or int(Y)
3299
3300 fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
3301 is_exp ? 1 : 2, num_fpu_regs_in_use);
3302
3303 // Come here with result in F-TOS
3304 bind(done);
3305 }
3306
3307 void MacroAssembler::fpop() {
3308 ffree();
3309 fincstp();
3310 }
3311
3312 void MacroAssembler::fremr(Register tmp) {
3313 save_rax(tmp);
3314 { Label L;
3315 bind(L);
3316 fprem();
3317 fwait(); fnstsw_ax();
3318 #ifdef _LP64
3319 testl(rax, 0x400);
3320 jcc(Assembler::notEqual, L);
3321 #else
|
3010 }
3011 bind(L);
3012 }
3013
3014 void MacroAssembler::fld_d(AddressLiteral src) {
3015 fld_d(as_Address(src));
3016 }
3017
3018 void MacroAssembler::fld_s(AddressLiteral src) {
3019 fld_s(as_Address(src));
3020 }
3021
3022 void MacroAssembler::fld_x(AddressLiteral src) {
3023 Assembler::fld_x(as_Address(src));
3024 }
3025
3026 void MacroAssembler::fldcw(AddressLiteral src) {
3027 Assembler::fldcw(as_Address(src));
3028 }
3029
3030 void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
3031 if (reachable(src)) {
3032 Assembler::mulpd(dst, as_Address(src));
3033 } else {
3034 lea(rscratch1, src);
3035 Assembler::mulpd(dst, Address(rscratch1, 0));
3036 }
3037 }
3038
3039 void MacroAssembler::pow_exp_core_encoding() {
3040 // kills rax, rcx, rdx
3041 subptr(rsp,sizeof(jdouble));
3042 // computes 2^X. Stack: X ...
3043 // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and
3044 // keep it on the thread's stack to compute 2^int(X) later
3045 // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1)
3046 // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X))
3047 fld_s(0); // Stack: X X ...
3048 frndint(); // Stack: int(X) X ...
3049 fsuba(1); // Stack: int(X) X-int(X) ...
3050 fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ...
3051 f2xm1(); // Stack: 2^(X-int(X))-1 ...
3052 fld1(); // Stack: 1 2^(X-int(X))-1 ...
3053 faddp(1); // Stack: 2^(X-int(X))
3054 // computes 2^(int(X)): add exponent bias (1023) to int(X), then
3055 // shift int(X)+1023 to exponent position.
3056 // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11
3057 // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent
3058 // values so detect them and set result to NaN.
3091 }
3092
3093 void MacroAssembler::restore_precision() {
3094 fldcw(Address(rsp, 0));
3095 addptr(rsp, BytesPerWord);
3096 }
3097
3098 void MacroAssembler::fast_pow() {
3099 // computes X^Y = 2^(Y * log2(X))
3100 // if fast computation is not possible, result is NaN. Requires
3101 // fallback from user of this macro.
3102 // increase precision for intermediate steps of the computation
3103 BLOCK_COMMENT("fast_pow {");
3104 increase_precision();
3105 fyl2x(); // Stack: (Y*log2(X)) ...
3106 pow_exp_core_encoding(); // Stack: exp(X) ...
3107 restore_precision();
3108 BLOCK_COMMENT("} fast_pow");
3109 }
3110
3111 void MacroAssembler::pow_or_exp(int num_fpu_regs_in_use) {
3112 // kills rax, rcx, rdx
3113 // pow and exp needs 2 extra registers on the fpu stack.
3114 Label slow_case, done;
3115 Register tmp = noreg;
3116 if (!VM_Version::supports_cmov()) {
3117 // fcmp needs a temporary so preserve rdx,
3118 tmp = rdx;
3119 }
3120 Register tmp2 = rax;
3121 Register tmp3 = rcx;
3122
3123 // Stack: X Y
3124 Label x_negative, y_not_2;
3125
3126 static double two = 2.0;
3127 ExternalAddress two_addr((address)&two);
3128
3129 // constant maybe too far on 64 bit
3130 lea(tmp2, two_addr);
3131 fld_d(Address(tmp2, 0)); // Stack: 2 X Y
3132 fcmp(tmp, 2, true, false); // Stack: X Y
3133 jcc(Assembler::parity, y_not_2);
3134 jcc(Assembler::notEqual, y_not_2);
3135
3136 fxch(); fpop(); // Stack: X
3137 fmul(0); // Stack: X*X
3138
3139 jmp(done);
3140
3141 bind(y_not_2);
3142
3254 bind(integer);
3255 }
3256 #endif
3257 #endif
3258
3259 // get rid of duplicate arguments. Stack: X^Y
3260 if (num_fpu_regs_in_use > 0) {
3261 fxch(); fpop();
3262 fxch(); fpop();
3263 } else {
3264 ffree(2);
3265 ffree(1);
3266 }
3267
3268 testl(tmp2, 1);
3269 jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
3270 // X <= 0, Y even: X^Y = -abs(X)^Y
3271
3272 fchs(); // Stack: -abs(X)^Y Y
3273 jmp(done);
3274
3275 // slow case: runtime call
3276 bind(slow_case);
3277
3278 fpop(); // pop incorrect result or int(Y)
3279
3280 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 2, num_fpu_regs_in_use);
3281
3282 // Come here with result in F-TOS
3283 bind(done);
3284 }
3285
3286 void MacroAssembler::fpop() {
3287 ffree();
3288 fincstp();
3289 }
3290
3291 void MacroAssembler::fremr(Register tmp) {
3292 save_rax(tmp);
3293 { Label L;
3294 bind(L);
3295 fprem();
3296 fwait(); fnstsw_ax();
3297 #ifdef _LP64
3298 testl(rax, 0x400);
3299 jcc(Assembler::notEqual, L);
3300 #else
|