< prev index next >

src/hotspot/cpu/aarch64/macroAssembler_aarch64_trig.cpp

Print this page
8248238: Adding Windows support to OpenJDK on AArch64

Summary: LP64 vs LLP64 changes to add Windows support

Contributed-by: Monica Beckwith <monica.beckwith@microsoft.com>, Ludovic Henry <luhenry@microsoft.com>
Reviewed-by:


 184 //}
 185 //
 186 // END __ieee754_rem_pio2 PSEUDO CODE
 187 //
 188 // Changes between fdlibm and intrinsic for __ieee754_rem_pio2:
 189 //     1. INF/NaN check for huge argument is removed in comparison with fdlibm
 190 //     code, because this check is already done in dcos/dsin code
 191 //     2. Most constants are now loaded from table instead of direct initialization
 192 //     3. Two loops are unrolled
 193 // Assumptions:
 194 //     1. Assume |X| >= PI/4
 195 //     2. Assume rscratch1 = 0x3fe921fb00000000  (~ PI/4)
 196 //     3. Assume ix = r3
 197 // Input and output:
 198 //     1. Input: X = r0
 199 //     2. Return n in r2, y[0] == y0 == v4, y[1] == y1 == v5
 200 // NOTE: general purpose register names match local variable names in C code
 201 // NOTE: fpu registers are actively reused. See comments in code about their usage
 202 void MacroAssembler::generate__ieee754_rem_pio2(address npio2_hw,
 203     address two_over_pi, address pio2) {
 204   const long PIO2_1t = 0x3DD0B4611A626331UL;
 205   const long PIO2_2  = 0x3DD0B4611A600000UL;
 206   const long PIO2_2t = 0x3BA3198A2E037073UL;
 207   Label X_IS_NEGATIVE, X_IS_MEDIUM_OR_LARGE, X_IS_POSITIVE_LONG_PI, LARGE_ELSE,
 208       REDUCTION_DONE, X_IS_MEDIUM_BRANCH_DONE, X_IS_LARGE, NX_SET,
 209       X_IS_NEGATIVE_LONG_PI;
 210   Register X = r0, n = r2, ix = r3, jv = r4, tmp5 = r5, jx = r6,
 211       tmp3 = r7, iqBase = r10, ih = r11, i = r17;
 212     // initializing constants first
 213     // rscratch1 = 0x3fe921fb00000000 (see assumptions)
 214     movk(rscratch1, 0x3ff9, 48); // was 0x3fe921fb0..0 now it's 0x3ff921fb0..0
 215     mov(rscratch2, 0x4002d97c); // 3*PI/4 high word
 216     movk(rscratch1, 0x5440, 16); // now rscratch1 == PIO2_1
 217     fmovd(v1, rscratch1); // v1 = PIO2_1
 218     cmp(rscratch2, ix);
 219     br(LE, X_IS_MEDIUM_OR_LARGE);
 220 
 221     block_comment("if(ix<0x4002d97c) {...  /* |x| ~< 3pi/4 */ "); {
 222       cmp(X, zr);
 223       br(LT, X_IS_NEGATIVE);
 224 
 225       block_comment("if(hx>0) {"); {
 226         fsubd(v2, v0, v1); // v2 = z = x - pio2_1




 184 //}
 185 //
 186 // END __ieee754_rem_pio2 PSEUDO CODE
 187 //
 188 // Changes between fdlibm and intrinsic for __ieee754_rem_pio2:
 189 //     1. INF/NaN check for huge argument is removed in comparison with fdlibm
 190 //     code, because this check is already done in dcos/dsin code
 191 //     2. Most constants are now loaded from table instead of direct initialization
 192 //     3. Two loops are unrolled
 193 // Assumptions:
 194 //     1. Assume |X| >= PI/4
 195 //     2. Assume rscratch1 = 0x3fe921fb00000000  (~ PI/4)
 196 //     3. Assume ix = r3
 197 // Input and output:
 198 //     1. Input: X = r0
 199 //     2. Return n in r2, y[0] == y0 == v4, y[1] == y1 == v5
 200 // NOTE: general purpose register names match local variable names in C code
 201 // NOTE: fpu registers are actively reused. See comments in code about their usage
 202 void MacroAssembler::generate__ieee754_rem_pio2(address npio2_hw,
 203     address two_over_pi, address pio2) {
 204   const int64_t PIO2_1t = 0x3DD0B4611A626331UL;
 205   const int64_t PIO2_2  = 0x3DD0B4611A600000UL;
 206   const int64_t PIO2_2t = 0x3BA3198A2E037073UL;
 207   Label X_IS_NEGATIVE, X_IS_MEDIUM_OR_LARGE, X_IS_POSITIVE_LONG_PI, LARGE_ELSE,
 208       REDUCTION_DONE, X_IS_MEDIUM_BRANCH_DONE, X_IS_LARGE, NX_SET,
 209       X_IS_NEGATIVE_LONG_PI;
 210   Register X = r0, n = r2, ix = r3, jv = r4, tmp5 = r5, jx = r6,
 211       tmp3 = r7, iqBase = r10, ih = r11, i = r17;
 212     // initializing constants first
 213     // rscratch1 = 0x3fe921fb00000000 (see assumptions)
 214     movk(rscratch1, 0x3ff9, 48); // was 0x3fe921fb0..0 now it's 0x3ff921fb0..0
 215     mov(rscratch2, 0x4002d97c); // 3*PI/4 high word
 216     movk(rscratch1, 0x5440, 16); // now rscratch1 == PIO2_1
 217     fmovd(v1, rscratch1); // v1 = PIO2_1
 218     cmp(rscratch2, ix);
 219     br(LE, X_IS_MEDIUM_OR_LARGE);
 220 
 221     block_comment("if(ix<0x4002d97c) {...  /* |x| ~< 3pi/4 */ "); {
 222       cmp(X, zr);
 223       br(LT, X_IS_NEGATIVE);
 224 
 225       block_comment("if(hx>0) {"); {
 226         fsubd(v2, v0, v1); // v2 = z = x - pio2_1


< prev index next >