184 //}
185 //
186 // END __ieee754_rem_pio2 PSEUDO CODE
187 //
188 // Changes between fdlibm and intrinsic for __ieee754_rem_pio2:
189 // 1. INF/NaN check for huge argument is removed in comparison with fdlibm
190 // code, because this check is already done in dcos/dsin code
191 // 2. Most constants are now loaded from table instead of direct initialization
192 // 3. Two loops are unrolled
193 // Assumptions:
194 // 1. Assume |X| >= PI/4
195 // 2. Assume rscratch1 = 0x3fe921fb00000000 (~ PI/4)
196 // 3. Assume ix = r3
197 // Input and output:
198 // 1. Input: X = r0
199 // 2. Return n in r2, y[0] == y0 == v4, y[1] == y1 == v5
200 // NOTE: general purpose register names match local variable names in C code
201 // NOTE: fpu registers are actively reused. See comments in code about their usage
202 void MacroAssembler::generate__ieee754_rem_pio2(address npio2_hw,
203 address two_over_pi, address pio2) {
204 const long PIO2_1t = 0x3DD0B4611A626331UL;
205 const long PIO2_2 = 0x3DD0B4611A600000UL;
206 const long PIO2_2t = 0x3BA3198A2E037073UL;
207 Label X_IS_NEGATIVE, X_IS_MEDIUM_OR_LARGE, X_IS_POSITIVE_LONG_PI, LARGE_ELSE,
208 REDUCTION_DONE, X_IS_MEDIUM_BRANCH_DONE, X_IS_LARGE, NX_SET,
209 X_IS_NEGATIVE_LONG_PI;
210 Register X = r0, n = r2, ix = r3, jv = r4, tmp5 = r5, jx = r6,
211 tmp3 = r7, iqBase = r10, ih = r11, i = r17;
212 // initializing constants first
213 // rscratch1 = 0x3fe921fb00000000 (see assumptions)
214 movk(rscratch1, 0x3ff9, 48); // was 0x3fe921fb0..0 now it's 0x3ff921fb0..0
215 mov(rscratch2, 0x4002d97c); // 3*PI/4 high word
216 movk(rscratch1, 0x5440, 16); // now rscratch1 == PIO2_1
217 fmovd(v1, rscratch1); // v1 = PIO2_1
218 cmp(rscratch2, ix);
219 br(LE, X_IS_MEDIUM_OR_LARGE);
220
221 block_comment("if(ix<0x4002d97c) {... /* |x| ~< 3pi/4 */ "); {
222 cmp(X, zr);
223 br(LT, X_IS_NEGATIVE);
224
225 block_comment("if(hx>0) {"); {
226 fsubd(v2, v0, v1); // v2 = z = x - pio2_1
|
184 //}
185 //
186 // END __ieee754_rem_pio2 PSEUDO CODE
187 //
188 // Changes between fdlibm and intrinsic for __ieee754_rem_pio2:
189 // 1. INF/NaN check for huge argument is removed in comparison with fdlibm
190 // code, because this check is already done in dcos/dsin code
191 // 2. Most constants are now loaded from table instead of direct initialization
192 // 3. Two loops are unrolled
193 // Assumptions:
194 // 1. Assume |X| >= PI/4
195 // 2. Assume rscratch1 = 0x3fe921fb00000000 (~ PI/4)
196 // 3. Assume ix = r3
197 // Input and output:
198 // 1. Input: X = r0
199 // 2. Return n in r2, y[0] == y0 == v4, y[1] == y1 == v5
200 // NOTE: general purpose register names match local variable names in C code
201 // NOTE: fpu registers are actively reused. See comments in code about their usage
202 void MacroAssembler::generate__ieee754_rem_pio2(address npio2_hw,
203 address two_over_pi, address pio2) {
204 const int64_t PIO2_1t = 0x3DD0B4611A626331UL;
205 const int64_t PIO2_2 = 0x3DD0B4611A600000UL;
206 const int64_t PIO2_2t = 0x3BA3198A2E037073UL;
207 Label X_IS_NEGATIVE, X_IS_MEDIUM_OR_LARGE, X_IS_POSITIVE_LONG_PI, LARGE_ELSE,
208 REDUCTION_DONE, X_IS_MEDIUM_BRANCH_DONE, X_IS_LARGE, NX_SET,
209 X_IS_NEGATIVE_LONG_PI;
210 Register X = r0, n = r2, ix = r3, jv = r4, tmp5 = r5, jx = r6,
211 tmp3 = r7, iqBase = r10, ih = r11, i = r17;
212 // initializing constants first
213 // rscratch1 = 0x3fe921fb00000000 (see assumptions)
214 movk(rscratch1, 0x3ff9, 48); // was 0x3fe921fb0..0 now it's 0x3ff921fb0..0
215 mov(rscratch2, 0x4002d97c); // 3*PI/4 high word
216 movk(rscratch1, 0x5440, 16); // now rscratch1 == PIO2_1
217 fmovd(v1, rscratch1); // v1 = PIO2_1
218 cmp(rscratch2, ix);
219 br(LE, X_IS_MEDIUM_OR_LARGE);
220
221 block_comment("if(ix<0x4002d97c) {... /* |x| ~< 3pi/4 */ "); {
222 cmp(X, zr);
223 br(LT, X_IS_NEGATIVE);
224
225 block_comment("if(hx>0) {"); {
226 fsubd(v2, v0, v1); // v2 = z = x - pio2_1
|