120 // -2 [ entry point ]
121 // -1 [ parameters ]
122 // 0 [ saved rbp ] <--- rbp
123 // 1 [ return address ]
124 // 2 [ parameter size ]
125 // 3 [ thread ]
126 //
127 // Windows Arguments:
128 // c_rarg0: call wrapper address address
129 // c_rarg1: result address
130 // c_rarg2: result type BasicType
131 // c_rarg3: method Method*
132 // 48(rbp): (interpreter) entry point address
133 // 56(rbp): parameters intptr_t*
134 // 64(rbp): parameter size (in words) int
135 // 72(rbp): thread Thread*
136 //
137 // [ return_from_Java ] <--- rsp
138 // [ argument word n ]
139 // ...
140 // -28 [ argument word 1 ]
141 // -27 [ saved xmm15 ] <--- rsp_after_call
142 // [ saved xmm7-xmm14 ]
143 // -9 [ saved xmm6 ] (each xmm register takes 2 slots)
144 // -7 [ saved r15 ]
145 // -6 [ saved r14 ]
146 // -5 [ saved r13 ]
147 // -4 [ saved r12 ]
148 // -3 [ saved rdi ]
149 // -2 [ saved rsi ]
150 // -1 [ saved rbx ]
151 // 0 [ saved rbp ] <--- rbp
152 // 1 [ return address ]
153 // 2 [ call wrapper ]
154 // 3 [ result ]
155 // 4 [ result type ]
156 // 5 [ method ]
157 // 6 [ entry point ]
158 // 7 [ parameters ]
159 // 8 [ parameter size ]
160 // 9 [ thread ]
161 //
162 // Windows reserves the callers stack space for arguments 1-4.
163 // We spill c_rarg0-c_rarg3 to this space.
164
165 // Call stub stack layout word offsets from rbp
166 enum call_stub_layout {
167 #ifdef _WIN64
168 xmm_save_first = 6, // save from xmm6
169 xmm_save_last = 15, // to xmm15
170 xmm_save_base = -9,
171 rsp_after_call_off = xmm_save_base - 2 * (xmm_save_last - xmm_save_first), // -27
172 r15_off = -7,
173 r14_off = -6,
174 r13_off = -5,
175 r12_off = -4,
176 rdi_off = -3,
177 rsi_off = -2,
178 rbx_off = -1,
179 rbp_off = 0,
180 retaddr_off = 1,
181 call_wrapper_off = 2,
182 result_off = 3,
183 result_type_off = 4,
184 method_off = 5,
185 entry_point_off = 6,
186 parameters_off = 7,
187 parameter_size_off = 8,
188 thread_off = 9
189 #else
245 __ enter();
246 __ subptr(rsp, -rsp_after_call_off * wordSize);
247
248 // save register parameters
249 #ifndef _WIN64
250 __ movptr(parameters, c_rarg5); // parameters
251 __ movptr(entry_point, c_rarg4); // entry_point
252 #endif
253
254 __ movptr(method, c_rarg3); // method
255 __ movl(result_type, c_rarg2); // result type
256 __ movptr(result, c_rarg1); // result
257 __ movptr(call_wrapper, c_rarg0); // call wrapper
258
259 // save regs belonging to calling function
260 __ movptr(rbx_save, rbx);
261 __ movptr(r12_save, r12);
262 __ movptr(r13_save, r13);
263 __ movptr(r14_save, r14);
264 __ movptr(r15_save, r15);
265 #ifdef _WIN64
266 for (int i = 6; i <= 15; i++) {
267 __ movdqu(xmm_save(i), as_XMMRegister(i));
268 }
269
270 const Address rdi_save(rbp, rdi_off * wordSize);
271 const Address rsi_save(rbp, rsi_off * wordSize);
272
273 __ movptr(rsi_save, rsi);
274 __ movptr(rdi_save, rdi);
275 #else
276 const Address mxcsr_save(rbp, mxcsr_off * wordSize);
277 {
278 Label skip_ldmx;
279 __ stmxcsr(mxcsr_save);
280 __ movl(rax, mxcsr_save);
281 __ andl(rax, MXCSR_MASK); // Only check control and mask bits
282 ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
283 __ cmp32(rax, mxcsr_std);
284 __ jcc(Assembler::equal, skip_ldmx);
285 __ ldmxcsr(mxcsr_std);
286 __ bind(skip_ldmx);
287 }
288 #endif
1301 // Copy big chunks forward
1302 //
1303 // Inputs:
1304 // end_from - source arrays end address
1305 // end_to - destination array end address
1306 // qword_count - 64-bits element count, negative
1307 // to - scratch
1308 // L_copy_bytes - entry label
1309 // L_copy_8_bytes - exit label
1310 //
1311 void copy_bytes_forward(Register end_from, Register end_to,
1312 Register qword_count, Register to,
1313 Label& L_copy_bytes, Label& L_copy_8_bytes) {
1314 DEBUG_ONLY(__ stop("enter at entry label, not here"));
1315 Label L_loop;
1316 __ align(OptoLoopAlignment);
1317 if (UseUnalignedLoadStores) {
1318 Label L_end;
1319 // Copy 64-bytes per iteration
1320 __ BIND(L_loop);
1321 if (UseAVX >= 2) {
1322 __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
1323 __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
1324 __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
1325 __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
1326 } else {
1327 __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
1328 __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
1329 __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
1330 __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
1331 __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
1332 __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
1333 __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
1334 __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
1335 }
1336 __ BIND(L_copy_bytes);
1337 __ addptr(qword_count, 8);
1338 __ jcc(Assembler::lessEqual, L_loop);
1339 __ subptr(qword_count, 4); // sub(8) and add(4)
1340 __ jccb(Assembler::greater, L_end);
1341 // Copy trailing 32 bytes
1377 // Copy big chunks backward
1378 //
1379 // Inputs:
1380 // from - source arrays address
1381 // dest - destination array address
1382 // qword_count - 64-bits element count
1383 // to - scratch
1384 // L_copy_bytes - entry label
1385 // L_copy_8_bytes - exit label
1386 //
1387 void copy_bytes_backward(Register from, Register dest,
1388 Register qword_count, Register to,
1389 Label& L_copy_bytes, Label& L_copy_8_bytes) {
1390 DEBUG_ONLY(__ stop("enter at entry label, not here"));
1391 Label L_loop;
1392 __ align(OptoLoopAlignment);
1393 if (UseUnalignedLoadStores) {
1394 Label L_end;
1395 // Copy 64-bytes per iteration
1396 __ BIND(L_loop);
1397 if (UseAVX >= 2) {
1398 __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
1399 __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
1400 __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
1401 __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
1402 } else {
1403 __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
1404 __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
1405 __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
1406 __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
1407 __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
1408 __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
1409 __ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0));
1410 __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3);
1411 }
1412 __ BIND(L_copy_bytes);
1413 __ subptr(qword_count, 8);
1414 __ jcc(Assembler::greaterEqual, L_loop);
1415
1416 __ addptr(qword_count, 4); // add(8) and sub(4)
1417 __ jccb(Assembler::less, L_end);
|
120 // -2 [ entry point ]
121 // -1 [ parameters ]
122 // 0 [ saved rbp ] <--- rbp
123 // 1 [ return address ]
124 // 2 [ parameter size ]
125 // 3 [ thread ]
126 //
127 // Windows Arguments:
128 // c_rarg0: call wrapper address address
129 // c_rarg1: result address
130 // c_rarg2: result type BasicType
131 // c_rarg3: method Method*
132 // 48(rbp): (interpreter) entry point address
133 // 56(rbp): parameters intptr_t*
134 // 64(rbp): parameter size (in words) int
135 // 72(rbp): thread Thread*
136 //
137 // [ return_from_Java ] <--- rsp
138 // [ argument word n ]
139 // ...
140 // -60 [ argument word 1 ]
141 // -59 [ saved xmm31 ] <--- rsp after_call
142 // [ saved xmm16-xmm30 ] (EVEX enabled, else the space is blank)
143 // -27 [ saved xmm15 ]
144 // [ saved xmm7-xmm14 ]
145 // -9 [ saved xmm6 ] (each xmm register takes 2 slots)
146 // -7 [ saved r15 ]
147 // -6 [ saved r14 ]
148 // -5 [ saved r13 ]
149 // -4 [ saved r12 ]
150 // -3 [ saved rdi ]
151 // -2 [ saved rsi ]
152 // -1 [ saved rbx ]
153 // 0 [ saved rbp ] <--- rbp
154 // 1 [ return address ]
155 // 2 [ call wrapper ]
156 // 3 [ result ]
157 // 4 [ result type ]
158 // 5 [ method ]
159 // 6 [ entry point ]
160 // 7 [ parameters ]
161 // 8 [ parameter size ]
162 // 9 [ thread ]
163 //
164 // Windows reserves the callers stack space for arguments 1-4.
165 // We spill c_rarg0-c_rarg3 to this space.
166
167 // Call stub stack layout word offsets from rbp
168 enum call_stub_layout {
169 #ifdef _WIN64
170 xmm_save_first = 6, // save from xmm6
171 xmm_save_last = 31, // to xmm31
172 xmm_save_base = -9,
173 rsp_after_call_off = xmm_save_base - 2 * (xmm_save_last - xmm_save_first), // -27
174 r15_off = -7,
175 r14_off = -6,
176 r13_off = -5,
177 r12_off = -4,
178 rdi_off = -3,
179 rsi_off = -2,
180 rbx_off = -1,
181 rbp_off = 0,
182 retaddr_off = 1,
183 call_wrapper_off = 2,
184 result_off = 3,
185 result_type_off = 4,
186 method_off = 5,
187 entry_point_off = 6,
188 parameters_off = 7,
189 parameter_size_off = 8,
190 thread_off = 9
191 #else
247 __ enter();
248 __ subptr(rsp, -rsp_after_call_off * wordSize);
249
250 // save register parameters
251 #ifndef _WIN64
252 __ movptr(parameters, c_rarg5); // parameters
253 __ movptr(entry_point, c_rarg4); // entry_point
254 #endif
255
256 __ movptr(method, c_rarg3); // method
257 __ movl(result_type, c_rarg2); // result type
258 __ movptr(result, c_rarg1); // result
259 __ movptr(call_wrapper, c_rarg0); // call wrapper
260
261 // save regs belonging to calling function
262 __ movptr(rbx_save, rbx);
263 __ movptr(r12_save, r12);
264 __ movptr(r13_save, r13);
265 __ movptr(r14_save, r14);
266 __ movptr(r15_save, r15);
267 if (UseAVX > 2) {
268 __ movl(rbx, 0xffff);
269 __ kmovql(k1, rbx);
270 }
271 #ifdef _WIN64
272 if (UseAVX > 2) {
273 for (int i = 6; i <= 31; i++) {
274 __ movdqu(xmm_save(i), as_XMMRegister(i));
275 }
276 } else {
277 for (int i = 6; i <= 15; i++) {
278 __ movdqu(xmm_save(i), as_XMMRegister(i));
279 }
280 }
281
282 const Address rdi_save(rbp, rdi_off * wordSize);
283 const Address rsi_save(rbp, rsi_off * wordSize);
284
285 __ movptr(rsi_save, rsi);
286 __ movptr(rdi_save, rdi);
287 #else
288 const Address mxcsr_save(rbp, mxcsr_off * wordSize);
289 {
290 Label skip_ldmx;
291 __ stmxcsr(mxcsr_save);
292 __ movl(rax, mxcsr_save);
293 __ andl(rax, MXCSR_MASK); // Only check control and mask bits
294 ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
295 __ cmp32(rax, mxcsr_std);
296 __ jcc(Assembler::equal, skip_ldmx);
297 __ ldmxcsr(mxcsr_std);
298 __ bind(skip_ldmx);
299 }
300 #endif
1313 // Copy big chunks forward
1314 //
1315 // Inputs:
1316 // end_from - source arrays end address
1317 // end_to - destination array end address
1318 // qword_count - 64-bits element count, negative
1319 // to - scratch
1320 // L_copy_bytes - entry label
1321 // L_copy_8_bytes - exit label
1322 //
1323 void copy_bytes_forward(Register end_from, Register end_to,
1324 Register qword_count, Register to,
1325 Label& L_copy_bytes, Label& L_copy_8_bytes) {
1326 DEBUG_ONLY(__ stop("enter at entry label, not here"));
1327 Label L_loop;
1328 __ align(OptoLoopAlignment);
1329 if (UseUnalignedLoadStores) {
1330 Label L_end;
1331 // Copy 64-bytes per iteration
1332 __ BIND(L_loop);
1333 if (UseAVX > 2) {
1334 __ evmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56), Assembler::AVX_512bit);
1335 __ evmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0, Assembler::AVX_512bit);
1336 } else if (UseAVX == 2) {
1337 __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
1338 __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
1339 __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
1340 __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
1341 } else {
1342 __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
1343 __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
1344 __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
1345 __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
1346 __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
1347 __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
1348 __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
1349 __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
1350 }
1351 __ BIND(L_copy_bytes);
1352 __ addptr(qword_count, 8);
1353 __ jcc(Assembler::lessEqual, L_loop);
1354 __ subptr(qword_count, 4); // sub(8) and add(4)
1355 __ jccb(Assembler::greater, L_end);
1356 // Copy trailing 32 bytes
1392 // Copy big chunks backward
1393 //
1394 // Inputs:
1395 // from - source arrays address
1396 // dest - destination array address
1397 // qword_count - 64-bits element count
1398 // to - scratch
1399 // L_copy_bytes - entry label
1400 // L_copy_8_bytes - exit label
1401 //
1402 void copy_bytes_backward(Register from, Register dest,
1403 Register qword_count, Register to,
1404 Label& L_copy_bytes, Label& L_copy_8_bytes) {
1405 DEBUG_ONLY(__ stop("enter at entry label, not here"));
1406 Label L_loop;
1407 __ align(OptoLoopAlignment);
1408 if (UseUnalignedLoadStores) {
1409 Label L_end;
1410 // Copy 64-bytes per iteration
1411 __ BIND(L_loop);
1412 if (UseAVX > 2) {
1413 __ evmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32), Assembler::AVX_512bit);
1414 __ evmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0, Assembler::AVX_512bit);
1415 } else if (UseAVX == 2) {
1416 __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
1417 __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
1418 __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
1419 __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
1420 } else {
1421 __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
1422 __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
1423 __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
1424 __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
1425 __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
1426 __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
1427 __ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0));
1428 __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3);
1429 }
1430 __ BIND(L_copy_bytes);
1431 __ subptr(qword_count, 8);
1432 __ jcc(Assembler::greaterEqual, L_loop);
1433
1434 __ addptr(qword_count, 4); // add(8) and sub(4)
1435 __ jccb(Assembler::less, L_end);
|