< prev index next >

src/cpu/x86/vm/stubGenerator_x86_64.cpp

Print this page




 120   //  -2 [ entry point          ]
 121   //  -1 [ parameters           ]
 122   //   0 [ saved rbp            ] <--- rbp
 123   //   1 [ return address       ]
 124   //   2 [ parameter size       ]
 125   //   3 [ thread               ]
 126   //
 127   // Windows Arguments:
 128   //    c_rarg0:   call wrapper address                   address
 129   //    c_rarg1:   result                                 address
 130   //    c_rarg2:   result type                            BasicType
 131   //    c_rarg3:   method                                 Method*
 132   //    48(rbp): (interpreter) entry point              address
 133   //    56(rbp): parameters                             intptr_t*
 134   //    64(rbp): parameter size (in words)              int
 135   //    72(rbp): thread                                 Thread*
 136   //
 137   //     [ return_from_Java     ] <--- rsp
 138   //     [ argument word n      ]
 139   //      ...
 140   // -28 [ argument word 1      ]
 141   // -27 [ saved xmm15          ] <--- rsp_after_call


 142   //     [ saved xmm7-xmm14     ]
 143   //  -9 [ saved xmm6           ] (each xmm register takes 2 slots)
 144   //  -7 [ saved r15            ]
 145   //  -6 [ saved r14            ]
 146   //  -5 [ saved r13            ]
 147   //  -4 [ saved r12            ]
 148   //  -3 [ saved rdi            ]
 149   //  -2 [ saved rsi            ]
 150   //  -1 [ saved rbx            ]
 151   //   0 [ saved rbp            ] <--- rbp
 152   //   1 [ return address       ]
 153   //   2 [ call wrapper         ]
 154   //   3 [ result               ]
 155   //   4 [ result type          ]
 156   //   5 [ method               ]
 157   //   6 [ entry point          ]
 158   //   7 [ parameters           ]
 159   //   8 [ parameter size       ]
 160   //   9 [ thread               ]
 161   //
 162   //    Windows reserves the callers stack space for arguments 1-4.
 163   //    We spill c_rarg0-c_rarg3 to this space.
 164 
 165   // Call stub stack layout word offsets from rbp
 166   enum call_stub_layout {
 167 #ifdef _WIN64
 168     xmm_save_first     = 6,  // save from xmm6
 169     xmm_save_last      = 15, // to xmm15
 170     xmm_save_base      = -9,
 171     rsp_after_call_off = xmm_save_base - 2 * (xmm_save_last - xmm_save_first), // -27
 172     r15_off            = -7,
 173     r14_off            = -6,
 174     r13_off            = -5,
 175     r12_off            = -4,
 176     rdi_off            = -3,
 177     rsi_off            = -2,
 178     rbx_off            = -1,
 179     rbp_off            =  0,
 180     retaddr_off        =  1,
 181     call_wrapper_off   =  2,
 182     result_off         =  3,
 183     result_type_off    =  4,
 184     method_off         =  5,
 185     entry_point_off    =  6,
 186     parameters_off     =  7,
 187     parameter_size_off =  8,
 188     thread_off         =  9
 189 #else


 245     __ enter();
 246     __ subptr(rsp, -rsp_after_call_off * wordSize);
 247 
 248     // save register parameters
 249 #ifndef _WIN64
 250     __ movptr(parameters,   c_rarg5); // parameters
 251     __ movptr(entry_point,  c_rarg4); // entry_point
 252 #endif
 253 
 254     __ movptr(method,       c_rarg3); // method
 255     __ movl(result_type,  c_rarg2);   // result type
 256     __ movptr(result,       c_rarg1); // result
 257     __ movptr(call_wrapper, c_rarg0); // call wrapper
 258 
 259     // save regs belonging to calling function
 260     __ movptr(rbx_save, rbx);
 261     __ movptr(r12_save, r12);
 262     __ movptr(r13_save, r13);
 263     __ movptr(r14_save, r14);
 264     __ movptr(r15_save, r15);




 265 #ifdef _WIN64





 266     for (int i = 6; i <= 15; i++) {
 267       __ movdqu(xmm_save(i), as_XMMRegister(i));
 268     }

 269 
 270     const Address rdi_save(rbp, rdi_off * wordSize);
 271     const Address rsi_save(rbp, rsi_off * wordSize);
 272 
 273     __ movptr(rsi_save, rsi);
 274     __ movptr(rdi_save, rdi);
 275 #else
 276     const Address mxcsr_save(rbp, mxcsr_off * wordSize);
 277     {
 278       Label skip_ldmx;
 279       __ stmxcsr(mxcsr_save);
 280       __ movl(rax, mxcsr_save);
 281       __ andl(rax, MXCSR_MASK);    // Only check control and mask bits
 282       ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
 283       __ cmp32(rax, mxcsr_std);
 284       __ jcc(Assembler::equal, skip_ldmx);
 285       __ ldmxcsr(mxcsr_std);
 286       __ bind(skip_ldmx);
 287     }
 288 #endif


1301   // Copy big chunks forward
1302   //
1303   // Inputs:
1304   //   end_from     - source arrays end address
1305   //   end_to       - destination array end address
1306   //   qword_count  - 64-bits element count, negative
1307   //   to           - scratch
1308   //   L_copy_bytes - entry label
1309   //   L_copy_8_bytes  - exit  label
1310   //
1311   void copy_bytes_forward(Register end_from, Register end_to,
1312                              Register qword_count, Register to,
1313                              Label& L_copy_bytes, Label& L_copy_8_bytes) {
1314     DEBUG_ONLY(__ stop("enter at entry label, not here"));
1315     Label L_loop;
1316     __ align(OptoLoopAlignment);
1317     if (UseUnalignedLoadStores) {
1318       Label L_end;
1319       // Copy 64-bytes per iteration
1320       __ BIND(L_loop);
1321       if (UseAVX >= 2) {



1322         __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
1323         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
1324         __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
1325         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
1326       } else {
1327         __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
1328         __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
1329         __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
1330         __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
1331         __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
1332         __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
1333         __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
1334         __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
1335       }
1336       __ BIND(L_copy_bytes);
1337       __ addptr(qword_count, 8);
1338       __ jcc(Assembler::lessEqual, L_loop);
1339       __ subptr(qword_count, 4);  // sub(8) and add(4)
1340       __ jccb(Assembler::greater, L_end);
1341       // Copy trailing 32 bytes


1377   // Copy big chunks backward
1378   //
1379   // Inputs:
1380   //   from         - source arrays address
1381   //   dest         - destination array address
1382   //   qword_count  - 64-bits element count
1383   //   to           - scratch
1384   //   L_copy_bytes - entry label
1385   //   L_copy_8_bytes  - exit  label
1386   //
1387   void copy_bytes_backward(Register from, Register dest,
1388                               Register qword_count, Register to,
1389                               Label& L_copy_bytes, Label& L_copy_8_bytes) {
1390     DEBUG_ONLY(__ stop("enter at entry label, not here"));
1391     Label L_loop;
1392     __ align(OptoLoopAlignment);
1393     if (UseUnalignedLoadStores) {
1394       Label L_end;
1395       // Copy 64-bytes per iteration
1396       __ BIND(L_loop);
1397       if (UseAVX >= 2) {



1398         __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
1399         __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
1400         __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
1401         __ vmovdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
1402       } else {
1403         __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
1404         __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
1405         __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
1406         __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
1407         __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
1408         __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
1409         __ movdqu(xmm3, Address(from, qword_count, Address::times_8,  0));
1410         __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm3);
1411       }
1412       __ BIND(L_copy_bytes);
1413       __ subptr(qword_count, 8);
1414       __ jcc(Assembler::greaterEqual, L_loop);
1415 
1416       __ addptr(qword_count, 4);  // add(8) and sub(4)
1417       __ jccb(Assembler::less, L_end);




 120   //  -2 [ entry point          ]
 121   //  -1 [ parameters           ]
 122   //   0 [ saved rbp            ] <--- rbp
 123   //   1 [ return address       ]
 124   //   2 [ parameter size       ]
 125   //   3 [ thread               ]
 126   //
 127   // Windows Arguments:
 128   //    c_rarg0:   call wrapper address                   address
 129   //    c_rarg1:   result                                 address
 130   //    c_rarg2:   result type                            BasicType
 131   //    c_rarg3:   method                                 Method*
 132   //    48(rbp): (interpreter) entry point              address
 133   //    56(rbp): parameters                             intptr_t*
 134   //    64(rbp): parameter size (in words)              int
 135   //    72(rbp): thread                                 Thread*
 136   //
 137   //     [ return_from_Java     ] <--- rsp
 138   //     [ argument word n      ]
 139   //      ...
 140   // -60 [ argument word 1      ]
 141   // -59 [ saved xmm31          ] <--- rsp after_call
 142   //     [ saved xmm16-xmm30    ] (EVEX enabled, else the space is blank)
 143   // -27 [ saved xmm15          ]
 144   //     [ saved xmm7-xmm14     ]
 145   //  -9 [ saved xmm6           ] (each xmm register takes 2 slots)
 146   //  -7 [ saved r15            ]
 147   //  -6 [ saved r14            ]
 148   //  -5 [ saved r13            ]
 149   //  -4 [ saved r12            ]
 150   //  -3 [ saved rdi            ]
 151   //  -2 [ saved rsi            ]
 152   //  -1 [ saved rbx            ]
 153   //   0 [ saved rbp            ] <--- rbp
 154   //   1 [ return address       ]
 155   //   2 [ call wrapper         ]
 156   //   3 [ result               ]
 157   //   4 [ result type          ]
 158   //   5 [ method               ]
 159   //   6 [ entry point          ]
 160   //   7 [ parameters           ]
 161   //   8 [ parameter size       ]
 162   //   9 [ thread               ]
 163   //
 164   //    Windows reserves the callers stack space for arguments 1-4.
 165   //    We spill c_rarg0-c_rarg3 to this space.
 166 
 167   // Call stub stack layout word offsets from rbp
 168   enum call_stub_layout {
 169 #ifdef _WIN64
 170     xmm_save_first     = 6,  // save from xmm6
 171     xmm_save_last      = 31, // to xmm31
 172     xmm_save_base      = -9,
 173     rsp_after_call_off = xmm_save_base - 2 * (xmm_save_last - xmm_save_first), // -27
 174     r15_off            = -7,
 175     r14_off            = -6,
 176     r13_off            = -5,
 177     r12_off            = -4,
 178     rdi_off            = -3,
 179     rsi_off            = -2,
 180     rbx_off            = -1,
 181     rbp_off            =  0,
 182     retaddr_off        =  1,
 183     call_wrapper_off   =  2,
 184     result_off         =  3,
 185     result_type_off    =  4,
 186     method_off         =  5,
 187     entry_point_off    =  6,
 188     parameters_off     =  7,
 189     parameter_size_off =  8,
 190     thread_off         =  9
 191 #else


 247     __ enter();
 248     __ subptr(rsp, -rsp_after_call_off * wordSize);
 249 
 250     // save register parameters
 251 #ifndef _WIN64
 252     __ movptr(parameters,   c_rarg5); // parameters
 253     __ movptr(entry_point,  c_rarg4); // entry_point
 254 #endif
 255 
 256     __ movptr(method,       c_rarg3); // method
 257     __ movl(result_type,  c_rarg2);   // result type
 258     __ movptr(result,       c_rarg1); // result
 259     __ movptr(call_wrapper, c_rarg0); // call wrapper
 260 
 261     // save regs belonging to calling function
 262     __ movptr(rbx_save, rbx);
 263     __ movptr(r12_save, r12);
 264     __ movptr(r13_save, r13);
 265     __ movptr(r14_save, r14);
 266     __ movptr(r15_save, r15);
 267     if (UseAVX > 2) {
 268       __ movl(rbx, 0xffff);
 269       __ kmovql(k1, rbx);
 270     }
 271 #ifdef _WIN64
 272     if (UseAVX > 2) {
 273       for (int i = 6; i <= 31; i++) {
 274         __ movdqu(xmm_save(i), as_XMMRegister(i));
 275       }
 276     } else {
 277       for (int i = 6; i <= 15; i++) {
 278         __ movdqu(xmm_save(i), as_XMMRegister(i));
 279       }
 280     }
 281 
 282     const Address rdi_save(rbp, rdi_off * wordSize);
 283     const Address rsi_save(rbp, rsi_off * wordSize);
 284 
 285     __ movptr(rsi_save, rsi);
 286     __ movptr(rdi_save, rdi);
 287 #else
 288     const Address mxcsr_save(rbp, mxcsr_off * wordSize);
 289     {
 290       Label skip_ldmx;
 291       __ stmxcsr(mxcsr_save);
 292       __ movl(rax, mxcsr_save);
 293       __ andl(rax, MXCSR_MASK);    // Only check control and mask bits
 294       ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
 295       __ cmp32(rax, mxcsr_std);
 296       __ jcc(Assembler::equal, skip_ldmx);
 297       __ ldmxcsr(mxcsr_std);
 298       __ bind(skip_ldmx);
 299     }
 300 #endif


1313   // Copy big chunks forward
1314   //
1315   // Inputs:
1316   //   end_from     - source arrays end address
1317   //   end_to       - destination array end address
1318   //   qword_count  - 64-bits element count, negative
1319   //   to           - scratch
1320   //   L_copy_bytes - entry label
1321   //   L_copy_8_bytes  - exit  label
1322   //
1323   void copy_bytes_forward(Register end_from, Register end_to,
1324                              Register qword_count, Register to,
1325                              Label& L_copy_bytes, Label& L_copy_8_bytes) {
1326     DEBUG_ONLY(__ stop("enter at entry label, not here"));
1327     Label L_loop;
1328     __ align(OptoLoopAlignment);
1329     if (UseUnalignedLoadStores) {
1330       Label L_end;
1331       // Copy 64-bytes per iteration
1332       __ BIND(L_loop);
1333       if (UseAVX > 2) {
1334         __ evmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56), Assembler::AVX_512bit);
1335         __ evmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0, Assembler::AVX_512bit);
1336       } else if (UseAVX == 2) {
1337         __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
1338         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
1339         __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
1340         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
1341       } else {
1342         __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
1343         __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
1344         __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
1345         __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
1346         __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
1347         __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
1348         __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
1349         __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
1350       }
1351       __ BIND(L_copy_bytes);
1352       __ addptr(qword_count, 8);
1353       __ jcc(Assembler::lessEqual, L_loop);
1354       __ subptr(qword_count, 4);  // sub(8) and add(4)
1355       __ jccb(Assembler::greater, L_end);
1356       // Copy trailing 32 bytes


1392   // Copy big chunks backward
1393   //
1394   // Inputs:
1395   //   from         - source arrays address
1396   //   dest         - destination array address
1397   //   qword_count  - 64-bits element count
1398   //   to           - scratch
1399   //   L_copy_bytes - entry label
1400   //   L_copy_8_bytes  - exit  label
1401   //
1402   void copy_bytes_backward(Register from, Register dest,
1403                               Register qword_count, Register to,
1404                               Label& L_copy_bytes, Label& L_copy_8_bytes) {
1405     DEBUG_ONLY(__ stop("enter at entry label, not here"));
1406     Label L_loop;
1407     __ align(OptoLoopAlignment);
1408     if (UseUnalignedLoadStores) {
1409       Label L_end;
1410       // Copy 64-bytes per iteration
1411       __ BIND(L_loop);
1412       if (UseAVX > 2) {
1413         __ evmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32), Assembler::AVX_512bit);
1414         __ evmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0, Assembler::AVX_512bit);
1415       } else if (UseAVX == 2) {
1416         __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
1417         __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
1418         __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
1419         __ vmovdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
1420       } else {
1421         __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
1422         __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
1423         __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
1424         __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
1425         __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
1426         __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
1427         __ movdqu(xmm3, Address(from, qword_count, Address::times_8,  0));
1428         __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm3);
1429       }
1430       __ BIND(L_copy_bytes);
1431       __ subptr(qword_count, 8);
1432       __ jcc(Assembler::greaterEqual, L_loop);
1433 
1434       __ addptr(qword_count, 4);  // add(8) and sub(4)
1435       __ jccb(Assembler::less, L_end);


< prev index next >