397 }
398 xmm_off += 2;
399 }
400 assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
401
402 } else if (UseSSE == 1) {
403 int xmm_off = xmm_regs_as_doubles_off;
404 for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
405 VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
406 map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0);
407 xmm_off += 2;
408 }
409 assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
410 }
411 }
412
413 return map;
414 }
415
416 static OopMap* save_live_registers(StubAssembler* sasm, int num_rt_args,
417 bool save_fpu_registers = true) {
418 __ block_comment("save_live_registers");
419
420 __ pusha(); // integer registers
421
422 // assert(float_regs_as_doubles_off % 2 == 0, "misaligned offset");
423 // assert(xmm_regs_as_doubles_off % 2 == 0, "misaligned offset");
424
425 __ subptr(rsp, extra_space_offset * VMRegImpl::stack_slot_size);
426
427 #ifdef ASSERT
428 __ movptr(Address(rsp, marker * VMRegImpl::stack_slot_size), (int32_t)0xfeedbeef);
429 #endif
430
431 if (save_fpu_registers) {
432 if (UseSSE < 2) {
433 // save FPU stack
434 __ fnsave(Address(rsp, fpu_state_off * VMRegImpl::stack_slot_size));
435 __ fwait();
436
437 #ifdef ASSERT
472 #endif
473 for (int n = 0; n < xmm_bypass_limit; n++) {
474 XMMRegister xmm_name = as_XMMRegister(n);
475 __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
476 offset += 8;
477 }
478 } else if (UseSSE == 1) {
479 // save XMM registers as float because double not supported without SSE2(num MMX == num fpu)
480 int offset = 0;
481 for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
482 XMMRegister xmm_name = as_XMMRegister(n);
483 __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
484 offset += 8;
485 }
486 }
487 }
488
489 // FPU stack must be empty now
490 __ verify_FPU(0, "save_live_registers");
491
492 return generate_oop_map(sasm, num_rt_args, save_fpu_registers);
493 }
494
495
496 static void restore_fpu(StubAssembler* sasm, bool restore_fpu_registers = true) {
497 if (restore_fpu_registers) {
498 if (UseSSE >= 2) {
499 // restore XMM registers
500 int xmm_bypass_limit = FrameMap::nof_xmm_regs;
501 #ifdef _LP64
502 if (UseAVX < 3) {
503 xmm_bypass_limit = xmm_bypass_limit / 2;
504 }
505 #endif
506 int offset = 0;
507 for (int n = 0; n < xmm_bypass_limit; n++) {
508 XMMRegister xmm_name = as_XMMRegister(n);
509 __ movdbl(xmm_name, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset));
510 offset += 8;
511 }
512 } else if (UseSSE == 1) {
940
941 Label reexecuteEntry, cont;
942
943 __ testptr(rax, rax); // have we deoptimized?
944 __ jcc(Assembler::equal, cont); // no
945
946 // Will reexecute. Proper return address is already on the stack we just restore
947 // registers, pop all of our frame but the return address and jump to the deopt blob
948 restore_live_registers(sasm);
949 __ leave();
950 __ jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
951
952 __ bind(cont);
953 restore_live_registers(sasm);
954 __ leave();
955 __ ret(0);
956
957 return oop_maps;
958 }
959
960
961 OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
962
963 // for better readability
964 const bool must_gc_arguments = true;
965 const bool dont_gc_arguments = false;
966
967 // default value; overwritten for some optimized stubs that are called from methods that do not use the fpu
968 bool save_fpu_registers = true;
969
970 // stub code & info for the different stubs
971 OopMapSet* oop_maps = NULL;
972 switch (id) {
973 case forward_exception_id:
974 {
975 oop_maps = generate_handle_exception(id, sasm);
976 __ leave();
977 __ ret(0);
978 }
979 break;
1025 __ should_not_reach_here();
1026 __ bind(ok);
1027 }
1028 #endif // ASSERT
1029
1030 // if we got here then the TLAB allocation failed, so try
1031 // refilling the TLAB or allocating directly from eden.
1032 Label retry_tlab, try_eden;
1033 const Register thread =
1034 __ tlab_refill(retry_tlab, try_eden, slow_path); // does not destroy rdx (klass), returns rdi
1035
1036 __ bind(retry_tlab);
1037
1038 // get the instance size (size is postive so movl is fine for 64bit)
1039 __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
1040
1041 __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
1042
1043 __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ true);
1044 __ verify_oop(obj);
1045 __ pop(rbx);
1046 __ pop(rdi);
1047 __ ret(0);
1048
1049 __ bind(try_eden);
1050 // get the instance size (size is postive so movl is fine for 64bit)
1051 __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
1052
1053 __ eden_allocate(obj, obj_size, 0, t1, slow_path);
1054 __ incr_allocated_bytes(thread, obj_size, 0);
1055
1056 __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
1057 __ verify_oop(obj);
1058 __ pop(rbx);
1059 __ pop(rdi);
1060 __ ret(0);
1061
1062 __ bind(slow_path);
1063 __ pop(rbx);
1064 __ pop(rdi);
1153 // since size is postive movl does right thing on 64bit
1154 __ movl(arr_size, length);
1155 assert(t1 == rcx, "fixed register usage");
1156 __ shlptr(arr_size /* by t1=rcx, mod 32 */);
1157 __ shrptr(t1, Klass::_lh_header_size_shift);
1158 __ andptr(t1, Klass::_lh_header_size_mask);
1159 __ addptr(arr_size, t1);
1160 __ addptr(arr_size, MinObjAlignmentInBytesMask); // align up
1161 __ andptr(arr_size, ~MinObjAlignmentInBytesMask);
1162
1163 __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path); // preserves arr_size
1164
1165 __ initialize_header(obj, klass, length, t1, t2);
1166 __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
1167 assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
1168 assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
1169 __ andptr(t1, Klass::_lh_header_size_mask);
1170 __ subptr(arr_size, t1); // body length
1171 __ addptr(t1, obj); // body start
1172 if (!ZeroTLAB) {
1173 __ initialize_body(t1, arr_size, 0, t2);
1174 }
1175 __ verify_oop(obj);
1176 __ ret(0);
1177
1178 __ bind(try_eden);
1179 // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
1180 // since size is positive movl does right thing on 64bit
1181 __ movl(t1, Address(klass, Klass::layout_helper_offset()));
1182 // since size is postive movl does right thing on 64bit
1183 __ movl(arr_size, length);
1184 assert(t1 == rcx, "fixed register usage");
1185 __ shlptr(arr_size /* by t1=rcx, mod 32 */);
1186 __ shrptr(t1, Klass::_lh_header_size_shift);
1187 __ andptr(t1, Klass::_lh_header_size_mask);
1188 __ addptr(arr_size, t1);
1189 __ addptr(arr_size, MinObjAlignmentInBytesMask); // align up
1190 __ andptr(arr_size, ~MinObjAlignmentInBytesMask);
1191
1192 __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size
1193 __ incr_allocated_bytes(thread, arr_size, 0);
1194
1484 oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
1485 }
1486 break;
1487
1488 case load_appendix_patching_id:
1489 { StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
1490 // we should set up register map
1491 oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
1492 }
1493 break;
1494
1495 case dtrace_object_alloc_id:
1496 { // rax,: object
1497 StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
1498 // we can't gc here so skip the oopmap but make sure that all
1499 // the live registers get saved.
1500 save_live_registers(sasm, 1);
1501
1502 __ NOT_LP64(push(rax)) LP64_ONLY(mov(c_rarg0, rax));
1503 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc)));
1504 NOT_LP64(__ pop(rax));
1505
1506 restore_live_registers(sasm);
1507 }
1508 break;
1509
1510 case fpu2long_stub_id:
1511 {
1512 // rax, and rdx are destroyed, but should be free since the result is returned there
1513 // preserve rsi,ecx
1514 __ push(rsi);
1515 __ push(rcx);
1516 LP64_ONLY(__ push(rdx);)
1517
1518 // check for NaN
1519 Label return0, do_return, return_min_jlong, do_convert;
1520
1521 Address value_high_word(rsp, wordSize + 4);
1522 Address value_low_word(rsp, wordSize);
1523 Address result_high_word(rsp, 3*wordSize + 4);
|
397 }
398 xmm_off += 2;
399 }
400 assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
401
402 } else if (UseSSE == 1) {
403 int xmm_off = xmm_regs_as_doubles_off;
404 for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
405 VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
406 map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0);
407 xmm_off += 2;
408 }
409 assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
410 }
411 }
412
413 return map;
414 }
415
416 static OopMap* save_live_registers(StubAssembler* sasm, int num_rt_args,
417 bool save_fpu_registers = true,
418 bool do_generate_oop_map = true) {
419 __ block_comment("save_live_registers");
420
421 __ pusha(); // integer registers
422
423 // assert(float_regs_as_doubles_off % 2 == 0, "misaligned offset");
424 // assert(xmm_regs_as_doubles_off % 2 == 0, "misaligned offset");
425
426 __ subptr(rsp, extra_space_offset * VMRegImpl::stack_slot_size);
427
428 #ifdef ASSERT
429 __ movptr(Address(rsp, marker * VMRegImpl::stack_slot_size), (int32_t)0xfeedbeef);
430 #endif
431
432 if (save_fpu_registers) {
433 if (UseSSE < 2) {
434 // save FPU stack
435 __ fnsave(Address(rsp, fpu_state_off * VMRegImpl::stack_slot_size));
436 __ fwait();
437
438 #ifdef ASSERT
473 #endif
474 for (int n = 0; n < xmm_bypass_limit; n++) {
475 XMMRegister xmm_name = as_XMMRegister(n);
476 __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
477 offset += 8;
478 }
479 } else if (UseSSE == 1) {
480 // save XMM registers as float because double not supported without SSE2(num MMX == num fpu)
481 int offset = 0;
482 for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
483 XMMRegister xmm_name = as_XMMRegister(n);
484 __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
485 offset += 8;
486 }
487 }
488 }
489
490 // FPU stack must be empty now
491 __ verify_FPU(0, "save_live_registers");
492
493 return do_generate_oop_map
494 ? generate_oop_map(sasm, num_rt_args, save_fpu_registers)
495 : NULL;
496 }
497
498
499 static void restore_fpu(StubAssembler* sasm, bool restore_fpu_registers = true) {
500 if (restore_fpu_registers) {
501 if (UseSSE >= 2) {
502 // restore XMM registers
503 int xmm_bypass_limit = FrameMap::nof_xmm_regs;
504 #ifdef _LP64
505 if (UseAVX < 3) {
506 xmm_bypass_limit = xmm_bypass_limit / 2;
507 }
508 #endif
509 int offset = 0;
510 for (int n = 0; n < xmm_bypass_limit; n++) {
511 XMMRegister xmm_name = as_XMMRegister(n);
512 __ movdbl(xmm_name, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset));
513 offset += 8;
514 }
515 } else if (UseSSE == 1) {
943
944 Label reexecuteEntry, cont;
945
946 __ testptr(rax, rax); // have we deoptimized?
947 __ jcc(Assembler::equal, cont); // no
948
949 // Will reexecute. Proper return address is already on the stack we just restore
950 // registers, pop all of our frame but the return address and jump to the deopt blob
951 restore_live_registers(sasm);
952 __ leave();
953 __ jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
954
955 __ bind(cont);
956 restore_live_registers(sasm);
957 __ leave();
958 __ ret(0);
959
960 return oop_maps;
961 }
962
963 static void heap_support_stub(StubAssembler* sasm, Register obj,
964 Register size_in_bytes, int con_size_in_bytes,
965 Register t1, Register t2) {
966 // Usually, when we invoke the sampling methods from within the client
967 // compiler, we do so in a stub. However, sometimes, we are already in a stub
968 // when we want to call these things, and stack trace gathering gets confused
969 // when you call a stub inside another stub.
970 HEAP_MONITORING(sasm, noreg, size_in_bytes, con_size_in_bytes, obj, t1, t2, \
971 { \
972 save_live_registers(sasm, 1, true, false); \
973 __ NOT_LP64(push(rax)) LP64_ONLY(mov(c_rarg0, rax)); \
974 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, \
975 HeapMonitoring::object_alloc))); \
976 NOT_LP64(__ pop(rax)); \
977 restore_live_registers(sasm); \
978 });
979 }
980
981 OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
982
983 // for better readability
984 const bool must_gc_arguments = true;
985 const bool dont_gc_arguments = false;
986
987 // default value; overwritten for some optimized stubs that are called from methods that do not use the fpu
988 bool save_fpu_registers = true;
989
990 // stub code & info for the different stubs
991 OopMapSet* oop_maps = NULL;
992 switch (id) {
993 case forward_exception_id:
994 {
995 oop_maps = generate_handle_exception(id, sasm);
996 __ leave();
997 __ ret(0);
998 }
999 break;
1045 __ should_not_reach_here();
1046 __ bind(ok);
1047 }
1048 #endif // ASSERT
1049
1050 // if we got here then the TLAB allocation failed, so try
1051 // refilling the TLAB or allocating directly from eden.
1052 Label retry_tlab, try_eden;
1053 const Register thread =
1054 __ tlab_refill(retry_tlab, try_eden, slow_path); // does not destroy rdx (klass), returns rdi
1055
1056 __ bind(retry_tlab);
1057
1058 // get the instance size (size is postive so movl is fine for 64bit)
1059 __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
1060
1061 __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
1062
1063 __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ true);
1064 __ verify_oop(obj);
1065 heap_support_stub(sasm, obj, obj_size, 0, t1, t2);
1066 __ pop(rbx);
1067 __ pop(rdi);
1068 __ ret(0);
1069
1070 __ bind(try_eden);
1071 // get the instance size (size is postive so movl is fine for 64bit)
1072 __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
1073
1074 __ eden_allocate(obj, obj_size, 0, t1, slow_path);
1075 __ incr_allocated_bytes(thread, obj_size, 0);
1076
1077 __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
1078 __ verify_oop(obj);
1079 __ pop(rbx);
1080 __ pop(rdi);
1081 __ ret(0);
1082
1083 __ bind(slow_path);
1084 __ pop(rbx);
1085 __ pop(rdi);
1174 // since size is postive movl does right thing on 64bit
1175 __ movl(arr_size, length);
1176 assert(t1 == rcx, "fixed register usage");
1177 __ shlptr(arr_size /* by t1=rcx, mod 32 */);
1178 __ shrptr(t1, Klass::_lh_header_size_shift);
1179 __ andptr(t1, Klass::_lh_header_size_mask);
1180 __ addptr(arr_size, t1);
1181 __ addptr(arr_size, MinObjAlignmentInBytesMask); // align up
1182 __ andptr(arr_size, ~MinObjAlignmentInBytesMask);
1183
1184 __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path); // preserves arr_size
1185
1186 __ initialize_header(obj, klass, length, t1, t2);
1187 __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
1188 assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
1189 assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
1190 __ andptr(t1, Klass::_lh_header_size_mask);
1191 __ subptr(arr_size, t1); // body length
1192 __ addptr(t1, obj); // body start
1193 if (!ZeroTLAB) {
1194 // Initialize body destroys arr_size so remember it.
1195 __ push(arr_size);
1196 __ initialize_body(t1, arr_size, 0, t2);
1197 __ pop(arr_size);
1198 }
1199 heap_support_stub(sasm, obj, arr_size, 0, t1, t2);
1200 __ verify_oop(obj);
1201 __ ret(0);
1202
1203 __ bind(try_eden);
1204 // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
1205 // since size is positive movl does right thing on 64bit
1206 __ movl(t1, Address(klass, Klass::layout_helper_offset()));
1207 // since size is postive movl does right thing on 64bit
1208 __ movl(arr_size, length);
1209 assert(t1 == rcx, "fixed register usage");
1210 __ shlptr(arr_size /* by t1=rcx, mod 32 */);
1211 __ shrptr(t1, Klass::_lh_header_size_shift);
1212 __ andptr(t1, Klass::_lh_header_size_mask);
1213 __ addptr(arr_size, t1);
1214 __ addptr(arr_size, MinObjAlignmentInBytesMask); // align up
1215 __ andptr(arr_size, ~MinObjAlignmentInBytesMask);
1216
1217 __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size
1218 __ incr_allocated_bytes(thread, arr_size, 0);
1219
1509 oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
1510 }
1511 break;
1512
1513 case load_appendix_patching_id:
1514 { StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
1515 // we should set up register map
1516 oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
1517 }
1518 break;
1519
1520 case dtrace_object_alloc_id:
1521 { // rax,: object
1522 StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
1523 // we can't gc here so skip the oopmap but make sure that all
1524 // the live registers get saved.
1525 save_live_registers(sasm, 1);
1526
1527 __ NOT_LP64(push(rax)) LP64_ONLY(mov(c_rarg0, rax));
1528 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc)));
1529 NOT_LP64(__ pop(rax));
1530
1531 restore_live_registers(sasm);
1532 }
1533 break;
1534
1535 case heap_object_sample_id:
1536 { // rax,: object
1537 StubFrame f(sasm, "heap_object_sample", dont_gc_arguments);
1538 // We can't gc here so skip the oopmap but make sure that all
1539 // the live registers get saved
1540 save_live_registers(sasm, 1);
1541
1542 __ NOT_LP64(push(rax)) LP64_ONLY(mov(c_rarg0, rax));
1543 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
1544 HeapMonitoring::object_alloc)));
1545 NOT_LP64(__ pop(rax));
1546
1547 restore_live_registers(sasm);
1548 }
1549 break;
1550
1551 case fpu2long_stub_id:
1552 {
1553 // rax, and rdx are destroyed, but should be free since the result is returned there
1554 // preserve rsi,ecx
1555 __ push(rsi);
1556 __ push(rcx);
1557 LP64_ONLY(__ push(rdx);)
1558
1559 // check for NaN
1560 Label return0, do_return, return_min_jlong, do_convert;
1561
1562 Address value_high_word(rsp, wordSize + 4);
1563 Address value_low_word(rsp, wordSize);
1564 Address result_high_word(rsp, 3*wordSize + 4);
|