397 }
398 xmm_off += 2;
399 }
400 assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
401
402 } else if (UseSSE == 1) {
403 int xmm_off = xmm_regs_as_doubles_off;
404 for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
405 VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
406 map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0);
407 xmm_off += 2;
408 }
409 assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
410 }
411 }
412
413 return map;
414 }
415
416 static OopMap* save_live_registers(StubAssembler* sasm, int num_rt_args,
417 bool save_fpu_registers = true) {
418 __ block_comment("save_live_registers");
419
420 __ pusha(); // integer registers
421
422 // assert(float_regs_as_doubles_off % 2 == 0, "misaligned offset");
423 // assert(xmm_regs_as_doubles_off % 2 == 0, "misaligned offset");
424
425 __ subptr(rsp, extra_space_offset * VMRegImpl::stack_slot_size);
426
427 #ifdef ASSERT
428 __ movptr(Address(rsp, marker * VMRegImpl::stack_slot_size), (int32_t)0xfeedbeef);
429 #endif
430
431 if (save_fpu_registers) {
432 if (UseSSE < 2) {
433 // save FPU stack
434 __ fnsave(Address(rsp, fpu_state_off * VMRegImpl::stack_slot_size));
435 __ fwait();
436
437 #ifdef ASSERT
472 #endif
473 for (int n = 0; n < xmm_bypass_limit; n++) {
474 XMMRegister xmm_name = as_XMMRegister(n);
475 __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
476 offset += 8;
477 }
478 } else if (UseSSE == 1) {
479 // save XMM registers as float because double not supported without SSE2(num MMX == num fpu)
480 int offset = 0;
481 for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
482 XMMRegister xmm_name = as_XMMRegister(n);
483 __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
484 offset += 8;
485 }
486 }
487 }
488
489 // FPU stack must be empty now
490 __ verify_FPU(0, "save_live_registers");
491
492 return generate_oop_map(sasm, num_rt_args, save_fpu_registers);
493 }
494
495
496 static void restore_fpu(StubAssembler* sasm, bool restore_fpu_registers = true) {
497 if (restore_fpu_registers) {
498 if (UseSSE >= 2) {
499 // restore XMM registers
500 int xmm_bypass_limit = FrameMap::nof_xmm_regs;
501 #ifdef _LP64
502 if (UseAVX < 3) {
503 xmm_bypass_limit = xmm_bypass_limit / 2;
504 }
505 #endif
506 int offset = 0;
507 for (int n = 0; n < xmm_bypass_limit; n++) {
508 XMMRegister xmm_name = as_XMMRegister(n);
509 __ movdbl(xmm_name, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset));
510 offset += 8;
511 }
512 } else if (UseSSE == 1) {
940
941 Label reexecuteEntry, cont;
942
943 __ testptr(rax, rax); // have we deoptimized?
944 __ jcc(Assembler::equal, cont); // no
945
946 // Will reexecute. Proper return address is already on the stack we just restore
947 // registers, pop all of our frame but the return address and jump to the deopt blob
948 restore_live_registers(sasm);
949 __ leave();
950 __ jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
951
952 __ bind(cont);
953 restore_live_registers(sasm);
954 __ leave();
955 __ ret(0);
956
957 return oop_maps;
958 }
959
960
961 OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
962
963 // for better readability
964 const bool must_gc_arguments = true;
965 const bool dont_gc_arguments = false;
966
967 // default value; overwritten for some optimized stubs that are called from methods that do not use the fpu
968 bool save_fpu_registers = true;
969
970 // stub code & info for the different stubs
971 OopMapSet* oop_maps = NULL;
972 switch (id) {
973 case forward_exception_id:
974 {
975 oop_maps = generate_handle_exception(id, sasm);
976 __ leave();
977 __ ret(0);
978 }
979 break;
1025 __ should_not_reach_here();
1026 __ bind(ok);
1027 }
1028 #endif // ASSERT
1029
1030 // if we got here then the TLAB allocation failed, so try
1031 // refilling the TLAB or allocating directly from eden.
1032 Label retry_tlab, try_eden;
1033 const Register thread =
1034 __ tlab_refill(retry_tlab, try_eden, slow_path); // does not destroy rdx (klass), returns rdi
1035
1036 __ bind(retry_tlab);
1037
1038 // get the instance size (size is postive so movl is fine for 64bit)
1039 __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
1040
1041 __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
1042
1043 __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ true);
1044 __ verify_oop(obj);
1045 __ pop(rbx);
1046 __ pop(rdi);
1047 __ ret(0);
1048
1049 __ bind(try_eden);
1050 // get the instance size (size is postive so movl is fine for 64bit)
1051 __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
1052
1053 __ eden_allocate(obj, obj_size, 0, t1, slow_path);
1054 __ incr_allocated_bytes(thread, obj_size, 0);
1055
1056 __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
1057 __ verify_oop(obj);
1058 __ pop(rbx);
1059 __ pop(rdi);
1060 __ ret(0);
1061
1062 __ bind(slow_path);
1063 __ pop(rbx);
1064 __ pop(rdi);
1153 // since size is postive movl does right thing on 64bit
1154 __ movl(arr_size, length);
1155 assert(t1 == rcx, "fixed register usage");
1156 __ shlptr(arr_size /* by t1=rcx, mod 32 */);
1157 __ shrptr(t1, Klass::_lh_header_size_shift);
1158 __ andptr(t1, Klass::_lh_header_size_mask);
1159 __ addptr(arr_size, t1);
1160 __ addptr(arr_size, MinObjAlignmentInBytesMask); // align up
1161 __ andptr(arr_size, ~MinObjAlignmentInBytesMask);
1162
1163 __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path); // preserves arr_size
1164
1165 __ initialize_header(obj, klass, length, t1, t2);
1166 __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
1167 assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
1168 assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
1169 __ andptr(t1, Klass::_lh_header_size_mask);
1170 __ subptr(arr_size, t1); // body length
1171 __ addptr(t1, obj); // body start
1172 if (!ZeroTLAB) {
1173 __ initialize_body(t1, arr_size, 0, t2);
1174 }
1175 __ verify_oop(obj);
1176 __ ret(0);
1177
1178 __ bind(try_eden);
1179 // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
1180 // since size is positive movl does right thing on 64bit
1181 __ movl(t1, Address(klass, Klass::layout_helper_offset()));
1182 // since size is postive movl does right thing on 64bit
1183 __ movl(arr_size, length);
1184 assert(t1 == rcx, "fixed register usage");
1185 __ shlptr(arr_size /* by t1=rcx, mod 32 */);
1186 __ shrptr(t1, Klass::_lh_header_size_shift);
1187 __ andptr(t1, Klass::_lh_header_size_mask);
1188 __ addptr(arr_size, t1);
1189 __ addptr(arr_size, MinObjAlignmentInBytesMask); // align up
1190 __ andptr(arr_size, ~MinObjAlignmentInBytesMask);
1191
1192 __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size
1193 __ incr_allocated_bytes(thread, arr_size, 0);
1194
1484 oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
1485 }
1486 break;
1487
1488 case load_appendix_patching_id:
1489 { StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
1490 // we should set up register map
1491 oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
1492 }
1493 break;
1494
1495 case dtrace_object_alloc_id:
1496 { // rax,: object
1497 StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
1498 // we can't gc here so skip the oopmap but make sure that all
1499 // the live registers get saved.
1500 save_live_registers(sasm, 1);
1501
1502 __ NOT_LP64(push(rax)) LP64_ONLY(mov(c_rarg0, rax));
1503 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc)));
1504 NOT_LP64(__ pop(rax));
1505
1506 restore_live_registers(sasm);
1507 }
1508 break;
1509
1510 case fpu2long_stub_id:
1511 {
1512 // rax, and rdx are destroyed, but should be free since the result is returned there
1513 // preserve rsi,ecx
1514 __ push(rsi);
1515 __ push(rcx);
1516 LP64_ONLY(__ push(rdx);)
1517
1518 // check for NaN
1519 Label return0, do_return, return_min_jlong, do_convert;
1520
1521 Address value_high_word(rsp, wordSize + 4);
1522 Address value_low_word(rsp, wordSize);
1523 Address result_high_word(rsp, 3*wordSize + 4);
|
397 }
398 xmm_off += 2;
399 }
400 assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
401
402 } else if (UseSSE == 1) {
403 int xmm_off = xmm_regs_as_doubles_off;
404 for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
405 VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
406 map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0);
407 xmm_off += 2;
408 }
409 assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
410 }
411 }
412
413 return map;
414 }
415
416 static OopMap* save_live_registers(StubAssembler* sasm, int num_rt_args,
417 bool save_fpu_registers = true,
418 bool do_generate_oop_map = true) {
419 __ block_comment("save_live_registers");
420
421 __ pusha(); // integer registers
422
423 // assert(float_regs_as_doubles_off % 2 == 0, "misaligned offset");
424 // assert(xmm_regs_as_doubles_off % 2 == 0, "misaligned offset");
425
426 __ subptr(rsp, extra_space_offset * VMRegImpl::stack_slot_size);
427
428 #ifdef ASSERT
429 __ movptr(Address(rsp, marker * VMRegImpl::stack_slot_size), (int32_t)0xfeedbeef);
430 #endif
431
432 if (save_fpu_registers) {
433 if (UseSSE < 2) {
434 // save FPU stack
435 __ fnsave(Address(rsp, fpu_state_off * VMRegImpl::stack_slot_size));
436 __ fwait();
437
438 #ifdef ASSERT
473 #endif
474 for (int n = 0; n < xmm_bypass_limit; n++) {
475 XMMRegister xmm_name = as_XMMRegister(n);
476 __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
477 offset += 8;
478 }
479 } else if (UseSSE == 1) {
480 // save XMM registers as float because double not supported without SSE2(num MMX == num fpu)
481 int offset = 0;
482 for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
483 XMMRegister xmm_name = as_XMMRegister(n);
484 __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
485 offset += 8;
486 }
487 }
488 }
489
490 // FPU stack must be empty now
491 __ verify_FPU(0, "save_live_registers");
492
493 return do_generate_oop_map
494 ? generate_oop_map(sasm, num_rt_args, save_fpu_registers)
495 : NULL;
496 }
497
498
499 static void restore_fpu(StubAssembler* sasm, bool restore_fpu_registers = true) {
500 if (restore_fpu_registers) {
501 if (UseSSE >= 2) {
502 // restore XMM registers
503 int xmm_bypass_limit = FrameMap::nof_xmm_regs;
504 #ifdef _LP64
505 if (UseAVX < 3) {
506 xmm_bypass_limit = xmm_bypass_limit / 2;
507 }
508 #endif
509 int offset = 0;
510 for (int n = 0; n < xmm_bypass_limit; n++) {
511 XMMRegister xmm_name = as_XMMRegister(n);
512 __ movdbl(xmm_name, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset));
513 offset += 8;
514 }
515 } else if (UseSSE == 1) {
943
944 Label reexecuteEntry, cont;
945
946 __ testptr(rax, rax); // have we deoptimized?
947 __ jcc(Assembler::equal, cont); // no
948
949 // Will reexecute. Proper return address is already on the stack we just restore
950 // registers, pop all of our frame but the return address and jump to the deopt blob
951 restore_live_registers(sasm);
952 __ leave();
953 __ jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
954
955 __ bind(cont);
956 restore_live_registers(sasm);
957 __ leave();
958 __ ret(0);
959
960 return oop_maps;
961 }
962
963 static void heap_support_stub(StubAssembler* sasm, Register obj,
964 Register size_in_bytes, int con_size_in_bytes,
965 Register t1, Register t2) {
966 // Usually, when we invoke the sampling methods from within the client
967 // compiler, we do so in a stub. However, sometimes, we are already in a stub
968 // when we want to call these things, and stack trace gathering gets confused
969 // when you call a stub inside another stub.
970 HEAP_MONITORING(sasm, noreg, size_in_bytes, con_size_in_bytes, obj, t1, t2, \
971 { \
972 save_live_registers(sasm, 1, true, false); \
973 __ NOT_LP64(push(rax)) LP64_ONLY(mov(c_rarg0, rax)); \
974 __ call(RuntimeAddress(
975 CAST_FROM_FN_PTR(address, \
976 HeapMonitoring::object_alloc_unsized))); \
977 NOT_LP64(__ pop(rax)); \
978 restore_live_registers(sasm); \
979 });
980 }
981
982 OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
983
984 // for better readability
985 const bool must_gc_arguments = true;
986 const bool dont_gc_arguments = false;
987
988 // default value; overwritten for some optimized stubs that are called from methods that do not use the fpu
989 bool save_fpu_registers = true;
990
991 // stub code & info for the different stubs
992 OopMapSet* oop_maps = NULL;
993 switch (id) {
994 case forward_exception_id:
995 {
996 oop_maps = generate_handle_exception(id, sasm);
997 __ leave();
998 __ ret(0);
999 }
1000 break;
1046 __ should_not_reach_here();
1047 __ bind(ok);
1048 }
1049 #endif // ASSERT
1050
1051 // if we got here then the TLAB allocation failed, so try
1052 // refilling the TLAB or allocating directly from eden.
1053 Label retry_tlab, try_eden;
1054 const Register thread =
1055 __ tlab_refill(retry_tlab, try_eden, slow_path); // does not destroy rdx (klass), returns rdi
1056
1057 __ bind(retry_tlab);
1058
1059 // get the instance size (size is postive so movl is fine for 64bit)
1060 __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
1061
1062 __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
1063
1064 __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ true);
1065 __ verify_oop(obj);
1066 heap_support_stub(sasm, obj, obj_size, 0, t1, t2);
1067 __ pop(rbx);
1068 __ pop(rdi);
1069 __ ret(0);
1070
1071 __ bind(try_eden);
1072 // get the instance size (size is postive so movl is fine for 64bit)
1073 __ movl(obj_size, Address(klass, Klass::layout_helper_offset()));
1074
1075 __ eden_allocate(obj, obj_size, 0, t1, slow_path);
1076 __ incr_allocated_bytes(thread, obj_size, 0);
1077
1078 __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
1079 __ verify_oop(obj);
1080 __ pop(rbx);
1081 __ pop(rdi);
1082 __ ret(0);
1083
1084 __ bind(slow_path);
1085 __ pop(rbx);
1086 __ pop(rdi);
1175 // since size is postive movl does right thing on 64bit
1176 __ movl(arr_size, length);
1177 assert(t1 == rcx, "fixed register usage");
1178 __ shlptr(arr_size /* by t1=rcx, mod 32 */);
1179 __ shrptr(t1, Klass::_lh_header_size_shift);
1180 __ andptr(t1, Klass::_lh_header_size_mask);
1181 __ addptr(arr_size, t1);
1182 __ addptr(arr_size, MinObjAlignmentInBytesMask); // align up
1183 __ andptr(arr_size, ~MinObjAlignmentInBytesMask);
1184
1185 __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path); // preserves arr_size
1186
1187 __ initialize_header(obj, klass, length, t1, t2);
1188 __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
1189 assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
1190 assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
1191 __ andptr(t1, Klass::_lh_header_size_mask);
1192 __ subptr(arr_size, t1); // body length
1193 __ addptr(t1, obj); // body start
1194 if (!ZeroTLAB) {
1195 // Initialize body destroys arr_size so remember it.
1196 __ push(arr_size);
1197 __ initialize_body(t1, arr_size, 0, t2);
1198 __ pop(arr_size);
1199 }
1200 heap_support_stub(sasm, obj, arr_size, 0, t1, t2);
1201 __ verify_oop(obj);
1202 __ ret(0);
1203
1204 __ bind(try_eden);
1205 // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
1206 // since size is positive movl does right thing on 64bit
1207 __ movl(t1, Address(klass, Klass::layout_helper_offset()));
1208 // since size is postive movl does right thing on 64bit
1209 __ movl(arr_size, length);
1210 assert(t1 == rcx, "fixed register usage");
1211 __ shlptr(arr_size /* by t1=rcx, mod 32 */);
1212 __ shrptr(t1, Klass::_lh_header_size_shift);
1213 __ andptr(t1, Klass::_lh_header_size_mask);
1214 __ addptr(arr_size, t1);
1215 __ addptr(arr_size, MinObjAlignmentInBytesMask); // align up
1216 __ andptr(arr_size, ~MinObjAlignmentInBytesMask);
1217
1218 __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size
1219 __ incr_allocated_bytes(thread, arr_size, 0);
1220
1510 oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
1511 }
1512 break;
1513
1514 case load_appendix_patching_id:
1515 { StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
1516 // we should set up register map
1517 oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
1518 }
1519 break;
1520
1521 case dtrace_object_alloc_id:
1522 { // rax,: object
1523 StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
1524 // we can't gc here so skip the oopmap but make sure that all
1525 // the live registers get saved.
1526 save_live_registers(sasm, 1);
1527
1528 __ NOT_LP64(push(rax)) LP64_ONLY(mov(c_rarg0, rax));
1529 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc)));
1530 NOT_LP64(__ pop(rax));
1531
1532 restore_live_registers(sasm);
1533 }
1534 break;
1535
1536 case heap_object_sample_id:
1537 { // rax,: object
1538 StubFrame f(sasm, "heap_object_sample", dont_gc_arguments);
1539 // We can't gc here so skip the oopmap but make sure that all
1540 // the live registers get saved
1541 save_live_registers(sasm, 1);
1542
1543 __ NOT_LP64(push(rax)) LP64_ONLY(mov(c_rarg0, rax));
1544 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
1545 HeapMonitoring::object_alloc)));
1546 NOT_LP64(__ pop(rax));
1547
1548 restore_live_registers(sasm);
1549 }
1550 break;
1551
1552 case fpu2long_stub_id:
1553 {
1554 // rax, and rdx are destroyed, but should be free since the result is returned there
1555 // preserve rsi,ecx
1556 __ push(rsi);
1557 __ push(rcx);
1558 LP64_ONLY(__ push(rdx);)
1559
1560 // check for NaN
1561 Label return0, do_return, return_min_jlong, do_convert;
1562
1563 Address value_high_word(rsp, wordSize + 4);
1564 Address value_low_word(rsp, wordSize);
1565 Address result_high_word(rsp, 3*wordSize + 4);
|