3428
3429 void MacroAssembler::movptr(Register dst, Register src) {
3430 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
3431 }
3432
3433 void MacroAssembler::movptr(Register dst, Address src) {
3434 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
3435 }
3436
3437 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
3438 void MacroAssembler::movptr(Register dst, intptr_t src) {
3439 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
3440 }
3441
3442 void MacroAssembler::movptr(Address dst, Register src) {
3443 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
3444 }
3445
3446 void MacroAssembler::movdqu(Address dst, XMMRegister src) {
3447 if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
3448 Assembler::vextractf32x4h(dst, src, 0);
3449 } else {
3450 Assembler::movdqu(dst, src);
3451 }
3452 }
3453
3454 void MacroAssembler::movdqu(XMMRegister dst, Address src) {
3455 if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
3456 Assembler::vinsertf32x4h(dst, src, 0);
3457 } else {
3458 Assembler::movdqu(dst, src);
3459 }
3460 }
3461
3462 void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
3463 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
3464 Assembler::evmovdqul(dst, src, Assembler::AVX_512bit);
3465 } else {
3466 Assembler::movdqu(dst, src);
3467 }
3468 }
3469
3470 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
3471 if (reachable(src)) {
3472 movdqu(dst, as_Address(src));
3473 } else {
3474 lea(rscratch1, src);
3475 movdqu(dst, Address(rscratch1, 0));
3476 }
3477 }
3478
3479 void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
3480 if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
3481 Assembler::vextractf64x4h(dst, src, 0);
3482 } else {
3483 Assembler::vmovdqu(dst, src);
3484 }
3485 }
3486
3487 void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
3488 if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
3489 Assembler::vinsertf64x4h(dst, src, 0);
3490 } else {
3491 Assembler::vmovdqu(dst, src);
3492 }
3493 }
3494
3495 void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
3496 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
3497 Assembler::evmovdqul(dst, src, Assembler::AVX_512bit);
3498 }
3499 else {
3500 Assembler::vmovdqu(dst, src);
3501 }
3502 }
3503
3504 void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src) {
3505 if (reachable(src)) {
3506 vmovdqu(dst, as_Address(src));
3507 }
3508 else {
3509 lea(rscratch1, src);
5632 }
5633
5634 if (UseSSE == 1) {
5635 subptr(rsp, sizeof(jdouble)*8);
5636 for (int n = 0; n < 8; n++) {
5637 movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
5638 }
5639 } else if (UseSSE >= 2) {
5640 if (UseAVX > 2) {
5641 push(rbx);
5642 movl(rbx, 0xffff);
5643 kmovwl(k1, rbx);
5644 pop(rbx);
5645 }
5646 #ifdef COMPILER2
5647 if (MaxVectorSize > 16) {
5648 if(UseAVX > 2) {
5649 // Save upper half of ZMM registers
5650 subptr(rsp, 32*num_xmm_regs);
5651 for (int n = 0; n < num_xmm_regs; n++) {
5652 vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1);
5653 }
5654 }
5655 assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
5656 // Save upper half of YMM registers
5657 subptr(rsp, 16*num_xmm_regs);
5658 for (int n = 0; n < num_xmm_regs; n++) {
5659 vextractf128h(Address(rsp, n*16), as_XMMRegister(n));
5660 }
5661 }
5662 #endif
5663 // Save whole 128bit (16 bytes) XMM registers
5664 subptr(rsp, 16*num_xmm_regs);
5665 #ifdef _LP64
5666 if (VM_Version::supports_evex()) {
5667 for (int n = 0; n < num_xmm_regs; n++) {
5668 vextractf32x4h(Address(rsp, n*16), as_XMMRegister(n), 0);
5669 }
5670 } else {
5671 for (int n = 0; n < num_xmm_regs; n++) {
5672 movdqu(Address(rsp, n*16), as_XMMRegister(n));
5673 }
5674 }
5675 #else
5676 for (int n = 0; n < num_xmm_regs; n++) {
5677 movdqu(Address(rsp, n*16), as_XMMRegister(n));
5678 }
5679 #endif
5680 }
5681
5682 // Preserve registers across runtime call
5683 int incoming_argument_and_return_value_offset = -1;
5684 if (num_fpu_regs_in_use > 1) {
5685 // Must preserve all other FPU regs (could alternatively convert
5686 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
5687 // FPU state, but can not trust C compiler)
5688 NEEDS_CLEANUP;
5736 // stack except incoming arguments
5737 fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
5738 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
5739 fld_d(Address(rsp, 0));
5740 addptr(rsp, sizeof(jdouble));
5741 }
5742 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
5743 addptr(rsp, sizeof(jdouble)*nb_args);
5744 }
5745
5746 if (UseSSE == 1) {
5747 for (int n = 0; n < 8; n++) {
5748 movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
5749 }
5750 addptr(rsp, sizeof(jdouble)*8);
5751 } else if (UseSSE >= 2) {
5752 // Restore whole 128bit (16 bytes) XMM registers
5753 #ifdef _LP64
5754 if (VM_Version::supports_evex()) {
5755 for (int n = 0; n < num_xmm_regs; n++) {
5756 vinsertf32x4h(as_XMMRegister(n), Address(rsp, n*16), 0);
5757 }
5758 } else {
5759 for (int n = 0; n < num_xmm_regs; n++) {
5760 movdqu(as_XMMRegister(n), Address(rsp, n*16));
5761 }
5762 }
5763 #else
5764 for (int n = 0; n < num_xmm_regs; n++) {
5765 movdqu(as_XMMRegister(n), Address(rsp, n*16));
5766 }
5767 #endif
5768 addptr(rsp, 16*num_xmm_regs);
5769
5770 #ifdef COMPILER2
5771 if (MaxVectorSize > 16) {
5772 // Restore upper half of YMM registers.
5773 for (int n = 0; n < num_xmm_regs; n++) {
5774 vinsertf128h(as_XMMRegister(n), Address(rsp, n*16));
5775 }
5776 addptr(rsp, 16*num_xmm_regs);
5777 if(UseAVX > 2) {
5778 for (int n = 0; n < num_xmm_regs; n++) {
5779 vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1);
5780 }
5781 addptr(rsp, 32*num_xmm_regs);
5782 }
5783 }
5784 #endif
5785 }
5786 popa();
5787 }
5788
5789 static const double pi_4 = 0.7853981633974483;
5790
5791 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
5792 // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
5793 // was attempted in this code; unfortunately it appears that the
5794 // switch to 80-bit precision and back causes this to be
5795 // unprofitable compared with simply performing a runtime call if
5796 // the argument is out of the (-pi/4, pi/4) range.
5797
5798 Register tmp = noreg;
5799 if (!VM_Version::supports_cmov()) {
|
3428
3429 void MacroAssembler::movptr(Register dst, Register src) {
3430 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
3431 }
3432
3433 void MacroAssembler::movptr(Register dst, Address src) {
3434 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
3435 }
3436
3437 // src should NEVER be a real pointer. Use AddressLiteral for true pointers
3438 void MacroAssembler::movptr(Register dst, intptr_t src) {
3439 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
3440 }
3441
3442 void MacroAssembler::movptr(Address dst, Register src) {
3443 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
3444 }
3445
3446 void MacroAssembler::movdqu(Address dst, XMMRegister src) {
3447 if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
3448 Assembler::vextractf32x4(dst, src, 0);
3449 } else {
3450 Assembler::movdqu(dst, src);
3451 }
3452 }
3453
3454 void MacroAssembler::movdqu(XMMRegister dst, Address src) {
3455 if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
3456 Assembler::vinsertf32x4(dst, dst, src, 0);
3457 } else {
3458 Assembler::movdqu(dst, src);
3459 }
3460 }
3461
3462 void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
3463 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
3464 Assembler::evmovdqul(dst, src, Assembler::AVX_512bit);
3465 } else {
3466 Assembler::movdqu(dst, src);
3467 }
3468 }
3469
3470 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
3471 if (reachable(src)) {
3472 movdqu(dst, as_Address(src));
3473 } else {
3474 lea(rscratch1, src);
3475 movdqu(dst, Address(rscratch1, 0));
3476 }
3477 }
3478
3479 void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
3480 if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
3481 Assembler::vextractf64x4(dst, src, 0);
3482 } else {
3483 Assembler::vmovdqu(dst, src);
3484 }
3485 }
3486
3487 void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
3488 if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
3489 Assembler::vinsertf64x4(dst, dst, src, 0);
3490 } else {
3491 Assembler::vmovdqu(dst, src);
3492 }
3493 }
3494
3495 void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
3496 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
3497 Assembler::evmovdqul(dst, src, Assembler::AVX_512bit);
3498 }
3499 else {
3500 Assembler::vmovdqu(dst, src);
3501 }
3502 }
3503
3504 void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src) {
3505 if (reachable(src)) {
3506 vmovdqu(dst, as_Address(src));
3507 }
3508 else {
3509 lea(rscratch1, src);
5632 }
5633
5634 if (UseSSE == 1) {
5635 subptr(rsp, sizeof(jdouble)*8);
5636 for (int n = 0; n < 8; n++) {
5637 movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
5638 }
5639 } else if (UseSSE >= 2) {
5640 if (UseAVX > 2) {
5641 push(rbx);
5642 movl(rbx, 0xffff);
5643 kmovwl(k1, rbx);
5644 pop(rbx);
5645 }
5646 #ifdef COMPILER2
5647 if (MaxVectorSize > 16) {
5648 if(UseAVX > 2) {
5649 // Save upper half of ZMM registers
5650 subptr(rsp, 32*num_xmm_regs);
5651 for (int n = 0; n < num_xmm_regs; n++) {
5652 vextractf64x4(Address(rsp, n*32), as_XMMRegister(n), 1);
5653 }
5654 }
5655 assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
5656 // Save upper half of YMM registers
5657 subptr(rsp, 16*num_xmm_regs);
5658 for (int n = 0; n < num_xmm_regs; n++) {
5659 vextractf128(Address(rsp, n*16), as_XMMRegister(n), 1);
5660 }
5661 }
5662 #endif
5663 // Save whole 128bit (16 bytes) XMM registers
5664 subptr(rsp, 16*num_xmm_regs);
5665 #ifdef _LP64
5666 if (VM_Version::supports_evex()) {
5667 for (int n = 0; n < num_xmm_regs; n++) {
5668 vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
5669 }
5670 } else {
5671 for (int n = 0; n < num_xmm_regs; n++) {
5672 movdqu(Address(rsp, n*16), as_XMMRegister(n));
5673 }
5674 }
5675 #else
5676 for (int n = 0; n < num_xmm_regs; n++) {
5677 movdqu(Address(rsp, n*16), as_XMMRegister(n));
5678 }
5679 #endif
5680 }
5681
5682 // Preserve registers across runtime call
5683 int incoming_argument_and_return_value_offset = -1;
5684 if (num_fpu_regs_in_use > 1) {
5685 // Must preserve all other FPU regs (could alternatively convert
5686 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
5687 // FPU state, but can not trust C compiler)
5688 NEEDS_CLEANUP;
5736 // stack except incoming arguments
5737 fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
5738 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
5739 fld_d(Address(rsp, 0));
5740 addptr(rsp, sizeof(jdouble));
5741 }
5742 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
5743 addptr(rsp, sizeof(jdouble)*nb_args);
5744 }
5745
5746 if (UseSSE == 1) {
5747 for (int n = 0; n < 8; n++) {
5748 movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
5749 }
5750 addptr(rsp, sizeof(jdouble)*8);
5751 } else if (UseSSE >= 2) {
5752 // Restore whole 128bit (16 bytes) XMM registers
5753 #ifdef _LP64
5754 if (VM_Version::supports_evex()) {
5755 for (int n = 0; n < num_xmm_regs; n++) {
5756 vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
5757 }
5758 } else {
5759 for (int n = 0; n < num_xmm_regs; n++) {
5760 movdqu(as_XMMRegister(n), Address(rsp, n*16));
5761 }
5762 }
5763 #else
5764 for (int n = 0; n < num_xmm_regs; n++) {
5765 movdqu(as_XMMRegister(n), Address(rsp, n*16));
5766 }
5767 #endif
5768 addptr(rsp, 16*num_xmm_regs);
5769
5770 #ifdef COMPILER2
5771 if (MaxVectorSize > 16) {
5772 // Restore upper half of YMM registers.
5773 for (int n = 0; n < num_xmm_regs; n++) {
5774 vinsertf128(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 1);
5775 }
5776 addptr(rsp, 16*num_xmm_regs);
5777 if(UseAVX > 2) {
5778 for (int n = 0; n < num_xmm_regs; n++) {
5779 vinsertf64x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*32), 1);
5780 }
5781 addptr(rsp, 32*num_xmm_regs);
5782 }
5783 }
5784 #endif
5785 }
5786 popa();
5787 }
5788
5789 static const double pi_4 = 0.7853981633974483;
5790
5791 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
5792 // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
5793 // was attempted in this code; unfortunately it appears that the
5794 // switch to 80-bit precision and back causes this to be
5795 // unprofitable compared with simply performing a runtime call if
5796 // the argument is out of the (-pi/4, pi/4) range.
5797
5798 Register tmp = noreg;
5799 if (!VM_Version::supports_cmov()) {
|