< prev index next >

src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

Print this page




5677     __ enter(); // required for proper stackwalking of RuntimeStub frame
5678 
5679     setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
5680                        // len => rcx, k => r8
5681                        // r9 and r10 may be used to save non-volatile registers
5682 #ifdef _WIN64
5683     // last argument is on stack on Win64
5684     __ movl(k, Address(rsp, 6 * wordSize));
5685 #endif
5686     __ movptr(r11, rdx);  // move offset in rdx to offset(r11)
5687     __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
5688 
5689     restore_arg_regs();
5690 
5691     __ leave(); // required for proper stackwalking of RuntimeStub frame
5692     __ ret(0);
5693 
5694     return start;
5695   }
5696 

















































































































































































































































5697   address generate_libmExp() {
5698     StubCodeMark mark(this, "StubRoutines", "libmExp");
5699 
5700     address start = __ pc();
5701 
5702     const XMMRegister x0  = xmm0;
5703     const XMMRegister x1  = xmm1;
5704     const XMMRegister x2  = xmm2;
5705     const XMMRegister x3  = xmm3;
5706 
5707     const XMMRegister x4  = xmm4;
5708     const XMMRegister x5  = xmm5;
5709     const XMMRegister x6  = xmm6;
5710     const XMMRegister x7  = xmm7;
5711 
5712     const Register tmp   = r11;
5713 
5714     BLOCK_COMMENT("Entry:");
5715     __ enter(); // required for proper stackwalking of RuntimeStub frame
5716 


6296     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
6297                                                        &StubRoutines::_safefetch32_fault_pc,
6298                                                        &StubRoutines::_safefetch32_continuation_pc);
6299     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
6300                                                        &StubRoutines::_safefetchN_fault_pc,
6301                                                        &StubRoutines::_safefetchN_continuation_pc);
6302 
6303     BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
6304     if (bs_nm != NULL) {
6305       StubRoutines::x86::_method_entry_barrier = generate_method_entry_barrier();
6306     }
6307 #ifdef COMPILER2
6308     if (UseMultiplyToLenIntrinsic) {
6309       StubRoutines::_multiplyToLen = generate_multiplyToLen();
6310     }
6311     if (UseSquareToLenIntrinsic) {
6312       StubRoutines::_squareToLen = generate_squareToLen();
6313     }
6314     if (UseMulAddIntrinsic) {
6315       StubRoutines::_mulAdd = generate_mulAdd();




6316     }
6317 #ifndef _WINDOWS
6318     if (UseMontgomeryMultiplyIntrinsic) {
6319       StubRoutines::_montgomeryMultiply
6320         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
6321     }
6322     if (UseMontgomerySquareIntrinsic) {
6323       StubRoutines::_montgomerySquare
6324         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
6325     }
6326 #endif // WINDOWS
6327 #endif // COMPILER2
6328 
6329     if (UseVectorizedMismatchIntrinsic) {
6330       StubRoutines::_vectorizedMismatch = generate_vectorizedMismatch();
6331     }
6332   }
6333 
6334  public:
6335   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {


5677     __ enter(); // required for proper stackwalking of RuntimeStub frame
5678 
5679     setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
5680                        // len => rcx, k => r8
5681                        // r9 and r10 may be used to save non-volatile registers
5682 #ifdef _WIN64
5683     // last argument is on stack on Win64
5684     __ movl(k, Address(rsp, 6 * wordSize));
5685 #endif
5686     __ movptr(r11, rdx);  // move offset in rdx to offset(r11)
5687     __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
5688 
5689     restore_arg_regs();
5690 
5691     __ leave(); // required for proper stackwalking of RuntimeStub frame
5692     __ ret(0);
5693 
5694     return start;
5695   }
5696 
5697   address generate_bigIntegerRightShift() {
5698     __ align(CodeEntryAlignment);
5699     StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
5700 
5701     address start = __ pc();
5702     Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
5703     // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
5704     const Register newArr = rdi;
5705     const Register oldArr = rsi;
5706     const Register newIdx = rdx;
5707     const Register shiftCount = rcx;  // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
5708     const Register totalNumIter = r8;
5709 
5710     // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
5711     // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
5712     const Register tmp1 = r11;                    // Caller save.
5713     const Register tmp2 = rax;                    // Caller save.
5714     const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9);   // Windows: Callee save. Linux: Caller save.
5715     const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10);  // Windows: Callee save. Linux: Caller save.
5716     const Register tmp5 = r14;                    // Callee save.
5717     const Register tmp6 = r15;
5718 
5719     const XMMRegister x0 = xmm0;
5720     const XMMRegister x1 = xmm1;
5721     const XMMRegister x2 = xmm2;
5722 
5723     BLOCK_COMMENT("Entry:");
5724     __ enter(); // required for proper stackwalking of RuntimeStub frame
5725 
5726 #ifdef _WINDOWS
5727     setup_arg_regs(4);
5728     // For windows, since last argument is on stack, we need to move it to the appropriate register.
5729     __ movl(totalNumIter, Address(rsp, 6 * wordSize));
5730     // Save callee save registers.
5731     __ push(tmp3);
5732     __ push(tmp4);
5733 #endif
5734     __ push(tmp5);
5735 
5736     // Rename temps used throughout the code.
5737     const Register idx = tmp1;
5738     const Register nIdx = tmp2;
5739 
5740     __ xorl(idx, idx);
5741 
5742     // Start right shift from end of the array.
5743     // For example, if #iteration = 4 and newIdx = 1
5744     // then dest[4] = src[4] >> shiftCount  | src[3] <<< (shiftCount - 32)
5745     // if #iteration = 4 and newIdx = 0
5746     // then dest[3] = src[4] >> shiftCount  | src[3] <<< (shiftCount - 32)
5747     __ movl(idx, totalNumIter);
5748     __ movl(nIdx, idx);
5749     __ addl(nIdx, newIdx);
5750 
5751     // If vectorization is enabled, check if the number of iterations is at least 64
5752     // If not, then go to ShifTwo processing 2 iterations
5753     if (VM_Version::supports_vbmi2()) {
5754       __ cmpptr(totalNumIter, (AVX3Threshold/64));
5755       __ jcc(Assembler::less, ShiftTwo);
5756 
5757       if (AVX3Threshold < 16 * 64) {
5758         __ cmpl(totalNumIter, 16);
5759         __ jcc(Assembler::less, ShiftTwo);
5760       }
5761       __ evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit);
5762       __ subl(idx, 16);
5763       __ subl(nIdx, 16);
5764       __ BIND(Shift512Loop);
5765       __ evmovdqul(x2, Address(oldArr, idx, Address::times_4, 4), Assembler::AVX_512bit);
5766       __ evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit);
5767       __ vpshrdvd(x2, x1, x0, Assembler::AVX_512bit);
5768       __ evmovdqul(Address(newArr, nIdx, Address::times_4), x2, Assembler::AVX_512bit);
5769       __ subl(nIdx, 16);
5770       __ subl(idx, 16);
5771       __ jcc(Assembler::greaterEqual, Shift512Loop);
5772       __ addl(idx, 16);
5773       __ addl(nIdx, 16);
5774     }
5775     __ BIND(ShiftTwo);
5776     __ cmpl(idx, 2);
5777     __ jcc(Assembler::less, ShiftOne);
5778     __ subl(idx, 2);
5779     __ subl(nIdx, 2);
5780     __ BIND(ShiftTwoLoop);
5781     __ movl(tmp5, Address(oldArr, idx, Address::times_4, 8));
5782     __ movl(tmp4, Address(oldArr, idx, Address::times_4, 4));
5783     __ movl(tmp3, Address(oldArr, idx, Address::times_4));
5784     __ shrdl(tmp5, tmp4);
5785     __ shrdl(tmp4, tmp3);
5786     __ movl(Address(newArr, nIdx, Address::times_4, 4), tmp5);
5787     __ movl(Address(newArr, nIdx, Address::times_4), tmp4);
5788     __ subl(nIdx, 2);
5789     __ subl(idx, 2);
5790     __ jcc(Assembler::greaterEqual, ShiftTwoLoop);
5791     __ addl(idx, 2);
5792     __ addl(nIdx, 2);
5793 
5794     // Do the last iteration
5795     __ BIND(ShiftOne);
5796     __ cmpl(idx, 1);
5797     __ jcc(Assembler::less, Exit);
5798     __ subl(idx, 1);
5799     __ subl(nIdx, 1);
5800     __ movl(tmp4, Address(oldArr, idx, Address::times_4, 4));
5801     __ movl(tmp3, Address(oldArr, idx, Address::times_4));
5802     __ shrdl(tmp4, tmp3);
5803     __ movl(Address(newArr, nIdx, Address::times_4), tmp4);
5804     __ BIND(Exit);
5805     // Restore callee save registers.
5806     __ pop(tmp5);
5807 #ifdef _WINDOWS
5808     __ pop(tmp4);
5809     __ pop(tmp3);
5810     restore_arg_regs();
5811 #endif
5812     __ leave(); // required for proper stackwalking of RuntimeStub frame
5813     __ ret(0);
5814     return start;
5815   }
5816 
5817    /**
5818    *  Arguments:
5819    *
5820    *  Input:
5821    *    c_rarg0   - newArr address
5822    *    c_rarg1   - oldArr address
5823    *    c_rarg2   - newIdx
5824    *    c_rarg3   - shiftCount
5825    * not Win64
5826    *    c_rarg4   - numIter
5827    * Win64
5828    *    rsp40    - numIter
5829    */
5830   address generate_bigIntegerLeftShift() {
5831     __ align(CodeEntryAlignment);
5832     StubCodeMark mark(this,  "StubRoutines", "bigIntegerLeftShiftWorker");
5833     address start = __ pc();
5834     Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
5835     // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
5836     const Register newArr = rdi;
5837     const Register oldArr = rsi;
5838     const Register newIdx = rdx;
5839     const Register shiftCount = rcx;  // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
5840     const Register totalNumIter = r8;
5841     // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
5842     // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
5843     const Register tmp1 = r11;                    // Caller save.
5844     const Register tmp2 = rax;                    // Caller save.
5845     const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9);   // Windows: Callee save. Linux: Caller save.
5846     const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10);  // Windows: Callee save. Linux: Caller save.
5847     const Register tmp5 = r14;                    // Callee save.
5848 
5849     const XMMRegister x0 = xmm0;
5850     const XMMRegister x1 = xmm1;
5851     const XMMRegister x2 = xmm2;
5852     BLOCK_COMMENT("Entry:");
5853     __ enter(); // required for proper stackwalking of RuntimeStub frame
5854 
5855 #ifdef _WINDOWS
5856     setup_arg_regs(4);
5857     // For windows, since last argument is on stack, we need to move it to the appropriate register.
5858     __ movl(totalNumIter, Address(rsp, 6 * wordSize));
5859     // Save callee save registers.
5860     __ push(tmp3);
5861     __ push(tmp4);
5862 #endif
5863     __ push(tmp5);
5864 
5865     // Rename temps used throughout the code
5866     const Register idx = tmp1;
5867     const Register numIterTmp = tmp2;
5868 
5869     // Start idx from zero.
5870     __ xorl(idx, idx);
5871     // Compute interior pointer for new array. We do this so that we can use same index for both old and new arrays.
5872     __ lea(newArr, Address(newArr, newIdx, Address::times_4));
5873     __ movl(numIterTmp, totalNumIter);
5874 
5875     // If vectorization is enabled, check if the number of iterations is at least 64
5876     // If not, then go to ShiftTwo shifting two numbers at a time
5877     if (VM_Version::supports_vbmi2()) {
5878       __ cmpl(totalNumIter, (AVX3Threshold/64));
5879       __ jcc(Assembler::less, ShiftTwo);
5880       
5881       if (AVX3Threshold < 16 * 64) {
5882         __ cmpl(totalNumIter, 16);
5883         __ jcc(Assembler::less, ShiftTwo);
5884       }
5885       __ evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit);
5886       __ subl(numIterTmp, 16);
5887       __ BIND(Shift512Loop);
5888       __ evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit);
5889       __ evmovdqul(x2, Address(oldArr, idx, Address::times_4, 0x4), Assembler::AVX_512bit);
5890       __ vpshldvd(x1, x2, x0, Assembler::AVX_512bit);
5891       __ evmovdqul(Address(newArr, idx, Address::times_4), x1, Assembler::AVX_512bit);
5892       __ addl(idx, 16);
5893       __ subl(numIterTmp, 16);
5894       __ jcc(Assembler::greaterEqual, Shift512Loop);
5895       __ addl(numIterTmp, 16);
5896     }
5897     __ BIND(ShiftTwo);
5898     __ cmpl(totalNumIter, 1);
5899     __ jcc(Assembler::less, Exit);
5900     __ movl(tmp3, Address(oldArr, idx, Address::times_4));
5901     __ subl(numIterTmp, 2);
5902     __ jcc(Assembler::less, ShiftOne);
5903 
5904     __ BIND(ShiftTwoLoop);
5905     __ movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4));
5906     __ movl(tmp5, Address(oldArr, idx, Address::times_4, 0x8));
5907     __ shldl(tmp3, tmp4);
5908     __ shldl(tmp4, tmp5);
5909     __ movl(Address(newArr, idx, Address::times_4), tmp3);
5910     __ movl(Address(newArr, idx, Address::times_4, 0x4), tmp4);
5911     __ movl(tmp3, tmp5);
5912     __ addl(idx, 2);
5913     __ subl(numIterTmp, 2);
5914     __ jcc(Assembler::greaterEqual, ShiftTwoLoop);
5915 
5916     // Do the last iteration
5917     __ BIND(ShiftOne);
5918     __ addl(numIterTmp, 2);
5919     __ cmpl(numIterTmp, 1);
5920     __ jcc(Assembler::less, Exit);
5921     __ movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4));
5922     __ shldl(tmp3, tmp4);
5923     __ movl(Address(newArr, idx, Address::times_4), tmp3);
5924 
5925     __ BIND(Exit);
5926     // Restore callee save registers.
5927     __ pop(tmp5);
5928 #ifdef _WINDOWS
5929     __ pop(tmp4);
5930     __ pop(tmp3);
5931     restore_arg_regs();
5932 #endif
5933     __ leave(); // required for proper stackwalking of RuntimeStub frame
5934     __ ret(0);
5935     return start;
5936   }
5937 
5938   address generate_libmExp() {
5939     StubCodeMark mark(this, "StubRoutines", "libmExp");
5940 
5941     address start = __ pc();
5942 
5943     const XMMRegister x0  = xmm0;
5944     const XMMRegister x1  = xmm1;
5945     const XMMRegister x2  = xmm2;
5946     const XMMRegister x3  = xmm3;
5947 
5948     const XMMRegister x4  = xmm4;
5949     const XMMRegister x5  = xmm5;
5950     const XMMRegister x6  = xmm6;
5951     const XMMRegister x7  = xmm7;
5952 
5953     const Register tmp   = r11;
5954 
5955     BLOCK_COMMENT("Entry:");
5956     __ enter(); // required for proper stackwalking of RuntimeStub frame
5957 


6537     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
6538                                                        &StubRoutines::_safefetch32_fault_pc,
6539                                                        &StubRoutines::_safefetch32_continuation_pc);
6540     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
6541                                                        &StubRoutines::_safefetchN_fault_pc,
6542                                                        &StubRoutines::_safefetchN_continuation_pc);
6543 
6544     BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
6545     if (bs_nm != NULL) {
6546       StubRoutines::x86::_method_entry_barrier = generate_method_entry_barrier();
6547     }
6548 #ifdef COMPILER2
6549     if (UseMultiplyToLenIntrinsic) {
6550       StubRoutines::_multiplyToLen = generate_multiplyToLen();
6551     }
6552     if (UseSquareToLenIntrinsic) {
6553       StubRoutines::_squareToLen = generate_squareToLen();
6554     }
6555     if (UseMulAddIntrinsic) {
6556       StubRoutines::_mulAdd = generate_mulAdd();
6557     }
6558     if (VM_Version::supports_vbmi2()) {
6559       StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
6560       StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
6561     }
6562 #ifndef _WINDOWS
6563     if (UseMontgomeryMultiplyIntrinsic) {
6564       StubRoutines::_montgomeryMultiply
6565         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
6566     }
6567     if (UseMontgomerySquareIntrinsic) {
6568       StubRoutines::_montgomerySquare
6569         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
6570     }
6571 #endif // WINDOWS
6572 #endif // COMPILER2
6573 
6574     if (UseVectorizedMismatchIntrinsic) {
6575       StubRoutines::_vectorizedMismatch = generate_vectorizedMismatch();
6576     }
6577   }
6578 
6579  public:
6580   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
< prev index next >