src/cpu/x86/vm/stubGenerator_x86_32.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 8076276 Sdiff src/cpu/x86/vm

src/cpu/x86/vm/stubGenerator_x86_32.cpp

Print this page
rev 8344 : 8076276: Add support for AVX512
Reviewed-by: kvn, roland
Contributed-by: michael.c.berg@intel.com


 149     const Address result_type   (rbp,  4 * wordSize);
 150     const Address method        (rbp,  5 * wordSize);
 151     const Address entry_point   (rbp,  6 * wordSize);
 152     const Address parameters    (rbp,  7 * wordSize);
 153     const Address parameter_size(rbp,  8 * wordSize);
 154     const Address thread        (rbp,  9 * wordSize); // same as in generate_catch_exception()!
 155     sse_save =  UseSSE > 0;
 156 
 157     // stub code
 158     __ enter();
 159     __ movptr(rcx, parameter_size);              // parameter counter
 160     __ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes
 161     __ addptr(rcx, locals_count_in_bytes);       // reserve space for register saves
 162     __ subptr(rsp, rcx);
 163     __ andptr(rsp, -(StackAlignmentInBytes));    // Align stack
 164 
 165     // save rdi, rsi, & rbx, according to C calling conventions
 166     __ movptr(saved_rdi, rdi);
 167     __ movptr(saved_rsi, rsi);
 168     __ movptr(saved_rbx, rbx);







 169     // save and initialize %mxcsr
 170     if (sse_save) {
 171       Label skip_ldmx;
 172       __ stmxcsr(mxcsr_save);
 173       __ movl(rax, mxcsr_save);
 174       __ andl(rax, MXCSR_MASK);    // Only check control and mask bits
 175       ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
 176       __ cmp32(rax, mxcsr_std);
 177       __ jcc(Assembler::equal, skip_ldmx);
 178       __ ldmxcsr(mxcsr_std);
 179       __ bind(skip_ldmx);
 180     }
 181 
 182     // make sure the control word is correct.
 183     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 184 
 185 #ifdef ASSERT
 186     // make sure we have no pending exceptions
 187     { Label L;
 188       __ movptr(rcx, thread);


 777     }
 778   }
 779 
 780 
 781   // Copy 64 bytes chunks
 782   //
 783   // Inputs:
 784   //   from        - source array address
 785   //   to_from     - destination array address - from
 786   //   qword_count - 8-bytes element count, negative
 787   //
 788   void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
 789     assert( UseSSE >= 2, "supported cpu only" );
 790     Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
 791     // Copy 64-byte chunks
 792     __ jmpb(L_copy_64_bytes);
 793     __ align(OptoLoopAlignment);
 794   __ BIND(L_copy_64_bytes_loop);
 795 
 796     if (UseUnalignedLoadStores) {
 797       if (UseAVX >= 2) {



 798         __ vmovdqu(xmm0, Address(from,  0));
 799         __ vmovdqu(Address(from, to_from, Address::times_1,  0), xmm0);
 800         __ vmovdqu(xmm1, Address(from, 32));
 801         __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1);
 802       } else {
 803         __ movdqu(xmm0, Address(from, 0));
 804         __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
 805         __ movdqu(xmm1, Address(from, 16));
 806         __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
 807         __ movdqu(xmm2, Address(from, 32));
 808         __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
 809         __ movdqu(xmm3, Address(from, 48));
 810         __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
 811       }
 812     } else {
 813       __ movq(xmm0, Address(from, 0));
 814       __ movq(Address(from, to_from, Address::times_1, 0), xmm0);
 815       __ movq(xmm1, Address(from, 8));
 816       __ movq(Address(from, to_from, Address::times_1, 8), xmm1);
 817       __ movq(xmm2, Address(from, 16));
 818       __ movq(Address(from, to_from, Address::times_1, 16), xmm2);
 819       __ movq(xmm3, Address(from, 24));
 820       __ movq(Address(from, to_from, Address::times_1, 24), xmm3);
 821       __ movq(xmm4, Address(from, 32));
 822       __ movq(Address(from, to_from, Address::times_1, 32), xmm4);
 823       __ movq(xmm5, Address(from, 40));
 824       __ movq(Address(from, to_from, Address::times_1, 40), xmm5);
 825       __ movq(xmm6, Address(from, 48));
 826       __ movq(Address(from, to_from, Address::times_1, 48), xmm6);
 827       __ movq(xmm7, Address(from, 56));
 828       __ movq(Address(from, to_from, Address::times_1, 56), xmm7);
 829     }
 830 
 831     __ addl(from, 64);
 832   __ BIND(L_copy_64_bytes);
 833     __ subl(qword_count, 8);
 834     __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
 835 
 836     if (UseUnalignedLoadStores && (UseAVX >= 2)) {
 837       // clean upper bits of YMM registers
 838       __ vpxor(xmm0, xmm0);
 839       __ vpxor(xmm1, xmm1);
 840     }
 841     __ addl(qword_count, 8);
 842     __ jccb(Assembler::zero, L_exit);
 843     //
 844     // length is too short, just copy qwords
 845     //
 846   __ BIND(L_copy_8_bytes);
 847     __ movq(xmm0, Address(from, 0));
 848     __ movq(Address(from, to_from, Address::times_1), xmm0);
 849     __ addl(from, 8);
 850     __ decrement(qword_count);
 851     __ jcc(Assembler::greater, L_copy_8_bytes);
 852   __ BIND(L_exit);
 853   }
 854 
 855   // Copy 64 bytes chunks
 856   //




 149     const Address result_type   (rbp,  4 * wordSize);
 150     const Address method        (rbp,  5 * wordSize);
 151     const Address entry_point   (rbp,  6 * wordSize);
 152     const Address parameters    (rbp,  7 * wordSize);
 153     const Address parameter_size(rbp,  8 * wordSize);
 154     const Address thread        (rbp,  9 * wordSize); // same as in generate_catch_exception()!
 155     sse_save =  UseSSE > 0;
 156 
 157     // stub code
 158     __ enter();
 159     __ movptr(rcx, parameter_size);              // parameter counter
 160     __ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes
 161     __ addptr(rcx, locals_count_in_bytes);       // reserve space for register saves
 162     __ subptr(rsp, rcx);
 163     __ andptr(rsp, -(StackAlignmentInBytes));    // Align stack
 164 
 165     // save rdi, rsi, & rbx, according to C calling conventions
 166     __ movptr(saved_rdi, rdi);
 167     __ movptr(saved_rsi, rsi);
 168     __ movptr(saved_rbx, rbx);
 169 
 170     // provide initial value for required masks
 171     if (UseAVX > 2) {
 172       __ movl(rbx, 0xffff);
 173       __ kmovdl(k1, rbx);
 174     }
 175 
 176     // save and initialize %mxcsr
 177     if (sse_save) {
 178       Label skip_ldmx;
 179       __ stmxcsr(mxcsr_save);
 180       __ movl(rax, mxcsr_save);
 181       __ andl(rax, MXCSR_MASK);    // Only check control and mask bits
 182       ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std());
 183       __ cmp32(rax, mxcsr_std);
 184       __ jcc(Assembler::equal, skip_ldmx);
 185       __ ldmxcsr(mxcsr_std);
 186       __ bind(skip_ldmx);
 187     }
 188 
 189     // make sure the control word is correct.
 190     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 191 
 192 #ifdef ASSERT
 193     // make sure we have no pending exceptions
 194     { Label L;
 195       __ movptr(rcx, thread);


 784     }
 785   }
 786 
 787 
 788   // Copy 64 bytes chunks
 789   //
 790   // Inputs:
 791   //   from        - source array address
 792   //   to_from     - destination array address - from
 793   //   qword_count - 8-bytes element count, negative
 794   //
 795   void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
 796     assert( UseSSE >= 2, "supported cpu only" );
 797     Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
 798     // Copy 64-byte chunks
 799     __ jmpb(L_copy_64_bytes);
 800     __ align(OptoLoopAlignment);
 801   __ BIND(L_copy_64_bytes_loop);
 802 
 803     if (UseUnalignedLoadStores) {
 804       if (UseAVX > 2) {
 805         __ evmovdqu(xmm0, Address(from, 0), Assembler::AVX_512bit);
 806         __ evmovdqu(Address(from, to_from, Address::times_1, 0), xmm0, Assembler::AVX_512bit);
 807       } else if (UseAVX == 2) {
 808         __ vmovdqu(xmm0, Address(from,  0));
 809         __ vmovdqu(Address(from, to_from, Address::times_1,  0), xmm0);
 810         __ vmovdqu(xmm1, Address(from, 32));
 811         __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1);
 812       } else {
 813         __ movdqu(xmm0, Address(from, 0));
 814         __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
 815         __ movdqu(xmm1, Address(from, 16));
 816         __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
 817         __ movdqu(xmm2, Address(from, 32));
 818         __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
 819         __ movdqu(xmm3, Address(from, 48));
 820         __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
 821       }
 822     } else {
 823       __ movq(xmm0, Address(from, 0));
 824       __ movq(Address(from, to_from, Address::times_1, 0), xmm0);
 825       __ movq(xmm1, Address(from, 8));
 826       __ movq(Address(from, to_from, Address::times_1, 8), xmm1);
 827       __ movq(xmm2, Address(from, 16));
 828       __ movq(Address(from, to_from, Address::times_1, 16), xmm2);
 829       __ movq(xmm3, Address(from, 24));
 830       __ movq(Address(from, to_from, Address::times_1, 24), xmm3);
 831       __ movq(xmm4, Address(from, 32));
 832       __ movq(Address(from, to_from, Address::times_1, 32), xmm4);
 833       __ movq(xmm5, Address(from, 40));
 834       __ movq(Address(from, to_from, Address::times_1, 40), xmm5);
 835       __ movq(xmm6, Address(from, 48));
 836       __ movq(Address(from, to_from, Address::times_1, 48), xmm6);
 837       __ movq(xmm7, Address(from, 56));
 838       __ movq(Address(from, to_from, Address::times_1, 56), xmm7);
 839     }
 840 
 841     __ addl(from, 64);
 842   __ BIND(L_copy_64_bytes);
 843     __ subl(qword_count, 8);
 844     __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
 845 
 846     if (UseUnalignedLoadStores && (UseAVX == 2)) {
 847       // clean upper bits of YMM registers
 848       __ vpxor(xmm0, xmm0);
 849       __ vpxor(xmm1, xmm1);
 850     }
 851     __ addl(qword_count, 8);
 852     __ jccb(Assembler::zero, L_exit);
 853     //
 854     // length is too short, just copy qwords
 855     //
 856   __ BIND(L_copy_8_bytes);
 857     __ movq(xmm0, Address(from, 0));
 858     __ movq(Address(from, to_from, Address::times_1), xmm0);
 859     __ addl(from, 8);
 860     __ decrement(qword_count);
 861     __ jcc(Assembler::greater, L_copy_8_bytes);
 862   __ BIND(L_exit);
 863   }
 864 
 865   // Copy 64 bytes chunks
 866   //


src/cpu/x86/vm/stubGenerator_x86_32.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File