33 # include "os_linux.inline.hpp"
34 #endif
35 #ifdef TARGET_OS_FAMILY_solaris
36 # include "os_solaris.inline.hpp"
37 #endif
38 #ifdef TARGET_OS_FAMILY_windows
39 # include "os_windows.inline.hpp"
40 #endif
41 #ifdef TARGET_OS_FAMILY_bsd
42 # include "os_bsd.inline.hpp"
43 #endif
44
45
46 int VM_Version::_cpu;
47 int VM_Version::_model;
48 int VM_Version::_stepping;
49 int VM_Version::_cpuFeatures;
50 const char* VM_Version::_features_str = "";
51 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
52
53 static BufferBlob* stub_blob;
54 static const int stub_size = 550;
55
56 extern "C" {
57 typedef void (*getPsrInfo_stub_t)(void*);
58 }
59 static getPsrInfo_stub_t getPsrInfo_stub = NULL;
60
61
62 class VM_Version_StubGenerator: public StubCodeGenerator {
63 public:
64
65 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
66
67 address generate_getPsrInfo() {
68 // Flags to test CPU type.
69 const uint32_t HS_EFL_AC = 0x40000;
70 const uint32_t HS_EFL_ID = 0x200000;
71 // Values for when we don't have a CPUID instruction.
72 const int CPU_FAMILY_SHIFT = 8;
73 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
74 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
217 __ movl(Address(rsi, 4), rbx);
218 __ movl(Address(rsi, 8), rcx);
219 __ movl(Address(rsi,12), rdx);
220
221 //
222 // Standard cpuid(0x1)
223 //
224 __ bind(std_cpuid1);
225 __ movl(rax, 1);
226 __ cpuid();
227 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
228 __ movl(Address(rsi, 0), rax);
229 __ movl(Address(rsi, 4), rbx);
230 __ movl(Address(rsi, 8), rcx);
231 __ movl(Address(rsi,12), rdx);
232
233 //
234 // Check if OS has enabled XGETBV instruction to access XCR0
235 // (OSXSAVE feature flag) and CPU supports AVX
236 //
237 __ andl(rcx, 0x18000000);
238 __ cmpl(rcx, 0x18000000);
239 __ jccb(Assembler::notEqual, sef_cpuid);
240
241 //
242 // XCR0, XFEATURE_ENABLED_MASK register
243 //
244 __ xorl(rcx, rcx); // zero for XCR0 register
245 __ xgetbv();
246 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
247 __ movl(Address(rsi, 0), rax);
248 __ movl(Address(rsi, 4), rdx);
249
250 //
251 // cpuid(0x7) Structured Extended Features
252 //
253 __ bind(sef_cpuid);
254 __ movl(rax, 7);
255 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
256 __ jccb(Assembler::greater, ext_cpuid);
257
258 __ xorl(rcx, rcx);
259 __ cpuid();
260 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
261 __ movl(Address(rsi, 0), rax);
262 __ movl(Address(rsi, 4), rbx);
263
264 //
265 // Extended cpuid(0x80000000)
266 //
267 __ bind(ext_cpuid);
268 __ movl(rax, 0x80000000);
269 __ cpuid();
523 if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
524 warning("AES intrinsics not available on this CPU");
525 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
526 }
527
528 #ifdef COMPILER2
529 if (UseFPUForSpilling) {
530 if (UseSSE < 2) {
531 // Only supported with SSE2+
532 FLAG_SET_DEFAULT(UseFPUForSpilling, false);
533 }
534 }
535 if (MaxVectorSize > 0) {
536 if (!is_power_of_2(MaxVectorSize)) {
537 warning("MaxVectorSize must be a power of 2");
538 FLAG_SET_DEFAULT(MaxVectorSize, 32);
539 }
540 if (MaxVectorSize > 32) {
541 FLAG_SET_DEFAULT(MaxVectorSize, 32);
542 }
543 if (MaxVectorSize > 16 && UseAVX == 0) {
544 // Only supported with AVX+
545 FLAG_SET_DEFAULT(MaxVectorSize, 16);
546 }
547 if (UseSSE < 2) {
548 // Only supported with SSE2+
549 FLAG_SET_DEFAULT(MaxVectorSize, 0);
550 }
551 }
552 #endif
553
554 // On new cpus instructions which update whole XMM register should be used
555 // to prevent partial register stall due to dependencies on high half.
556 //
557 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
558 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
559 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
560 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
561
562 if( is_amd() ) { // AMD cpus specific settings
563 if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
564 // Use it on new AMD cpus starting from Opteron.
565 UseAddressNop = true;
566 }
567 if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
568 // Use it on new AMD cpus starting from Opteron.
569 UseNewLongLShift = true;
570 }
661 // generated NOP instructions. 11 is the largest size of one
662 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
663 MaxLoopPad = 11;
664 }
665 #endif // COMPILER2
666 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
667 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
668 }
669 if (supports_sse4_2() && supports_ht()) { // Newest Intel cpus
670 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
671 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
672 }
673 }
674 if (supports_sse4_2() && UseSSE >= 4) {
675 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
676 UseSSE42Intrinsics = true;
677 }
678 }
679 }
680 }
681 #if defined(COMPILER2) && defined(_ALLBSD_SOURCE)
682 if (MaxVectorSize > 16) {
683 // Limit vectors size to 16 bytes on BSD until it fixes
684 // restoring upper 128bit of YMM registers on return
685 // from signal handler.
686 FLAG_SET_DEFAULT(MaxVectorSize, 16);
687 }
688 #endif // COMPILER2
689
690 // Use count leading zeros count instruction if available.
691 if (supports_lzcnt()) {
692 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
693 UseCountLeadingZerosInstruction = true;
694 }
695 } else if (UseCountLeadingZerosInstruction) {
696 warning("lzcnt instruction is not available on this CPU");
697 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
698 }
699
700 if (supports_bmi1()) {
701 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
702 UseBMI1Instructions = true;
703 }
704 } else if (UseBMI1Instructions) {
705 warning("BMI1 instructions are not available on this CPU");
706 FLAG_SET_DEFAULT(UseBMI1Instructions, false);
707 }
708
797 PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
798 PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
799 PrefetchFieldsAhead = prefetch_fields_ahead();
800 #endif
801
802 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
803 (cache_line_size > ContendedPaddingWidth))
804 ContendedPaddingWidth = cache_line_size;
805
806 #ifndef PRODUCT
807 if (PrintMiscellaneous && Verbose) {
808 tty->print_cr("Logical CPUs per core: %u",
809 logical_processors_per_package());
810 tty->print("UseSSE=%d",UseSSE);
811 if (UseAVX > 0) {
812 tty->print(" UseAVX=%d",UseAVX);
813 }
814 if (UseAES) {
815 tty->print(" UseAES=1");
816 }
817 tty->cr();
818 tty->print("Allocation");
819 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
820 tty->print_cr(": no prefetching");
821 } else {
822 tty->print(" prefetching: ");
823 if (UseSSE == 0 && supports_3dnow_prefetch()) {
824 tty->print("PREFETCHW");
825 } else if (UseSSE >= 1) {
826 if (AllocatePrefetchInstr == 0) {
827 tty->print("PREFETCHNTA");
828 } else if (AllocatePrefetchInstr == 1) {
829 tty->print("PREFETCHT0");
830 } else if (AllocatePrefetchInstr == 2) {
831 tty->print("PREFETCHT2");
832 } else if (AllocatePrefetchInstr == 3) {
833 tty->print("PREFETCHW");
834 }
835 }
836 if (AllocatePrefetchLines > 1) {
|
33 # include "os_linux.inline.hpp"
34 #endif
35 #ifdef TARGET_OS_FAMILY_solaris
36 # include "os_solaris.inline.hpp"
37 #endif
38 #ifdef TARGET_OS_FAMILY_windows
39 # include "os_windows.inline.hpp"
40 #endif
41 #ifdef TARGET_OS_FAMILY_bsd
42 # include "os_bsd.inline.hpp"
43 #endif
44
45
46 int VM_Version::_cpu;
47 int VM_Version::_model;
48 int VM_Version::_stepping;
49 int VM_Version::_cpuFeatures;
50 const char* VM_Version::_features_str = "";
51 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
52
53 // Address of instruction which causes SEGV
54 address VM_Version::_cpuinfo_segv_addr = 0;
55 // Address of instruction which causes SEGV
56 address VM_Version::_cpuinfo_cont_addr = 0;
57
58 static BufferBlob* stub_blob;
59 static const int stub_size = 600;
60
61 extern "C" {
62 typedef void (*getPsrInfo_stub_t)(void*);
63 }
64 static getPsrInfo_stub_t getPsrInfo_stub = NULL;
65
66
67 class VM_Version_StubGenerator: public StubCodeGenerator {
68 public:
69
70 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
71
72 address generate_getPsrInfo() {
73 // Flags to test CPU type.
74 const uint32_t HS_EFL_AC = 0x40000;
75 const uint32_t HS_EFL_ID = 0x200000;
76 // Values for when we don't have a CPUID instruction.
77 const int CPU_FAMILY_SHIFT = 8;
78 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
79 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
222 __ movl(Address(rsi, 4), rbx);
223 __ movl(Address(rsi, 8), rcx);
224 __ movl(Address(rsi,12), rdx);
225
226 //
227 // Standard cpuid(0x1)
228 //
229 __ bind(std_cpuid1);
230 __ movl(rax, 1);
231 __ cpuid();
232 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
233 __ movl(Address(rsi, 0), rax);
234 __ movl(Address(rsi, 4), rbx);
235 __ movl(Address(rsi, 8), rcx);
236 __ movl(Address(rsi,12), rdx);
237
238 //
239 // Check if OS has enabled XGETBV instruction to access XCR0
240 // (OSXSAVE feature flag) and CPU supports AVX
241 //
242 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
243 __ cmpl(rcx, 0x18000000);
244 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
245
246 //
247 // XCR0, XFEATURE_ENABLED_MASK register
248 //
249 __ xorl(rcx, rcx); // zero for XCR0 register
250 __ xgetbv();
251 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
252 __ movl(Address(rsi, 0), rax);
253 __ movl(Address(rsi, 4), rdx);
254
255 __ andl(rax, 0x6); // xcr0 bits sse | ymm
256 __ cmpl(rax, 0x6);
257 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
258
259 //
260 // Some OSs have a bug when upper 128bits of YMM
261 // registers are not restored after a signal processing.
262 // Generate SEGV here (reference through NULL)
263 // and check upper YMM bits after it.
264 //
265 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
266
267 // load value into all 32 bytes of ymm7 register
268 __ movl(rcx, VM_Version::ymm_test_value());
269
270 __ movdl(xmm0, rcx);
271 __ pshufd(xmm0, xmm0, 0x00);
272 __ vinsertf128h(xmm0, xmm0, xmm0);
273 __ vmovdqu(xmm7, xmm0);
274 #ifdef _LP64
275 __ vmovdqu(xmm8, xmm0);
276 __ vmovdqu(xmm15, xmm0);
277 #endif
278
279 __ xorl(rsi, rsi);
280 VM_Version::set_cpuinfo_segv_addr( __ pc() );
281 // Generate SEGV
282 __ movl(rax, Address(rsi, 0));
283
284 VM_Version::set_cpuinfo_cont_addr( __ pc() );
285 // Returns here after signal. Save xmm0 to check it later.
286 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
287 __ vmovdqu(Address(rsi, 0), xmm0);
288 __ vmovdqu(Address(rsi, 32), xmm7);
289 #ifdef _LP64
290 __ vmovdqu(Address(rsi, 64), xmm8);
291 __ vmovdqu(Address(rsi, 96), xmm15);
292 #endif
293
294 VM_Version::clean_cpuFeatures();
295
296 //
297 // cpuid(0x7) Structured Extended Features
298 //
299 __ bind(sef_cpuid);
300 __ movl(rax, 7);
301 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
302 __ jccb(Assembler::greater, ext_cpuid);
303
304 __ xorl(rcx, rcx);
305 __ cpuid();
306 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
307 __ movl(Address(rsi, 0), rax);
308 __ movl(Address(rsi, 4), rbx);
309
310 //
311 // Extended cpuid(0x80000000)
312 //
313 __ bind(ext_cpuid);
314 __ movl(rax, 0x80000000);
315 __ cpuid();
569 if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
570 warning("AES intrinsics not available on this CPU");
571 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
572 }
573
574 #ifdef COMPILER2
575 if (UseFPUForSpilling) {
576 if (UseSSE < 2) {
577 // Only supported with SSE2+
578 FLAG_SET_DEFAULT(UseFPUForSpilling, false);
579 }
580 }
581 if (MaxVectorSize > 0) {
582 if (!is_power_of_2(MaxVectorSize)) {
583 warning("MaxVectorSize must be a power of 2");
584 FLAG_SET_DEFAULT(MaxVectorSize, 32);
585 }
586 if (MaxVectorSize > 32) {
587 FLAG_SET_DEFAULT(MaxVectorSize, 32);
588 }
589 if (MaxVectorSize > 16 && (UseAVX == 0 || !os_supports_avx_vectors())) {
590 // 32 bytes vectors (in YMM) are only supported with AVX+
591 FLAG_SET_DEFAULT(MaxVectorSize, 16);
592 }
593 if (UseSSE < 2) {
594 // Vectors (in XMM) are only supported with SSE2+
595 FLAG_SET_DEFAULT(MaxVectorSize, 0);
596 }
597 #ifdef ASSERT
598 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
599 tty->print_cr("State of YMM registers after signal handle:");
600 int nreg = 2 LP64_ONLY(+2);
601 const char* ymm_name[4] = {"0", "7", "8", "15"};
602 for (int i = 0; i < nreg; i++) {
603 tty->print("YMM%s:", ymm_name[i]);
604 for (int j = 7; j >=0; j--) {
605 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
606 }
607 tty->cr();
608 }
609 }
610 #endif
611 }
612 #endif
613
614 // On new cpus instructions which update whole XMM register should be used
615 // to prevent partial register stall due to dependencies on high half.
616 //
617 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
618 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
619 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
620 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
621
622 if( is_amd() ) { // AMD cpus specific settings
623 if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
624 // Use it on new AMD cpus starting from Opteron.
625 UseAddressNop = true;
626 }
627 if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
628 // Use it on new AMD cpus starting from Opteron.
629 UseNewLongLShift = true;
630 }
721 // generated NOP instructions. 11 is the largest size of one
722 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
723 MaxLoopPad = 11;
724 }
725 #endif // COMPILER2
726 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
727 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
728 }
729 if (supports_sse4_2() && supports_ht()) { // Newest Intel cpus
730 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
731 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
732 }
733 }
734 if (supports_sse4_2() && UseSSE >= 4) {
735 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
736 UseSSE42Intrinsics = true;
737 }
738 }
739 }
740 }
741
742 // Use count leading zeros count instruction if available.
743 if (supports_lzcnt()) {
744 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
745 UseCountLeadingZerosInstruction = true;
746 }
747 } else if (UseCountLeadingZerosInstruction) {
748 warning("lzcnt instruction is not available on this CPU");
749 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
750 }
751
752 if (supports_bmi1()) {
753 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
754 UseBMI1Instructions = true;
755 }
756 } else if (UseBMI1Instructions) {
757 warning("BMI1 instructions are not available on this CPU");
758 FLAG_SET_DEFAULT(UseBMI1Instructions, false);
759 }
760
849 PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
850 PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
851 PrefetchFieldsAhead = prefetch_fields_ahead();
852 #endif
853
854 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
855 (cache_line_size > ContendedPaddingWidth))
856 ContendedPaddingWidth = cache_line_size;
857
858 #ifndef PRODUCT
859 if (PrintMiscellaneous && Verbose) {
860 tty->print_cr("Logical CPUs per core: %u",
861 logical_processors_per_package());
862 tty->print("UseSSE=%d",UseSSE);
863 if (UseAVX > 0) {
864 tty->print(" UseAVX=%d",UseAVX);
865 }
866 if (UseAES) {
867 tty->print(" UseAES=1");
868 }
869 #ifdef COMPILER2
870 if (MaxVectorSize > 0) {
871 tty->print(" MaxVectorSize=%d", MaxVectorSize);
872 }
873 #endif
874 tty->cr();
875 tty->print("Allocation");
876 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
877 tty->print_cr(": no prefetching");
878 } else {
879 tty->print(" prefetching: ");
880 if (UseSSE == 0 && supports_3dnow_prefetch()) {
881 tty->print("PREFETCHW");
882 } else if (UseSSE >= 1) {
883 if (AllocatePrefetchInstr == 0) {
884 tty->print("PREFETCHNTA");
885 } else if (AllocatePrefetchInstr == 1) {
886 tty->print("PREFETCHT0");
887 } else if (AllocatePrefetchInstr == 2) {
888 tty->print("PREFETCHT2");
889 } else if (AllocatePrefetchInstr == 3) {
890 tty->print("PREFETCHW");
891 }
892 }
893 if (AllocatePrefetchLines > 1) {
|