33 # include "os_linux.inline.hpp"
34 #endif
35 #ifdef TARGET_OS_FAMILY_solaris
36 # include "os_solaris.inline.hpp"
37 #endif
38 #ifdef TARGET_OS_FAMILY_windows
39 # include "os_windows.inline.hpp"
40 #endif
41 #ifdef TARGET_OS_FAMILY_bsd
42 # include "os_bsd.inline.hpp"
43 #endif
44
45
46 int VM_Version::_cpu;
47 int VM_Version::_model;
48 int VM_Version::_stepping;
49 int VM_Version::_cpuFeatures;
50 const char* VM_Version::_features_str = "";
51 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
52
53 static BufferBlob* stub_blob;
54 static const int stub_size = 550;
55
56 extern "C" {
57 typedef void (*getPsrInfo_stub_t)(void*);
58 }
59 static getPsrInfo_stub_t getPsrInfo_stub = NULL;
60
61
62 class VM_Version_StubGenerator: public StubCodeGenerator {
63 public:
64
65 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
66
67 address generate_getPsrInfo() {
68 // Flags to test CPU type.
69 const uint32_t HS_EFL_AC = 0x40000;
70 const uint32_t HS_EFL_ID = 0x200000;
71 // Values for when we don't have a CPUID instruction.
72 const int CPU_FAMILY_SHIFT = 8;
73 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
74 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
217 __ movl(Address(rsi, 4), rbx);
218 __ movl(Address(rsi, 8), rcx);
219 __ movl(Address(rsi,12), rdx);
220
221 //
222 // Standard cpuid(0x1)
223 //
224 __ bind(std_cpuid1);
225 __ movl(rax, 1);
226 __ cpuid();
227 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
228 __ movl(Address(rsi, 0), rax);
229 __ movl(Address(rsi, 4), rbx);
230 __ movl(Address(rsi, 8), rcx);
231 __ movl(Address(rsi,12), rdx);
232
233 //
234 // Check if OS has enabled XGETBV instruction to access XCR0
235 // (OSXSAVE feature flag) and CPU supports AVX
236 //
237 __ andl(rcx, 0x18000000);
238 __ cmpl(rcx, 0x18000000);
239 __ jccb(Assembler::notEqual, sef_cpuid);
240
241 //
242 // XCR0, XFEATURE_ENABLED_MASK register
243 //
244 __ xorl(rcx, rcx); // zero for XCR0 register
245 __ xgetbv();
246 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
247 __ movl(Address(rsi, 0), rax);
248 __ movl(Address(rsi, 4), rdx);
249
250 //
251 // cpuid(0x7) Structured Extended Features
252 //
253 __ bind(sef_cpuid);
254 __ movl(rax, 7);
255 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
256 __ jccb(Assembler::greater, ext_cpuid);
257
258 __ xorl(rcx, rcx);
259 __ cpuid();
260 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
261 __ movl(Address(rsi, 0), rax);
262 __ movl(Address(rsi, 4), rbx);
263
264 //
265 // Extended cpuid(0x80000000)
266 //
267 __ bind(ext_cpuid);
268 __ movl(rax, 0x80000000);
269 __ cpuid();
523 if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
524 warning("AES intrinsics not available on this CPU");
525 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
526 }
527
528 #ifdef COMPILER2
529 if (UseFPUForSpilling) {
530 if (UseSSE < 2) {
531 // Only supported with SSE2+
532 FLAG_SET_DEFAULT(UseFPUForSpilling, false);
533 }
534 }
535 if (MaxVectorSize > 0) {
536 if (!is_power_of_2(MaxVectorSize)) {
537 warning("MaxVectorSize must be a power of 2");
538 FLAG_SET_DEFAULT(MaxVectorSize, 32);
539 }
540 if (MaxVectorSize > 32) {
541 FLAG_SET_DEFAULT(MaxVectorSize, 32);
542 }
543 if (MaxVectorSize > 16 && UseAVX == 0) {
544 // Only supported with AVX+
545 FLAG_SET_DEFAULT(MaxVectorSize, 16);
546 }
547 if (UseSSE < 2) {
548 // Only supported with SSE2+
549 FLAG_SET_DEFAULT(MaxVectorSize, 0);
550 }
551 }
552 #endif
553
554 // On new cpus instructions which update whole XMM register should be used
555 // to prevent partial register stall due to dependencies on high half.
556 //
557 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
558 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
559 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
560 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
561
562 if( is_amd() ) { // AMD cpus specific settings
563 if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
564 // Use it on new AMD cpus starting from Opteron.
565 UseAddressNop = true;
566 }
567 if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
568 // Use it on new AMD cpus starting from Opteron.
661 // generated NOP instructions. 11 is the largest size of one
662 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
663 MaxLoopPad = 11;
664 }
665 #endif // COMPILER2
666 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
667 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
668 }
669 if (supports_sse4_2() && supports_ht()) { // Newest Intel cpus
670 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
671 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
672 }
673 }
674 if (supports_sse4_2() && UseSSE >= 4) {
675 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
676 UseSSE42Intrinsics = true;
677 }
678 }
679 }
680 }
681 #if defined(COMPILER2) && defined(_ALLBSD_SOURCE)
682 if (MaxVectorSize > 16) {
683 // Limit vectors size to 16 bytes on BSD until it fixes
684 // restoring upper 128bit of YMM registers on return
685 // from signal handler.
686 FLAG_SET_DEFAULT(MaxVectorSize, 16);
687 }
688 #endif // COMPILER2
689
690 // Use count leading zeros count instruction if available.
691 if (supports_lzcnt()) {
692 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
693 UseCountLeadingZerosInstruction = true;
694 }
695 } else if (UseCountLeadingZerosInstruction) {
696 warning("lzcnt instruction is not available on this CPU");
697 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
698 }
699
700 if (supports_bmi1()) {
701 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
702 UseBMI1Instructions = true;
703 }
704 } else if (UseBMI1Instructions) {
705 warning("BMI1 instructions are not available on this CPU");
706 FLAG_SET_DEFAULT(UseBMI1Instructions, false);
707 }
708
797 PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
798 PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
799 PrefetchFieldsAhead = prefetch_fields_ahead();
800 #endif
801
802 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
803 (cache_line_size > ContendedPaddingWidth))
804 ContendedPaddingWidth = cache_line_size;
805
806 #ifndef PRODUCT
807 if (PrintMiscellaneous && Verbose) {
808 tty->print_cr("Logical CPUs per core: %u",
809 logical_processors_per_package());
810 tty->print("UseSSE=%d",UseSSE);
811 if (UseAVX > 0) {
812 tty->print(" UseAVX=%d",UseAVX);
813 }
814 if (UseAES) {
815 tty->print(" UseAES=1");
816 }
817 tty->cr();
818 tty->print("Allocation");
819 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
820 tty->print_cr(": no prefetching");
821 } else {
822 tty->print(" prefetching: ");
823 if (UseSSE == 0 && supports_3dnow_prefetch()) {
824 tty->print("PREFETCHW");
825 } else if (UseSSE >= 1) {
826 if (AllocatePrefetchInstr == 0) {
827 tty->print("PREFETCHNTA");
828 } else if (AllocatePrefetchInstr == 1) {
829 tty->print("PREFETCHT0");
830 } else if (AllocatePrefetchInstr == 2) {
831 tty->print("PREFETCHT2");
832 } else if (AllocatePrefetchInstr == 3) {
833 tty->print("PREFETCHW");
834 }
835 }
836 if (AllocatePrefetchLines > 1) {
|
33 # include "os_linux.inline.hpp"
34 #endif
35 #ifdef TARGET_OS_FAMILY_solaris
36 # include "os_solaris.inline.hpp"
37 #endif
38 #ifdef TARGET_OS_FAMILY_windows
39 # include "os_windows.inline.hpp"
40 #endif
41 #ifdef TARGET_OS_FAMILY_bsd
42 # include "os_bsd.inline.hpp"
43 #endif
44
45
46 int VM_Version::_cpu;
47 int VM_Version::_model;
48 int VM_Version::_stepping;
49 int VM_Version::_cpuFeatures;
50 const char* VM_Version::_features_str = "";
51 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
52
53 // Address of instruction which causes SEGV
54 address VM_Version::_cpuinfo_segv_addr = 0;
55 // Address of instruction which causes SEGV
56 address VM_Version::_cpuinfo_cont_addr = 0;
57
58 static BufferBlob* stub_blob;
59 static const int stub_size = 600;
60
61 extern "C" {
62 typedef void (*getPsrInfo_stub_t)(void*);
63 }
64 static getPsrInfo_stub_t getPsrInfo_stub = NULL;
65
66
67 class VM_Version_StubGenerator: public StubCodeGenerator {
68 public:
69
70 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
71
72 address generate_getPsrInfo() {
73 // Flags to test CPU type.
74 const uint32_t HS_EFL_AC = 0x40000;
75 const uint32_t HS_EFL_ID = 0x200000;
76 // Values for when we don't have a CPUID instruction.
77 const int CPU_FAMILY_SHIFT = 8;
78 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
79 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
222 __ movl(Address(rsi, 4), rbx);
223 __ movl(Address(rsi, 8), rcx);
224 __ movl(Address(rsi,12), rdx);
225
226 //
227 // Standard cpuid(0x1)
228 //
229 __ bind(std_cpuid1);
230 __ movl(rax, 1);
231 __ cpuid();
232 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
233 __ movl(Address(rsi, 0), rax);
234 __ movl(Address(rsi, 4), rbx);
235 __ movl(Address(rsi, 8), rcx);
236 __ movl(Address(rsi,12), rdx);
237
238 //
239 // Check if OS has enabled XGETBV instruction to access XCR0
240 // (OSXSAVE feature flag) and CPU supports AVX
241 //
242 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
243 __ cmpl(rcx, 0x18000000);
244 __ jccb(Assembler::notEqual, sef_cpuid);
245
246 //
247 // XCR0, XFEATURE_ENABLED_MASK register
248 //
249 __ xorl(rcx, rcx); // zero for XCR0 register
250 __ xgetbv();
251 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
252 __ movl(Address(rsi, 0), rax);
253 __ movl(Address(rsi, 4), rdx);
254
255 __ andl(rax, 0x6); // xcr0 bits sse | ymm
256 __ cmpl(rax, 0x6);
257 __ jccb(Assembler::notEqual, sef_cpuid);
258
259 //
260 // Some OSs have a bug when upper 128bits of YMM
261 // registers are not restored after a signal processing.
262 // Generate SEGV here (reference through NULL)
263 // and check upper YMM bits after it.
264 //
265 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
266
267 // load value into all 32 bytes of ymm7 register
268 __ movl(rcx, VM_Version::ymm_test_value());
269
270 __ movdl(xmm7, rcx);
271 __ pshufd(xmm7, xmm7, 0x00);
272 __ vinsertf128h(xmm7, xmm7, xmm7);
273
274 __ xorl(rsi, rsi);
275 VM_Version::set_cpuinfo_segv_addr( __ pc() );
276 // Generate SEGV
277 __ movl(rax, Address(rsi, 0));
278
279 VM_Version::set_cpuinfo_cont_addr( __ pc() );
280 // Returns here after signal. Save xmm7 to check it later.
281 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
282 __ movdqu(Address(rsi, 0), xmm7);
283 __ vextractf128h(Address(rsi, 16), xmm7);
284
285 VM_Version::clean_cpuFeatures();
286
287 //
288 // cpuid(0x7) Structured Extended Features
289 //
290 __ bind(sef_cpuid);
291 __ movl(rax, 7);
292 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
293 __ jccb(Assembler::greater, ext_cpuid);
294
295 __ xorl(rcx, rcx);
296 __ cpuid();
297 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
298 __ movl(Address(rsi, 0), rax);
299 __ movl(Address(rsi, 4), rbx);
300
301 //
302 // Extended cpuid(0x80000000)
303 //
304 __ bind(ext_cpuid);
305 __ movl(rax, 0x80000000);
306 __ cpuid();
560 if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
561 warning("AES intrinsics not available on this CPU");
562 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
563 }
564
565 #ifdef COMPILER2
566 if (UseFPUForSpilling) {
567 if (UseSSE < 2) {
568 // Only supported with SSE2+
569 FLAG_SET_DEFAULT(UseFPUForSpilling, false);
570 }
571 }
572 if (MaxVectorSize > 0) {
573 if (!is_power_of_2(MaxVectorSize)) {
574 warning("MaxVectorSize must be a power of 2");
575 FLAG_SET_DEFAULT(MaxVectorSize, 32);
576 }
577 if (MaxVectorSize > 32) {
578 FLAG_SET_DEFAULT(MaxVectorSize, 32);
579 }
580 if (MaxVectorSize > 16 && (UseAVX == 0 || !os_supports_avx_vectors())) {
581 // 32 bytes vectors (in YMM) are only supported with AVX+
582 FLAG_SET_DEFAULT(MaxVectorSize, 16);
583 }
584 if (UseSSE < 2) {
585 // 16 bytes vectors (in XMM) are only supported with SSE2+
586 FLAG_SET_DEFAULT(MaxVectorSize, 0);
587 }
588 }
589 #endif
590
591 // On new cpus instructions which update whole XMM register should be used
592 // to prevent partial register stall due to dependencies on high half.
593 //
594 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
595 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
596 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
597 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
598
599 if( is_amd() ) { // AMD cpus specific settings
600 if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
601 // Use it on new AMD cpus starting from Opteron.
602 UseAddressNop = true;
603 }
604 if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
605 // Use it on new AMD cpus starting from Opteron.
698 // generated NOP instructions. 11 is the largest size of one
699 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
700 MaxLoopPad = 11;
701 }
702 #endif // COMPILER2
703 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
704 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
705 }
706 if (supports_sse4_2() && supports_ht()) { // Newest Intel cpus
707 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
708 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
709 }
710 }
711 if (supports_sse4_2() && UseSSE >= 4) {
712 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
713 UseSSE42Intrinsics = true;
714 }
715 }
716 }
717 }
718
719 // Use count leading zeros count instruction if available.
720 if (supports_lzcnt()) {
721 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
722 UseCountLeadingZerosInstruction = true;
723 }
724 } else if (UseCountLeadingZerosInstruction) {
725 warning("lzcnt instruction is not available on this CPU");
726 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
727 }
728
729 if (supports_bmi1()) {
730 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
731 UseBMI1Instructions = true;
732 }
733 } else if (UseBMI1Instructions) {
734 warning("BMI1 instructions are not available on this CPU");
735 FLAG_SET_DEFAULT(UseBMI1Instructions, false);
736 }
737
826 PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
827 PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
828 PrefetchFieldsAhead = prefetch_fields_ahead();
829 #endif
830
831 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
832 (cache_line_size > ContendedPaddingWidth))
833 ContendedPaddingWidth = cache_line_size;
834
835 #ifndef PRODUCT
836 if (PrintMiscellaneous && Verbose) {
837 tty->print_cr("Logical CPUs per core: %u",
838 logical_processors_per_package());
839 tty->print("UseSSE=%d",UseSSE);
840 if (UseAVX > 0) {
841 tty->print(" UseAVX=%d",UseAVX);
842 }
843 if (UseAES) {
844 tty->print(" UseAES=1");
845 }
846 #ifdef COMPILER2
847 if (MaxVectorSize > 0) {
848 tty->print(" MaxVectorSize=%d", MaxVectorSize);
849 }
850 #endif
851 tty->cr();
852 tty->print("Allocation");
853 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
854 tty->print_cr(": no prefetching");
855 } else {
856 tty->print(" prefetching: ");
857 if (UseSSE == 0 && supports_3dnow_prefetch()) {
858 tty->print("PREFETCHW");
859 } else if (UseSSE >= 1) {
860 if (AllocatePrefetchInstr == 0) {
861 tty->print("PREFETCHNTA");
862 } else if (AllocatePrefetchInstr == 1) {
863 tty->print("PREFETCHT0");
864 } else if (AllocatePrefetchInstr == 2) {
865 tty->print("PREFETCHT2");
866 } else if (AllocatePrefetchInstr == 3) {
867 tty->print("PREFETCHW");
868 }
869 }
870 if (AllocatePrefetchLines > 1) {
|