src/cpu/x86/vm/vm_version_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 8037226 Sdiff src/cpu/x86/vm

src/cpu/x86/vm/vm_version_x86.cpp

Print this page




  33 # include "os_linux.inline.hpp"
  34 #endif
  35 #ifdef TARGET_OS_FAMILY_solaris
  36 # include "os_solaris.inline.hpp"
  37 #endif
  38 #ifdef TARGET_OS_FAMILY_windows
  39 # include "os_windows.inline.hpp"
  40 #endif
  41 #ifdef TARGET_OS_FAMILY_bsd
  42 # include "os_bsd.inline.hpp"
  43 #endif
  44 
  45 
  46 int VM_Version::_cpu;
  47 int VM_Version::_model;
  48 int VM_Version::_stepping;
  49 int VM_Version::_cpuFeatures;
  50 const char*           VM_Version::_features_str = "";
  51 VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
  52 





  53 static BufferBlob* stub_blob;
  54 static const int stub_size = 550;
  55 
  56 extern "C" {
  57   typedef void (*getPsrInfo_stub_t)(void*);
  58 }
  59 static getPsrInfo_stub_t getPsrInfo_stub = NULL;
  60 
  61 
  62 class VM_Version_StubGenerator: public StubCodeGenerator {
  63  public:
  64 
  65   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
  66 
  67   address generate_getPsrInfo() {
  68     // Flags to test CPU type.
  69     const uint32_t HS_EFL_AC           = 0x40000;
  70     const uint32_t HS_EFL_ID           = 0x200000;
  71     // Values for when we don't have a CPUID instruction.
  72     const int      CPU_FAMILY_SHIFT = 8;
  73     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
  74     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);


 217     __ movl(Address(rsi, 4), rbx);
 218     __ movl(Address(rsi, 8), rcx);
 219     __ movl(Address(rsi,12), rdx);
 220 
 221     //
 222     // Standard cpuid(0x1)
 223     //
 224     __ bind(std_cpuid1);
 225     __ movl(rax, 1);
 226     __ cpuid();
 227     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 228     __ movl(Address(rsi, 0), rax);
 229     __ movl(Address(rsi, 4), rbx);
 230     __ movl(Address(rsi, 8), rcx);
 231     __ movl(Address(rsi,12), rdx);
 232 
 233     //
 234     // Check if OS has enabled XGETBV instruction to access XCR0
 235     // (OSXSAVE feature flag) and CPU supports AVX
 236     //
 237     __ andl(rcx, 0x18000000);
 238     __ cmpl(rcx, 0x18000000);
 239     __ jccb(Assembler::notEqual, sef_cpuid);
 240 
 241     //
 242     // XCR0, XFEATURE_ENABLED_MASK register
 243     //
 244     __ xorl(rcx, rcx);   // zero for XCR0 register
 245     __ xgetbv();
 246     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 247     __ movl(Address(rsi, 0), rax);
 248     __ movl(Address(rsi, 4), rdx);
 249 
































 250     //
 251     // cpuid(0x7) Structured Extended Features
 252     //
 253     __ bind(sef_cpuid);
 254     __ movl(rax, 7);
 255     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 256     __ jccb(Assembler::greater, ext_cpuid);
 257 
 258     __ xorl(rcx, rcx);
 259     __ cpuid();
 260     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 261     __ movl(Address(rsi, 0), rax);
 262     __ movl(Address(rsi, 4), rbx);
 263 
 264     //
 265     // Extended cpuid(0x80000000)
 266     //
 267     __ bind(ext_cpuid);
 268     __ movl(rax, 0x80000000);
 269     __ cpuid();


 523     if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
 524       warning("AES intrinsics not available on this CPU");
 525     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
 526   }
 527 
 528 #ifdef COMPILER2
 529   if (UseFPUForSpilling) {
 530     if (UseSSE < 2) {
 531       // Only supported with SSE2+
 532       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
 533     }
 534   }
 535   if (MaxVectorSize > 0) {
 536     if (!is_power_of_2(MaxVectorSize)) {
 537       warning("MaxVectorSize must be a power of 2");
 538       FLAG_SET_DEFAULT(MaxVectorSize, 32);
 539     }
 540     if (MaxVectorSize > 32) {
 541       FLAG_SET_DEFAULT(MaxVectorSize, 32);
 542     }
 543     if (MaxVectorSize > 16 && UseAVX == 0) {
 544       // Only supported with AVX+
 545       FLAG_SET_DEFAULT(MaxVectorSize, 16);
 546     }
 547     if (UseSSE < 2) {
 548       // Only supported with SSE2+
 549       FLAG_SET_DEFAULT(MaxVectorSize, 0);
 550     }
 551   }
 552 #endif
 553 
 554   // On new cpus instructions which update whole XMM register should be used
 555   // to prevent partial register stall due to dependencies on high half.
 556   //
 557   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
 558   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
 559   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
 560   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
 561 
 562   if( is_amd() ) { // AMD cpus specific settings
 563     if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
 564       // Use it on new AMD cpus starting from Opteron.
 565       UseAddressNop = true;
 566     }
 567     if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
 568       // Use it on new AMD cpus starting from Opteron.


 661         // generated NOP instructions. 11 is the largest size of one
 662         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
 663         MaxLoopPad = 11;
 664       }
 665 #endif // COMPILER2
 666       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
 667         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
 668       }
 669       if (supports_sse4_2() && supports_ht()) { // Newest Intel cpus
 670         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
 671           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
 672         }
 673       }
 674       if (supports_sse4_2() && UseSSE >= 4) {
 675         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
 676           UseSSE42Intrinsics = true;
 677         }
 678       }
 679     }
 680   }
 681 #if defined(COMPILER2) && defined(_ALLBSD_SOURCE)
 682     if (MaxVectorSize > 16) {
 683       // Limit vectors size to 16 bytes on BSD until it fixes
 684       // restoring upper 128bit of YMM registers on return
 685       // from signal handler.
 686       FLAG_SET_DEFAULT(MaxVectorSize, 16);
 687     }
 688 #endif // COMPILER2
 689 
 690   // Use count leading zeros count instruction if available.
 691   if (supports_lzcnt()) {
 692     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
 693       UseCountLeadingZerosInstruction = true;
 694     }
 695    } else if (UseCountLeadingZerosInstruction) {
 696     warning("lzcnt instruction is not available on this CPU");
 697     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
 698   }
 699 
 700   if (supports_bmi1()) {
 701     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
 702       UseBMI1Instructions = true;
 703     }
 704   } else if (UseBMI1Instructions) {
 705     warning("BMI1 instructions are not available on this CPU");
 706     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
 707   }
 708 


 797   PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
 798   PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
 799   PrefetchFieldsAhead         = prefetch_fields_ahead();
 800 #endif
 801 
 802   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
 803      (cache_line_size > ContendedPaddingWidth))
 804      ContendedPaddingWidth = cache_line_size;
 805 
 806 #ifndef PRODUCT
 807   if (PrintMiscellaneous && Verbose) {
 808     tty->print_cr("Logical CPUs per core: %u",
 809                   logical_processors_per_package());
 810     tty->print("UseSSE=%d",UseSSE);
 811     if (UseAVX > 0) {
 812       tty->print("  UseAVX=%d",UseAVX);
 813     }
 814     if (UseAES) {
 815       tty->print("  UseAES=1");
 816     }





 817     tty->cr();
 818     tty->print("Allocation");
 819     if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
 820       tty->print_cr(": no prefetching");
 821     } else {
 822       tty->print(" prefetching: ");
 823       if (UseSSE == 0 && supports_3dnow_prefetch()) {
 824         tty->print("PREFETCHW");
 825       } else if (UseSSE >= 1) {
 826         if (AllocatePrefetchInstr == 0) {
 827           tty->print("PREFETCHNTA");
 828         } else if (AllocatePrefetchInstr == 1) {
 829           tty->print("PREFETCHT0");
 830         } else if (AllocatePrefetchInstr == 2) {
 831           tty->print("PREFETCHT2");
 832         } else if (AllocatePrefetchInstr == 3) {
 833           tty->print("PREFETCHW");
 834         }
 835       }
 836       if (AllocatePrefetchLines > 1) {




  33 # include "os_linux.inline.hpp"
  34 #endif
  35 #ifdef TARGET_OS_FAMILY_solaris
  36 # include "os_solaris.inline.hpp"
  37 #endif
  38 #ifdef TARGET_OS_FAMILY_windows
  39 # include "os_windows.inline.hpp"
  40 #endif
  41 #ifdef TARGET_OS_FAMILY_bsd
  42 # include "os_bsd.inline.hpp"
  43 #endif
  44 
  45 
  46 int VM_Version::_cpu;
  47 int VM_Version::_model;
  48 int VM_Version::_stepping;
  49 int VM_Version::_cpuFeatures;
  50 const char*           VM_Version::_features_str = "";
  51 VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
  52 
  53 // Address of instruction which causes SEGV
  54 address VM_Version::_cpuinfo_segv_addr = 0;
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_cont_addr = 0;
  57 
  58 static BufferBlob* stub_blob;
  59 static const int stub_size = 600;
  60 
  61 extern "C" {
  62   typedef void (*getPsrInfo_stub_t)(void*);
  63 }
  64 static getPsrInfo_stub_t getPsrInfo_stub = NULL;
  65 
  66 
  67 class VM_Version_StubGenerator: public StubCodeGenerator {
  68  public:
  69 
  70   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
  71 
  72   address generate_getPsrInfo() {
  73     // Flags to test CPU type.
  74     const uint32_t HS_EFL_AC           = 0x40000;
  75     const uint32_t HS_EFL_ID           = 0x200000;
  76     // Values for when we don't have a CPUID instruction.
  77     const int      CPU_FAMILY_SHIFT = 8;
  78     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
  79     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);


 222     __ movl(Address(rsi, 4), rbx);
 223     __ movl(Address(rsi, 8), rcx);
 224     __ movl(Address(rsi,12), rdx);
 225 
 226     //
 227     // Standard cpuid(0x1)
 228     //
 229     __ bind(std_cpuid1);
 230     __ movl(rax, 1);
 231     __ cpuid();
 232     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 233     __ movl(Address(rsi, 0), rax);
 234     __ movl(Address(rsi, 4), rbx);
 235     __ movl(Address(rsi, 8), rcx);
 236     __ movl(Address(rsi,12), rdx);
 237 
 238     //
 239     // Check if OS has enabled XGETBV instruction to access XCR0
 240     // (OSXSAVE feature flag) and CPU supports AVX
 241     //
 242     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 243     __ cmpl(rcx, 0x18000000);
 244     __ jccb(Assembler::notEqual, sef_cpuid);
 245 
 246     //
 247     // XCR0, XFEATURE_ENABLED_MASK register
 248     //
 249     __ xorl(rcx, rcx);   // zero for XCR0 register
 250     __ xgetbv();
 251     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 252     __ movl(Address(rsi, 0), rax);
 253     __ movl(Address(rsi, 4), rdx);
 254 
 255     __ andl(rax, 0x6); // xcr0 bits sse | ymm
 256     __ cmpl(rax, 0x6);
 257     __ jccb(Assembler::notEqual, sef_cpuid);
 258 
 259     //
 260     // Some OSs have a bug when upper 128bits of YMM
 261     // registers are not restored after a signal processing.
 262     // Generate SEGV here (reference through NULL)
 263     // and check upper YMM bits after it. 
 264     //
 265     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 266 
 267     // load value into all 32 bytes of ymm7 register
 268     __ movl(rcx, VM_Version::ymm_test_value());
 269 
 270     __ movdl(xmm7, rcx);
 271     __ pshufd(xmm7, xmm7, 0x00);
 272     __ vinsertf128h(xmm7, xmm7, xmm7);
 273 
 274     __ xorl(rsi, rsi);
 275     VM_Version::set_cpuinfo_segv_addr( __ pc() );
 276     // Generate SEGV
 277     __ movl(rax, Address(rsi, 0));
 278 
 279     VM_Version::set_cpuinfo_cont_addr( __ pc() );
 280     // Returns here after signal. Save xmm7 to check it later.
 281     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 282     __ movdqu(Address(rsi,  0), xmm7);
 283     __ vextractf128h(Address(rsi, 16), xmm7);
 284 
 285     VM_Version::clean_cpuFeatures();
 286 
 287     //
 288     // cpuid(0x7) Structured Extended Features
 289     //
 290     __ bind(sef_cpuid);
 291     __ movl(rax, 7);
 292     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 293     __ jccb(Assembler::greater, ext_cpuid);
 294 
 295     __ xorl(rcx, rcx);
 296     __ cpuid();
 297     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 298     __ movl(Address(rsi, 0), rax);
 299     __ movl(Address(rsi, 4), rbx);
 300 
 301     //
 302     // Extended cpuid(0x80000000)
 303     //
 304     __ bind(ext_cpuid);
 305     __ movl(rax, 0x80000000);
 306     __ cpuid();


 560     if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
 561       warning("AES intrinsics not available on this CPU");
 562     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
 563   }
 564 
 565 #ifdef COMPILER2
 566   if (UseFPUForSpilling) {
 567     if (UseSSE < 2) {
 568       // Only supported with SSE2+
 569       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
 570     }
 571   }
 572   if (MaxVectorSize > 0) {
 573     if (!is_power_of_2(MaxVectorSize)) {
 574       warning("MaxVectorSize must be a power of 2");
 575       FLAG_SET_DEFAULT(MaxVectorSize, 32);
 576     }
 577     if (MaxVectorSize > 32) {
 578       FLAG_SET_DEFAULT(MaxVectorSize, 32);
 579     }
 580     if (MaxVectorSize > 16 && (UseAVX == 0 || !os_supports_avx_vectors())) {
 581       // 32 bytes vectors (in YMM) are only supported with AVX+
 582       FLAG_SET_DEFAULT(MaxVectorSize, 16);
 583     }
 584     if (UseSSE < 2) {
 585       // 16 bytes vectors (in XMM) are only supported with SSE2+
 586       FLAG_SET_DEFAULT(MaxVectorSize, 0);
 587     }
 588   }
 589 #endif
 590 
 591   // On new cpus instructions which update whole XMM register should be used
 592   // to prevent partial register stall due to dependencies on high half.
 593   //
 594   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
 595   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
 596   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
 597   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
 598 
 599   if( is_amd() ) { // AMD cpus specific settings
 600     if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
 601       // Use it on new AMD cpus starting from Opteron.
 602       UseAddressNop = true;
 603     }
 604     if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
 605       // Use it on new AMD cpus starting from Opteron.


 698         // generated NOP instructions. 11 is the largest size of one
 699         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
 700         MaxLoopPad = 11;
 701       }
 702 #endif // COMPILER2
 703       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
 704         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
 705       }
 706       if (supports_sse4_2() && supports_ht()) { // Newest Intel cpus
 707         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
 708           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
 709         }
 710       }
 711       if (supports_sse4_2() && UseSSE >= 4) {
 712         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
 713           UseSSE42Intrinsics = true;
 714         }
 715       }
 716     }
 717   }








 718 
 719   // Use count leading zeros count instruction if available.
 720   if (supports_lzcnt()) {
 721     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
 722       UseCountLeadingZerosInstruction = true;
 723     }
 724    } else if (UseCountLeadingZerosInstruction) {
 725     warning("lzcnt instruction is not available on this CPU");
 726     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
 727   }
 728 
 729   if (supports_bmi1()) {
 730     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
 731       UseBMI1Instructions = true;
 732     }
 733   } else if (UseBMI1Instructions) {
 734     warning("BMI1 instructions are not available on this CPU");
 735     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
 736   }
 737 


 826   PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
 827   PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
 828   PrefetchFieldsAhead         = prefetch_fields_ahead();
 829 #endif
 830 
 831   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
 832      (cache_line_size > ContendedPaddingWidth))
 833      ContendedPaddingWidth = cache_line_size;
 834 
 835 #ifndef PRODUCT
 836   if (PrintMiscellaneous && Verbose) {
 837     tty->print_cr("Logical CPUs per core: %u",
 838                   logical_processors_per_package());
 839     tty->print("UseSSE=%d",UseSSE);
 840     if (UseAVX > 0) {
 841       tty->print("  UseAVX=%d",UseAVX);
 842     }
 843     if (UseAES) {
 844       tty->print("  UseAES=1");
 845     }
 846 #ifdef COMPILER2
 847     if (MaxVectorSize > 0) {
 848       tty->print("  MaxVectorSize=%d", MaxVectorSize);
 849     }
 850 #endif
 851     tty->cr();
 852     tty->print("Allocation");
 853     if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
 854       tty->print_cr(": no prefetching");
 855     } else {
 856       tty->print(" prefetching: ");
 857       if (UseSSE == 0 && supports_3dnow_prefetch()) {
 858         tty->print("PREFETCHW");
 859       } else if (UseSSE >= 1) {
 860         if (AllocatePrefetchInstr == 0) {
 861           tty->print("PREFETCHNTA");
 862         } else if (AllocatePrefetchInstr == 1) {
 863           tty->print("PREFETCHT0");
 864         } else if (AllocatePrefetchInstr == 2) {
 865           tty->print("PREFETCHT2");
 866         } else if (AllocatePrefetchInstr == 3) {
 867           tty->print("PREFETCHW");
 868         }
 869       }
 870       if (AllocatePrefetchLines > 1) {


src/cpu/x86/vm/vm_version_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File