src/cpu/x86/vm/vm_version_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 8037226 Sdiff src/cpu/x86/vm

src/cpu/x86/vm/vm_version_x86.cpp

Print this page




  33 # include "os_linux.inline.hpp"
  34 #endif
  35 #ifdef TARGET_OS_FAMILY_solaris
  36 # include "os_solaris.inline.hpp"
  37 #endif
  38 #ifdef TARGET_OS_FAMILY_windows
  39 # include "os_windows.inline.hpp"
  40 #endif
  41 #ifdef TARGET_OS_FAMILY_bsd
  42 # include "os_bsd.inline.hpp"
  43 #endif
  44 
  45 
  46 int VM_Version::_cpu;
  47 int VM_Version::_model;
  48 int VM_Version::_stepping;
  49 int VM_Version::_cpuFeatures;
  50 const char*           VM_Version::_features_str = "";
  51 VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
  52 





  53 static BufferBlob* stub_blob;
  54 static const int stub_size = 550;
  55 
  56 extern "C" {
  57   typedef void (*getPsrInfo_stub_t)(void*);
  58 }
  59 static getPsrInfo_stub_t getPsrInfo_stub = NULL;
  60 
  61 
  62 class VM_Version_StubGenerator: public StubCodeGenerator {
  63  public:
  64 
  65   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
  66 
  67   address generate_getPsrInfo() {
  68     // Flags to test CPU type.
  69     const uint32_t HS_EFL_AC           = 0x40000;
  70     const uint32_t HS_EFL_ID           = 0x200000;
  71     // Values for when we don't have a CPUID instruction.
  72     const int      CPU_FAMILY_SHIFT = 8;
  73     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
  74     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);


 217     __ movl(Address(rsi, 4), rbx);
 218     __ movl(Address(rsi, 8), rcx);
 219     __ movl(Address(rsi,12), rdx);
 220 
 221     //
 222     // Standard cpuid(0x1)
 223     //
 224     __ bind(std_cpuid1);
 225     __ movl(rax, 1);
 226     __ cpuid();
 227     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 228     __ movl(Address(rsi, 0), rax);
 229     __ movl(Address(rsi, 4), rbx);
 230     __ movl(Address(rsi, 8), rcx);
 231     __ movl(Address(rsi,12), rdx);
 232 
 233     //
 234     // Check if OS has enabled XGETBV instruction to access XCR0
 235     // (OSXSAVE feature flag) and CPU supports AVX
 236     //
 237     __ andl(rcx, 0x18000000);
 238     __ cmpl(rcx, 0x18000000);
 239     __ jccb(Assembler::notEqual, sef_cpuid);
 240 
 241     //
 242     // XCR0, XFEATURE_ENABLED_MASK register
 243     //
 244     __ xorl(rcx, rcx);   // zero for XCR0 register
 245     __ xgetbv();
 246     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 247     __ movl(Address(rsi, 0), rax);
 248     __ movl(Address(rsi, 4), rdx);
 249 









































 250     //
 251     // cpuid(0x7) Structured Extended Features
 252     //
 253     __ bind(sef_cpuid);
 254     __ movl(rax, 7);
 255     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 256     __ jccb(Assembler::greater, ext_cpuid);
 257 
 258     __ xorl(rcx, rcx);
 259     __ cpuid();
 260     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 261     __ movl(Address(rsi, 0), rax);
 262     __ movl(Address(rsi, 4), rbx);
 263 
 264     //
 265     // Extended cpuid(0x80000000)
 266     //
 267     __ bind(ext_cpuid);
 268     __ movl(rax, 0x80000000);
 269     __ cpuid();


 523     if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
 524       warning("AES intrinsics not available on this CPU");
 525     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
 526   }
 527 
 528 #ifdef COMPILER2
 529   if (UseFPUForSpilling) {
 530     if (UseSSE < 2) {
 531       // Only supported with SSE2+
 532       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
 533     }
 534   }
 535   if (MaxVectorSize > 0) {
 536     if (!is_power_of_2(MaxVectorSize)) {
 537       warning("MaxVectorSize must be a power of 2");
 538       FLAG_SET_DEFAULT(MaxVectorSize, 32);
 539     }
 540     if (MaxVectorSize > 32) {
 541       FLAG_SET_DEFAULT(MaxVectorSize, 32);
 542     }
 543     if (MaxVectorSize > 16 && UseAVX == 0) {
 544       // Only supported with AVX+
 545       FLAG_SET_DEFAULT(MaxVectorSize, 16);
 546     }
 547     if (UseSSE < 2) {
 548       // Only supported with SSE2+
 549       FLAG_SET_DEFAULT(MaxVectorSize, 0);
 550     }














 551   }
 552 #endif
 553 
 554   // On new cpus instructions which update whole XMM register should be used
 555   // to prevent partial register stall due to dependencies on high half.
 556   //
 557   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
 558   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
 559   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
 560   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
 561 
 562   if( is_amd() ) { // AMD cpus specific settings
 563     if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
 564       // Use it on new AMD cpus starting from Opteron.
 565       UseAddressNop = true;
 566     }
 567     if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
 568       // Use it on new AMD cpus starting from Opteron.
 569       UseNewLongLShift = true;
 570     }


 661         // generated NOP instructions. 11 is the largest size of one
 662         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
 663         MaxLoopPad = 11;
 664       }
 665 #endif // COMPILER2
 666       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
 667         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
 668       }
 669       if (supports_sse4_2() && supports_ht()) { // Newest Intel cpus
 670         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
 671           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
 672         }
 673       }
 674       if (supports_sse4_2() && UseSSE >= 4) {
 675         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
 676           UseSSE42Intrinsics = true;
 677         }
 678       }
 679     }
 680   }
 681 #if defined(COMPILER2) && defined(_ALLBSD_SOURCE)
 682     if (MaxVectorSize > 16) {
 683       // Limit vectors size to 16 bytes on BSD until it fixes
 684       // restoring upper 128bit of YMM registers on return
 685       // from signal handler.
 686       FLAG_SET_DEFAULT(MaxVectorSize, 16);
 687     }
 688 #endif // COMPILER2
 689 
 690   // Use count leading zeros count instruction if available.
 691   if (supports_lzcnt()) {
 692     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
 693       UseCountLeadingZerosInstruction = true;
 694     }
 695    } else if (UseCountLeadingZerosInstruction) {
 696     warning("lzcnt instruction is not available on this CPU");
 697     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
 698   }
 699 
 700   if (supports_bmi1()) {
 701     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
 702       UseBMI1Instructions = true;
 703     }
 704   } else if (UseBMI1Instructions) {
 705     warning("BMI1 instructions are not available on this CPU");
 706     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
 707   }
 708 


 797   PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
 798   PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
 799   PrefetchFieldsAhead         = prefetch_fields_ahead();
 800 #endif
 801 
 802   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
 803      (cache_line_size > ContendedPaddingWidth))
 804      ContendedPaddingWidth = cache_line_size;
 805 
 806 #ifndef PRODUCT
 807   if (PrintMiscellaneous && Verbose) {
 808     tty->print_cr("Logical CPUs per core: %u",
 809                   logical_processors_per_package());
 810     tty->print("UseSSE=%d",UseSSE);
 811     if (UseAVX > 0) {
 812       tty->print("  UseAVX=%d",UseAVX);
 813     }
 814     if (UseAES) {
 815       tty->print("  UseAES=1");
 816     }





 817     tty->cr();
 818     tty->print("Allocation");
 819     if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
 820       tty->print_cr(": no prefetching");
 821     } else {
 822       tty->print(" prefetching: ");
 823       if (UseSSE == 0 && supports_3dnow_prefetch()) {
 824         tty->print("PREFETCHW");
 825       } else if (UseSSE >= 1) {
 826         if (AllocatePrefetchInstr == 0) {
 827           tty->print("PREFETCHNTA");
 828         } else if (AllocatePrefetchInstr == 1) {
 829           tty->print("PREFETCHT0");
 830         } else if (AllocatePrefetchInstr == 2) {
 831           tty->print("PREFETCHT2");
 832         } else if (AllocatePrefetchInstr == 3) {
 833           tty->print("PREFETCHW");
 834         }
 835       }
 836       if (AllocatePrefetchLines > 1) {




  33 # include "os_linux.inline.hpp"
  34 #endif
  35 #ifdef TARGET_OS_FAMILY_solaris
  36 # include "os_solaris.inline.hpp"
  37 #endif
  38 #ifdef TARGET_OS_FAMILY_windows
  39 # include "os_windows.inline.hpp"
  40 #endif
  41 #ifdef TARGET_OS_FAMILY_bsd
  42 # include "os_bsd.inline.hpp"
  43 #endif
  44 
  45 
  46 int VM_Version::_cpu;
  47 int VM_Version::_model;
  48 int VM_Version::_stepping;
  49 int VM_Version::_cpuFeatures;
  50 const char*           VM_Version::_features_str = "";
  51 VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
  52 
  53 // Address of instruction which causes SEGV
  54 address VM_Version::_cpuinfo_segv_addr = 0;
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_cont_addr = 0;
  57 
  58 static BufferBlob* stub_blob;
  59 static const int stub_size = 600;
  60 
  61 extern "C" {
  62   typedef void (*getPsrInfo_stub_t)(void*);
  63 }
  64 static getPsrInfo_stub_t getPsrInfo_stub = NULL;
  65 
  66 
  67 class VM_Version_StubGenerator: public StubCodeGenerator {
  68  public:
  69 
  70   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
  71 
  72   address generate_getPsrInfo() {
  73     // Flags to test CPU type.
  74     const uint32_t HS_EFL_AC           = 0x40000;
  75     const uint32_t HS_EFL_ID           = 0x200000;
  76     // Values for when we don't have a CPUID instruction.
  77     const int      CPU_FAMILY_SHIFT = 8;
  78     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
  79     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);


 222     __ movl(Address(rsi, 4), rbx);
 223     __ movl(Address(rsi, 8), rcx);
 224     __ movl(Address(rsi,12), rdx);
 225 
 226     //
 227     // Standard cpuid(0x1)
 228     //
 229     __ bind(std_cpuid1);
 230     __ movl(rax, 1);
 231     __ cpuid();
 232     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 233     __ movl(Address(rsi, 0), rax);
 234     __ movl(Address(rsi, 4), rbx);
 235     __ movl(Address(rsi, 8), rcx);
 236     __ movl(Address(rsi,12), rdx);
 237 
 238     //
 239     // Check if OS has enabled XGETBV instruction to access XCR0
 240     // (OSXSAVE feature flag) and CPU supports AVX
 241     //
 242     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 243     __ cmpl(rcx, 0x18000000);
 244     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 245 
 246     //
 247     // XCR0, XFEATURE_ENABLED_MASK register
 248     //
 249     __ xorl(rcx, rcx);   // zero for XCR0 register
 250     __ xgetbv();
 251     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 252     __ movl(Address(rsi, 0), rax);
 253     __ movl(Address(rsi, 4), rdx);
 254 
 255     __ andl(rax, 0x6); // xcr0 bits sse | ymm
 256     __ cmpl(rax, 0x6);
 257     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 258 
 259     //
 260     // Some OSs have a bug when upper 128bits of YMM
 261     // registers are not restored after a signal processing.
 262     // Generate SEGV here (reference through NULL)
 263     // and check upper YMM bits after it. 
 264     //
 265     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 266 
 267     // load value into all 32 bytes of ymm7 register
 268     __ movl(rcx, VM_Version::ymm_test_value());
 269 
 270     __ movdl(xmm0, rcx);
 271     __ pshufd(xmm0, xmm0, 0x00);
 272     __ vinsertf128h(xmm0, xmm0, xmm0);
 273     __ vmovdqu(xmm7, xmm0);
 274 #ifdef _LP64
 275     __ vmovdqu(xmm8,  xmm0);
 276     __ vmovdqu(xmm15, xmm0);
 277 #endif
 278 
 279     __ xorl(rsi, rsi);
 280     VM_Version::set_cpuinfo_segv_addr( __ pc() );
 281     // Generate SEGV
 282     __ movl(rax, Address(rsi, 0));
 283 
 284     VM_Version::set_cpuinfo_cont_addr( __ pc() );
 285     // Returns here after signal. Save xmm0 to check it later.
 286     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 287     __ vmovdqu(Address(rsi,  0), xmm0);
 288     __ vmovdqu(Address(rsi, 32), xmm7);
 289 #ifdef _LP64
 290     __ vmovdqu(Address(rsi, 64), xmm8);
 291     __ vmovdqu(Address(rsi, 96), xmm15);
 292 #endif
 293 
 294     VM_Version::clean_cpuFeatures();
 295 
 296     //
 297     // cpuid(0x7) Structured Extended Features
 298     //
 299     __ bind(sef_cpuid);
 300     __ movl(rax, 7);
 301     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 302     __ jccb(Assembler::greater, ext_cpuid);
 303 
 304     __ xorl(rcx, rcx);
 305     __ cpuid();
 306     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 307     __ movl(Address(rsi, 0), rax);
 308     __ movl(Address(rsi, 4), rbx);
 309 
 310     //
 311     // Extended cpuid(0x80000000)
 312     //
 313     __ bind(ext_cpuid);
 314     __ movl(rax, 0x80000000);
 315     __ cpuid();


 569     if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
 570       warning("AES intrinsics not available on this CPU");
 571     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
 572   }
 573 
 574 #ifdef COMPILER2
 575   if (UseFPUForSpilling) {
 576     if (UseSSE < 2) {
 577       // Only supported with SSE2+
 578       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
 579     }
 580   }
 581   if (MaxVectorSize > 0) {
 582     if (!is_power_of_2(MaxVectorSize)) {
 583       warning("MaxVectorSize must be a power of 2");
 584       FLAG_SET_DEFAULT(MaxVectorSize, 32);
 585     }
 586     if (MaxVectorSize > 32) {
 587       FLAG_SET_DEFAULT(MaxVectorSize, 32);
 588     }
 589     if (MaxVectorSize > 16 && (UseAVX == 0 || !os_supports_avx_vectors())) {
 590       // 32 bytes vectors (in YMM) are only supported with AVX+
 591       FLAG_SET_DEFAULT(MaxVectorSize, 16);
 592     }
 593     if (UseSSE < 2) {
 594       // Vectors (in XMM) are only supported with SSE2+
 595       FLAG_SET_DEFAULT(MaxVectorSize, 0);
 596     }
 597 #ifdef ASSERT
 598     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
 599       tty->print_cr("State of YMM registers after signal handle:");
 600       int nreg = 2 LP64_ONLY(+2);
 601       const char* ymm_name[4] = {"0", "7", "8", "15"};
 602       for (int i = 0; i < nreg; i++) {
 603         tty->print("YMM%s:", ymm_name[i]);
 604         for (int j = 7; j >=0; j--) {
 605           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
 606         }
 607         tty->cr();
 608       }
 609     }
 610 #endif
 611   }
 612 #endif
 613 
 614   // On new cpus instructions which update whole XMM register should be used
 615   // to prevent partial register stall due to dependencies on high half.
 616   //
 617   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
 618   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
 619   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
 620   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
 621 
 622   if( is_amd() ) { // AMD cpus specific settings
 623     if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
 624       // Use it on new AMD cpus starting from Opteron.
 625       UseAddressNop = true;
 626     }
 627     if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
 628       // Use it on new AMD cpus starting from Opteron.
 629       UseNewLongLShift = true;
 630     }


 721         // generated NOP instructions. 11 is the largest size of one
 722         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
 723         MaxLoopPad = 11;
 724       }
 725 #endif // COMPILER2
 726       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
 727         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
 728       }
 729       if (supports_sse4_2() && supports_ht()) { // Newest Intel cpus
 730         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
 731           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
 732         }
 733       }
 734       if (supports_sse4_2() && UseSSE >= 4) {
 735         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
 736           UseSSE42Intrinsics = true;
 737         }
 738       }
 739     }
 740   }








 741 
 742   // Use count leading zeros count instruction if available.
 743   if (supports_lzcnt()) {
 744     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
 745       UseCountLeadingZerosInstruction = true;
 746     }
 747    } else if (UseCountLeadingZerosInstruction) {
 748     warning("lzcnt instruction is not available on this CPU");
 749     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
 750   }
 751 
 752   if (supports_bmi1()) {
 753     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
 754       UseBMI1Instructions = true;
 755     }
 756   } else if (UseBMI1Instructions) {
 757     warning("BMI1 instructions are not available on this CPU");
 758     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
 759   }
 760 


 849   PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
 850   PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
 851   PrefetchFieldsAhead         = prefetch_fields_ahead();
 852 #endif
 853 
 854   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
 855      (cache_line_size > ContendedPaddingWidth))
 856      ContendedPaddingWidth = cache_line_size;
 857 
 858 #ifndef PRODUCT
 859   if (PrintMiscellaneous && Verbose) {
 860     tty->print_cr("Logical CPUs per core: %u",
 861                   logical_processors_per_package());
 862     tty->print("UseSSE=%d",UseSSE);
 863     if (UseAVX > 0) {
 864       tty->print("  UseAVX=%d",UseAVX);
 865     }
 866     if (UseAES) {
 867       tty->print("  UseAES=1");
 868     }
 869 #ifdef COMPILER2
 870     if (MaxVectorSize > 0) {
 871       tty->print("  MaxVectorSize=%d", MaxVectorSize);
 872     }
 873 #endif
 874     tty->cr();
 875     tty->print("Allocation");
 876     if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
 877       tty->print_cr(": no prefetching");
 878     } else {
 879       tty->print(" prefetching: ");
 880       if (UseSSE == 0 && supports_3dnow_prefetch()) {
 881         tty->print("PREFETCHW");
 882       } else if (UseSSE >= 1) {
 883         if (AllocatePrefetchInstr == 0) {
 884           tty->print("PREFETCHNTA");
 885         } else if (AllocatePrefetchInstr == 1) {
 886           tty->print("PREFETCHT0");
 887         } else if (AllocatePrefetchInstr == 2) {
 888           tty->print("PREFETCHT2");
 889         } else if (AllocatePrefetchInstr == 3) {
 890           tty->print("PREFETCHW");
 891         }
 892       }
 893       if (AllocatePrefetchLines > 1) {


src/cpu/x86/vm/vm_version_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File