src/cpu/x86/vm/vm_version_x86.hpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 8031320_8u Sdiff src/cpu/x86/vm

src/cpu/x86/vm/vm_version_x86.hpp

Print this page
rev 5968 : 8031320: Use Intel RTM instructions for locks
Summary: Use RTM for inflated locks and stack locks.
Reviewed-by: iveresov, twisti, roland, dcubed


 190       uint32_t cores_per_cpu : 8,
 191                              : 24;
 192     } bits;
 193   };
 194 
 195   union SefCpuid7Eax {
 196     uint32_t value;
 197   };
 198 
 199   union SefCpuid7Ebx {
 200     uint32_t value;
 201     struct {
 202       uint32_t fsgsbase : 1,
 203                         : 2,
 204                    bmi1 : 1,
 205                         : 1,
 206                    avx2 : 1,
 207                         : 2,
 208                    bmi2 : 1,
 209                    erms : 1,
 210                         : 22;


 211     } bits;
 212   };
 213 
 214   union XemXcr0Eax {
 215     uint32_t value;
 216     struct {
 217       uint32_t x87 : 1,
 218                sse : 1,
 219                ymm : 1,
 220                    : 29;
 221     } bits;
 222   };
 223 
 224 protected:
 225   static int _cpu;
 226   static int _model;
 227   static int _stepping;
 228   static int _cpuFeatures;     // features returned by the "cpuid" instruction
 229                                // 0 if this instruction is not available
 230   static const char* _features_str;


 240     CPU_MMX    = (1 << 4),
 241     CPU_3DNOW_PREFETCH  = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions
 242                                     // may not necessarily support other 3dnow instructions
 243     CPU_SSE    = (1 << 6),
 244     CPU_SSE2   = (1 << 7),
 245     CPU_SSE3   = (1 << 8), // SSE3 comes from cpuid 1 (ECX)
 246     CPU_SSSE3  = (1 << 9),
 247     CPU_SSE4A  = (1 << 10),
 248     CPU_SSE4_1 = (1 << 11),
 249     CPU_SSE4_2 = (1 << 12),
 250     CPU_POPCNT = (1 << 13),
 251     CPU_LZCNT  = (1 << 14),
 252     CPU_TSC    = (1 << 15),
 253     CPU_TSCINV = (1 << 16),
 254     CPU_AVX    = (1 << 17),
 255     CPU_AVX2   = (1 << 18),
 256     CPU_AES    = (1 << 19),
 257     CPU_ERMS   = (1 << 20), // enhanced 'rep movsb/stosb' instructions
 258     CPU_CLMUL  = (1 << 21), // carryless multiply for CRC
 259     CPU_BMI1   = (1 << 22),
 260     CPU_BMI2   = (1 << 23)

 261   } cpuFeatureFlags;
 262 
 263   enum {
 264     // AMD
 265     CPU_FAMILY_AMD_11H       = 0x11,
 266     // Intel
 267     CPU_FAMILY_INTEL_CORE    = 6,
 268     CPU_MODEL_NEHALEM        = 0x1e,
 269     CPU_MODEL_NEHALEM_EP     = 0x1a,
 270     CPU_MODEL_NEHALEM_EX     = 0x2e,
 271     CPU_MODEL_WESTMERE       = 0x25,
 272     CPU_MODEL_WESTMERE_EP    = 0x2c,
 273     CPU_MODEL_WESTMERE_EX    = 0x2f,
 274     CPU_MODEL_SANDYBRIDGE    = 0x2a,
 275     CPU_MODEL_SANDYBRIDGE_EP = 0x2d,
 276     CPU_MODEL_IVYBRIDGE_EP   = 0x3a
 277   } cpuExtendedFamily;
 278 
 279   // cpuid information block.  All info derived from executing cpuid with
 280   // various function numbers is stored here.  Intel and AMD info is


 427     if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
 428         _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
 429         _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
 430         _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
 431       result |= CPU_AVX;
 432       if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
 433         result |= CPU_AVX2;
 434     }
 435     if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
 436       result |= CPU_BMI1;
 437     if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
 438       result |= CPU_TSC;
 439     if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
 440       result |= CPU_TSCINV;
 441     if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
 442       result |= CPU_AES;
 443     if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
 444       result |= CPU_ERMS;
 445     if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
 446       result |= CPU_CLMUL;


 447 
 448     // AMD features.
 449     if (is_amd()) {
 450       if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
 451           (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
 452         result |= CPU_3DNOW_PREFETCH;
 453       if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
 454         result |= CPU_LZCNT;
 455       if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
 456         result |= CPU_SSE4A;
 457     }
 458     // Intel features.
 459     if(is_intel()) {
 460       if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
 461         result |= CPU_BMI2;
 462       if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
 463         result |= CPU_LZCNT;
 464     }
 465 
 466     return result;


 497   static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
 498   static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
 499   static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
 500   static ByteSize ymm_save_offset() { return byte_offset_of(CpuidInfo, ymm_save); }
 501 
 502   // The value used to check ymm register after signal handle
 503   static int ymm_test_value()    { return 0xCAFEBABE; }
 504 
 505   static void set_cpuinfo_segv_addr(address pc) { _cpuinfo_segv_addr = pc; }
 506   static bool  is_cpuinfo_segv_addr(address pc) { return _cpuinfo_segv_addr == pc; }
 507   static void set_cpuinfo_cont_addr(address pc) { _cpuinfo_cont_addr = pc; }
 508   static address  cpuinfo_cont_addr()           { return _cpuinfo_cont_addr; }
 509 
 510   static void clean_cpuFeatures()   { _cpuFeatures = 0; }
 511   static void set_avx_cpuFeatures() { _cpuFeatures = (CPU_SSE | CPU_SSE2 | CPU_AVX); }
 512 
 513 
 514   // Initialization
 515   static void initialize();
 516 



 517   // Asserts
 518   static void assert_is_initialized() {
 519     assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
 520   }
 521 
 522   //
 523   // Processor family:
 524   //       3   -  386
 525   //       4   -  486
 526   //       5   -  Pentium
 527   //       6   -  PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
 528   //              Pentium M, Core Solo, Core Duo, Core2 Duo
 529   //    family 6 model:   9,        13,       14,        15
 530   //    0x0f   -  Pentium 4, Opteron
 531   //
 532   // Note: The cpu family should be used to select between
 533   //       instruction sequences which are valid on all Intel
 534   //       processors.  Use the feature test functions below to
 535   //       determine whether a particular instruction is supported.
 536   //


 589   //
 590   static bool supports_cpuid()    { return _cpuFeatures  != 0; }
 591   static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
 592   static bool supports_cmov()     { return (_cpuFeatures & CPU_CMOV) != 0; }
 593   static bool supports_fxsr()     { return (_cpuFeatures & CPU_FXSR) != 0; }
 594   static bool supports_ht()       { return (_cpuFeatures & CPU_HT) != 0; }
 595   static bool supports_mmx()      { return (_cpuFeatures & CPU_MMX) != 0; }
 596   static bool supports_sse()      { return (_cpuFeatures & CPU_SSE) != 0; }
 597   static bool supports_sse2()     { return (_cpuFeatures & CPU_SSE2) != 0; }
 598   static bool supports_sse3()     { return (_cpuFeatures & CPU_SSE3) != 0; }
 599   static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
 600   static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
 601   static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
 602   static bool supports_popcnt()   { return (_cpuFeatures & CPU_POPCNT) != 0; }
 603   static bool supports_avx()      { return (_cpuFeatures & CPU_AVX) != 0; }
 604   static bool supports_avx2()     { return (_cpuFeatures & CPU_AVX2) != 0; }
 605   static bool supports_tsc()      { return (_cpuFeatures & CPU_TSC)    != 0; }
 606   static bool supports_aes()      { return (_cpuFeatures & CPU_AES) != 0; }
 607   static bool supports_erms()     { return (_cpuFeatures & CPU_ERMS) != 0; }
 608   static bool supports_clmul()    { return (_cpuFeatures & CPU_CLMUL) != 0; }

 609   static bool supports_bmi1()     { return (_cpuFeatures & CPU_BMI1) != 0; }
 610   static bool supports_bmi2()     { return (_cpuFeatures & CPU_BMI2) != 0; }
 611   // Intel features
 612   static bool is_intel_family_core() { return is_intel() &&
 613                                        extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
 614 
 615   static bool is_intel_tsc_synched_at_init()  {
 616     if (is_intel_family_core()) {
 617       uint32_t ext_model = extended_cpu_model();
 618       if (ext_model == CPU_MODEL_NEHALEM_EP     ||
 619           ext_model == CPU_MODEL_WESTMERE_EP    ||
 620           ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
 621           ext_model == CPU_MODEL_IVYBRIDGE_EP) {
 622         // <= 2-socket invariant tsc support. EX versions are usually used
 623         // in > 2-socket systems and likely don't synchronize tscs at
 624         // initialization.
 625         // Code that uses tsc values must be prepared for them to arbitrarily
 626         // jump forward or backward.
 627         return true;
 628       }




 190       uint32_t cores_per_cpu : 8,
 191                              : 24;
 192     } bits;
 193   };
 194 
 195   union SefCpuid7Eax {
 196     uint32_t value;
 197   };
 198 
 199   union SefCpuid7Ebx {
 200     uint32_t value;
 201     struct {
 202       uint32_t fsgsbase : 1,
 203                         : 2,
 204                    bmi1 : 1,
 205                         : 1,
 206                    avx2 : 1,
 207                         : 2,
 208                    bmi2 : 1,
 209                    erms : 1,
 210                         : 1,
 211                    rtm  : 1,
 212                         : 20;
 213     } bits;
 214   };
 215 
 216   union XemXcr0Eax {
 217     uint32_t value;
 218     struct {
 219       uint32_t x87 : 1,
 220                sse : 1,
 221                ymm : 1,
 222                    : 29;
 223     } bits;
 224   };
 225 
 226 protected:
 227   static int _cpu;
 228   static int _model;
 229   static int _stepping;
 230   static int _cpuFeatures;     // features returned by the "cpuid" instruction
 231                                // 0 if this instruction is not available
 232   static const char* _features_str;


 242     CPU_MMX    = (1 << 4),
 243     CPU_3DNOW_PREFETCH  = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions
 244                                     // may not necessarily support other 3dnow instructions
 245     CPU_SSE    = (1 << 6),
 246     CPU_SSE2   = (1 << 7),
 247     CPU_SSE3   = (1 << 8), // SSE3 comes from cpuid 1 (ECX)
 248     CPU_SSSE3  = (1 << 9),
 249     CPU_SSE4A  = (1 << 10),
 250     CPU_SSE4_1 = (1 << 11),
 251     CPU_SSE4_2 = (1 << 12),
 252     CPU_POPCNT = (1 << 13),
 253     CPU_LZCNT  = (1 << 14),
 254     CPU_TSC    = (1 << 15),
 255     CPU_TSCINV = (1 << 16),
 256     CPU_AVX    = (1 << 17),
 257     CPU_AVX2   = (1 << 18),
 258     CPU_AES    = (1 << 19),
 259     CPU_ERMS   = (1 << 20), // enhanced 'rep movsb/stosb' instructions
 260     CPU_CLMUL  = (1 << 21), // carryless multiply for CRC
 261     CPU_BMI1   = (1 << 22),
 262     CPU_BMI2   = (1 << 23),
 263     CPU_RTM    = (1 << 24)  // Restricted Transactional Memory instructions
 264   } cpuFeatureFlags;
 265 
 266   enum {
 267     // AMD
 268     CPU_FAMILY_AMD_11H       = 0x11,
 269     // Intel
 270     CPU_FAMILY_INTEL_CORE    = 6,
 271     CPU_MODEL_NEHALEM        = 0x1e,
 272     CPU_MODEL_NEHALEM_EP     = 0x1a,
 273     CPU_MODEL_NEHALEM_EX     = 0x2e,
 274     CPU_MODEL_WESTMERE       = 0x25,
 275     CPU_MODEL_WESTMERE_EP    = 0x2c,
 276     CPU_MODEL_WESTMERE_EX    = 0x2f,
 277     CPU_MODEL_SANDYBRIDGE    = 0x2a,
 278     CPU_MODEL_SANDYBRIDGE_EP = 0x2d,
 279     CPU_MODEL_IVYBRIDGE_EP   = 0x3a
 280   } cpuExtendedFamily;
 281 
 282   // cpuid information block.  All info derived from executing cpuid with
 283   // various function numbers is stored here.  Intel and AMD info is


 430     if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
 431         _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
 432         _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
 433         _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
 434       result |= CPU_AVX;
 435       if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
 436         result |= CPU_AVX2;
 437     }
 438     if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
 439       result |= CPU_BMI1;
 440     if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
 441       result |= CPU_TSC;
 442     if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
 443       result |= CPU_TSCINV;
 444     if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
 445       result |= CPU_AES;
 446     if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
 447       result |= CPU_ERMS;
 448     if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
 449       result |= CPU_CLMUL;
 450     if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
 451       result |= CPU_RTM;
 452 
 453     // AMD features.
 454     if (is_amd()) {
 455       if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
 456           (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
 457         result |= CPU_3DNOW_PREFETCH;
 458       if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
 459         result |= CPU_LZCNT;
 460       if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
 461         result |= CPU_SSE4A;
 462     }
 463     // Intel features.
 464     if(is_intel()) {
 465       if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
 466         result |= CPU_BMI2;
 467       if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
 468         result |= CPU_LZCNT;
 469     }
 470 
 471     return result;


 502   static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
 503   static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
 504   static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
 505   static ByteSize ymm_save_offset() { return byte_offset_of(CpuidInfo, ymm_save); }
 506 
 507   // The value used to check ymm register after signal handle
 508   static int ymm_test_value()    { return 0xCAFEBABE; }
 509 
 510   static void set_cpuinfo_segv_addr(address pc) { _cpuinfo_segv_addr = pc; }
 511   static bool  is_cpuinfo_segv_addr(address pc) { return _cpuinfo_segv_addr == pc; }
 512   static void set_cpuinfo_cont_addr(address pc) { _cpuinfo_cont_addr = pc; }
 513   static address  cpuinfo_cont_addr()           { return _cpuinfo_cont_addr; }
 514 
 515   static void clean_cpuFeatures()   { _cpuFeatures = 0; }
 516   static void set_avx_cpuFeatures() { _cpuFeatures = (CPU_SSE | CPU_SSE2 | CPU_AVX); }
 517 
 518 
 519   // Initialization
 520   static void initialize();
 521 
 522   // Override Abstract_VM_Version implementation
 523   static bool use_biased_locking();
 524 
 525   // Asserts
 526   static void assert_is_initialized() {
 527     assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
 528   }
 529 
 530   //
 531   // Processor family:
 532   //       3   -  386
 533   //       4   -  486
 534   //       5   -  Pentium
 535   //       6   -  PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
 536   //              Pentium M, Core Solo, Core Duo, Core2 Duo
 537   //    family 6 model:   9,        13,       14,        15
 538   //    0x0f   -  Pentium 4, Opteron
 539   //
 540   // Note: The cpu family should be used to select between
 541   //       instruction sequences which are valid on all Intel
 542   //       processors.  Use the feature test functions below to
 543   //       determine whether a particular instruction is supported.
 544   //


 597   //
 598   static bool supports_cpuid()    { return _cpuFeatures  != 0; }
 599   static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
 600   static bool supports_cmov()     { return (_cpuFeatures & CPU_CMOV) != 0; }
 601   static bool supports_fxsr()     { return (_cpuFeatures & CPU_FXSR) != 0; }
 602   static bool supports_ht()       { return (_cpuFeatures & CPU_HT) != 0; }
 603   static bool supports_mmx()      { return (_cpuFeatures & CPU_MMX) != 0; }
 604   static bool supports_sse()      { return (_cpuFeatures & CPU_SSE) != 0; }
 605   static bool supports_sse2()     { return (_cpuFeatures & CPU_SSE2) != 0; }
 606   static bool supports_sse3()     { return (_cpuFeatures & CPU_SSE3) != 0; }
 607   static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
 608   static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
 609   static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
 610   static bool supports_popcnt()   { return (_cpuFeatures & CPU_POPCNT) != 0; }
 611   static bool supports_avx()      { return (_cpuFeatures & CPU_AVX) != 0; }
 612   static bool supports_avx2()     { return (_cpuFeatures & CPU_AVX2) != 0; }
 613   static bool supports_tsc()      { return (_cpuFeatures & CPU_TSC)    != 0; }
 614   static bool supports_aes()      { return (_cpuFeatures & CPU_AES) != 0; }
 615   static bool supports_erms()     { return (_cpuFeatures & CPU_ERMS) != 0; }
 616   static bool supports_clmul()    { return (_cpuFeatures & CPU_CLMUL) != 0; }
 617   static bool supports_rtm()      { return (_cpuFeatures & CPU_RTM) != 0; }
 618   static bool supports_bmi1()     { return (_cpuFeatures & CPU_BMI1) != 0; }
 619   static bool supports_bmi2()     { return (_cpuFeatures & CPU_BMI2) != 0; }
 620   // Intel features
 621   static bool is_intel_family_core() { return is_intel() &&
 622                                        extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
 623 
 624   static bool is_intel_tsc_synched_at_init()  {
 625     if (is_intel_family_core()) {
 626       uint32_t ext_model = extended_cpu_model();
 627       if (ext_model == CPU_MODEL_NEHALEM_EP     ||
 628           ext_model == CPU_MODEL_WESTMERE_EP    ||
 629           ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
 630           ext_model == CPU_MODEL_IVYBRIDGE_EP) {
 631         // <= 2-socket invariant tsc support. EX versions are usually used
 632         // in > 2-socket systems and likely don't synchronize tscs at
 633         // initialization.
 634         // Code that uses tsc values must be prepared for them to arbitrarily
 635         // jump forward or backward.
 636         return true;
 637       }


src/cpu/x86/vm/vm_version_x86.hpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File