< prev index next >

src/hotspot/cpu/x86/vm_version_x86.hpp

Print this page
rev 49218 : Add support for vector popcount


 211                         : 2,
 212                    bmi2 : 1,
 213                    erms : 1,
 214                         : 1,
 215                     rtm : 1,
 216                         : 4,
 217                 avx512f : 1,
 218                avx512dq : 1,
 219                         : 1,
 220                     adx : 1,
 221                         : 6,
 222                avx512pf : 1,
 223                avx512er : 1,
 224                avx512cd : 1,
 225                     sha : 1,
 226                avx512bw : 1,
 227                avx512vl : 1;
 228     } bits;
 229   };
 230 
































 231   union ExtCpuid1EEbx {
 232     uint32_t value;
 233     struct {
 234       uint32_t                  : 8,
 235                threads_per_core : 8,
 236                                 : 16;
 237     } bits;
 238   };
 239 
 240   union XemXcr0Eax {
 241     uint32_t value;
 242     struct {
 243       uint32_t x87     : 1,
 244                sse     : 1,
 245                ymm     : 1,
 246                bndregs : 1,
 247                bndcsr  : 1,
 248                opmask  : 1,
 249                zmm512  : 1,
 250                zmm32   : 1,


 284     CPU_AES      = (1 << 19),
 285     CPU_ERMS     = (1 << 20), // enhanced 'rep movsb/stosb' instructions
 286     CPU_CLMUL    = (1 << 21), // carryless multiply for CRC
 287     CPU_BMI1     = (1 << 22),
 288     CPU_BMI2     = (1 << 23),
 289     CPU_RTM      = (1 << 24), // Restricted Transactional Memory instructions
 290     CPU_ADX      = (1 << 25),
 291     CPU_AVX512F  = (1 << 26), // AVX 512bit foundation instructions
 292     CPU_AVX512DQ = (1 << 27),
 293     CPU_AVX512PF = (1 << 28),
 294     CPU_AVX512ER = (1 << 29),
 295     CPU_AVX512CD = (1 << 30)
 296     // Keeping sign bit 31 unassigned.
 297   };
 298 
 299 #define CPU_AVX512BW ((uint64_t)UCONST64(0x100000000)) // enums are limited to 31 bit
 300 #define CPU_AVX512VL ((uint64_t)UCONST64(0x200000000)) // EVEX instructions with smaller vector length
 301 #define CPU_SHA ((uint64_t)UCONST64(0x400000000))      // SHA instructions
 302 #define CPU_FMA ((uint64_t)UCONST64(0x800000000))      // FMA instructions
 303 #define CPU_VZEROUPPER ((uint64_t)UCONST64(0x1000000000))      // Vzeroupper instruction

 304 
 305   enum Extended_Family {
 306     // AMD
 307     CPU_FAMILY_AMD_11H       = 0x11,
 308     // ZX
 309     CPU_FAMILY_ZX_CORE_F6    = 6,
 310     CPU_FAMILY_ZX_CORE_F7    = 7,
 311     // Intel
 312     CPU_FAMILY_INTEL_CORE    = 6,
 313     CPU_MODEL_NEHALEM        = 0x1e,
 314     CPU_MODEL_NEHALEM_EP     = 0x1a,
 315     CPU_MODEL_NEHALEM_EX     = 0x2e,
 316     CPU_MODEL_WESTMERE       = 0x25,
 317     CPU_MODEL_WESTMERE_EP    = 0x2c,
 318     CPU_MODEL_WESTMERE_EX    = 0x2f,
 319     CPU_MODEL_SANDYBRIDGE    = 0x2a,
 320     CPU_MODEL_SANDYBRIDGE_EP = 0x2d,
 321     CPU_MODEL_IVYBRIDGE_EP   = 0x3a,
 322     CPU_MODEL_HASWELL_E3     = 0x3c,
 323     CPU_MODEL_HASWELL_E7     = 0x3f,


 336     uint32_t std_max_function;
 337     uint32_t std_vendor_name_0;
 338     uint32_t std_vendor_name_1;
 339     uint32_t std_vendor_name_2;
 340 
 341     // cpuid function 1
 342     StdCpuid1Eax std_cpuid1_eax;
 343     StdCpuid1Ebx std_cpuid1_ebx;
 344     StdCpuid1Ecx std_cpuid1_ecx;
 345     StdCpuid1Edx std_cpuid1_edx;
 346 
 347     // cpuid function 4 (deterministic cache parameters)
 348     DcpCpuid4Eax dcp_cpuid4_eax;
 349     DcpCpuid4Ebx dcp_cpuid4_ebx;
 350     uint32_t     dcp_cpuid4_ecx; // unused currently
 351     uint32_t     dcp_cpuid4_edx; // unused currently
 352 
 353     // cpuid function 7 (structured extended features)
 354     SefCpuid7Eax sef_cpuid7_eax;
 355     SefCpuid7Ebx sef_cpuid7_ebx;
 356     uint32_t     sef_cpuid7_ecx; // unused currently
 357     uint32_t     sef_cpuid7_edx; // unused currently
 358 
 359     // cpuid function 0xB (processor topology)
 360     // ecx = 0
 361     uint32_t     tpl_cpuidB0_eax;
 362     TplCpuidBEbx tpl_cpuidB0_ebx;
 363     uint32_t     tpl_cpuidB0_ecx; // unused currently
 364     uint32_t     tpl_cpuidB0_edx; // unused currently
 365 
 366     // ecx = 1
 367     uint32_t     tpl_cpuidB1_eax;
 368     TplCpuidBEbx tpl_cpuidB1_ebx;
 369     uint32_t     tpl_cpuidB1_ecx; // unused currently
 370     uint32_t     tpl_cpuidB1_edx; // unused currently
 371 
 372     // ecx = 2
 373     uint32_t     tpl_cpuidB2_eax;
 374     TplCpuidBEbx tpl_cpuidB2_ebx;
 375     uint32_t     tpl_cpuidB2_ecx; // unused currently
 376     uint32_t     tpl_cpuidB2_edx; // unused currently
 377 


 490       result |= CPU_VZEROUPPER;
 491       if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
 492         result |= CPU_AVX2;
 493       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&
 494           _cpuid_info.xem_xcr0_eax.bits.opmask != 0 &&
 495           _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 &&
 496           _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
 497         result |= CPU_AVX512F;
 498         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
 499           result |= CPU_AVX512CD;
 500         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
 501           result |= CPU_AVX512DQ;
 502         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
 503           result |= CPU_AVX512PF;
 504         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
 505           result |= CPU_AVX512ER;
 506         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
 507           result |= CPU_AVX512BW;
 508         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
 509           result |= CPU_AVX512VL;


 510       }
 511     }
 512     if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
 513       result |= CPU_BMI1;
 514     if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
 515       result |= CPU_TSC;
 516     if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
 517       result |= CPU_TSCINV;
 518     if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
 519       result |= CPU_AES;
 520     if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
 521       result |= CPU_ERMS;
 522     if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
 523       result |= CPU_CLMUL;
 524     if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
 525       result |= CPU_RTM;
 526     if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
 527        result |= CPU_ADX;
 528     if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
 529       result |= CPU_BMI2;


 766   static bool supports_clmul()    { return (_features & CPU_CLMUL) != 0; }
 767   static bool supports_rtm()      { return (_features & CPU_RTM) != 0; }
 768   static bool supports_bmi1()     { return (_features & CPU_BMI1) != 0; }
 769   static bool supports_bmi2()     { return (_features & CPU_BMI2) != 0; }
 770   static bool supports_adx()      { return (_features & CPU_ADX) != 0; }
 771   static bool supports_evex()     { return (_features & CPU_AVX512F) != 0; }
 772   static bool supports_avx512dq() { return (_features & CPU_AVX512DQ) != 0; }
 773   static bool supports_avx512pf() { return (_features & CPU_AVX512PF) != 0; }
 774   static bool supports_avx512er() { return (_features & CPU_AVX512ER) != 0; }
 775   static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; }
 776   static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; }
 777   static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; }
 778   static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); }
 779   static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
 780   static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
 781   static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
 782   static bool supports_avxonly()    { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
 783   static bool supports_sha()        { return (_features & CPU_SHA) != 0; }
 784   static bool supports_fma()        { return (_features & CPU_FMA) != 0 && supports_avx(); }
 785   static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; }

 786 
 787   // Intel features
 788   static bool is_intel_family_core() { return is_intel() &&
 789                                        extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
 790 
 791   static bool is_intel_tsc_synched_at_init()  {
 792     if (is_intel_family_core()) {
 793       uint32_t ext_model = extended_cpu_model();
 794       if (ext_model == CPU_MODEL_NEHALEM_EP     ||
 795           ext_model == CPU_MODEL_WESTMERE_EP    ||
 796           ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
 797           ext_model == CPU_MODEL_IVYBRIDGE_EP) {
 798         // <= 2-socket invariant tsc support. EX versions are usually used
 799         // in > 2-socket systems and likely don't synchronize tscs at
 800         // initialization.
 801         // Code that uses tsc values must be prepared for them to arbitrarily
 802         // jump forward or backward.
 803         return true;
 804       }
 805     }




 211                         : 2,
 212                    bmi2 : 1,
 213                    erms : 1,
 214                         : 1,
 215                     rtm : 1,
 216                         : 4,
 217                 avx512f : 1,
 218                avx512dq : 1,
 219                         : 1,
 220                     adx : 1,
 221                         : 6,
 222                avx512pf : 1,
 223                avx512er : 1,
 224                avx512cd : 1,
 225                     sha : 1,
 226                avx512bw : 1,
 227                avx512vl : 1;
 228     } bits;
 229   };
 230 
 231   union SefCpuid7Ecx {
 232     uint32_t value;
 233     struct {
 234       uint32_t prefetchwt1 : 1,
 235                avx512_vbmi : 1,
 236                       umip : 1,
 237                        pku : 1,
 238                      ospke : 1,
 239                            : 1,
 240               avx512_vbmi2 : 1,
 241                            : 1,
 242                       gfni : 1,
 243                       vaes : 1,
 244                 vpclmulqdq : 1,
 245                avx512_vnni : 1,
 246              avx512_bitalg : 1,
 247                            : 1,
 248           avx512_vpopcntdq : 1,
 249                            : 17;
 250     } bits;
 251   };
 252 
 253   union SefCpuid7Edx {
 254     uint32_t value;
 255     struct {
 256       uint32_t             : 2,
 257              avx512_4vnniw : 1,
 258              avx512_4fmaps : 1,
 259                            : 28;
 260     } bits;
 261   };
 262 
 263   union ExtCpuid1EEbx {
 264     uint32_t value;
 265     struct {
 266       uint32_t                  : 8,
 267                threads_per_core : 8,
 268                                 : 16;
 269     } bits;
 270   };
 271 
 272   union XemXcr0Eax {
 273     uint32_t value;
 274     struct {
 275       uint32_t x87     : 1,
 276                sse     : 1,
 277                ymm     : 1,
 278                bndregs : 1,
 279                bndcsr  : 1,
 280                opmask  : 1,
 281                zmm512  : 1,
 282                zmm32   : 1,


 316     CPU_AES      = (1 << 19),
 317     CPU_ERMS     = (1 << 20), // enhanced 'rep movsb/stosb' instructions
 318     CPU_CLMUL    = (1 << 21), // carryless multiply for CRC
 319     CPU_BMI1     = (1 << 22),
 320     CPU_BMI2     = (1 << 23),
 321     CPU_RTM      = (1 << 24), // Restricted Transactional Memory instructions
 322     CPU_ADX      = (1 << 25),
 323     CPU_AVX512F  = (1 << 26), // AVX 512bit foundation instructions
 324     CPU_AVX512DQ = (1 << 27),
 325     CPU_AVX512PF = (1 << 28),
 326     CPU_AVX512ER = (1 << 29),
 327     CPU_AVX512CD = (1 << 30)
 328     // Keeping sign bit 31 unassigned.
 329   };
 330 
 331 #define CPU_AVX512BW ((uint64_t)UCONST64(0x100000000)) // enums are limited to 31 bit
 332 #define CPU_AVX512VL ((uint64_t)UCONST64(0x200000000)) // EVEX instructions with smaller vector length
 333 #define CPU_SHA ((uint64_t)UCONST64(0x400000000))      // SHA instructions
 334 #define CPU_FMA ((uint64_t)UCONST64(0x800000000))      // FMA instructions
 335 #define CPU_VZEROUPPER ((uint64_t)UCONST64(0x1000000000))       // Vzeroupper instruction
 336 #define CPU_AVX512_VPOPCNTDQ ((uint64_t)UCONST64(0x2000000000)) // Vector popcount
 337 
 338   enum Extended_Family {
 339     // AMD
 340     CPU_FAMILY_AMD_11H       = 0x11,
 341     // ZX
 342     CPU_FAMILY_ZX_CORE_F6    = 6,
 343     CPU_FAMILY_ZX_CORE_F7    = 7,
 344     // Intel
 345     CPU_FAMILY_INTEL_CORE    = 6,
 346     CPU_MODEL_NEHALEM        = 0x1e,
 347     CPU_MODEL_NEHALEM_EP     = 0x1a,
 348     CPU_MODEL_NEHALEM_EX     = 0x2e,
 349     CPU_MODEL_WESTMERE       = 0x25,
 350     CPU_MODEL_WESTMERE_EP    = 0x2c,
 351     CPU_MODEL_WESTMERE_EX    = 0x2f,
 352     CPU_MODEL_SANDYBRIDGE    = 0x2a,
 353     CPU_MODEL_SANDYBRIDGE_EP = 0x2d,
 354     CPU_MODEL_IVYBRIDGE_EP   = 0x3a,
 355     CPU_MODEL_HASWELL_E3     = 0x3c,
 356     CPU_MODEL_HASWELL_E7     = 0x3f,


 369     uint32_t std_max_function;
 370     uint32_t std_vendor_name_0;
 371     uint32_t std_vendor_name_1;
 372     uint32_t std_vendor_name_2;
 373 
 374     // cpuid function 1
 375     StdCpuid1Eax std_cpuid1_eax;
 376     StdCpuid1Ebx std_cpuid1_ebx;
 377     StdCpuid1Ecx std_cpuid1_ecx;
 378     StdCpuid1Edx std_cpuid1_edx;
 379 
 380     // cpuid function 4 (deterministic cache parameters)
 381     DcpCpuid4Eax dcp_cpuid4_eax;
 382     DcpCpuid4Ebx dcp_cpuid4_ebx;
 383     uint32_t     dcp_cpuid4_ecx; // unused currently
 384     uint32_t     dcp_cpuid4_edx; // unused currently
 385 
 386     // cpuid function 7 (structured extended features)
 387     SefCpuid7Eax sef_cpuid7_eax;
 388     SefCpuid7Ebx sef_cpuid7_ebx;
 389     SefCpuid7Ecx sef_cpuid7_ecx;
 390     SefCpuid7Edx sef_cpuid7_edx;
 391 
 392     // cpuid function 0xB (processor topology)
 393     // ecx = 0
 394     uint32_t     tpl_cpuidB0_eax;
 395     TplCpuidBEbx tpl_cpuidB0_ebx;
 396     uint32_t     tpl_cpuidB0_ecx; // unused currently
 397     uint32_t     tpl_cpuidB0_edx; // unused currently
 398 
 399     // ecx = 1
 400     uint32_t     tpl_cpuidB1_eax;
 401     TplCpuidBEbx tpl_cpuidB1_ebx;
 402     uint32_t     tpl_cpuidB1_ecx; // unused currently
 403     uint32_t     tpl_cpuidB1_edx; // unused currently
 404 
 405     // ecx = 2
 406     uint32_t     tpl_cpuidB2_eax;
 407     TplCpuidBEbx tpl_cpuidB2_ebx;
 408     uint32_t     tpl_cpuidB2_ecx; // unused currently
 409     uint32_t     tpl_cpuidB2_edx; // unused currently
 410 


 523       result |= CPU_VZEROUPPER;
 524       if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
 525         result |= CPU_AVX2;
 526       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&
 527           _cpuid_info.xem_xcr0_eax.bits.opmask != 0 &&
 528           _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 &&
 529           _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
 530         result |= CPU_AVX512F;
 531         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
 532           result |= CPU_AVX512CD;
 533         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
 534           result |= CPU_AVX512DQ;
 535         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
 536           result |= CPU_AVX512PF;
 537         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
 538           result |= CPU_AVX512ER;
 539         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
 540           result |= CPU_AVX512BW;
 541         if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
 542           result |= CPU_AVX512VL;
 543         if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
 544           result |= CPU_AVX512_VPOPCNTDQ;
 545       }
 546     }
 547     if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
 548       result |= CPU_BMI1;
 549     if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
 550       result |= CPU_TSC;
 551     if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
 552       result |= CPU_TSCINV;
 553     if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
 554       result |= CPU_AES;
 555     if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
 556       result |= CPU_ERMS;
 557     if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
 558       result |= CPU_CLMUL;
 559     if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
 560       result |= CPU_RTM;
 561     if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
 562        result |= CPU_ADX;
 563     if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
 564       result |= CPU_BMI2;


 801   static bool supports_clmul()    { return (_features & CPU_CLMUL) != 0; }
 802   static bool supports_rtm()      { return (_features & CPU_RTM) != 0; }
 803   static bool supports_bmi1()     { return (_features & CPU_BMI1) != 0; }
 804   static bool supports_bmi2()     { return (_features & CPU_BMI2) != 0; }
 805   static bool supports_adx()      { return (_features & CPU_ADX) != 0; }
 806   static bool supports_evex()     { return (_features & CPU_AVX512F) != 0; }
 807   static bool supports_avx512dq() { return (_features & CPU_AVX512DQ) != 0; }
 808   static bool supports_avx512pf() { return (_features & CPU_AVX512PF) != 0; }
 809   static bool supports_avx512er() { return (_features & CPU_AVX512ER) != 0; }
 810   static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; }
 811   static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; }
 812   static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; }
 813   static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); }
 814   static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
 815   static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
 816   static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
 817   static bool supports_avxonly()    { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
 818   static bool supports_sha()        { return (_features & CPU_SHA) != 0; }
 819   static bool supports_fma()        { return (_features & CPU_FMA) != 0 && supports_avx(); }
 820   static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; }
 821   static bool supports_vpopcntdq()  { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; }
 822 
 823   // Intel features
 824   static bool is_intel_family_core() { return is_intel() &&
 825                                        extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
 826 
 827   static bool is_intel_tsc_synched_at_init()  {
 828     if (is_intel_family_core()) {
 829       uint32_t ext_model = extended_cpu_model();
 830       if (ext_model == CPU_MODEL_NEHALEM_EP     ||
 831           ext_model == CPU_MODEL_WESTMERE_EP    ||
 832           ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
 833           ext_model == CPU_MODEL_IVYBRIDGE_EP) {
 834         // <= 2-socket invariant tsc support. EX versions are usually used
 835         // in > 2-socket systems and likely don't synchronize tscs at
 836         // initialization.
 837         // Code that uses tsc values must be prepared for them to arbitrarily
 838         // jump forward or backward.
 839         return true;
 840       }
 841     }


< prev index next >