1 #ifdef USE_PRAGMA_IDENT_HDR
   2 #pragma ident "@(#)vm_version_x86_32.hpp        1.32 07/07/02 16:50:39 JVM"
   3 #endif
   4 /*
   5  * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
   6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   7  *
   8  * This code is free software; you can redistribute it and/or modify it
   9  * under the terms of the GNU General Public License version 2 only, as
  10  * published by the Free Software Foundation.
  11  *
  12  * This code is distributed in the hope that it will be useful, but WITHOUT
  13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15  * version 2 for more details (a copy is included in the LICENSE file that
  16  * accompanied this code).
  17  *
  18  * You should have received a copy of the GNU General Public License version
  19  * 2 along with this work; if not, write to the Free Software Foundation,
  20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  21  *
  22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  23  * CA 95054 USA or visit www.sun.com if you need additional information or
  24  * have any questions.
  25  *  
  26  */
  27 
  28 class VM_Version: public Abstract_VM_Version {
  29 public:
  30   // cpuid result register layouts.  These are all unions of a uint32_t
  31   // (in case anyone wants access to the register as a whole) and a bitfield.
  32 
  33   union StdCpuid1Eax {
  34     uint32_t value;
  35     struct {
  36       uint32_t stepping   : 4,
  37                model      : 4,
  38                family     : 4,
  39                proc_type  : 2,
  40                           : 2,
  41                ext_model  : 4,
  42                ext_family : 8,
  43                           : 4;
  44     } bits;
  45   };
  46 
  47   union StdCpuid1Ebx { // example, unused
  48     uint32_t value;
  49     struct {
  50       uint32_t brand_id         : 8,
  51                clflush_size     : 8,
  52                threads_per_cpu  : 8,
  53                apic_id          : 8;
  54     } bits;
  55   };
  56 
  57   union StdCpuid1Ecx {
  58     uint32_t value;
  59     struct {
  60       uint32_t sse3     : 1,
  61                         : 2,
  62                monitor  : 1,
  63                         : 1,
  64                vmx      : 1,
  65                         : 1,
  66                est      : 1,
  67                         : 1,
  68                ssse3    : 1,
  69                cid      : 1,
  70                         : 2,
  71                cmpxchg16: 1,
  72                         : 4,
  73                dca      : 1,
  74                         : 4,
  75                popcnt   : 1,
  76                         : 8;
  77     } bits;
  78   };
  79 
  80   union StdCpuid1Edx {
  81     uint32_t value;
  82     struct {
  83       uint32_t          : 4,
  84                tsc      : 1,
  85                         : 3,
  86                cmpxchg8 : 1,
  87                         : 6,
  88                cmov     : 1,
  89                         : 7,
  90                mmx      : 1,
  91                fxsr     : 1,
  92                sse      : 1,
  93                sse2     : 1,
  94                         : 1,
  95                ht       : 1,
  96                         : 3;
  97     } bits;
  98   };
  99 
 100   union DcpCpuid4Eax {
 101     uint32_t value;
 102     struct {
 103       uint32_t cache_type    : 5,
 104                              : 21,
 105                cores_per_cpu : 6;
 106     } bits;
 107   };
 108 
 109   union DcpCpuid4Ebx {
 110     uint32_t value;
 111     struct {
 112       uint32_t L1_line_size  : 12,
 113                partitions    : 10,
 114                associativity : 10;
 115     } bits;
 116   };
 117 
 118   union ExtCpuid1Ecx {
 119     uint32_t value;
 120     struct {
 121       uint32_t LahfSahf     : 1,
 122                CmpLegacy    : 1,
 123                             : 4,
 124                abm          : 1,
 125                sse4a        : 1,
 126                misalignsse  : 1,
 127                prefetchw    : 1,
 128                             : 22;
 129     } bits;
 130   };
 131 
 132   union ExtCpuid1Edx {
 133     uint32_t value;
 134     struct {
 135       uint32_t           : 22,
 136                mmx_amd   : 1,
 137                mmx       : 1,
 138                fxsr      : 1,
 139                          : 4,
 140                long_mode : 1,
 141                tdnow2    : 1,
 142                tdnow     : 1;
 143     } bits;
 144   };
 145 
 146   union ExtCpuid5Ex {
 147     uint32_t value;
 148     struct {
 149       uint32_t L1_line_size : 8,
 150                L1_tag_lines : 8,
 151                L1_assoc     : 8,
 152                L1_size      : 8;
 153     } bits;
 154   };
 155 
 156   union ExtCpuid8Ecx {
 157     uint32_t value;
 158     struct {
 159       uint32_t cores_per_cpu : 8,
 160                              : 24;
 161     } bits;
 162   };
 163 
 164 protected:
 165    static int _cpu;
 166    static int _model;
 167    static int _stepping;
 168    static int _cpuFeatures;     // features returned by the "cpuid" instruction
 169                                 // 0 if this instruction is not available
 170    static const char* _features_str;
 171 
 172    enum {
 173      CPU_CX8  = (1 << 0), // next bits are from cpuid 1 (EDX)
 174      CPU_CMOV = (1 << 1),
 175      CPU_FXSR = (1 << 2),
 176      CPU_HT   = (1 << 3),
 177      CPU_MMX  = (1 << 4),
 178      CPU_3DNOW= (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX)
 179      CPU_SSE  = (1 << 6),
 180      CPU_SSE2 = (1 << 7),
 181      CPU_SSE3 = (1 << 8), // sse3  comes from cpuid 1 (ECX)
 182      CPU_SSSE3= (1 << 9),
 183      CPU_SSE4 = (1 <<10),
 184      CPU_SSE4A= (1 <<11)
 185    } cpuFeatureFlags;
 186 
 187   // cpuid information block.  All info derived from executing cpuid with
 188   // various function numbers is stored here.  Intel and AMD info is
 189   // merged in this block: accessor methods disentangle it.
 190   //
 191   // The info block is laid out in subblocks of 4 dwords corresponding to
 192   // rax, rbx, rcx and rdx, whether or not they contain anything useful.
 193   struct CpuidInfo {
 194     // cpuid function 0
 195     uint32_t std_max_function;
 196     uint32_t std_vendor_name_0;
 197     uint32_t std_vendor_name_1;
 198     uint32_t std_vendor_name_2;
 199 
 200     // cpuid function 1
 201     StdCpuid1Eax std_cpuid1_rax;
 202     StdCpuid1Ebx std_cpuid1_rbx;
 203     StdCpuid1Ecx std_cpuid1_rcx;
 204     StdCpuid1Edx std_cpuid1_rdx;
 205 
 206     // cpuid function 4 (deterministic cache parameters)
 207     DcpCpuid4Eax dcp_cpuid4_rax;
 208     DcpCpuid4Ebx dcp_cpuid4_rbx;
 209     uint32_t     dcp_cpuid4_rcx; // unused currently
 210     uint32_t     dcp_cpuid4_rdx; // unused currently
 211 
 212     // cpuid function 0x80000000 // example, unused
 213     uint32_t ext_max_function;
 214     uint32_t ext_vendor_name_0;
 215     uint32_t ext_vendor_name_1;
 216     uint32_t ext_vendor_name_2;
 217 
 218     // cpuid function 0x80000001
 219     uint32_t     ext_cpuid1_rax; // reserved
 220     uint32_t     ext_cpuid1_rbx; // reserved
 221     ExtCpuid1Ecx ext_cpuid1_rcx;
 222     ExtCpuid1Edx ext_cpuid1_rdx;
 223 
 224     // cpuid functions 0x80000002 thru 0x80000004: example, unused
 225     uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
 226     uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
 227     uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
 228 
 229     // cpuid function 0x80000005 //AMD L1, Intel reserved
 230     uint32_t     ext_cpuid5_rax; // unused currently
 231     uint32_t     ext_cpuid5_rbx; // reserved
 232     ExtCpuid5Ex  ext_cpuid5_rcx; // L1 data cache info (AMD)
 233     ExtCpuid5Ex  ext_cpuid5_rdx; // L1 instruction cache info (AMD)
 234 
 235     // cpuid function 0x80000008
 236     uint32_t     ext_cpuid8_rax; // unused currently
 237     uint32_t     ext_cpuid8_rbx; // reserved
 238     ExtCpuid8Ecx ext_cpuid8_rcx;
 239     uint32_t     ext_cpuid8_rdx; // reserved
 240   };
 241 
 242   // The actual cpuid info block
 243   static CpuidInfo _cpuid_info;
 244 
 245   // Extractors and predicates
 246   static bool is_extended_cpu_family() {
 247     const uint32_t Extended_Cpu_Family = 0xf;
 248     return _cpuid_info.std_cpuid1_rax.bits.family == Extended_Cpu_Family;
 249   }
 250   static uint32_t extended_cpu_family() {
 251     uint32_t result = _cpuid_info.std_cpuid1_rax.bits.family;
 252     if (is_extended_cpu_family()) {
 253       result += _cpuid_info.std_cpuid1_rax.bits.ext_family;
 254     }
 255     return result;
 256   }
 257   static uint32_t extended_cpu_model() {
 258     uint32_t result = _cpuid_info.std_cpuid1_rax.bits.model;
 259     if (is_extended_cpu_family()) {
 260       result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4;
 261     }
 262     return result;
 263   }
 264   static uint32_t cpu_stepping() {
 265     uint32_t result = _cpuid_info.std_cpuid1_rax.bits.stepping;
 266     return result;
 267   }
 268   static uint logical_processor_count() {
 269     uint result = threads_per_core();
 270     return result;
 271   }
 272   static uint32_t feature_flags() {
 273     uint32_t result = 0;
 274     if (_cpuid_info.std_cpuid1_rdx.bits.cmpxchg8 != 0)
 275       result |= CPU_CX8;
 276     if (_cpuid_info.std_cpuid1_rdx.bits.cmov != 0)
 277       result |= CPU_CMOV;
 278     if (_cpuid_info.std_cpuid1_rdx.bits.fxsr != 0 || is_amd() && 
 279         _cpuid_info.ext_cpuid1_rdx.bits.fxsr != 0)
 280       result |= CPU_FXSR;
 281     // HT flag is set for multi-core processors also.
 282     if (threads_per_core() > 1)
 283       result |= CPU_HT;
 284     if (_cpuid_info.std_cpuid1_rdx.bits.mmx != 0 || is_amd() && 
 285         _cpuid_info.ext_cpuid1_rdx.bits.mmx != 0)
 286       result |= CPU_MMX;
 287     if (is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.tdnow != 0)
 288       result |= CPU_3DNOW;
 289     if (_cpuid_info.std_cpuid1_rdx.bits.sse != 0)
 290       result |= CPU_SSE;
 291     if (_cpuid_info.std_cpuid1_rdx.bits.sse2 != 0)
 292       result |= CPU_SSE2;
 293     if (_cpuid_info.std_cpuid1_rcx.bits.sse3 != 0)
 294       result |= CPU_SSE3;
 295     if (_cpuid_info.std_cpuid1_rcx.bits.ssse3 != 0)
 296       result |= CPU_SSSE3;
 297     if (is_amd() && _cpuid_info.ext_cpuid1_rcx.bits.sse4a != 0)
 298       result |= CPU_SSE4A;
 299     return result;
 300   }
 301 
 302   static void get_processor_features();
 303 
 304 public:
 305   // Offsets for cpuid asm stub
 306   static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
 307   static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_rax); }
 308   static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_rax); }
 309   static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_rax); }
 310   static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_rax); }
 311   static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_rax); }
 312 
 313   // Initialization
 314   static void initialize();
 315 
 316   // Asserts
 317   static void assert_is_initialized() {
 318     assert(_cpuid_info.std_cpuid1_rax.bits.family != 0, "VM_Version not initialized");
 319   }
 320 
 321   //
 322   // Processor family:
 323   //       3   -  386
 324   //       4   -  486
 325   //       5   -  Pentium
 326   //       6   -  PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
 327   //              Pentium M, Core Solo, Core Duo, Core2 Duo
 328   //    family 6 model:   9,        13,       14,        15
 329   //    0x0f   -  Pentium 4, Opteron
 330   //
 331   // Note: The cpu family should be used to select between
 332   //       instruction sequences which are valid on all Intel
 333   //       processors.  Use the feature test functions below to
 334   //       determine whether a particular instruction is supported.
 335   //
 336   static int  cpu_family()        { return _cpu;}
 337   static bool is_P6()             { return cpu_family() >= 6; }
 338 
 339   static bool is_amd()            { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
 340   static bool is_intel()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
 341 
 342   static uint cores_per_cpu()  { 
 343     uint result = 1;
 344     if (is_intel()) {
 345       result = (_cpuid_info.dcp_cpuid4_rax.bits.cores_per_cpu + 1);
 346     } else if (is_amd()) {
 347       result = (_cpuid_info.ext_cpuid8_rcx.bits.cores_per_cpu + 1);
 348     }
 349     return result; 
 350   }
 351 
 352   static uint threads_per_core()  { 
 353     uint result = 1;
 354     if (_cpuid_info.std_cpuid1_rdx.bits.ht != 0) {
 355       result = _cpuid_info.std_cpuid1_rbx.bits.threads_per_cpu / 
 356                cores_per_cpu();
 357     }
 358     return result; 
 359   }
 360 
 361   static intx L1_data_cache_line_size()  { 
 362     intx result = 0;
 363     if (is_intel()) {
 364       result = (_cpuid_info.dcp_cpuid4_rbx.bits.L1_line_size + 1);
 365     } else if (is_amd()) {
 366       result = _cpuid_info.ext_cpuid5_rcx.bits.L1_line_size;
 367     }
 368     if (result < 32) // not defined ?
 369       result = 32;   // 32 bytes by default on x86
 370     return result; 
 371   }
 372 
 373   //
 374   // Feature identification
 375   //
 376   static bool supports_cpuid()    { return _cpuFeatures  != 0; }
 377   static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
 378   static bool supports_cmov()     { return (_cpuFeatures & CPU_CMOV) != 0; }
 379   static bool supports_fxsr()     { return (_cpuFeatures & CPU_FXSR) != 0; }
 380   static bool supports_ht()       { return (_cpuFeatures & CPU_HT) != 0; }
 381   static bool supports_mmx()      { return (_cpuFeatures & CPU_MMX) != 0; }
 382   static bool supports_sse()      { return (_cpuFeatures & CPU_SSE) != 0; }
 383   static bool supports_sse2()     { return (_cpuFeatures & CPU_SSE2) != 0; }
 384   static bool supports_sse3()     { return (_cpuFeatures & CPU_SSE3) != 0; }
 385   static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
 386   static bool supports_sse4()     { return (_cpuFeatures & CPU_SSE4) != 0; }
 387   //
 388   // AMD features
 389   //
 390   static bool supports_3dnow()    { return (_cpuFeatures & CPU_3DNOW) != 0; }
 391   static bool supports_mmx_ext()  { return is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.mmx_amd != 0; }
 392   static bool supports_3dnow2()   { return is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.tdnow2 != 0; }
 393   static bool supports_sse4a()    { return (_cpuFeatures & CPU_SSE4A) != 0; }
 394 
 395   static bool supports_compare_and_exchange() { return true; }
 396 
 397   static const char* cpu_features()           { return _features_str; }
 398 
 399   static intx allocate_prefetch_distance() {
 400     // This method should be called before allocate_prefetch_style().
 401     //
 402     // Hardware prefetching (distance/size in bytes):
 403     // Pentium 3 -  64 /  32
 404     // Pentium 4 - 256 / 128
 405     // Athlon    -  64 /  32 ????
 406     // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
 407     // Core      - 128 /  64
 408     //
 409     // Software prefetching (distance in bytes / instruction with best score):
 410     // Pentium 3 - 128 / prefetchnta
 411     // Pentium 4 - 512 / prefetchnta
 412     // Athlon    - 128 / prefetchnta
 413     // Opteron   - 256 / prefetchnta
 414     // Core      - 256 / prefetchnta
 415     // It will be used only when AllocatePrefetchStyle > 0 
 416 
 417     intx count = AllocatePrefetchDistance;
 418     if (count < 0) {   // default ?
 419       if (is_amd()) {  // AMD
 420         if (supports_sse2())
 421           count = 256; // Opteron
 422         else
 423           count = 128; // Athlon
 424       } else {         // Intel
 425         if (supports_sse2())
 426           if (cpu_family() == 6) {
 427             count = 256; // Pentium M, Core, Core2
 428           } else {
 429             count = 512; // Pentium 4
 430           }
 431         else
 432           count = 128; // Pentium 3 (and all other old CPUs)
 433       }
 434     }
 435     return count;
 436   }
 437   static intx allocate_prefetch_style() {
 438     assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
 439     // Return 0 if AllocatePrefetchDistance was not defined or
 440     // prefetch instruction is not supported.
 441     return (AllocatePrefetchDistance > 0 && 
 442             (supports_3dnow() || supports_sse())) ? AllocatePrefetchStyle : 0;
 443   }
 444 };