1 #ifdef USE_PRAGMA_IDENT_HDR 2 #pragma ident "@(#)vm_version_x86_32.hpp 1.32 07/07/02 16:50:39 JVM" 3 #endif 4 /* 5 * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved. 6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 7 * 8 * This code is free software; you can redistribute it and/or modify it 9 * under the terms of the GNU General Public License version 2 only, as 10 * published by the Free Software Foundation. 11 * 12 * This code is distributed in the hope that it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 * version 2 for more details (a copy is included in the LICENSE file that 16 * accompanied this code). 17 * 18 * You should have received a copy of the GNU General Public License version 19 * 2 along with this work; if not, write to the Free Software Foundation, 20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 23 * CA 95054 USA or visit www.sun.com if you need additional information or 24 * have any questions. 25 * 26 */ 27 28 class VM_Version: public Abstract_VM_Version { 29 public: 30 // cpuid result register layouts. These are all unions of a uint32_t 31 // (in case anyone wants access to the register as a whole) and a bitfield. 32 33 union StdCpuid1Eax { 34 uint32_t value; 35 struct { 36 uint32_t stepping : 4, 37 model : 4, 38 family : 4, 39 proc_type : 2, 40 : 2, 41 ext_model : 4, 42 ext_family : 8, 43 : 4; 44 } bits; 45 }; 46 47 union StdCpuid1Ebx { // example, unused 48 uint32_t value; 49 struct { 50 uint32_t brand_id : 8, 51 clflush_size : 8, 52 threads_per_cpu : 8, 53 apic_id : 8; 54 } bits; 55 }; 56 57 union StdCpuid1Ecx { 58 uint32_t value; 59 struct { 60 uint32_t sse3 : 1, 61 : 2, 62 monitor : 1, 63 : 1, 64 vmx : 1, 65 : 1, 66 est : 1, 67 : 1, 68 ssse3 : 1, 69 cid : 1, 70 : 2, 71 cmpxchg16: 1, 72 : 4, 73 dca : 1, 74 : 4, 75 popcnt : 1, 76 : 8; 77 } bits; 78 }; 79 80 union StdCpuid1Edx { 81 uint32_t value; 82 struct { 83 uint32_t : 4, 84 tsc : 1, 85 : 3, 86 cmpxchg8 : 1, 87 : 6, 88 cmov : 1, 89 : 7, 90 mmx : 1, 91 fxsr : 1, 92 sse : 1, 93 sse2 : 1, 94 : 1, 95 ht : 1, 96 : 3; 97 } bits; 98 }; 99 100 union DcpCpuid4Eax { 101 uint32_t value; 102 struct { 103 uint32_t cache_type : 5, 104 : 21, 105 cores_per_cpu : 6; 106 } bits; 107 }; 108 109 union DcpCpuid4Ebx { 110 uint32_t value; 111 struct { 112 uint32_t L1_line_size : 12, 113 partitions : 10, 114 associativity : 10; 115 } bits; 116 }; 117 118 union ExtCpuid1Ecx { 119 uint32_t value; 120 struct { 121 uint32_t LahfSahf : 1, 122 CmpLegacy : 1, 123 : 4, 124 abm : 1, 125 sse4a : 1, 126 misalignsse : 1, 127 prefetchw : 1, 128 : 22; 129 } bits; 130 }; 131 132 union ExtCpuid1Edx { 133 uint32_t value; 134 struct { 135 uint32_t : 22, 136 mmx_amd : 1, 137 mmx : 1, 138 fxsr : 1, 139 : 4, 140 long_mode : 1, 141 tdnow2 : 1, 142 tdnow : 1; 143 } bits; 144 }; 145 146 union ExtCpuid5Ex { 147 uint32_t value; 148 struct { 149 uint32_t L1_line_size : 8, 150 L1_tag_lines : 8, 151 L1_assoc : 8, 152 L1_size : 8; 153 } bits; 154 }; 155 156 union ExtCpuid8Ecx { 157 uint32_t value; 158 struct { 159 uint32_t cores_per_cpu : 8, 160 : 24; 161 } bits; 162 }; 163 164 protected: 165 static int _cpu; 166 static int _model; 167 static int _stepping; 168 static int _cpuFeatures; // features returned by the "cpuid" instruction 169 // 0 if this instruction is not available 170 static const char* _features_str; 171 172 enum { 173 CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) 174 CPU_CMOV = (1 << 1), 175 CPU_FXSR = (1 << 2), 176 CPU_HT = (1 << 3), 177 CPU_MMX = (1 << 4), 178 CPU_3DNOW= (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX) 179 CPU_SSE = (1 << 6), 180 CPU_SSE2 = (1 << 7), 181 CPU_SSE3 = (1 << 8), // sse3 comes from cpuid 1 (ECX) 182 CPU_SSSE3= (1 << 9), 183 CPU_SSE4 = (1 <<10), 184 CPU_SSE4A= (1 <<11) 185 } cpuFeatureFlags; 186 187 // cpuid information block. All info derived from executing cpuid with 188 // various function numbers is stored here. Intel and AMD info is 189 // merged in this block: accessor methods disentangle it. 190 // 191 // The info block is laid out in subblocks of 4 dwords corresponding to 192 // rax, rbx, rcx and rdx, whether or not they contain anything useful. 193 struct CpuidInfo { 194 // cpuid function 0 195 uint32_t std_max_function; 196 uint32_t std_vendor_name_0; 197 uint32_t std_vendor_name_1; 198 uint32_t std_vendor_name_2; 199 200 // cpuid function 1 201 StdCpuid1Eax std_cpuid1_rax; 202 StdCpuid1Ebx std_cpuid1_rbx; 203 StdCpuid1Ecx std_cpuid1_rcx; 204 StdCpuid1Edx std_cpuid1_rdx; 205 206 // cpuid function 4 (deterministic cache parameters) 207 DcpCpuid4Eax dcp_cpuid4_rax; 208 DcpCpuid4Ebx dcp_cpuid4_rbx; 209 uint32_t dcp_cpuid4_rcx; // unused currently 210 uint32_t dcp_cpuid4_rdx; // unused currently 211 212 // cpuid function 0x80000000 // example, unused 213 uint32_t ext_max_function; 214 uint32_t ext_vendor_name_0; 215 uint32_t ext_vendor_name_1; 216 uint32_t ext_vendor_name_2; 217 218 // cpuid function 0x80000001 219 uint32_t ext_cpuid1_rax; // reserved 220 uint32_t ext_cpuid1_rbx; // reserved 221 ExtCpuid1Ecx ext_cpuid1_rcx; 222 ExtCpuid1Edx ext_cpuid1_rdx; 223 224 // cpuid functions 0x80000002 thru 0x80000004: example, unused 225 uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; 226 uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; 227 uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; 228 229 // cpuid function 0x80000005 //AMD L1, Intel reserved 230 uint32_t ext_cpuid5_rax; // unused currently 231 uint32_t ext_cpuid5_rbx; // reserved 232 ExtCpuid5Ex ext_cpuid5_rcx; // L1 data cache info (AMD) 233 ExtCpuid5Ex ext_cpuid5_rdx; // L1 instruction cache info (AMD) 234 235 // cpuid function 0x80000008 236 uint32_t ext_cpuid8_rax; // unused currently 237 uint32_t ext_cpuid8_rbx; // reserved 238 ExtCpuid8Ecx ext_cpuid8_rcx; 239 uint32_t ext_cpuid8_rdx; // reserved 240 }; 241 242 // The actual cpuid info block 243 static CpuidInfo _cpuid_info; 244 245 // Extractors and predicates 246 static bool is_extended_cpu_family() { 247 const uint32_t Extended_Cpu_Family = 0xf; 248 return _cpuid_info.std_cpuid1_rax.bits.family == Extended_Cpu_Family; 249 } 250 static uint32_t extended_cpu_family() { 251 uint32_t result = _cpuid_info.std_cpuid1_rax.bits.family; 252 if (is_extended_cpu_family()) { 253 result += _cpuid_info.std_cpuid1_rax.bits.ext_family; 254 } 255 return result; 256 } 257 static uint32_t extended_cpu_model() { 258 uint32_t result = _cpuid_info.std_cpuid1_rax.bits.model; 259 if (is_extended_cpu_family()) { 260 result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4; 261 } 262 return result; 263 } 264 static uint32_t cpu_stepping() { 265 uint32_t result = _cpuid_info.std_cpuid1_rax.bits.stepping; 266 return result; 267 } 268 static uint logical_processor_count() { 269 uint result = threads_per_core(); 270 return result; 271 } 272 static uint32_t feature_flags() { 273 uint32_t result = 0; 274 if (_cpuid_info.std_cpuid1_rdx.bits.cmpxchg8 != 0) 275 result |= CPU_CX8; 276 if (_cpuid_info.std_cpuid1_rdx.bits.cmov != 0) 277 result |= CPU_CMOV; 278 if (_cpuid_info.std_cpuid1_rdx.bits.fxsr != 0 || is_amd() && 279 _cpuid_info.ext_cpuid1_rdx.bits.fxsr != 0) 280 result |= CPU_FXSR; 281 // HT flag is set for multi-core processors also. 282 if (threads_per_core() > 1) 283 result |= CPU_HT; 284 if (_cpuid_info.std_cpuid1_rdx.bits.mmx != 0 || is_amd() && 285 _cpuid_info.ext_cpuid1_rdx.bits.mmx != 0) 286 result |= CPU_MMX; 287 if (is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.tdnow != 0) 288 result |= CPU_3DNOW; 289 if (_cpuid_info.std_cpuid1_rdx.bits.sse != 0) 290 result |= CPU_SSE; 291 if (_cpuid_info.std_cpuid1_rdx.bits.sse2 != 0) 292 result |= CPU_SSE2; 293 if (_cpuid_info.std_cpuid1_rcx.bits.sse3 != 0) 294 result |= CPU_SSE3; 295 if (_cpuid_info.std_cpuid1_rcx.bits.ssse3 != 0) 296 result |= CPU_SSSE3; 297 if (is_amd() && _cpuid_info.ext_cpuid1_rcx.bits.sse4a != 0) 298 result |= CPU_SSE4A; 299 return result; 300 } 301 302 static void get_processor_features(); 303 304 public: 305 // Offsets for cpuid asm stub 306 static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } 307 static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_rax); } 308 static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_rax); } 309 static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_rax); } 310 static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_rax); } 311 static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_rax); } 312 313 // Initialization 314 static void initialize(); 315 316 // Asserts 317 static void assert_is_initialized() { 318 assert(_cpuid_info.std_cpuid1_rax.bits.family != 0, "VM_Version not initialized"); 319 } 320 321 // 322 // Processor family: 323 // 3 - 386 324 // 4 - 486 325 // 5 - Pentium 326 // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, 327 // Pentium M, Core Solo, Core Duo, Core2 Duo 328 // family 6 model: 9, 13, 14, 15 329 // 0x0f - Pentium 4, Opteron 330 // 331 // Note: The cpu family should be used to select between 332 // instruction sequences which are valid on all Intel 333 // processors. Use the feature test functions below to 334 // determine whether a particular instruction is supported. 335 // 336 static int cpu_family() { return _cpu;} 337 static bool is_P6() { return cpu_family() >= 6; } 338 339 static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' 340 static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' 341 342 static uint cores_per_cpu() { 343 uint result = 1; 344 if (is_intel()) { 345 result = (_cpuid_info.dcp_cpuid4_rax.bits.cores_per_cpu + 1); 346 } else if (is_amd()) { 347 result = (_cpuid_info.ext_cpuid8_rcx.bits.cores_per_cpu + 1); 348 } 349 return result; 350 } 351 352 static uint threads_per_core() { 353 uint result = 1; 354 if (_cpuid_info.std_cpuid1_rdx.bits.ht != 0) { 355 result = _cpuid_info.std_cpuid1_rbx.bits.threads_per_cpu / 356 cores_per_cpu(); 357 } 358 return result; 359 } 360 361 static intx L1_data_cache_line_size() { 362 intx result = 0; 363 if (is_intel()) { 364 result = (_cpuid_info.dcp_cpuid4_rbx.bits.L1_line_size + 1); 365 } else if (is_amd()) { 366 result = _cpuid_info.ext_cpuid5_rcx.bits.L1_line_size; 367 } 368 if (result < 32) // not defined ? 369 result = 32; // 32 bytes by default on x86 370 return result; 371 } 372 373 // 374 // Feature identification 375 // 376 static bool supports_cpuid() { return _cpuFeatures != 0; } 377 static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } 378 static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } 379 static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } 380 static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } 381 static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } 382 static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } 383 static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } 384 static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } 385 static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } 386 static bool supports_sse4() { return (_cpuFeatures & CPU_SSE4) != 0; } 387 // 388 // AMD features 389 // 390 static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } 391 static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.mmx_amd != 0; } 392 static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.tdnow2 != 0; } 393 static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } 394 395 static bool supports_compare_and_exchange() { return true; } 396 397 static const char* cpu_features() { return _features_str; } 398 399 static intx allocate_prefetch_distance() { 400 // This method should be called before allocate_prefetch_style(). 401 // 402 // Hardware prefetching (distance/size in bytes): 403 // Pentium 3 - 64 / 32 404 // Pentium 4 - 256 / 128 405 // Athlon - 64 / 32 ???? 406 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 407 // Core - 128 / 64 408 // 409 // Software prefetching (distance in bytes / instruction with best score): 410 // Pentium 3 - 128 / prefetchnta 411 // Pentium 4 - 512 / prefetchnta 412 // Athlon - 128 / prefetchnta 413 // Opteron - 256 / prefetchnta 414 // Core - 256 / prefetchnta 415 // It will be used only when AllocatePrefetchStyle > 0 416 417 intx count = AllocatePrefetchDistance; 418 if (count < 0) { // default ? 419 if (is_amd()) { // AMD 420 if (supports_sse2()) 421 count = 256; // Opteron 422 else 423 count = 128; // Athlon 424 } else { // Intel 425 if (supports_sse2()) 426 if (cpu_family() == 6) { 427 count = 256; // Pentium M, Core, Core2 428 } else { 429 count = 512; // Pentium 4 430 } 431 else 432 count = 128; // Pentium 3 (and all other old CPUs) 433 } 434 } 435 return count; 436 } 437 static intx allocate_prefetch_style() { 438 assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); 439 // Return 0 if AllocatePrefetchDistance was not defined or 440 // prefetch instruction is not supported. 441 return (AllocatePrefetchDistance > 0 && 442 (supports_3dnow() || supports_sse())) ? AllocatePrefetchStyle : 0; 443 } 444 };