124 struct {
125 uint32_t L1_line_size : 12,
126 partitions : 10,
127 associativity : 10;
128 } bits;
129 };
130
131 union TplCpuidBEbx {
132 uint32_t value;
133 struct {
134 uint32_t logical_cpus : 16,
135 : 16;
136 } bits;
137 };
138
139 union ExtCpuid1Ecx {
140 uint32_t value;
141 struct {
142 uint32_t LahfSahf : 1,
143 CmpLegacy : 1,
144 : 4,
145 lzcnt : 1,
146 sse4a : 1,
147 misalignsse : 1,
148 prefetchw : 1,
149 : 22;
150 } bits;
151 };
152
153 union ExtCpuid1Edx {
154 uint32_t value;
155 struct {
156 uint32_t : 22,
157 mmx_amd : 1,
158 mmx : 1,
159 fxsr : 1,
160 : 4,
161 long_mode : 1,
162 tdnow2 : 1,
163 tdnow : 1;
164 } bits;
234 CPU_FXSR = (1 << 2),
235 CPU_HT = (1 << 3),
236 CPU_MMX = (1 << 4),
237 CPU_3DNOW_PREFETCH = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions
238 // may not necessarily support other 3dnow instructions
239 CPU_SSE = (1 << 6),
240 CPU_SSE2 = (1 << 7),
241 CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX)
242 CPU_SSSE3 = (1 << 9),
243 CPU_SSE4A = (1 << 10),
244 CPU_SSE4_1 = (1 << 11),
245 CPU_SSE4_2 = (1 << 12),
246 CPU_POPCNT = (1 << 13),
247 CPU_LZCNT = (1 << 14),
248 CPU_TSC = (1 << 15),
249 CPU_TSCINV = (1 << 16),
250 CPU_AVX = (1 << 17),
251 CPU_AVX2 = (1 << 18),
252 CPU_AES = (1 << 19),
253 CPU_ERMS = (1 << 20), // enhanced 'rep movsb/stosb' instructions
254 CPU_CLMUL = (1 << 21) // carryless multiply for CRC
255 } cpuFeatureFlags;
256
257 enum {
258 // AMD
259 CPU_FAMILY_AMD_11H = 0x11,
260 // Intel
261 CPU_FAMILY_INTEL_CORE = 6,
262 CPU_MODEL_NEHALEM = 0x1e,
263 CPU_MODEL_NEHALEM_EP = 0x1a,
264 CPU_MODEL_NEHALEM_EX = 0x2e,
265 CPU_MODEL_WESTMERE = 0x25,
266 CPU_MODEL_WESTMERE_EP = 0x2c,
267 CPU_MODEL_WESTMERE_EX = 0x2f,
268 CPU_MODEL_SANDYBRIDGE = 0x2a,
269 CPU_MODEL_SANDYBRIDGE_EP = 0x2d,
270 CPU_MODEL_IVYBRIDGE_EP = 0x3a
271 } cpuExtendedFamily;
272
273 // cpuid information block. All info derived from executing cpuid with
274 // various function numbers is stored here. Intel and AMD info is
406 if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
407 result |= CPU_SSE2;
408 if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
409 result |= CPU_SSE3;
410 if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
411 result |= CPU_SSSE3;
412 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
413 result |= CPU_SSE4_1;
414 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
415 result |= CPU_SSE4_2;
416 if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
417 result |= CPU_POPCNT;
418 if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
419 _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
420 _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
421 _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
422 result |= CPU_AVX;
423 if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
424 result |= CPU_AVX2;
425 }
426 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
427 result |= CPU_TSC;
428 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
429 result |= CPU_TSCINV;
430 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
431 result |= CPU_AES;
432 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
433 result |= CPU_ERMS;
434 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
435 result |= CPU_CLMUL;
436
437 // AMD features.
438 if (is_amd()) {
439 if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
440 (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
441 result |= CPU_3DNOW_PREFETCH;
442 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
443 result |= CPU_LZCNT;
444 if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
445 result |= CPU_SSE4A;
446 }
447
448 return result;
449 }
450
451 static void get_processor_features();
452
453 public:
454 // Offsets for cpuid asm stub
455 static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
456 static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
457 static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
458 static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); }
459 static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
460 static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
461 static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
462 static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
463 static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
464 static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
465 static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
466 static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
543 //
544 static bool supports_cpuid() { return _cpuFeatures != 0; }
545 static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
546 static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; }
547 static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; }
548 static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; }
549 static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; }
550 static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; }
551 static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; }
552 static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; }
553 static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; }
554 static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; }
555 static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; }
556 static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; }
557 static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; }
558 static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; }
559 static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; }
560 static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; }
561 static bool supports_erms() { return (_cpuFeatures & CPU_ERMS) != 0; }
562 static bool supports_clmul() { return (_cpuFeatures & CPU_CLMUL) != 0; }
563
564 // Intel features
565 static bool is_intel_family_core() { return is_intel() &&
566 extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
567
568 static bool is_intel_tsc_synched_at_init() {
569 if (is_intel_family_core()) {
570 uint32_t ext_model = extended_cpu_model();
571 if (ext_model == CPU_MODEL_NEHALEM_EP ||
572 ext_model == CPU_MODEL_WESTMERE_EP ||
573 ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
574 ext_model == CPU_MODEL_IVYBRIDGE_EP) {
575 // <= 2-socket invariant tsc support. EX versions are usually used
576 // in > 2-socket systems and likely don't synchronize tscs at
577 // initialization.
578 // Code that uses tsc values must be prepared for them to arbitrarily
579 // jump forward or backward.
580 return true;
581 }
582 }
583 return false;
|
124 struct {
125 uint32_t L1_line_size : 12,
126 partitions : 10,
127 associativity : 10;
128 } bits;
129 };
130
131 union TplCpuidBEbx {
132 uint32_t value;
133 struct {
134 uint32_t logical_cpus : 16,
135 : 16;
136 } bits;
137 };
138
139 union ExtCpuid1Ecx {
140 uint32_t value;
141 struct {
142 uint32_t LahfSahf : 1,
143 CmpLegacy : 1,
144 : 3,
145 lzcnt_intel : 1,
146 lzcnt : 1,
147 sse4a : 1,
148 misalignsse : 1,
149 prefetchw : 1,
150 : 22;
151 } bits;
152 };
153
154 union ExtCpuid1Edx {
155 uint32_t value;
156 struct {
157 uint32_t : 22,
158 mmx_amd : 1,
159 mmx : 1,
160 fxsr : 1,
161 : 4,
162 long_mode : 1,
163 tdnow2 : 1,
164 tdnow : 1;
165 } bits;
235 CPU_FXSR = (1 << 2),
236 CPU_HT = (1 << 3),
237 CPU_MMX = (1 << 4),
238 CPU_3DNOW_PREFETCH = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions
239 // may not necessarily support other 3dnow instructions
240 CPU_SSE = (1 << 6),
241 CPU_SSE2 = (1 << 7),
242 CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX)
243 CPU_SSSE3 = (1 << 9),
244 CPU_SSE4A = (1 << 10),
245 CPU_SSE4_1 = (1 << 11),
246 CPU_SSE4_2 = (1 << 12),
247 CPU_POPCNT = (1 << 13),
248 CPU_LZCNT = (1 << 14),
249 CPU_TSC = (1 << 15),
250 CPU_TSCINV = (1 << 16),
251 CPU_AVX = (1 << 17),
252 CPU_AVX2 = (1 << 18),
253 CPU_AES = (1 << 19),
254 CPU_ERMS = (1 << 20), // enhanced 'rep movsb/stosb' instructions
255 CPU_CLMUL = (1 << 21), // carryless multiply for CRC
256 CPU_BMI1 = (1 << 22),
257 CPU_BMI2 = (1 << 23)
258 } cpuFeatureFlags;
259
260 enum {
261 // AMD
262 CPU_FAMILY_AMD_11H = 0x11,
263 // Intel
264 CPU_FAMILY_INTEL_CORE = 6,
265 CPU_MODEL_NEHALEM = 0x1e,
266 CPU_MODEL_NEHALEM_EP = 0x1a,
267 CPU_MODEL_NEHALEM_EX = 0x2e,
268 CPU_MODEL_WESTMERE = 0x25,
269 CPU_MODEL_WESTMERE_EP = 0x2c,
270 CPU_MODEL_WESTMERE_EX = 0x2f,
271 CPU_MODEL_SANDYBRIDGE = 0x2a,
272 CPU_MODEL_SANDYBRIDGE_EP = 0x2d,
273 CPU_MODEL_IVYBRIDGE_EP = 0x3a
274 } cpuExtendedFamily;
275
276 // cpuid information block. All info derived from executing cpuid with
277 // various function numbers is stored here. Intel and AMD info is
409 if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
410 result |= CPU_SSE2;
411 if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
412 result |= CPU_SSE3;
413 if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
414 result |= CPU_SSSE3;
415 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
416 result |= CPU_SSE4_1;
417 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
418 result |= CPU_SSE4_2;
419 if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
420 result |= CPU_POPCNT;
421 if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
422 _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
423 _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
424 _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
425 result |= CPU_AVX;
426 if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
427 result |= CPU_AVX2;
428 }
429 if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
430 result |= CPU_BMI1;
431 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
432 result |= CPU_TSC;
433 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
434 result |= CPU_TSCINV;
435 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
436 result |= CPU_AES;
437 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
438 result |= CPU_ERMS;
439 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
440 result |= CPU_CLMUL;
441
442 // AMD features.
443 if (is_amd()) {
444 if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
445 (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
446 result |= CPU_3DNOW_PREFETCH;
447 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
448 result |= CPU_LZCNT;
449 if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
450 result |= CPU_SSE4A;
451 }
452 // Intel features.
453 if(is_intel()) {
454 if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
455 result |= CPU_BMI2;
456 if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
457 result |= CPU_LZCNT;
458 }
459
460 return result;
461 }
462
463 static void get_processor_features();
464
465 public:
466 // Offsets for cpuid asm stub
467 static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
468 static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
469 static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
470 static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); }
471 static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
472 static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
473 static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
474 static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
475 static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
476 static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
477 static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
478 static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
555 //
556 static bool supports_cpuid() { return _cpuFeatures != 0; }
557 static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
558 static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; }
559 static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; }
560 static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; }
561 static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; }
562 static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; }
563 static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; }
564 static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; }
565 static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; }
566 static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; }
567 static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; }
568 static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; }
569 static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; }
570 static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; }
571 static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; }
572 static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; }
573 static bool supports_erms() { return (_cpuFeatures & CPU_ERMS) != 0; }
574 static bool supports_clmul() { return (_cpuFeatures & CPU_CLMUL) != 0; }
575 static bool supports_bmi1() { return (_cpuFeatures & CPU_BMI1) != 0; }
576 static bool supports_bmi2() { return (_cpuFeatures & CPU_BMI2) != 0; }
577 // Intel features
578 static bool is_intel_family_core() { return is_intel() &&
579 extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
580
581 static bool is_intel_tsc_synched_at_init() {
582 if (is_intel_family_core()) {
583 uint32_t ext_model = extended_cpu_model();
584 if (ext_model == CPU_MODEL_NEHALEM_EP ||
585 ext_model == CPU_MODEL_WESTMERE_EP ||
586 ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
587 ext_model == CPU_MODEL_IVYBRIDGE_EP) {
588 // <= 2-socket invariant tsc support. EX versions are usually used
589 // in > 2-socket systems and likely don't synchronize tscs at
590 // initialization.
591 // Code that uses tsc values must be prepared for them to arbitrarily
592 // jump forward or backward.
593 return true;
594 }
595 }
596 return false;
|