211 : 2,
212 bmi2 : 1,
213 erms : 1,
214 : 1,
215 rtm : 1,
216 : 4,
217 avx512f : 1,
218 avx512dq : 1,
219 : 1,
220 adx : 1,
221 : 6,
222 avx512pf : 1,
223 avx512er : 1,
224 avx512cd : 1,
225 sha : 1,
226 avx512bw : 1,
227 avx512vl : 1;
228 } bits;
229 };
230
231 union ExtCpuid1EEbx {
232 uint32_t value;
233 struct {
234 uint32_t : 8,
235 threads_per_core : 8,
236 : 16;
237 } bits;
238 };
239
240 union XemXcr0Eax {
241 uint32_t value;
242 struct {
243 uint32_t x87 : 1,
244 sse : 1,
245 ymm : 1,
246 bndregs : 1,
247 bndcsr : 1,
248 opmask : 1,
249 zmm512 : 1,
250 zmm32 : 1,
284 CPU_AES = (1 << 19),
285 CPU_ERMS = (1 << 20), // enhanced 'rep movsb/stosb' instructions
286 CPU_CLMUL = (1 << 21), // carryless multiply for CRC
287 CPU_BMI1 = (1 << 22),
288 CPU_BMI2 = (1 << 23),
289 CPU_RTM = (1 << 24), // Restricted Transactional Memory instructions
290 CPU_ADX = (1 << 25),
291 CPU_AVX512F = (1 << 26), // AVX 512bit foundation instructions
292 CPU_AVX512DQ = (1 << 27),
293 CPU_AVX512PF = (1 << 28),
294 CPU_AVX512ER = (1 << 29),
295 CPU_AVX512CD = (1 << 30)
296 // Keeping sign bit 31 unassigned.
297 };
298
299 #define CPU_AVX512BW ((uint64_t)UCONST64(0x100000000)) // enums are limited to 31 bit
300 #define CPU_AVX512VL ((uint64_t)UCONST64(0x200000000)) // EVEX instructions with smaller vector length
301 #define CPU_SHA ((uint64_t)UCONST64(0x400000000)) // SHA instructions
302 #define CPU_FMA ((uint64_t)UCONST64(0x800000000)) // FMA instructions
303 #define CPU_VZEROUPPER ((uint64_t)UCONST64(0x1000000000)) // Vzeroupper instruction
304
305 enum Extended_Family {
306 // AMD
307 CPU_FAMILY_AMD_11H = 0x11,
308 // ZX
309 CPU_FAMILY_ZX_CORE_F6 = 6,
310 CPU_FAMILY_ZX_CORE_F7 = 7,
311 // Intel
312 CPU_FAMILY_INTEL_CORE = 6,
313 CPU_MODEL_NEHALEM = 0x1e,
314 CPU_MODEL_NEHALEM_EP = 0x1a,
315 CPU_MODEL_NEHALEM_EX = 0x2e,
316 CPU_MODEL_WESTMERE = 0x25,
317 CPU_MODEL_WESTMERE_EP = 0x2c,
318 CPU_MODEL_WESTMERE_EX = 0x2f,
319 CPU_MODEL_SANDYBRIDGE = 0x2a,
320 CPU_MODEL_SANDYBRIDGE_EP = 0x2d,
321 CPU_MODEL_IVYBRIDGE_EP = 0x3a,
322 CPU_MODEL_HASWELL_E3 = 0x3c,
323 CPU_MODEL_HASWELL_E7 = 0x3f,
336 uint32_t std_max_function;
337 uint32_t std_vendor_name_0;
338 uint32_t std_vendor_name_1;
339 uint32_t std_vendor_name_2;
340
341 // cpuid function 1
342 StdCpuid1Eax std_cpuid1_eax;
343 StdCpuid1Ebx std_cpuid1_ebx;
344 StdCpuid1Ecx std_cpuid1_ecx;
345 StdCpuid1Edx std_cpuid1_edx;
346
347 // cpuid function 4 (deterministic cache parameters)
348 DcpCpuid4Eax dcp_cpuid4_eax;
349 DcpCpuid4Ebx dcp_cpuid4_ebx;
350 uint32_t dcp_cpuid4_ecx; // unused currently
351 uint32_t dcp_cpuid4_edx; // unused currently
352
353 // cpuid function 7 (structured extended features)
354 SefCpuid7Eax sef_cpuid7_eax;
355 SefCpuid7Ebx sef_cpuid7_ebx;
356 uint32_t sef_cpuid7_ecx; // unused currently
357 uint32_t sef_cpuid7_edx; // unused currently
358
359 // cpuid function 0xB (processor topology)
360 // ecx = 0
361 uint32_t tpl_cpuidB0_eax;
362 TplCpuidBEbx tpl_cpuidB0_ebx;
363 uint32_t tpl_cpuidB0_ecx; // unused currently
364 uint32_t tpl_cpuidB0_edx; // unused currently
365
366 // ecx = 1
367 uint32_t tpl_cpuidB1_eax;
368 TplCpuidBEbx tpl_cpuidB1_ebx;
369 uint32_t tpl_cpuidB1_ecx; // unused currently
370 uint32_t tpl_cpuidB1_edx; // unused currently
371
372 // ecx = 2
373 uint32_t tpl_cpuidB2_eax;
374 TplCpuidBEbx tpl_cpuidB2_ebx;
375 uint32_t tpl_cpuidB2_ecx; // unused currently
376 uint32_t tpl_cpuidB2_edx; // unused currently
377
490 result |= CPU_VZEROUPPER;
491 if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
492 result |= CPU_AVX2;
493 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&
494 _cpuid_info.xem_xcr0_eax.bits.opmask != 0 &&
495 _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 &&
496 _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
497 result |= CPU_AVX512F;
498 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
499 result |= CPU_AVX512CD;
500 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
501 result |= CPU_AVX512DQ;
502 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
503 result |= CPU_AVX512PF;
504 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
505 result |= CPU_AVX512ER;
506 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
507 result |= CPU_AVX512BW;
508 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
509 result |= CPU_AVX512VL;
510 }
511 }
512 if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
513 result |= CPU_BMI1;
514 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
515 result |= CPU_TSC;
516 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
517 result |= CPU_TSCINV;
518 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
519 result |= CPU_AES;
520 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
521 result |= CPU_ERMS;
522 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
523 result |= CPU_CLMUL;
524 if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
525 result |= CPU_RTM;
526 if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
527 result |= CPU_ADX;
528 if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
529 result |= CPU_BMI2;
766 static bool supports_clmul() { return (_features & CPU_CLMUL) != 0; }
767 static bool supports_rtm() { return (_features & CPU_RTM) != 0; }
768 static bool supports_bmi1() { return (_features & CPU_BMI1) != 0; }
769 static bool supports_bmi2() { return (_features & CPU_BMI2) != 0; }
770 static bool supports_adx() { return (_features & CPU_ADX) != 0; }
771 static bool supports_evex() { return (_features & CPU_AVX512F) != 0; }
772 static bool supports_avx512dq() { return (_features & CPU_AVX512DQ) != 0; }
773 static bool supports_avx512pf() { return (_features & CPU_AVX512PF) != 0; }
774 static bool supports_avx512er() { return (_features & CPU_AVX512ER) != 0; }
775 static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; }
776 static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; }
777 static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; }
778 static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); }
779 static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
780 static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
781 static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
782 static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
783 static bool supports_sha() { return (_features & CPU_SHA) != 0; }
784 static bool supports_fma() { return (_features & CPU_FMA) != 0 && supports_avx(); }
785 static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; }
786
787 // Intel features
788 static bool is_intel_family_core() { return is_intel() &&
789 extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
790
791 static bool is_intel_tsc_synched_at_init() {
792 if (is_intel_family_core()) {
793 uint32_t ext_model = extended_cpu_model();
794 if (ext_model == CPU_MODEL_NEHALEM_EP ||
795 ext_model == CPU_MODEL_WESTMERE_EP ||
796 ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
797 ext_model == CPU_MODEL_IVYBRIDGE_EP) {
798 // <= 2-socket invariant tsc support. EX versions are usually used
799 // in > 2-socket systems and likely don't synchronize tscs at
800 // initialization.
801 // Code that uses tsc values must be prepared for them to arbitrarily
802 // jump forward or backward.
803 return true;
804 }
805 }
|
211 : 2,
212 bmi2 : 1,
213 erms : 1,
214 : 1,
215 rtm : 1,
216 : 4,
217 avx512f : 1,
218 avx512dq : 1,
219 : 1,
220 adx : 1,
221 : 6,
222 avx512pf : 1,
223 avx512er : 1,
224 avx512cd : 1,
225 sha : 1,
226 avx512bw : 1,
227 avx512vl : 1;
228 } bits;
229 };
230
231 union SefCpuid7Ecx {
232 uint32_t value;
233 struct {
234 uint32_t prefetchwt1 : 1,
235 avx512_vbmi : 1,
236 umip : 1,
237 pku : 1,
238 ospke : 1,
239 : 1,
240 avx512_vbmi2 : 1,
241 : 1,
242 gfni : 1,
243 vaes : 1,
244 vpclmulqdq : 1,
245 avx512_vnni : 1,
246 avx512_bitalg : 1,
247 : 1,
248 avx512_vpopcntdq : 1,
249 : 17;
250 } bits;
251 };
252
253 union SefCpuid7Edx {
254 uint32_t value;
255 struct {
256 uint32_t : 2,
257 avx512_4vnniw : 1,
258 avx512_4fmaps : 1,
259 : 28;
260 } bits;
261 };
262
263 union ExtCpuid1EEbx {
264 uint32_t value;
265 struct {
266 uint32_t : 8,
267 threads_per_core : 8,
268 : 16;
269 } bits;
270 };
271
272 union XemXcr0Eax {
273 uint32_t value;
274 struct {
275 uint32_t x87 : 1,
276 sse : 1,
277 ymm : 1,
278 bndregs : 1,
279 bndcsr : 1,
280 opmask : 1,
281 zmm512 : 1,
282 zmm32 : 1,
316 CPU_AES = (1 << 19),
317 CPU_ERMS = (1 << 20), // enhanced 'rep movsb/stosb' instructions
318 CPU_CLMUL = (1 << 21), // carryless multiply for CRC
319 CPU_BMI1 = (1 << 22),
320 CPU_BMI2 = (1 << 23),
321 CPU_RTM = (1 << 24), // Restricted Transactional Memory instructions
322 CPU_ADX = (1 << 25),
323 CPU_AVX512F = (1 << 26), // AVX 512bit foundation instructions
324 CPU_AVX512DQ = (1 << 27),
325 CPU_AVX512PF = (1 << 28),
326 CPU_AVX512ER = (1 << 29),
327 CPU_AVX512CD = (1 << 30)
328 // Keeping sign bit 31 unassigned.
329 };
330
331 #define CPU_AVX512BW ((uint64_t)UCONST64(0x100000000)) // enums are limited to 31 bit
332 #define CPU_AVX512VL ((uint64_t)UCONST64(0x200000000)) // EVEX instructions with smaller vector length
333 #define CPU_SHA ((uint64_t)UCONST64(0x400000000)) // SHA instructions
334 #define CPU_FMA ((uint64_t)UCONST64(0x800000000)) // FMA instructions
335 #define CPU_VZEROUPPER ((uint64_t)UCONST64(0x1000000000)) // Vzeroupper instruction
336 #define CPU_AVX512_VPOPCNTDQ ((uint64_t)UCONST64(0x2000000000)) // Vector popcount
337
338 enum Extended_Family {
339 // AMD
340 CPU_FAMILY_AMD_11H = 0x11,
341 // ZX
342 CPU_FAMILY_ZX_CORE_F6 = 6,
343 CPU_FAMILY_ZX_CORE_F7 = 7,
344 // Intel
345 CPU_FAMILY_INTEL_CORE = 6,
346 CPU_MODEL_NEHALEM = 0x1e,
347 CPU_MODEL_NEHALEM_EP = 0x1a,
348 CPU_MODEL_NEHALEM_EX = 0x2e,
349 CPU_MODEL_WESTMERE = 0x25,
350 CPU_MODEL_WESTMERE_EP = 0x2c,
351 CPU_MODEL_WESTMERE_EX = 0x2f,
352 CPU_MODEL_SANDYBRIDGE = 0x2a,
353 CPU_MODEL_SANDYBRIDGE_EP = 0x2d,
354 CPU_MODEL_IVYBRIDGE_EP = 0x3a,
355 CPU_MODEL_HASWELL_E3 = 0x3c,
356 CPU_MODEL_HASWELL_E7 = 0x3f,
369 uint32_t std_max_function;
370 uint32_t std_vendor_name_0;
371 uint32_t std_vendor_name_1;
372 uint32_t std_vendor_name_2;
373
374 // cpuid function 1
375 StdCpuid1Eax std_cpuid1_eax;
376 StdCpuid1Ebx std_cpuid1_ebx;
377 StdCpuid1Ecx std_cpuid1_ecx;
378 StdCpuid1Edx std_cpuid1_edx;
379
380 // cpuid function 4 (deterministic cache parameters)
381 DcpCpuid4Eax dcp_cpuid4_eax;
382 DcpCpuid4Ebx dcp_cpuid4_ebx;
383 uint32_t dcp_cpuid4_ecx; // unused currently
384 uint32_t dcp_cpuid4_edx; // unused currently
385
386 // cpuid function 7 (structured extended features)
387 SefCpuid7Eax sef_cpuid7_eax;
388 SefCpuid7Ebx sef_cpuid7_ebx;
389 SefCpuid7Ecx sef_cpuid7_ecx;
390 SefCpuid7Edx sef_cpuid7_edx;
391
392 // cpuid function 0xB (processor topology)
393 // ecx = 0
394 uint32_t tpl_cpuidB0_eax;
395 TplCpuidBEbx tpl_cpuidB0_ebx;
396 uint32_t tpl_cpuidB0_ecx; // unused currently
397 uint32_t tpl_cpuidB0_edx; // unused currently
398
399 // ecx = 1
400 uint32_t tpl_cpuidB1_eax;
401 TplCpuidBEbx tpl_cpuidB1_ebx;
402 uint32_t tpl_cpuidB1_ecx; // unused currently
403 uint32_t tpl_cpuidB1_edx; // unused currently
404
405 // ecx = 2
406 uint32_t tpl_cpuidB2_eax;
407 TplCpuidBEbx tpl_cpuidB2_ebx;
408 uint32_t tpl_cpuidB2_ecx; // unused currently
409 uint32_t tpl_cpuidB2_edx; // unused currently
410
523 result |= CPU_VZEROUPPER;
524 if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
525 result |= CPU_AVX2;
526 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&
527 _cpuid_info.xem_xcr0_eax.bits.opmask != 0 &&
528 _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 &&
529 _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
530 result |= CPU_AVX512F;
531 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
532 result |= CPU_AVX512CD;
533 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
534 result |= CPU_AVX512DQ;
535 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
536 result |= CPU_AVX512PF;
537 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
538 result |= CPU_AVX512ER;
539 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
540 result |= CPU_AVX512BW;
541 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
542 result |= CPU_AVX512VL;
543 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
544 result |= CPU_AVX512_VPOPCNTDQ;
545 }
546 }
547 if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
548 result |= CPU_BMI1;
549 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
550 result |= CPU_TSC;
551 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
552 result |= CPU_TSCINV;
553 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
554 result |= CPU_AES;
555 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
556 result |= CPU_ERMS;
557 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
558 result |= CPU_CLMUL;
559 if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
560 result |= CPU_RTM;
561 if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
562 result |= CPU_ADX;
563 if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
564 result |= CPU_BMI2;
801 static bool supports_clmul() { return (_features & CPU_CLMUL) != 0; }
802 static bool supports_rtm() { return (_features & CPU_RTM) != 0; }
803 static bool supports_bmi1() { return (_features & CPU_BMI1) != 0; }
804 static bool supports_bmi2() { return (_features & CPU_BMI2) != 0; }
805 static bool supports_adx() { return (_features & CPU_ADX) != 0; }
806 static bool supports_evex() { return (_features & CPU_AVX512F) != 0; }
807 static bool supports_avx512dq() { return (_features & CPU_AVX512DQ) != 0; }
808 static bool supports_avx512pf() { return (_features & CPU_AVX512PF) != 0; }
809 static bool supports_avx512er() { return (_features & CPU_AVX512ER) != 0; }
810 static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; }
811 static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; }
812 static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; }
813 static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); }
814 static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
815 static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
816 static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
817 static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
818 static bool supports_sha() { return (_features & CPU_SHA) != 0; }
819 static bool supports_fma() { return (_features & CPU_FMA) != 0 && supports_avx(); }
820 static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; }
821 static bool supports_vpopcntdq() { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; }
822
823 // Intel features
824 static bool is_intel_family_core() { return is_intel() &&
825 extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
826
827 static bool is_intel_tsc_synched_at_init() {
828 if (is_intel_family_core()) {
829 uint32_t ext_model = extended_cpu_model();
830 if (ext_model == CPU_MODEL_NEHALEM_EP ||
831 ext_model == CPU_MODEL_WESTMERE_EP ||
832 ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
833 ext_model == CPU_MODEL_IVYBRIDGE_EP) {
834 // <= 2-socket invariant tsc support. EX versions are usually used
835 // in > 2-socket systems and likely don't synchronize tscs at
836 // initialization.
837 // Code that uses tsc values must be prepared for them to arbitrarily
838 // jump forward or backward.
839 return true;
840 }
841 }
|