18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/macroAssembler.hpp"
27 #include "asm/macroAssembler.inline.hpp"
28 #include "memory/resourceArea.hpp"
29 #include "runtime/java.hpp"
30 #include "runtime/os.hpp"
31 #include "runtime/stubCodeGenerator.hpp"
32 #include "vm_version_x86.hpp"
33
34
35 int VM_Version::_cpu;
36 int VM_Version::_model;
37 int VM_Version::_stepping;
38 int VM_Version::_cpuFeatures;
39 const char* VM_Version::_features_str = "";
40 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
41
42 // Address of instruction which causes SEGV
43 address VM_Version::_cpuinfo_segv_addr = 0;
44 // Address of instruction after the one which causes SEGV
45 address VM_Version::_cpuinfo_cont_addr = 0;
46
47 static BufferBlob* stub_blob;
48 static const int stub_size = 600;
49
50 extern "C" {
51 typedef void (*get_cpu_info_stub_t)(void*);
52 }
53 static get_cpu_info_stub_t get_cpu_info_stub = NULL;
54
55
56 class VM_Version_StubGenerator: public StubCodeGenerator {
57 public:
58
59 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
60
61 address generate_get_cpu_info() {
62 // Flags to test CPU type.
63 const uint32_t HS_EFL_AC = 0x40000;
64 const uint32_t HS_EFL_ID = 0x200000;
65 // Values for when we don't have a CPUID instruction.
66 const int CPU_FAMILY_SHIFT = 8;
67 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
68 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
69
70 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
71 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done;
72
73 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
74 # define __ _masm->
75
76 address start = __ pc();
77
78 //
79 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
80 //
81 // LP64: rcx and rdx are first and second argument registers on windows
82
83 __ push(rbp);
84 #ifdef _LP64
85 __ mov(rbp, c_rarg0); // cpuid_info address
86 #else
87 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
88 #endif
89 __ push(rbx);
90 __ push(rsi);
91 __ pushf(); // preserve rbx, and flags
224 __ movl(Address(rsi, 8), rcx);
225 __ movl(Address(rsi,12), rdx);
226
227 //
228 // Check if OS has enabled XGETBV instruction to access XCR0
229 // (OSXSAVE feature flag) and CPU supports AVX
230 //
231 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
232 __ cmpl(rcx, 0x18000000);
233 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
234
235 //
236 // XCR0, XFEATURE_ENABLED_MASK register
237 //
238 __ xorl(rcx, rcx); // zero for XCR0 register
239 __ xgetbv();
240 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
241 __ movl(Address(rsi, 0), rax);
242 __ movl(Address(rsi, 4), rdx);
243
244 __ andl(rax, 0x6); // xcr0 bits sse | ymm
245 __ cmpl(rax, 0x6);
246 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
247
248 //
249 // Some OSs have a bug when upper 128bits of YMM
250 // registers are not restored after a signal processing.
251 // Generate SEGV here (reference through NULL)
252 // and check upper YMM bits after it.
253 //
254 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
255 intx saved_useavx = UseAVX;
256 intx saved_usesse = UseSSE;
257 UseAVX = 1;
258 UseSSE = 2;
259
260 // load value into all 32 bytes of ymm7 register
261 __ movl(rcx, VM_Version::ymm_test_value());
262
263 __ movdl(xmm0, rcx);
264 __ pshufd(xmm0, xmm0, 0x00);
265 __ vinsertf128h(xmm0, xmm0, xmm0);
266 __ vmovdqu(xmm7, xmm0);
267 #ifdef _LP64
268 __ vmovdqu(xmm8, xmm0);
269 __ vmovdqu(xmm15, xmm0);
270 #endif
271
272 __ xorl(rsi, rsi);
273 VM_Version::set_cpuinfo_segv_addr( __ pc() );
274 // Generate SEGV
275 __ movl(rax, Address(rsi, 0));
276
277 VM_Version::set_cpuinfo_cont_addr( __ pc() );
278 // Returns here after signal. Save xmm0 to check it later.
279 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
280 __ vmovdqu(Address(rsi, 0), xmm0);
281 __ vmovdqu(Address(rsi, 32), xmm7);
282 #ifdef _LP64
283 __ vmovdqu(Address(rsi, 64), xmm8);
284 __ vmovdqu(Address(rsi, 96), xmm15);
285 #endif
286
287 VM_Version::clean_cpuFeatures();
288 UseAVX = saved_useavx;
289 UseSSE = saved_usesse;
290
291 //
292 // cpuid(0x7) Structured Extended Features
293 //
294 __ bind(sef_cpuid);
295 __ movl(rax, 7);
296 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
297 __ jccb(Assembler::greater, ext_cpuid);
298
299 __ xorl(rcx, rcx);
300 __ cpuid();
301 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
302 __ movl(Address(rsi, 0), rax);
303 __ movl(Address(rsi, 4), rbx);
304
305 //
306 // Extended cpuid(0x80000000)
307 //
308 __ bind(ext_cpuid);
309 __ movl(rax, 0x80000000);
310 __ cpuid();
347 __ cpuid();
348 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
349 __ movl(Address(rsi, 0), rax);
350 __ movl(Address(rsi, 4), rbx);
351 __ movl(Address(rsi, 8), rcx);
352 __ movl(Address(rsi,12), rdx);
353
354 //
355 // Extended cpuid(0x80000001)
356 //
357 __ bind(ext_cpuid1);
358 __ movl(rax, 0x80000001);
359 __ cpuid();
360 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
361 __ movl(Address(rsi, 0), rax);
362 __ movl(Address(rsi, 4), rbx);
363 __ movl(Address(rsi, 8), rcx);
364 __ movl(Address(rsi,12), rdx);
365
366 //
367 // return
368 //
369 __ bind(done);
370 __ popf();
371 __ pop(rsi);
372 __ pop(rbx);
373 __ pop(rbp);
374 __ ret(0);
375
376 # undef __
377
378 return start;
379 };
380 };
381
382 void VM_Version::get_processor_features() {
383
384 _cpu = 4; // 486 by default
385 _model = 0;
386 _stepping = 0;
387 _cpuFeatures = 0;
388 _logical_processors_per_package = 1;
389 // i486 internal cache is both I&D and has a 16-byte line size
442 if (!os::supports_sse())
443 _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
444
445 if (UseSSE < 4) {
446 _cpuFeatures &= ~CPU_SSE4_1;
447 _cpuFeatures &= ~CPU_SSE4_2;
448 }
449
450 if (UseSSE < 3) {
451 _cpuFeatures &= ~CPU_SSE3;
452 _cpuFeatures &= ~CPU_SSSE3;
453 _cpuFeatures &= ~CPU_SSE4A;
454 }
455
456 if (UseSSE < 2)
457 _cpuFeatures &= ~CPU_SSE2;
458
459 if (UseSSE < 1)
460 _cpuFeatures &= ~CPU_SSE;
461
462 if (UseAVX < 2)
463 _cpuFeatures &= ~CPU_AVX2;
464
465 if (UseAVX < 1)
466 _cpuFeatures &= ~CPU_AVX;
467
468 if (!UseAES && !FLAG_IS_DEFAULT(UseAES))
469 _cpuFeatures &= ~CPU_AES;
470
471 if (logical_processors_per_package() == 1) {
472 // HT processor could be installed on a system which doesn't support HT.
473 _cpuFeatures &= ~CPU_HT;
474 }
475
476 char buf[256];
477 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
478 cores_per_cpu(), threads_per_core(),
479 cpu_family(), _model, _stepping,
480 (supports_cmov() ? ", cmov" : ""),
481 (supports_cmpxchg8() ? ", cx8" : ""),
482 (supports_fxsr() ? ", fxsr" : ""),
483 (supports_mmx() ? ", mmx" : ""),
484 (supports_sse() ? ", sse" : ""),
485 (supports_sse2() ? ", sse2" : ""),
486 (supports_sse3() ? ", sse3" : ""),
487 (supports_ssse3()? ", ssse3": ""),
488 (supports_sse4_1() ? ", sse4.1" : ""),
489 (supports_sse4_2() ? ", sse4.2" : ""),
490 (supports_popcnt() ? ", popcnt" : ""),
491 (supports_avx() ? ", avx" : ""),
492 (supports_avx2() ? ", avx2" : ""),
493 (supports_aes() ? ", aes" : ""),
494 (supports_clmul() ? ", clmul" : ""),
495 (supports_erms() ? ", erms" : ""),
496 (supports_rtm() ? ", rtm" : ""),
497 (supports_mmx_ext() ? ", mmxext" : ""),
498 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
499 (supports_lzcnt() ? ", lzcnt": ""),
500 (supports_sse4a() ? ", sse4a": ""),
501 (supports_ht() ? ", ht": ""),
502 (supports_tsc() ? ", tsc": ""),
503 (supports_tscinv_bit() ? ", tscinvbit": ""),
504 (supports_tscinv() ? ", tscinv": ""),
505 (supports_bmi1() ? ", bmi1" : ""),
506 (supports_bmi2() ? ", bmi2" : ""),
507 (supports_adx() ? ", adx" : ""));
508 _features_str = os::strdup(buf);
509
510 // UseSSE is set to the smaller of what hardware supports and what
511 // the command line requires. I.e., you cannot set UseSSE to 2 on
512 // older Pentiums which do not support it.
513 if (UseSSE > 4) UseSSE=4;
514 if (UseSSE < 0) UseSSE=0;
515 if (!supports_sse4_1()) // Drop to 3 if no SSE4 support
516 UseSSE = MIN2((intx)3,UseSSE);
517 if (!supports_sse3()) // Drop to 2 if no SSE3 support
518 UseSSE = MIN2((intx)2,UseSSE);
519 if (!supports_sse2()) // Drop to 1 if no SSE2 support
520 UseSSE = MIN2((intx)1,UseSSE);
521 if (!supports_sse ()) // Drop to 0 if no SSE support
522 UseSSE = 0;
523
524 if (UseAVX > 2) UseAVX=2;
525 if (UseAVX < 0) UseAVX=0;
526 if (!supports_avx2()) // Drop to 1 if no AVX2 support
527 UseAVX = MIN2((intx)1,UseAVX);
528 if (!supports_avx ()) // Drop to 0 if no AVX support
529 UseAVX = 0;
530
531 // Use AES instructions if available.
532 if (supports_aes()) {
533 if (FLAG_IS_DEFAULT(UseAES)) {
534 UseAES = true;
535 }
536 } else if (UseAES) {
537 if (!FLAG_IS_DEFAULT(UseAES))
538 warning("AES instructions are not available on this CPU");
539 FLAG_SET_DEFAULT(UseAES, false);
540 }
541
542 // Use CLMUL instructions if available.
543 if (supports_clmul()) {
544 if (FLAG_IS_DEFAULT(UseCLMUL)) {
545 UseCLMUL = true;
546 }
547 } else if (UseCLMUL) {
548 if (!FLAG_IS_DEFAULT(UseCLMUL))
549 warning("CLMUL instructions not available on this CPU (AVX may also be required)");
550 FLAG_SET_DEFAULT(UseCLMUL, false);
581 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
582 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
583 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
584 }
585
586 // Adjust RTM (Restricted Transactional Memory) flags
587 if (!supports_rtm() && UseRTMLocking) {
588 // Can't continue because UseRTMLocking affects UseBiasedLocking flag
589 // setting during arguments processing. See use_biased_locking().
590 // VM_Version_init() is executed after UseBiasedLocking is used
591 // in Thread::allocate().
592 vm_exit_during_initialization("RTM instructions are not available on this CPU");
593 }
594
595 #if INCLUDE_RTM_OPT
596 if (UseRTMLocking) {
597 if (is_intel_family_core()) {
598 if ((_model == CPU_MODEL_HASWELL_E3) ||
599 (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
600 (_model == CPU_MODEL_BROADWELL && _stepping < 4)) {
601 if (!UnlockExperimentalVMOptions) {
602 vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
603 } else {
604 warning("UseRTMLocking is only available as experimental option on this platform.");
605 }
606 }
607 }
608 if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
609 // RTM locking should be used only for applications with
610 // high lock contention. For now we do not use it by default.
611 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
612 }
613 if (!is_power_of_2(RTMTotalCountIncrRate)) {
614 warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
615 FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
616 }
617 if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
618 warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
619 FLAG_SET_DEFAULT(RTMAbortRatio, 50);
620 }
621 } else { // !UseRTMLocking
634 }
635 #else
636 if (UseRTMLocking) {
637 // Only C2 does RTM locking optimization.
638 // Can't continue because UseRTMLocking affects UseBiasedLocking flag
639 // setting during arguments processing. See use_biased_locking().
640 vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
641 }
642 #endif
643
644 #ifdef COMPILER2
645 if (UseFPUForSpilling) {
646 if (UseSSE < 2) {
647 // Only supported with SSE2+
648 FLAG_SET_DEFAULT(UseFPUForSpilling, false);
649 }
650 }
651 if (MaxVectorSize > 0) {
652 if (!is_power_of_2(MaxVectorSize)) {
653 warning("MaxVectorSize must be a power of 2");
654 FLAG_SET_DEFAULT(MaxVectorSize, 32);
655 }
656 if (MaxVectorSize > 32) {
657 FLAG_SET_DEFAULT(MaxVectorSize, 32);
658 }
659 if (MaxVectorSize > 16 && (UseAVX == 0 || !os_supports_avx_vectors())) {
660 // 32 bytes vectors (in YMM) are only supported with AVX+
661 FLAG_SET_DEFAULT(MaxVectorSize, 16);
662 }
663 if (UseSSE < 2) {
664 // Vectors (in XMM) are only supported with SSE2+
665 FLAG_SET_DEFAULT(MaxVectorSize, 0);
666 }
667 #ifdef ASSERT
668 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
669 tty->print_cr("State of YMM registers after signal handle:");
670 int nreg = 2 LP64_ONLY(+2);
671 const char* ymm_name[4] = {"0", "7", "8", "15"};
672 for (int i = 0; i < nreg; i++) {
673 tty->print("YMM%s:", ymm_name[i]);
674 for (int j = 7; j >=0; j--) {
675 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
676 }
677 tty->cr();
|
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/macroAssembler.hpp"
27 #include "asm/macroAssembler.inline.hpp"
28 #include "memory/resourceArea.hpp"
29 #include "runtime/java.hpp"
30 #include "runtime/os.hpp"
31 #include "runtime/stubCodeGenerator.hpp"
32 #include "vm_version_x86.hpp"
33
34
35 int VM_Version::_cpu;
36 int VM_Version::_model;
37 int VM_Version::_stepping;
38 uint64_t VM_Version::_cpuFeatures;
39 const char* VM_Version::_features_str = "";
40 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
41
42 // Address of instruction which causes SEGV
43 address VM_Version::_cpuinfo_segv_addr = 0;
44 // Address of instruction after the one which causes SEGV
45 address VM_Version::_cpuinfo_cont_addr = 0;
46
47 static BufferBlob* stub_blob;
48 static const int stub_size = 1000;
49
50 extern "C" {
51 typedef void (*get_cpu_info_stub_t)(void*);
52 }
53 static get_cpu_info_stub_t get_cpu_info_stub = NULL;
54
55
56 class VM_Version_StubGenerator: public StubCodeGenerator {
57 public:
58
59 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
60
61 address generate_get_cpu_info() {
62 // Flags to test CPU type.
63 const uint32_t HS_EFL_AC = 0x40000;
64 const uint32_t HS_EFL_ID = 0x200000;
65 // Values for when we don't have a CPUID instruction.
66 const int CPU_FAMILY_SHIFT = 8;
67 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
68 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
69
70 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
71 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done, wrapup;
72 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
73
74 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
75 # define __ _masm->
76
77 address start = __ pc();
78
79 //
80 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
81 //
82 // LP64: rcx and rdx are first and second argument registers on windows
83
84 __ push(rbp);
85 #ifdef _LP64
86 __ mov(rbp, c_rarg0); // cpuid_info address
87 #else
88 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
89 #endif
90 __ push(rbx);
91 __ push(rsi);
92 __ pushf(); // preserve rbx, and flags
225 __ movl(Address(rsi, 8), rcx);
226 __ movl(Address(rsi,12), rdx);
227
228 //
229 // Check if OS has enabled XGETBV instruction to access XCR0
230 // (OSXSAVE feature flag) and CPU supports AVX
231 //
232 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
233 __ cmpl(rcx, 0x18000000);
234 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
235
236 //
237 // XCR0, XFEATURE_ENABLED_MASK register
238 //
239 __ xorl(rcx, rcx); // zero for XCR0 register
240 __ xgetbv();
241 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
242 __ movl(Address(rsi, 0), rax);
243 __ movl(Address(rsi, 4), rdx);
244
245 //
246 // cpuid(0x7) Structured Extended Features
247 //
248 __ bind(sef_cpuid);
249 __ movl(rax, 7);
250 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
251 __ jccb(Assembler::greater, ext_cpuid);
252
253 __ xorl(rcx, rcx);
254 __ cpuid();
255 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
256 __ movl(Address(rsi, 0), rax);
257 __ movl(Address(rsi, 4), rbx);
258
259 //
260 // Extended cpuid(0x80000000)
261 //
262 __ bind(ext_cpuid);
263 __ movl(rax, 0x80000000);
264 __ cpuid();
301 __ cpuid();
302 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
303 __ movl(Address(rsi, 0), rax);
304 __ movl(Address(rsi, 4), rbx);
305 __ movl(Address(rsi, 8), rcx);
306 __ movl(Address(rsi,12), rdx);
307
308 //
309 // Extended cpuid(0x80000001)
310 //
311 __ bind(ext_cpuid1);
312 __ movl(rax, 0x80000001);
313 __ cpuid();
314 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
315 __ movl(Address(rsi, 0), rax);
316 __ movl(Address(rsi, 4), rbx);
317 __ movl(Address(rsi, 8), rcx);
318 __ movl(Address(rsi,12), rdx);
319
320 //
321 // Check if OS has enabled XGETBV instruction to access XCR0
322 // (OSXSAVE feature flag) and CPU supports AVX
323 //
324 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
325 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
326 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
327 __ cmpl(rcx, 0x18000000);
328 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
329
330 __ movl(rax, 0x6);
331 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
332 __ cmpl(rax, 0x6);
333 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
334
335 // we need to bridge farther than imm8, so we use this island as a thunk
336 __ bind(done);
337 __ jmp(wrapup);
338
339 __ bind(start_simd_check);
340 //
341 // Some OSs have a bug when upper 128/256bits of YMM/ZMM
342 // registers are not restored after a signal processing.
343 // Generate SEGV here (reference through NULL)
344 // and check upper YMM/ZMM bits after it.
345 //
346 intx saved_useavx = UseAVX;
347 intx saved_usesse = UseSSE;
348 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
349 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
350 __ movl(rax, 0x10000);
351 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
352 __ cmpl(rax, 0x10000);
353 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
354 // check _cpuid_info.xem_xcr0_eax.bits.opmask
355 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
356 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
357 __ movl(rax, 0xE0);
358 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
359 __ cmpl(rax, 0xE0);
360 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
361
362 // EVEX setup: run in lowest evex mode
363 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
364 UseAVX = 3;
365 UseSSE = 2;
366 // load value into all 64 bytes of zmm7 register
367 __ movl(rcx, VM_Version::ymm_test_value());
368 __ movdl(xmm0, rcx);
369 __ movl(rcx, 0xffff);
370 #ifdef _LP64
371 __ kmovql(k1, rcx);
372 #else
373 __ kmovdl(k1, rcx);
374 #endif
375 __ evpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
376 __ evmovdqu(xmm7, xmm0, Assembler::AVX_512bit);
377 #ifdef _LP64
378 __ evmovdqu(xmm8, xmm0, Assembler::AVX_512bit);
379 __ evmovdqu(xmm31, xmm0, Assembler::AVX_512bit);
380 #endif
381 VM_Version::clean_cpuFeatures();
382 __ jmp(save_restore_except);
383
384 __ bind(legacy_setup);
385 // AVX setup
386 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
387 UseAVX = 1;
388 UseSSE = 2;
389 // load value into all 32 bytes of ymm7 register
390 __ movl(rcx, VM_Version::ymm_test_value());
391
392 __ movdl(xmm0, rcx);
393 __ pshufd(xmm0, xmm0, 0x00);
394 __ vinsertf128h(xmm0, xmm0, xmm0);
395 __ vmovdqu(xmm7, xmm0);
396 #ifdef _LP64
397 __ vmovdqu(xmm8, xmm0);
398 __ vmovdqu(xmm15, xmm0);
399 #endif
400 VM_Version::clean_cpuFeatures();
401
402 __ bind(save_restore_except);
403 __ xorl(rsi, rsi);
404 VM_Version::set_cpuinfo_segv_addr(__ pc());
405 // Generate SEGV
406 __ movl(rax, Address(rsi, 0));
407
408 VM_Version::set_cpuinfo_cont_addr(__ pc());
409 // Returns here after signal. Save xmm0 to check it later.
410
411 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
412 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
413 __ movl(rax, 0x10000);
414 __ andl(rax, Address(rsi, 4));
415 __ cmpl(rax, 0x10000);
416 __ jccb(Assembler::notEqual, legacy_save_restore);
417 // check _cpuid_info.xem_xcr0_eax.bits.opmask
418 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
419 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
420 __ movl(rax, 0xE0);
421 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
422 __ cmpl(rax, 0xE0);
423 __ jccb(Assembler::notEqual, legacy_save_restore);
424
425 // EVEX check: run in lowest evex mode
426 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
427 UseAVX = 3;
428 UseSSE = 2;
429 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
430 __ evmovdqu(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
431 __ evmovdqu(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
432 #ifdef _LP64
433 __ evmovdqu(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
434 __ evmovdqu(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
435 #endif
436 VM_Version::clean_cpuFeatures();
437 UseAVX = saved_useavx;
438 UseSSE = saved_usesse;
439 __ jmp(wrapup);
440
441 __ bind(legacy_save_restore);
442 // AVX check
443 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
444 UseAVX = 1;
445 UseSSE = 2;
446 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
447 __ vmovdqu(Address(rsi, 0), xmm0);
448 __ vmovdqu(Address(rsi, 32), xmm7);
449 #ifdef _LP64
450 __ vmovdqu(Address(rsi, 64), xmm8);
451 __ vmovdqu(Address(rsi, 96), xmm15);
452 #endif
453 VM_Version::clean_cpuFeatures();
454 UseAVX = saved_useavx;
455 UseSSE = saved_usesse;
456
457 __ bind(wrapup);
458 __ popf();
459 __ pop(rsi);
460 __ pop(rbx);
461 __ pop(rbp);
462 __ ret(0);
463
464 # undef __
465
466 return start;
467 };
468 };
469
470 void VM_Version::get_processor_features() {
471
472 _cpu = 4; // 486 by default
473 _model = 0;
474 _stepping = 0;
475 _cpuFeatures = 0;
476 _logical_processors_per_package = 1;
477 // i486 internal cache is both I&D and has a 16-byte line size
530 if (!os::supports_sse())
531 _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
532
533 if (UseSSE < 4) {
534 _cpuFeatures &= ~CPU_SSE4_1;
535 _cpuFeatures &= ~CPU_SSE4_2;
536 }
537
538 if (UseSSE < 3) {
539 _cpuFeatures &= ~CPU_SSE3;
540 _cpuFeatures &= ~CPU_SSSE3;
541 _cpuFeatures &= ~CPU_SSE4A;
542 }
543
544 if (UseSSE < 2)
545 _cpuFeatures &= ~CPU_SSE2;
546
547 if (UseSSE < 1)
548 _cpuFeatures &= ~CPU_SSE;
549
550 // first try initial setting and detect what we can support
551 if (UseAVX > 0) {
552 if (UseAVX > 2 && supports_evex()) {
553 UseAVX = 3;
554 } else if (UseAVX > 1 && supports_avx2()) {
555 UseAVX = 2;
556 } else if (UseAVX > 0 && supports_avx()) {
557 UseAVX = 1;
558 } else {
559 UseAVX = 0;
560 }
561 } else if (UseAVX < 0) {
562 UseAVX = 0;
563 }
564
565 if (UseAVX < 3) {
566 _cpuFeatures &= ~CPU_AVX512F;
567 _cpuFeatures &= ~CPU_AVX512DQ;
568 _cpuFeatures &= ~CPU_AVX512CD;
569 _cpuFeatures &= ~CPU_AVX512BW;
570 _cpuFeatures &= ~CPU_AVX512VL;
571 }
572
573 if (UseAVX < 2)
574 _cpuFeatures &= ~CPU_AVX2;
575
576 if (UseAVX < 1)
577 _cpuFeatures &= ~CPU_AVX;
578
579 if (!UseAES && !FLAG_IS_DEFAULT(UseAES))
580 _cpuFeatures &= ~CPU_AES;
581
582 if (logical_processors_per_package() == 1) {
583 // HT processor could be installed on a system which doesn't support HT.
584 _cpuFeatures &= ~CPU_HT;
585 }
586
587 char buf[256];
588 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
589 cores_per_cpu(), threads_per_core(),
590 cpu_family(), _model, _stepping,
591 (supports_cmov() ? ", cmov" : ""),
592 (supports_cmpxchg8() ? ", cx8" : ""),
593 (supports_fxsr() ? ", fxsr" : ""),
594 (supports_mmx() ? ", mmx" : ""),
595 (supports_sse() ? ", sse" : ""),
596 (supports_sse2() ? ", sse2" : ""),
597 (supports_sse3() ? ", sse3" : ""),
598 (supports_ssse3()? ", ssse3": ""),
599 (supports_sse4_1() ? ", sse4.1" : ""),
600 (supports_sse4_2() ? ", sse4.2" : ""),
601 (supports_popcnt() ? ", popcnt" : ""),
602 (supports_avx() ? ", avx" : ""),
603 (supports_avx2() ? ", avx2" : ""),
604 (supports_aes() ? ", aes" : ""),
605 (supports_clmul() ? ", clmul" : ""),
606 (supports_erms() ? ", erms" : ""),
607 (supports_rtm() ? ", rtm" : ""),
608 (supports_mmx_ext() ? ", mmxext" : ""),
609 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
610 (supports_lzcnt() ? ", lzcnt": ""),
611 (supports_sse4a() ? ", sse4a": ""),
612 (supports_ht() ? ", ht": ""),
613 (supports_tsc() ? ", tsc": ""),
614 (supports_tscinv_bit() ? ", tscinvbit": ""),
615 (supports_tscinv() ? ", tscinv": ""),
616 (supports_bmi1() ? ", bmi1" : ""),
617 (supports_bmi2() ? ", bmi2" : ""),
618 (supports_adx() ? ", adx" : ""),
619 (supports_evex() ? ", evex" : ""));
620 _features_str = os::strdup(buf);
621
622 // UseSSE is set to the smaller of what hardware supports and what
623 // the command line requires. I.e., you cannot set UseSSE to 2 on
624 // older Pentiums which do not support it.
625 if (UseSSE > 4) UseSSE=4;
626 if (UseSSE < 0) UseSSE=0;
627 if (!supports_sse4_1()) // Drop to 3 if no SSE4 support
628 UseSSE = MIN2((intx)3,UseSSE);
629 if (!supports_sse3()) // Drop to 2 if no SSE3 support
630 UseSSE = MIN2((intx)2,UseSSE);
631 if (!supports_sse2()) // Drop to 1 if no SSE2 support
632 UseSSE = MIN2((intx)1,UseSSE);
633 if (!supports_sse ()) // Drop to 0 if no SSE support
634 UseSSE = 0;
635
636 // Use AES instructions if available.
637 if (supports_aes()) {
638 if (FLAG_IS_DEFAULT(UseAES)) {
639 UseAES = true;
640 }
641 } else if (UseAES) {
642 if (!FLAG_IS_DEFAULT(UseAES))
643 warning("AES instructions are not available on this CPU");
644 FLAG_SET_DEFAULT(UseAES, false);
645 }
646
647 // Use CLMUL instructions if available.
648 if (supports_clmul()) {
649 if (FLAG_IS_DEFAULT(UseCLMUL)) {
650 UseCLMUL = true;
651 }
652 } else if (UseCLMUL) {
653 if (!FLAG_IS_DEFAULT(UseCLMUL))
654 warning("CLMUL instructions not available on this CPU (AVX may also be required)");
655 FLAG_SET_DEFAULT(UseCLMUL, false);
686 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
687 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
688 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
689 }
690
691 // Adjust RTM (Restricted Transactional Memory) flags
692 if (!supports_rtm() && UseRTMLocking) {
693 // Can't continue because UseRTMLocking affects UseBiasedLocking flag
694 // setting during arguments processing. See use_biased_locking().
695 // VM_Version_init() is executed after UseBiasedLocking is used
696 // in Thread::allocate().
697 vm_exit_during_initialization("RTM instructions are not available on this CPU");
698 }
699
700 #if INCLUDE_RTM_OPT
701 if (UseRTMLocking) {
702 if (is_intel_family_core()) {
703 if ((_model == CPU_MODEL_HASWELL_E3) ||
704 (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
705 (_model == CPU_MODEL_BROADWELL && _stepping < 4)) {
706 // currently a collision between SKL and HSW_E3
707 if (!UnlockExperimentalVMOptions && UseAVX < 3) {
708 vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
709 } else {
710 warning("UseRTMLocking is only available as experimental option on this platform.");
711 }
712 }
713 }
714 if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
715 // RTM locking should be used only for applications with
716 // high lock contention. For now we do not use it by default.
717 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
718 }
719 if (!is_power_of_2(RTMTotalCountIncrRate)) {
720 warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
721 FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
722 }
723 if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
724 warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
725 FLAG_SET_DEFAULT(RTMAbortRatio, 50);
726 }
727 } else { // !UseRTMLocking
740 }
741 #else
742 if (UseRTMLocking) {
743 // Only C2 does RTM locking optimization.
744 // Can't continue because UseRTMLocking affects UseBiasedLocking flag
745 // setting during arguments processing. See use_biased_locking().
746 vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
747 }
748 #endif
749
750 #ifdef COMPILER2
751 if (UseFPUForSpilling) {
752 if (UseSSE < 2) {
753 // Only supported with SSE2+
754 FLAG_SET_DEFAULT(UseFPUForSpilling, false);
755 }
756 }
757 if (MaxVectorSize > 0) {
758 if (!is_power_of_2(MaxVectorSize)) {
759 warning("MaxVectorSize must be a power of 2");
760 FLAG_SET_DEFAULT(MaxVectorSize, 64);
761 }
762 if (MaxVectorSize > 64) {
763 FLAG_SET_DEFAULT(MaxVectorSize, 64);
764 }
765 if (MaxVectorSize > 16 && (UseAVX == 0 || !os_supports_avx_vectors())) {
766 // 32 bytes vectors (in YMM) are only supported with AVX+
767 FLAG_SET_DEFAULT(MaxVectorSize, 16);
768 }
769 if (UseSSE < 2) {
770 // Vectors (in XMM) are only supported with SSE2+
771 FLAG_SET_DEFAULT(MaxVectorSize, 0);
772 }
773 #ifdef ASSERT
774 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
775 tty->print_cr("State of YMM registers after signal handle:");
776 int nreg = 2 LP64_ONLY(+2);
777 const char* ymm_name[4] = {"0", "7", "8", "15"};
778 for (int i = 0; i < nreg; i++) {
779 tty->print("YMM%s:", ymm_name[i]);
780 for (int j = 7; j >=0; j--) {
781 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
782 }
783 tty->cr();
|