18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/macroAssembler.hpp"
27 #include "asm/macroAssembler.inline.hpp"
28 #include "memory/resourceArea.hpp"
29 #include "runtime/java.hpp"
30 #include "runtime/os.hpp"
31 #include "runtime/stubCodeGenerator.hpp"
32 #include "vm_version_x86.hpp"
33
34
35 int VM_Version::_cpu;
36 int VM_Version::_model;
37 int VM_Version::_stepping;
38 int VM_Version::_cpuFeatures;
39 const char* VM_Version::_features_str = "";
40 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
41
42 // Address of instruction which causes SEGV
43 address VM_Version::_cpuinfo_segv_addr = 0;
44 // Address of instruction after the one which causes SEGV
45 address VM_Version::_cpuinfo_cont_addr = 0;
46
47 static BufferBlob* stub_blob;
48 static const int stub_size = 600;
49
50 extern "C" {
51 typedef void (*get_cpu_info_stub_t)(void*);
52 }
53 static get_cpu_info_stub_t get_cpu_info_stub = NULL;
54
55
56 class VM_Version_StubGenerator: public StubCodeGenerator {
57 public:
58
59 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
60
61 address generate_get_cpu_info() {
62 // Flags to test CPU type.
63 const uint32_t HS_EFL_AC = 0x40000;
64 const uint32_t HS_EFL_ID = 0x200000;
65 // Values for when we don't have a CPUID instruction.
66 const int CPU_FAMILY_SHIFT = 8;
67 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
68 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
69
70 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
71 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done;
72
73 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
74 # define __ _masm->
75
76 address start = __ pc();
77
78 //
79 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
80 //
81 // LP64: rcx and rdx are first and second argument registers on windows
82
83 __ push(rbp);
84 #ifdef _LP64
85 __ mov(rbp, c_rarg0); // cpuid_info address
86 #else
87 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
88 #endif
89 __ push(rbx);
90 __ push(rsi);
91 __ pushf(); // preserve rbx, and flags
224 __ movl(Address(rsi, 8), rcx);
225 __ movl(Address(rsi,12), rdx);
226
227 //
228 // Check if OS has enabled XGETBV instruction to access XCR0
229 // (OSXSAVE feature flag) and CPU supports AVX
230 //
231 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
232 __ cmpl(rcx, 0x18000000);
233 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
234
235 //
236 // XCR0, XFEATURE_ENABLED_MASK register
237 //
238 __ xorl(rcx, rcx); // zero for XCR0 register
239 __ xgetbv();
240 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
241 __ movl(Address(rsi, 0), rax);
242 __ movl(Address(rsi, 4), rdx);
243
244 __ andl(rax, 0x6); // xcr0 bits sse | ymm
245 __ cmpl(rax, 0x6);
246 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
247
248 //
249 // Some OSs have a bug when upper 128bits of YMM
250 // registers are not restored after a signal processing.
251 // Generate SEGV here (reference through NULL)
252 // and check upper YMM bits after it.
253 //
254 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
255 intx saved_useavx = UseAVX;
256 intx saved_usesse = UseSSE;
257 UseAVX = 1;
258 UseSSE = 2;
259
260 // load value into all 32 bytes of ymm7 register
261 __ movl(rcx, VM_Version::ymm_test_value());
262
263 __ movdl(xmm0, rcx);
264 __ pshufd(xmm0, xmm0, 0x00);
265 __ vinsertf128h(xmm0, xmm0, xmm0);
266 __ vmovdqu(xmm7, xmm0);
267 #ifdef _LP64
268 __ vmovdqu(xmm8, xmm0);
269 __ vmovdqu(xmm15, xmm0);
270 #endif
271
272 __ xorl(rsi, rsi);
273 VM_Version::set_cpuinfo_segv_addr( __ pc() );
274 // Generate SEGV
275 __ movl(rax, Address(rsi, 0));
276
277 VM_Version::set_cpuinfo_cont_addr( __ pc() );
278 // Returns here after signal. Save xmm0 to check it later.
279 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
280 __ vmovdqu(Address(rsi, 0), xmm0);
281 __ vmovdqu(Address(rsi, 32), xmm7);
282 #ifdef _LP64
283 __ vmovdqu(Address(rsi, 64), xmm8);
284 __ vmovdqu(Address(rsi, 96), xmm15);
285 #endif
286
287 VM_Version::clean_cpuFeatures();
288 UseAVX = saved_useavx;
289 UseSSE = saved_usesse;
290
291 //
292 // cpuid(0x7) Structured Extended Features
293 //
294 __ bind(sef_cpuid);
295 __ movl(rax, 7);
296 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
297 __ jccb(Assembler::greater, ext_cpuid);
298
299 __ xorl(rcx, rcx);
300 __ cpuid();
301 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
302 __ movl(Address(rsi, 0), rax);
303 __ movl(Address(rsi, 4), rbx);
304
305 //
306 // Extended cpuid(0x80000000)
307 //
308 __ bind(ext_cpuid);
309 __ movl(rax, 0x80000000);
310 __ cpuid();
347 __ cpuid();
348 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
349 __ movl(Address(rsi, 0), rax);
350 __ movl(Address(rsi, 4), rbx);
351 __ movl(Address(rsi, 8), rcx);
352 __ movl(Address(rsi,12), rdx);
353
354 //
355 // Extended cpuid(0x80000001)
356 //
357 __ bind(ext_cpuid1);
358 __ movl(rax, 0x80000001);
359 __ cpuid();
360 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
361 __ movl(Address(rsi, 0), rax);
362 __ movl(Address(rsi, 4), rbx);
363 __ movl(Address(rsi, 8), rcx);
364 __ movl(Address(rsi,12), rdx);
365
366 //
367 // return
368 //
369 __ bind(done);
370 __ popf();
371 __ pop(rsi);
372 __ pop(rbx);
373 __ pop(rbp);
374 __ ret(0);
375
376 # undef __
377
378 return start;
379 };
380 };
381
382
383 void VM_Version::get_cpu_info_wrapper() {
384 get_cpu_info_stub(&_cpuid_info);
385 }
386
387 #ifndef CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED
388 #define CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED(f) f()
389 #endif
453 if (!os::supports_sse())
454 _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
455
456 if (UseSSE < 4) {
457 _cpuFeatures &= ~CPU_SSE4_1;
458 _cpuFeatures &= ~CPU_SSE4_2;
459 }
460
461 if (UseSSE < 3) {
462 _cpuFeatures &= ~CPU_SSE3;
463 _cpuFeatures &= ~CPU_SSSE3;
464 _cpuFeatures &= ~CPU_SSE4A;
465 }
466
467 if (UseSSE < 2)
468 _cpuFeatures &= ~CPU_SSE2;
469
470 if (UseSSE < 1)
471 _cpuFeatures &= ~CPU_SSE;
472
473 if (UseAVX < 2)
474 _cpuFeatures &= ~CPU_AVX2;
475
476 if (UseAVX < 1)
477 _cpuFeatures &= ~CPU_AVX;
478
479 if (!UseAES && !FLAG_IS_DEFAULT(UseAES))
480 _cpuFeatures &= ~CPU_AES;
481
482 if (logical_processors_per_package() == 1) {
483 // HT processor could be installed on a system which doesn't support HT.
484 _cpuFeatures &= ~CPU_HT;
485 }
486
487 char buf[256];
488 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
489 cores_per_cpu(), threads_per_core(),
490 cpu_family(), _model, _stepping,
491 (supports_cmov() ? ", cmov" : ""),
492 (supports_cmpxchg8() ? ", cx8" : ""),
493 (supports_fxsr() ? ", fxsr" : ""),
494 (supports_mmx() ? ", mmx" : ""),
495 (supports_sse() ? ", sse" : ""),
496 (supports_sse2() ? ", sse2" : ""),
497 (supports_sse3() ? ", sse3" : ""),
498 (supports_ssse3()? ", ssse3": ""),
499 (supports_sse4_1() ? ", sse4.1" : ""),
500 (supports_sse4_2() ? ", sse4.2" : ""),
501 (supports_popcnt() ? ", popcnt" : ""),
502 (supports_avx() ? ", avx" : ""),
503 (supports_avx2() ? ", avx2" : ""),
504 (supports_aes() ? ", aes" : ""),
505 (supports_clmul() ? ", clmul" : ""),
506 (supports_erms() ? ", erms" : ""),
507 (supports_rtm() ? ", rtm" : ""),
508 (supports_mmx_ext() ? ", mmxext" : ""),
509 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
510 (supports_lzcnt() ? ", lzcnt": ""),
511 (supports_sse4a() ? ", sse4a": ""),
512 (supports_ht() ? ", ht": ""),
513 (supports_tsc() ? ", tsc": ""),
514 (supports_tscinv_bit() ? ", tscinvbit": ""),
515 (supports_tscinv() ? ", tscinv": ""),
516 (supports_bmi1() ? ", bmi1" : ""),
517 (supports_bmi2() ? ", bmi2" : ""),
518 (supports_adx() ? ", adx" : ""));
519 _features_str = os::strdup(buf);
520
521 // UseSSE is set to the smaller of what hardware supports and what
522 // the command line requires. I.e., you cannot set UseSSE to 2 on
523 // older Pentiums which do not support it.
524 if (UseSSE > 4) UseSSE=4;
525 if (UseSSE < 0) UseSSE=0;
526 if (!supports_sse4_1()) // Drop to 3 if no SSE4 support
527 UseSSE = MIN2((intx)3,UseSSE);
528 if (!supports_sse3()) // Drop to 2 if no SSE3 support
529 UseSSE = MIN2((intx)2,UseSSE);
530 if (!supports_sse2()) // Drop to 1 if no SSE2 support
531 UseSSE = MIN2((intx)1,UseSSE);
532 if (!supports_sse ()) // Drop to 0 if no SSE support
533 UseSSE = 0;
534
535 if (UseAVX > 2) UseAVX=2;
536 if (UseAVX < 0) UseAVX=0;
537 if (!supports_avx2()) // Drop to 1 if no AVX2 support
538 UseAVX = MIN2((intx)1,UseAVX);
539 if (!supports_avx ()) // Drop to 0 if no AVX support
540 UseAVX = 0;
541
542 // Use AES instructions if available.
543 if (supports_aes()) {
544 if (FLAG_IS_DEFAULT(UseAES)) {
545 UseAES = true;
546 }
547 } else if (UseAES) {
548 if (!FLAG_IS_DEFAULT(UseAES))
549 warning("AES instructions are not available on this CPU");
550 FLAG_SET_DEFAULT(UseAES, false);
551 }
552
553 // Use CLMUL instructions if available.
554 if (supports_clmul()) {
555 if (FLAG_IS_DEFAULT(UseCLMUL)) {
556 UseCLMUL = true;
557 }
558 } else if (UseCLMUL) {
559 if (!FLAG_IS_DEFAULT(UseCLMUL))
560 warning("CLMUL instructions not available on this CPU (AVX may also be required)");
561 FLAG_SET_DEFAULT(UseCLMUL, false);
592 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
593 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
594 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
595 }
596
597 // Adjust RTM (Restricted Transactional Memory) flags
598 if (!supports_rtm() && UseRTMLocking) {
599 // Can't continue because UseRTMLocking affects UseBiasedLocking flag
600 // setting during arguments processing. See use_biased_locking().
601 // VM_Version_init() is executed after UseBiasedLocking is used
602 // in Thread::allocate().
603 vm_exit_during_initialization("RTM instructions are not available on this CPU");
604 }
605
606 #if INCLUDE_RTM_OPT
607 if (UseRTMLocking) {
608 if (is_intel_family_core()) {
609 if ((_model == CPU_MODEL_HASWELL_E3) ||
610 (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
611 (_model == CPU_MODEL_BROADWELL && _stepping < 4)) {
612 if (!UnlockExperimentalVMOptions) {
613 vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
614 } else {
615 warning("UseRTMLocking is only available as experimental option on this platform.");
616 }
617 }
618 }
619 if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
620 // RTM locking should be used only for applications with
621 // high lock contention. For now we do not use it by default.
622 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
623 }
624 if (!is_power_of_2(RTMTotalCountIncrRate)) {
625 warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
626 FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
627 }
628 if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
629 warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
630 FLAG_SET_DEFAULT(RTMAbortRatio, 50);
631 }
632 } else { // !UseRTMLocking
645 }
646 #else
647 if (UseRTMLocking) {
648 // Only C2 does RTM locking optimization.
649 // Can't continue because UseRTMLocking affects UseBiasedLocking flag
650 // setting during arguments processing. See use_biased_locking().
651 vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
652 }
653 #endif
654
655 #ifdef COMPILER2
656 if (UseFPUForSpilling) {
657 if (UseSSE < 2) {
658 // Only supported with SSE2+
659 FLAG_SET_DEFAULT(UseFPUForSpilling, false);
660 }
661 }
662 if (MaxVectorSize > 0) {
663 if (!is_power_of_2(MaxVectorSize)) {
664 warning("MaxVectorSize must be a power of 2");
665 FLAG_SET_DEFAULT(MaxVectorSize, 32);
666 }
667 if (MaxVectorSize > 32) {
668 FLAG_SET_DEFAULT(MaxVectorSize, 32);
669 }
670 if (MaxVectorSize > 16 && (UseAVX == 0 || !os_supports_avx_vectors())) {
671 // 32 bytes vectors (in YMM) are only supported with AVX+
672 FLAG_SET_DEFAULT(MaxVectorSize, 16);
673 }
674 if (UseSSE < 2) {
675 // Vectors (in XMM) are only supported with SSE2+
676 FLAG_SET_DEFAULT(MaxVectorSize, 0);
677 }
678 #ifdef ASSERT
679 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
680 tty->print_cr("State of YMM registers after signal handle:");
681 int nreg = 2 LP64_ONLY(+2);
682 const char* ymm_name[4] = {"0", "7", "8", "15"};
683 for (int i = 0; i < nreg; i++) {
684 tty->print("YMM%s:", ymm_name[i]);
685 for (int j = 7; j >=0; j--) {
686 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
687 }
688 tty->cr();
|
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/macroAssembler.hpp"
27 #include "asm/macroAssembler.inline.hpp"
28 #include "memory/resourceArea.hpp"
29 #include "runtime/java.hpp"
30 #include "runtime/os.hpp"
31 #include "runtime/stubCodeGenerator.hpp"
32 #include "vm_version_x86.hpp"
33
34
35 int VM_Version::_cpu;
36 int VM_Version::_model;
37 int VM_Version::_stepping;
38 uint64_t VM_Version::_cpuFeatures;
39 const char* VM_Version::_features_str = "";
40 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
41
42 // Address of instruction which causes SEGV
43 address VM_Version::_cpuinfo_segv_addr = 0;
44 // Address of instruction after the one which causes SEGV
45 address VM_Version::_cpuinfo_cont_addr = 0;
46
47 static BufferBlob* stub_blob;
48 static const int stub_size = 1000;
49
50 extern "C" {
51 typedef void (*get_cpu_info_stub_t)(void*);
52 }
53 static get_cpu_info_stub_t get_cpu_info_stub = NULL;
54
55
56 class VM_Version_StubGenerator: public StubCodeGenerator {
57 public:
58
59 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
60
61 address generate_get_cpu_info() {
62 // Flags to test CPU type.
63 const uint32_t HS_EFL_AC = 0x40000;
64 const uint32_t HS_EFL_ID = 0x200000;
65 // Values for when we don't have a CPUID instruction.
66 const int CPU_FAMILY_SHIFT = 8;
67 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
68 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
69
70 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
71 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done, wrapup;
72 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
73
74 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
75 # define __ _masm->
76
77 address start = __ pc();
78
79 //
80 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
81 //
82 // LP64: rcx and rdx are first and second argument registers on windows
83
84 __ push(rbp);
85 #ifdef _LP64
86 __ mov(rbp, c_rarg0); // cpuid_info address
87 #else
88 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
89 #endif
90 __ push(rbx);
91 __ push(rsi);
92 __ pushf(); // preserve rbx, and flags
225 __ movl(Address(rsi, 8), rcx);
226 __ movl(Address(rsi,12), rdx);
227
228 //
229 // Check if OS has enabled XGETBV instruction to access XCR0
230 // (OSXSAVE feature flag) and CPU supports AVX
231 //
232 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
233 __ cmpl(rcx, 0x18000000);
234 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
235
236 //
237 // XCR0, XFEATURE_ENABLED_MASK register
238 //
239 __ xorl(rcx, rcx); // zero for XCR0 register
240 __ xgetbv();
241 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
242 __ movl(Address(rsi, 0), rax);
243 __ movl(Address(rsi, 4), rdx);
244
245 //
246 // cpuid(0x7) Structured Extended Features
247 //
248 __ bind(sef_cpuid);
249 __ movl(rax, 7);
250 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
251 __ jccb(Assembler::greater, ext_cpuid);
252
253 __ xorl(rcx, rcx);
254 __ cpuid();
255 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
256 __ movl(Address(rsi, 0), rax);
257 __ movl(Address(rsi, 4), rbx);
258
259 //
260 // Extended cpuid(0x80000000)
261 //
262 __ bind(ext_cpuid);
263 __ movl(rax, 0x80000000);
264 __ cpuid();
301 __ cpuid();
302 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
303 __ movl(Address(rsi, 0), rax);
304 __ movl(Address(rsi, 4), rbx);
305 __ movl(Address(rsi, 8), rcx);
306 __ movl(Address(rsi,12), rdx);
307
308 //
309 // Extended cpuid(0x80000001)
310 //
311 __ bind(ext_cpuid1);
312 __ movl(rax, 0x80000001);
313 __ cpuid();
314 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
315 __ movl(Address(rsi, 0), rax);
316 __ movl(Address(rsi, 4), rbx);
317 __ movl(Address(rsi, 8), rcx);
318 __ movl(Address(rsi,12), rdx);
319
320 //
321 // Check if OS has enabled XGETBV instruction to access XCR0
322 // (OSXSAVE feature flag) and CPU supports AVX
323 //
324 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
325 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
326 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
327 __ cmpl(rcx, 0x18000000);
328 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
329
330 __ movl(rax, 0x6);
331 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
332 __ cmpl(rax, 0x6);
333 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
334
335 // we need to bridge farther than imm8, so we use this island as a thunk
336 __ bind(done);
337 __ jmp(wrapup);
338
339 __ bind(start_simd_check);
340 //
341 // Some OSs have a bug when upper 128/256bits of YMM/ZMM
342 // registers are not restored after a signal processing.
343 // Generate SEGV here (reference through NULL)
344 // and check upper YMM/ZMM bits after it.
345 //
346 intx saved_useavx = UseAVX;
347 intx saved_usesse = UseSSE;
348 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
349 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
350 __ movl(rax, 0x10000);
351 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
352 __ cmpl(rax, 0x10000);
353 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
354 // check _cpuid_info.xem_xcr0_eax.bits.opmask
355 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
356 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
357 __ movl(rax, 0xE0);
358 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
359 __ cmpl(rax, 0xE0);
360 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
361
362 // EVEX setup: run in lowest evex mode
363 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
364 UseAVX = 3;
365 UseSSE = 2;
366 // load value into all 64 bytes of zmm7 register
367 __ movl(rcx, VM_Version::ymm_test_value());
368 __ movdl(xmm0, rcx);
369 __ movl(rcx, 0xffff);
370 #ifdef _LP64
371 __ kmovql(k1, rcx);
372 #else
373 __ kmovdl(k1, rcx);
374 #endif
375 __ evpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
376 __ evmovdqu(xmm7, xmm0, Assembler::AVX_512bit);
377 #ifdef _LP64
378 __ evmovdqu(xmm8, xmm0, Assembler::AVX_512bit);
379 __ evmovdqu(xmm31, xmm0, Assembler::AVX_512bit);
380 #endif
381 VM_Version::clean_cpuFeatures();
382 __ jmp(save_restore_except);
383
384 __ bind(legacy_setup);
385 // AVX setup
386 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
387 UseAVX = 1;
388 UseSSE = 2;
389 // load value into all 32 bytes of ymm7 register
390 __ movl(rcx, VM_Version::ymm_test_value());
391
392 __ movdl(xmm0, rcx);
393 __ pshufd(xmm0, xmm0, 0x00);
394 __ vinsertf128h(xmm0, xmm0, xmm0);
395 __ vmovdqu(xmm7, xmm0);
396 #ifdef _LP64
397 __ vmovdqu(xmm8, xmm0);
398 __ vmovdqu(xmm15, xmm0);
399 #endif
400 VM_Version::clean_cpuFeatures();
401
402 __ bind(save_restore_except);
403 __ xorl(rsi, rsi);
404 VM_Version::set_cpuinfo_segv_addr(__ pc());
405 // Generate SEGV
406 __ movl(rax, Address(rsi, 0));
407
408 VM_Version::set_cpuinfo_cont_addr(__ pc());
409 // Returns here after signal. Save xmm0 to check it later.
410
411 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
412 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
413 __ movl(rax, 0x10000);
414 __ andl(rax, Address(rsi, 4));
415 __ cmpl(rax, 0x10000);
416 __ jccb(Assembler::notEqual, legacy_save_restore);
417 // check _cpuid_info.xem_xcr0_eax.bits.opmask
418 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
419 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
420 __ movl(rax, 0xE0);
421 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
422 __ cmpl(rax, 0xE0);
423 __ jccb(Assembler::notEqual, legacy_save_restore);
424
425 // EVEX check: run in lowest evex mode
426 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
427 UseAVX = 3;
428 UseSSE = 2;
429 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
430 __ evmovdqu(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
431 __ evmovdqu(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
432 #ifdef _LP64
433 __ evmovdqu(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
434 __ evmovdqu(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
435 #endif
436 VM_Version::clean_cpuFeatures();
437 UseAVX = saved_useavx;
438 UseSSE = saved_usesse;
439 __ jmp(wrapup);
440
441 __ bind(legacy_save_restore);
442 // AVX check
443 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
444 UseAVX = 1;
445 UseSSE = 2;
446 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
447 __ vmovdqu(Address(rsi, 0), xmm0);
448 __ vmovdqu(Address(rsi, 32), xmm7);
449 #ifdef _LP64
450 __ vmovdqu(Address(rsi, 64), xmm8);
451 __ vmovdqu(Address(rsi, 96), xmm15);
452 #endif
453 VM_Version::clean_cpuFeatures();
454 UseAVX = saved_useavx;
455 UseSSE = saved_usesse;
456
457 __ bind(wrapup);
458 __ popf();
459 __ pop(rsi);
460 __ pop(rbx);
461 __ pop(rbp);
462 __ ret(0);
463
464 # undef __
465
466 return start;
467 };
468 };
469
470
471 void VM_Version::get_cpu_info_wrapper() {
472 get_cpu_info_stub(&_cpuid_info);
473 }
474
475 #ifndef CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED
476 #define CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED(f) f()
477 #endif
541 if (!os::supports_sse())
542 _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
543
544 if (UseSSE < 4) {
545 _cpuFeatures &= ~CPU_SSE4_1;
546 _cpuFeatures &= ~CPU_SSE4_2;
547 }
548
549 if (UseSSE < 3) {
550 _cpuFeatures &= ~CPU_SSE3;
551 _cpuFeatures &= ~CPU_SSSE3;
552 _cpuFeatures &= ~CPU_SSE4A;
553 }
554
555 if (UseSSE < 2)
556 _cpuFeatures &= ~CPU_SSE2;
557
558 if (UseSSE < 1)
559 _cpuFeatures &= ~CPU_SSE;
560
561 // first try initial setting and detect what we can support
562 if (UseAVX > 0) {
563 if (UseAVX > 2 && supports_evex()) {
564 UseAVX = 3;
565 } else if (UseAVX > 1 && supports_avx2()) {
566 UseAVX = 2;
567 } else if (UseAVX > 0 && supports_avx()) {
568 UseAVX = 1;
569 } else {
570 UseAVX = 0;
571 }
572 } else if (UseAVX < 0) {
573 UseAVX = 0;
574 }
575
576 if (UseAVX < 3) {
577 _cpuFeatures &= ~CPU_AVX512F;
578 _cpuFeatures &= ~CPU_AVX512DQ;
579 _cpuFeatures &= ~CPU_AVX512CD;
580 _cpuFeatures &= ~CPU_AVX512BW;
581 _cpuFeatures &= ~CPU_AVX512VL;
582 }
583
584 if (UseAVX < 2)
585 _cpuFeatures &= ~CPU_AVX2;
586
587 if (UseAVX < 1)
588 _cpuFeatures &= ~CPU_AVX;
589
590 if (!UseAES && !FLAG_IS_DEFAULT(UseAES))
591 _cpuFeatures &= ~CPU_AES;
592
593 if (logical_processors_per_package() == 1) {
594 // HT processor could be installed on a system which doesn't support HT.
595 _cpuFeatures &= ~CPU_HT;
596 }
597
598 char buf[256];
599 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
600 cores_per_cpu(), threads_per_core(),
601 cpu_family(), _model, _stepping,
602 (supports_cmov() ? ", cmov" : ""),
603 (supports_cmpxchg8() ? ", cx8" : ""),
604 (supports_fxsr() ? ", fxsr" : ""),
605 (supports_mmx() ? ", mmx" : ""),
606 (supports_sse() ? ", sse" : ""),
607 (supports_sse2() ? ", sse2" : ""),
608 (supports_sse3() ? ", sse3" : ""),
609 (supports_ssse3()? ", ssse3": ""),
610 (supports_sse4_1() ? ", sse4.1" : ""),
611 (supports_sse4_2() ? ", sse4.2" : ""),
612 (supports_popcnt() ? ", popcnt" : ""),
613 (supports_avx() ? ", avx" : ""),
614 (supports_avx2() ? ", avx2" : ""),
615 (supports_aes() ? ", aes" : ""),
616 (supports_clmul() ? ", clmul" : ""),
617 (supports_erms() ? ", erms" : ""),
618 (supports_rtm() ? ", rtm" : ""),
619 (supports_mmx_ext() ? ", mmxext" : ""),
620 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
621 (supports_lzcnt() ? ", lzcnt": ""),
622 (supports_sse4a() ? ", sse4a": ""),
623 (supports_ht() ? ", ht": ""),
624 (supports_tsc() ? ", tsc": ""),
625 (supports_tscinv_bit() ? ", tscinvbit": ""),
626 (supports_tscinv() ? ", tscinv": ""),
627 (supports_bmi1() ? ", bmi1" : ""),
628 (supports_bmi2() ? ", bmi2" : ""),
629 (supports_adx() ? ", adx" : ""),
630 (supports_evex() ? ", evex" : ""));
631 _features_str = os::strdup(buf);
632
633 // UseSSE is set to the smaller of what hardware supports and what
634 // the command line requires. I.e., you cannot set UseSSE to 2 on
635 // older Pentiums which do not support it.
636 if (UseSSE > 4) UseSSE=4;
637 if (UseSSE < 0) UseSSE=0;
638 if (!supports_sse4_1()) // Drop to 3 if no SSE4 support
639 UseSSE = MIN2((intx)3,UseSSE);
640 if (!supports_sse3()) // Drop to 2 if no SSE3 support
641 UseSSE = MIN2((intx)2,UseSSE);
642 if (!supports_sse2()) // Drop to 1 if no SSE2 support
643 UseSSE = MIN2((intx)1,UseSSE);
644 if (!supports_sse ()) // Drop to 0 if no SSE support
645 UseSSE = 0;
646
647 // Use AES instructions if available.
648 if (supports_aes()) {
649 if (FLAG_IS_DEFAULT(UseAES)) {
650 UseAES = true;
651 }
652 } else if (UseAES) {
653 if (!FLAG_IS_DEFAULT(UseAES))
654 warning("AES instructions are not available on this CPU");
655 FLAG_SET_DEFAULT(UseAES, false);
656 }
657
658 // Use CLMUL instructions if available.
659 if (supports_clmul()) {
660 if (FLAG_IS_DEFAULT(UseCLMUL)) {
661 UseCLMUL = true;
662 }
663 } else if (UseCLMUL) {
664 if (!FLAG_IS_DEFAULT(UseCLMUL))
665 warning("CLMUL instructions not available on this CPU (AVX may also be required)");
666 FLAG_SET_DEFAULT(UseCLMUL, false);
697 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
698 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
699 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
700 }
701
702 // Adjust RTM (Restricted Transactional Memory) flags
703 if (!supports_rtm() && UseRTMLocking) {
704 // Can't continue because UseRTMLocking affects UseBiasedLocking flag
705 // setting during arguments processing. See use_biased_locking().
706 // VM_Version_init() is executed after UseBiasedLocking is used
707 // in Thread::allocate().
708 vm_exit_during_initialization("RTM instructions are not available on this CPU");
709 }
710
711 #if INCLUDE_RTM_OPT
712 if (UseRTMLocking) {
713 if (is_intel_family_core()) {
714 if ((_model == CPU_MODEL_HASWELL_E3) ||
715 (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
716 (_model == CPU_MODEL_BROADWELL && _stepping < 4)) {
717 // currently a collision between SKL and HSW_E3
718 if (!UnlockExperimentalVMOptions && UseAVX < 3) {
719 vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
720 } else {
721 warning("UseRTMLocking is only available as experimental option on this platform.");
722 }
723 }
724 }
725 if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
726 // RTM locking should be used only for applications with
727 // high lock contention. For now we do not use it by default.
728 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
729 }
730 if (!is_power_of_2(RTMTotalCountIncrRate)) {
731 warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
732 FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
733 }
734 if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
735 warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
736 FLAG_SET_DEFAULT(RTMAbortRatio, 50);
737 }
738 } else { // !UseRTMLocking
751 }
752 #else
753 if (UseRTMLocking) {
754 // Only C2 does RTM locking optimization.
755 // Can't continue because UseRTMLocking affects UseBiasedLocking flag
756 // setting during arguments processing. See use_biased_locking().
757 vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
758 }
759 #endif
760
761 #ifdef COMPILER2
762 if (UseFPUForSpilling) {
763 if (UseSSE < 2) {
764 // Only supported with SSE2+
765 FLAG_SET_DEFAULT(UseFPUForSpilling, false);
766 }
767 }
768 if (MaxVectorSize > 0) {
769 if (!is_power_of_2(MaxVectorSize)) {
770 warning("MaxVectorSize must be a power of 2");
771 FLAG_SET_DEFAULT(MaxVectorSize, 64);
772 }
773 if (MaxVectorSize > 64) {
774 FLAG_SET_DEFAULT(MaxVectorSize, 64);
775 }
776 if (MaxVectorSize > 16 && (UseAVX == 0 || !os_supports_avx_vectors())) {
777 // 32 bytes vectors (in YMM) are only supported with AVX+
778 FLAG_SET_DEFAULT(MaxVectorSize, 16);
779 }
780 if (UseSSE < 2) {
781 // Vectors (in XMM) are only supported with SSE2+
782 FLAG_SET_DEFAULT(MaxVectorSize, 0);
783 }
784 #ifdef ASSERT
785 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
786 tty->print_cr("State of YMM registers after signal handle:");
787 int nreg = 2 LP64_ONLY(+2);
788 const char* ymm_name[4] = {"0", "7", "8", "15"};
789 for (int i = 0; i < nreg; i++) {
790 tty->print("YMM%s:", ymm_name[i]);
791 for (int j = 7; j >=0; j--) {
792 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
793 }
794 tty->cr();
|