rev 10354 : imported patch vextrinscleanup2
1 /*
2 * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "asm/macroAssembler.hpp"
27 #include "asm/macroAssembler.inline.hpp"
28 #include "memory/resourceArea.hpp"
29 #include "runtime/java.hpp"
30 #include "runtime/os.hpp"
31 #include "runtime/stubCodeGenerator.hpp"
32 #include "vm_version_x86.hpp"
33
34
35 int VM_Version::_cpu;
36 int VM_Version::_model;
37 int VM_Version::_stepping;
38 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
39
40 // Address of instruction which causes SEGV
41 address VM_Version::_cpuinfo_segv_addr = 0;
42 // Address of instruction after the one which causes SEGV
43 address VM_Version::_cpuinfo_cont_addr = 0;
44
45 static BufferBlob* stub_blob;
46 static const int stub_size = 1000;
47
48 extern "C" {
49 typedef void (*get_cpu_info_stub_t)(void*);
50 }
51 static get_cpu_info_stub_t get_cpu_info_stub = NULL;
52
53
54 class VM_Version_StubGenerator: public StubCodeGenerator {
55 public:
56
57 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
58
59 address generate_get_cpu_info() {
60 // Flags to test CPU type.
61 const uint32_t HS_EFL_AC = 0x40000;
62 const uint32_t HS_EFL_ID = 0x200000;
63 // Values for when we don't have a CPUID instruction.
64 const int CPU_FAMILY_SHIFT = 8;
65 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
66 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
67
68 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
69 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done, wrapup;
70 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
71
72 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
73 # define __ _masm->
74
75 address start = __ pc();
76
77 //
78 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
79 //
80 // LP64: rcx and rdx are first and second argument registers on windows
81
82 __ push(rbp);
83 #ifdef _LP64
84 __ mov(rbp, c_rarg0); // cpuid_info address
85 #else
86 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
87 #endif
88 __ push(rbx);
89 __ push(rsi);
90 __ pushf(); // preserve rbx, and flags
91 __ pop(rax);
92 __ push(rax);
93 __ mov(rcx, rax);
94 //
95 // if we are unable to change the AC flag, we have a 386
96 //
97 __ xorl(rax, HS_EFL_AC);
98 __ push(rax);
99 __ popf();
100 __ pushf();
101 __ pop(rax);
102 __ cmpptr(rax, rcx);
103 __ jccb(Assembler::notEqual, detect_486);
104
105 __ movl(rax, CPU_FAMILY_386);
106 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
107 __ jmp(done);
108
109 //
110 // If we are unable to change the ID flag, we have a 486 which does
111 // not support the "cpuid" instruction.
112 //
113 __ bind(detect_486);
114 __ mov(rax, rcx);
115 __ xorl(rax, HS_EFL_ID);
116 __ push(rax);
117 __ popf();
118 __ pushf();
119 __ pop(rax);
120 __ cmpptr(rcx, rax);
121 __ jccb(Assembler::notEqual, detect_586);
122
123 __ bind(cpu486);
124 __ movl(rax, CPU_FAMILY_486);
125 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
126 __ jmp(done);
127
128 //
129 // At this point, we have a chip which supports the "cpuid" instruction
130 //
131 __ bind(detect_586);
132 __ xorl(rax, rax);
133 __ cpuid();
134 __ orl(rax, rax);
135 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
136 // value of at least 1, we give up and
137 // assume a 486
138 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
139 __ movl(Address(rsi, 0), rax);
140 __ movl(Address(rsi, 4), rbx);
141 __ movl(Address(rsi, 8), rcx);
142 __ movl(Address(rsi,12), rdx);
143
144 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported?
145 __ jccb(Assembler::belowEqual, std_cpuid4);
146
147 //
148 // cpuid(0xB) Processor Topology
149 //
150 __ movl(rax, 0xb);
151 __ xorl(rcx, rcx); // Threads level
152 __ cpuid();
153
154 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
155 __ movl(Address(rsi, 0), rax);
156 __ movl(Address(rsi, 4), rbx);
157 __ movl(Address(rsi, 8), rcx);
158 __ movl(Address(rsi,12), rdx);
159
160 __ movl(rax, 0xb);
161 __ movl(rcx, 1); // Cores level
162 __ cpuid();
163 __ push(rax);
164 __ andl(rax, 0x1f); // Determine if valid topology level
165 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
166 __ andl(rax, 0xffff);
167 __ pop(rax);
168 __ jccb(Assembler::equal, std_cpuid4);
169
170 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
171 __ movl(Address(rsi, 0), rax);
172 __ movl(Address(rsi, 4), rbx);
173 __ movl(Address(rsi, 8), rcx);
174 __ movl(Address(rsi,12), rdx);
175
176 __ movl(rax, 0xb);
177 __ movl(rcx, 2); // Packages level
178 __ cpuid();
179 __ push(rax);
180 __ andl(rax, 0x1f); // Determine if valid topology level
181 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
182 __ andl(rax, 0xffff);
183 __ pop(rax);
184 __ jccb(Assembler::equal, std_cpuid4);
185
186 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
187 __ movl(Address(rsi, 0), rax);
188 __ movl(Address(rsi, 4), rbx);
189 __ movl(Address(rsi, 8), rcx);
190 __ movl(Address(rsi,12), rdx);
191
192 //
193 // cpuid(0x4) Deterministic cache params
194 //
195 __ bind(std_cpuid4);
196 __ movl(rax, 4);
197 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
198 __ jccb(Assembler::greater, std_cpuid1);
199
200 __ xorl(rcx, rcx); // L1 cache
201 __ cpuid();
202 __ push(rax);
203 __ andl(rax, 0x1f); // Determine if valid cache parameters used
204 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache
205 __ pop(rax);
206 __ jccb(Assembler::equal, std_cpuid1);
207
208 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
209 __ movl(Address(rsi, 0), rax);
210 __ movl(Address(rsi, 4), rbx);
211 __ movl(Address(rsi, 8), rcx);
212 __ movl(Address(rsi,12), rdx);
213
214 //
215 // Standard cpuid(0x1)
216 //
217 __ bind(std_cpuid1);
218 __ movl(rax, 1);
219 __ cpuid();
220 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
221 __ movl(Address(rsi, 0), rax);
222 __ movl(Address(rsi, 4), rbx);
223 __ movl(Address(rsi, 8), rcx);
224 __ movl(Address(rsi,12), rdx);
225
226 //
227 // Check if OS has enabled XGETBV instruction to access XCR0
228 // (OSXSAVE feature flag) and CPU supports AVX
229 //
230 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
231 __ cmpl(rcx, 0x18000000);
232 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
233
234 //
235 // XCR0, XFEATURE_ENABLED_MASK register
236 //
237 __ xorl(rcx, rcx); // zero for XCR0 register
238 __ xgetbv();
239 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
240 __ movl(Address(rsi, 0), rax);
241 __ movl(Address(rsi, 4), rdx);
242
243 //
244 // cpuid(0x7) Structured Extended Features
245 //
246 __ bind(sef_cpuid);
247 __ movl(rax, 7);
248 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
249 __ jccb(Assembler::greater, ext_cpuid);
250
251 __ xorl(rcx, rcx);
252 __ cpuid();
253 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
254 __ movl(Address(rsi, 0), rax);
255 __ movl(Address(rsi, 4), rbx);
256
257 //
258 // Extended cpuid(0x80000000)
259 //
260 __ bind(ext_cpuid);
261 __ movl(rax, 0x80000000);
262 __ cpuid();
263 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
264 __ jcc(Assembler::belowEqual, done);
265 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
266 __ jccb(Assembler::belowEqual, ext_cpuid1);
267 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
268 __ jccb(Assembler::belowEqual, ext_cpuid5);
269 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
270 __ jccb(Assembler::belowEqual, ext_cpuid7);
271 //
272 // Extended cpuid(0x80000008)
273 //
274 __ movl(rax, 0x80000008);
275 __ cpuid();
276 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
277 __ movl(Address(rsi, 0), rax);
278 __ movl(Address(rsi, 4), rbx);
279 __ movl(Address(rsi, 8), rcx);
280 __ movl(Address(rsi,12), rdx);
281
282 //
283 // Extended cpuid(0x80000007)
284 //
285 __ bind(ext_cpuid7);
286 __ movl(rax, 0x80000007);
287 __ cpuid();
288 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
289 __ movl(Address(rsi, 0), rax);
290 __ movl(Address(rsi, 4), rbx);
291 __ movl(Address(rsi, 8), rcx);
292 __ movl(Address(rsi,12), rdx);
293
294 //
295 // Extended cpuid(0x80000005)
296 //
297 __ bind(ext_cpuid5);
298 __ movl(rax, 0x80000005);
299 __ cpuid();
300 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
301 __ movl(Address(rsi, 0), rax);
302 __ movl(Address(rsi, 4), rbx);
303 __ movl(Address(rsi, 8), rcx);
304 __ movl(Address(rsi,12), rdx);
305
306 //
307 // Extended cpuid(0x80000001)
308 //
309 __ bind(ext_cpuid1);
310 __ movl(rax, 0x80000001);
311 __ cpuid();
312 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
313 __ movl(Address(rsi, 0), rax);
314 __ movl(Address(rsi, 4), rbx);
315 __ movl(Address(rsi, 8), rcx);
316 __ movl(Address(rsi,12), rdx);
317
318 //
319 // Check if OS has enabled XGETBV instruction to access XCR0
320 // (OSXSAVE feature flag) and CPU supports AVX
321 //
322 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
323 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
324 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
325 __ cmpl(rcx, 0x18000000);
326 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
327
328 __ movl(rax, 0x6);
329 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
330 __ cmpl(rax, 0x6);
331 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
332
333 // we need to bridge farther than imm8, so we use this island as a thunk
334 __ bind(done);
335 __ jmp(wrapup);
336
337 __ bind(start_simd_check);
338 //
339 // Some OSs have a bug when upper 128/256bits of YMM/ZMM
340 // registers are not restored after a signal processing.
341 // Generate SEGV here (reference through NULL)
342 // and check upper YMM/ZMM bits after it.
343 //
344 intx saved_useavx = UseAVX;
345 intx saved_usesse = UseSSE;
346 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
347 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
348 __ movl(rax, 0x10000);
349 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
350 __ cmpl(rax, 0x10000);
351 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
352 // check _cpuid_info.xem_xcr0_eax.bits.opmask
353 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
354 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
355 __ movl(rax, 0xE0);
356 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
357 __ cmpl(rax, 0xE0);
358 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
359
360 // EVEX setup: run in lowest evex mode
361 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
362 UseAVX = 3;
363 UseSSE = 2;
364 // load value into all 64 bytes of zmm7 register
365 __ movl(rcx, VM_Version::ymm_test_value());
366 __ movdl(xmm0, rcx);
367 __ movl(rcx, 0xffff);
368 __ kmovwl(k1, rcx);
369 __ evpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
370 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
371 #ifdef _LP64
372 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
373 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
374 #endif
375 VM_Version::clean_cpuFeatures();
376 __ jmp(save_restore_except);
377
378 __ bind(legacy_setup);
379 // AVX setup
380 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
381 UseAVX = 1;
382 UseSSE = 2;
383 // load value into all 32 bytes of ymm7 register
384 __ movl(rcx, VM_Version::ymm_test_value());
385
386 __ movdl(xmm0, rcx);
387 __ pshufd(xmm0, xmm0, 0x00);
388 __ vinsertf128h(xmm0, xmm0, xmm0);
389 __ vmovdqu(xmm7, xmm0);
390 #ifdef _LP64
391 __ vmovdqu(xmm8, xmm0);
392 __ vmovdqu(xmm15, xmm0);
393 #endif
394 VM_Version::clean_cpuFeatures();
395
396 __ bind(save_restore_except);
397 __ xorl(rsi, rsi);
398 VM_Version::set_cpuinfo_segv_addr(__ pc());
399 // Generate SEGV
400 __ movl(rax, Address(rsi, 0));
401
402 VM_Version::set_cpuinfo_cont_addr(__ pc());
403 // Returns here after signal. Save xmm0 to check it later.
404
405 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
406 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
407 __ movl(rax, 0x10000);
408 __ andl(rax, Address(rsi, 4));
409 __ cmpl(rax, 0x10000);
410 __ jccb(Assembler::notEqual, legacy_save_restore);
411 // check _cpuid_info.xem_xcr0_eax.bits.opmask
412 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
413 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
414 __ movl(rax, 0xE0);
415 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
416 __ cmpl(rax, 0xE0);
417 __ jccb(Assembler::notEqual, legacy_save_restore);
418
419 // EVEX check: run in lowest evex mode
420 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
421 UseAVX = 3;
422 UseSSE = 2;
423 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
424 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
425 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
426 #ifdef _LP64
427 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
428 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
429 #endif
430 VM_Version::clean_cpuFeatures();
431 UseAVX = saved_useavx;
432 UseSSE = saved_usesse;
433 __ jmp(wrapup);
434
435 __ bind(legacy_save_restore);
436 // AVX check
437 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
438 UseAVX = 1;
439 UseSSE = 2;
440 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
441 __ vmovdqu(Address(rsi, 0), xmm0);
442 __ vmovdqu(Address(rsi, 32), xmm7);
443 #ifdef _LP64
444 __ vmovdqu(Address(rsi, 64), xmm8);
445 __ vmovdqu(Address(rsi, 96), xmm15);
446 #endif
447 VM_Version::clean_cpuFeatures();
448 UseAVX = saved_useavx;
449 UseSSE = saved_usesse;
450
451 __ bind(wrapup);
452 __ popf();
453 __ pop(rsi);
454 __ pop(rbx);
455 __ pop(rbp);
456 __ ret(0);
457
458 # undef __
459
460 return start;
461 };
462 };
463
464 void VM_Version::get_processor_features() {
465
466 _cpu = 4; // 486 by default
467 _model = 0;
468 _stepping = 0;
469 _features = 0;
470 _logical_processors_per_package = 1;
471 // i486 internal cache is both I&D and has a 16-byte line size
472 _L1_data_cache_line_size = 16;
473
474 // Get raw processor info
475
476 get_cpu_info_stub(&_cpuid_info);
477
478 assert_is_initialized();
479 _cpu = extended_cpu_family();
480 _model = extended_cpu_model();
481 _stepping = cpu_stepping();
482
483 if (cpu_family() > 4) { // it supports CPUID
484 _features = feature_flags();
485 // Logical processors are only available on P4s and above,
486 // and only if hyperthreading is available.
487 _logical_processors_per_package = logical_processor_count();
488 _L1_data_cache_line_size = L1_line_size();
489 }
490
491 _supports_cx8 = supports_cmpxchg8();
492 // xchg and xadd instructions
493 _supports_atomic_getset4 = true;
494 _supports_atomic_getadd4 = true;
495 LP64_ONLY(_supports_atomic_getset8 = true);
496 LP64_ONLY(_supports_atomic_getadd8 = true);
497
498 #ifdef _LP64
499 // OS should support SSE for x64 and hardware should support at least SSE2.
500 if (!VM_Version::supports_sse2()) {
501 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
502 }
503 // in 64 bit the use of SSE2 is the minimum
504 if (UseSSE < 2) UseSSE = 2;
505 #endif
506
507 #ifdef AMD64
508 // flush_icache_stub have to be generated first.
509 // That is why Icache line size is hard coded in ICache class,
510 // see icache_x86.hpp. It is also the reason why we can't use
511 // clflush instruction in 32-bit VM since it could be running
512 // on CPU which does not support it.
513 //
514 // The only thing we can do is to verify that flushed
515 // ICache::line_size has correct value.
516 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
517 // clflush_size is size in quadwords (8 bytes).
518 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
519 #endif
520
521 // If the OS doesn't support SSE, we can't use this feature even if the HW does
522 if (!os::supports_sse())
523 _features &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
524
525 if (UseSSE < 4) {
526 _features &= ~CPU_SSE4_1;
527 _features &= ~CPU_SSE4_2;
528 }
529
530 if (UseSSE < 3) {
531 _features &= ~CPU_SSE3;
532 _features &= ~CPU_SSSE3;
533 _features &= ~CPU_SSE4A;
534 }
535
536 if (UseSSE < 2)
537 _features &= ~CPU_SSE2;
538
539 if (UseSSE < 1)
540 _features &= ~CPU_SSE;
541
542 // first try initial setting and detect what we can support
543 if (UseAVX > 0) {
544 if (UseAVX > 2 && supports_evex()) {
545 UseAVX = 3;
546 } else if (UseAVX > 1 && supports_avx2()) {
547 UseAVX = 2;
548 } else if (UseAVX > 0 && supports_avx()) {
549 UseAVX = 1;
550 } else {
551 UseAVX = 0;
552 }
553 } else if (UseAVX < 0) {
554 UseAVX = 0;
555 }
556
557 if (UseAVX < 3) {
558 _features &= ~CPU_AVX512F;
559 _features &= ~CPU_AVX512DQ;
560 _features &= ~CPU_AVX512CD;
561 _features &= ~CPU_AVX512BW;
562 _features &= ~CPU_AVX512VL;
563 }
564
565 if (UseAVX < 2)
566 _features &= ~CPU_AVX2;
567
568 if (UseAVX < 1)
569 _features &= ~CPU_AVX;
570
571 if (!UseAES && !FLAG_IS_DEFAULT(UseAES))
572 _features &= ~CPU_AES;
573
574 if (logical_processors_per_package() == 1) {
575 // HT processor could be installed on a system which doesn't support HT.
576 _features &= ~CPU_HT;
577 }
578
579 char buf[256];
580 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
581 cores_per_cpu(), threads_per_core(),
582 cpu_family(), _model, _stepping,
583 (supports_cmov() ? ", cmov" : ""),
584 (supports_cmpxchg8() ? ", cx8" : ""),
585 (supports_fxsr() ? ", fxsr" : ""),
586 (supports_mmx() ? ", mmx" : ""),
587 (supports_sse() ? ", sse" : ""),
588 (supports_sse2() ? ", sse2" : ""),
589 (supports_sse3() ? ", sse3" : ""),
590 (supports_ssse3()? ", ssse3": ""),
591 (supports_sse4_1() ? ", sse4.1" : ""),
592 (supports_sse4_2() ? ", sse4.2" : ""),
593 (supports_popcnt() ? ", popcnt" : ""),
594 (supports_avx() ? ", avx" : ""),
595 (supports_avx2() ? ", avx2" : ""),
596 (supports_aes() ? ", aes" : ""),
597 (supports_clmul() ? ", clmul" : ""),
598 (supports_erms() ? ", erms" : ""),
599 (supports_rtm() ? ", rtm" : ""),
600 (supports_mmx_ext() ? ", mmxext" : ""),
601 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
602 (supports_lzcnt() ? ", lzcnt": ""),
603 (supports_sse4a() ? ", sse4a": ""),
604 (supports_ht() ? ", ht": ""),
605 (supports_tsc() ? ", tsc": ""),
606 (supports_tscinv_bit() ? ", tscinvbit": ""),
607 (supports_tscinv() ? ", tscinv": ""),
608 (supports_bmi1() ? ", bmi1" : ""),
609 (supports_bmi2() ? ", bmi2" : ""),
610 (supports_adx() ? ", adx" : ""),
611 (supports_evex() ? ", evex" : ""));
612 _features_string = os::strdup(buf);
613
614 // UseSSE is set to the smaller of what hardware supports and what
615 // the command line requires. I.e., you cannot set UseSSE to 2 on
616 // older Pentiums which do not support it.
617 if (UseSSE > 4) UseSSE=4;
618 if (UseSSE < 0) UseSSE=0;
619 if (!supports_sse4_1()) // Drop to 3 if no SSE4 support
620 UseSSE = MIN2((intx)3,UseSSE);
621 if (!supports_sse3()) // Drop to 2 if no SSE3 support
622 UseSSE = MIN2((intx)2,UseSSE);
623 if (!supports_sse2()) // Drop to 1 if no SSE2 support
624 UseSSE = MIN2((intx)1,UseSSE);
625 if (!supports_sse ()) // Drop to 0 if no SSE support
626 UseSSE = 0;
627
628 // Use AES instructions if available.
629 if (supports_aes()) {
630 if (FLAG_IS_DEFAULT(UseAES)) {
631 FLAG_SET_DEFAULT(UseAES, true);
632 }
633 if (!UseAES) {
634 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
635 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
636 }
637 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
638 } else {
639 if (UseSSE > 2) {
640 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
641 FLAG_SET_DEFAULT(UseAESIntrinsics, true);
642 }
643 } else {
644 // The AES intrinsic stubs require AES instruction support (of course)
645 // but also require sse3 mode or higher for instructions it use.
646 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
647 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
648 }
649 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
650 }
651
652 // --AES-CTR begins--
653 if (!UseAESIntrinsics) {
654 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
655 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
656 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
657 }
658 } else {
659 if(supports_sse4_1() && UseSSE >= 4) {
660 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
661 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
662 }
663 } else {
664 // The AES-CTR intrinsic stubs require AES instruction support (of course)
665 // but also require sse4.1 mode or higher for instructions it use.
666 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
667 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
668 }
669 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
670 }
671 }
672 // --AES-CTR ends--
673 }
674 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
675 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
676 warning("AES instructions are not available on this CPU");
677 FLAG_SET_DEFAULT(UseAES, false);
678 }
679 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
680 warning("AES intrinsics are not available on this CPU");
681 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
682 }
683 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
684 warning("AES-CTR intrinsics are not available on this CPU");
685 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
686 }
687 }
688
689 // Use CLMUL instructions if available.
690 if (supports_clmul()) {
691 if (FLAG_IS_DEFAULT(UseCLMUL)) {
692 UseCLMUL = true;
693 }
694 } else if (UseCLMUL) {
695 if (!FLAG_IS_DEFAULT(UseCLMUL))
696 warning("CLMUL instructions not available on this CPU (AVX may also be required)");
697 FLAG_SET_DEFAULT(UseCLMUL, false);
698 }
699
700 if (UseCLMUL && (UseSSE > 2)) {
701 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
702 UseCRC32Intrinsics = true;
703 }
704 } else if (UseCRC32Intrinsics) {
705 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
706 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
707 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
708 }
709
710 if (supports_sse4_2()) {
711 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
712 UseCRC32CIntrinsics = true;
713 }
714 }
715 else if (UseCRC32CIntrinsics) {
716 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
717 warning("CRC32C intrinsics are not available on this CPU");
718 }
719 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
720 }
721
722 // GHASH/GCM intrinsics
723 if (UseCLMUL && (UseSSE > 2)) {
724 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
725 UseGHASHIntrinsics = true;
726 }
727 } else if (UseGHASHIntrinsics) {
728 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
729 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
730 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
731 }
732
733 if (UseSHA) {
734 warning("SHA instructions are not available on this CPU");
735 FLAG_SET_DEFAULT(UseSHA, false);
736 }
737
738 if (UseSHA1Intrinsics) {
739 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
740 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
741 }
742
743 if (UseSHA256Intrinsics) {
744 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
745 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
746 }
747
748 if (UseSHA512Intrinsics) {
749 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
750 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
751 }
752
753 if (UseAdler32Intrinsics) {
754 warning("Adler32Intrinsics not available on this CPU.");
755 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
756 }
757
758 // Adjust RTM (Restricted Transactional Memory) flags
759 if (!supports_rtm() && UseRTMLocking) {
760 // Can't continue because UseRTMLocking affects UseBiasedLocking flag
761 // setting during arguments processing. See use_biased_locking().
762 // VM_Version_init() is executed after UseBiasedLocking is used
763 // in Thread::allocate().
764 vm_exit_during_initialization("RTM instructions are not available on this CPU");
765 }
766
767 #if INCLUDE_RTM_OPT
768 if (UseRTMLocking) {
769 if (is_intel_family_core()) {
770 if ((_model == CPU_MODEL_HASWELL_E3) ||
771 (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
772 (_model == CPU_MODEL_BROADWELL && _stepping < 4)) {
773 // currently a collision between SKL and HSW_E3
774 if (!UnlockExperimentalVMOptions && UseAVX < 3) {
775 vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
776 } else {
777 warning("UseRTMLocking is only available as experimental option on this platform.");
778 }
779 }
780 }
781 if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
782 // RTM locking should be used only for applications with
783 // high lock contention. For now we do not use it by default.
784 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
785 }
786 if (!is_power_of_2(RTMTotalCountIncrRate)) {
787 warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
788 FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
789 }
790 if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
791 warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
792 FLAG_SET_DEFAULT(RTMAbortRatio, 50);
793 }
794 } else { // !UseRTMLocking
795 if (UseRTMForStackLocks) {
796 if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
797 warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
798 }
799 FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
800 }
801 if (UseRTMDeopt) {
802 FLAG_SET_DEFAULT(UseRTMDeopt, false);
803 }
804 if (PrintPreciseRTMLockingStatistics) {
805 FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
806 }
807 }
808 #else
809 if (UseRTMLocking) {
810 // Only C2 does RTM locking optimization.
811 // Can't continue because UseRTMLocking affects UseBiasedLocking flag
812 // setting during arguments processing. See use_biased_locking().
813 vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
814 }
815 #endif
816
817 #ifdef COMPILER2
818 if (UseFPUForSpilling) {
819 if (UseSSE < 2) {
820 // Only supported with SSE2+
821 FLAG_SET_DEFAULT(UseFPUForSpilling, false);
822 }
823 }
824 #endif
825 #if defined(COMPILER2) || INCLUDE_JVMCI
826 if (MaxVectorSize > 0) {
827 if (!is_power_of_2(MaxVectorSize)) {
828 warning("MaxVectorSize must be a power of 2");
829 FLAG_SET_DEFAULT(MaxVectorSize, 64);
830 }
831 if (MaxVectorSize > 64) {
832 FLAG_SET_DEFAULT(MaxVectorSize, 64);
833 }
834 if (MaxVectorSize > 16 && (UseAVX == 0 || !os_supports_avx_vectors())) {
835 // 32 bytes vectors (in YMM) are only supported with AVX+
836 FLAG_SET_DEFAULT(MaxVectorSize, 16);
837 }
838 if (UseSSE < 2) {
839 // Vectors (in XMM) are only supported with SSE2+
840 FLAG_SET_DEFAULT(MaxVectorSize, 0);
841 }
842 #if defined(COMPILER2) && defined(ASSERT)
843 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
844 tty->print_cr("State of YMM registers after signal handle:");
845 int nreg = 2 LP64_ONLY(+2);
846 const char* ymm_name[4] = {"0", "7", "8", "15"};
847 for (int i = 0; i < nreg; i++) {
848 tty->print("YMM%s:", ymm_name[i]);
849 for (int j = 7; j >=0; j--) {
850 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
851 }
852 tty->cr();
853 }
854 }
855 #endif // COMPILER2 && ASSERT
856 }
857 #endif // COMPILER2 || INCLUDE_JVMCI
858
859 #ifdef COMPILER2
860 #ifdef _LP64
861 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
862 UseMultiplyToLenIntrinsic = true;
863 }
864 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
865 UseSquareToLenIntrinsic = true;
866 }
867 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
868 UseMulAddIntrinsic = true;
869 }
870 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
871 UseMontgomeryMultiplyIntrinsic = true;
872 }
873 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
874 UseMontgomerySquareIntrinsic = true;
875 }
876 #else
877 if (UseMultiplyToLenIntrinsic) {
878 if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
879 warning("multiplyToLen intrinsic is not available in 32-bit VM");
880 }
881 FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
882 }
883 if (UseMontgomeryMultiplyIntrinsic) {
884 if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
885 warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
886 }
887 FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
888 }
889 if (UseMontgomerySquareIntrinsic) {
890 if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
891 warning("montgomerySquare intrinsic is not available in 32-bit VM");
892 }
893 FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
894 }
895 if (UseSquareToLenIntrinsic) {
896 if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
897 warning("squareToLen intrinsic is not available in 32-bit VM");
898 }
899 FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
900 }
901 if (UseMulAddIntrinsic) {
902 if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
903 warning("mulAdd intrinsic is not available in 32-bit VM");
904 }
905 FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
906 }
907 #endif
908 #endif // COMPILER2
909
910 // On new cpus instructions which update whole XMM register should be used
911 // to prevent partial register stall due to dependencies on high half.
912 //
913 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
914 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
915 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
916 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
917
918 if( is_amd() ) { // AMD cpus specific settings
919 if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
920 // Use it on new AMD cpus starting from Opteron.
921 UseAddressNop = true;
922 }
923 if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
924 // Use it on new AMD cpus starting from Opteron.
925 UseNewLongLShift = true;
926 }
927 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
928 if (supports_sse4a()) {
929 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
930 } else {
931 UseXmmLoadAndClearUpper = false;
932 }
933 }
934 if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
935 if( supports_sse4a() ) {
936 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
937 } else {
938 UseXmmRegToRegMoveAll = false;
939 }
940 }
941 if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
942 if( supports_sse4a() ) {
943 UseXmmI2F = true;
944 } else {
945 UseXmmI2F = false;
946 }
947 }
948 if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
949 if( supports_sse4a() ) {
950 UseXmmI2D = true;
951 } else {
952 UseXmmI2D = false;
953 }
954 }
955 if (supports_sse4_2() && UseSSE >= 4) {
956 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
957 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
958 }
959 } else {
960 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
961 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
962 }
963 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
964 }
965
966 // some defaults for AMD family 15h
967 if ( cpu_family() == 0x15 ) {
968 // On family 15h processors default is no sw prefetch
969 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
970 AllocatePrefetchStyle = 0;
971 }
972 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
973 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
974 AllocatePrefetchInstr = 3;
975 }
976 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
977 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
978 UseXMMForArrayCopy = true;
979 }
980 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
981 UseUnalignedLoadStores = true;
982 }
983 }
984
985 #ifdef COMPILER2
986 if (MaxVectorSize > 16) {
987 // Limit vectors size to 16 bytes on current AMD cpus.
988 FLAG_SET_DEFAULT(MaxVectorSize, 16);
989 }
990 #endif // COMPILER2
991 }
992
993 if( is_intel() ) { // Intel cpus specific settings
994 if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
995 UseStoreImmI16 = false; // don't use it on Intel cpus
996 }
997 if( cpu_family() == 6 || cpu_family() == 15 ) {
998 if( FLAG_IS_DEFAULT(UseAddressNop) ) {
999 // Use it on all Intel cpus starting from PentiumPro
1000 UseAddressNop = true;
1001 }
1002 }
1003 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
1004 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1005 }
1006 if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
1007 if( supports_sse3() ) {
1008 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1009 } else {
1010 UseXmmRegToRegMoveAll = false;
1011 }
1012 }
1013 if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus
1014 #ifdef COMPILER2
1015 if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
1016 // For new Intel cpus do the next optimization:
1017 // don't align the beginning of a loop if there are enough instructions
1018 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1019 // in current fetch line (OptoLoopAlignment) or the padding
1020 // is big (> MaxLoopPad).
1021 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1022 // generated NOP instructions. 11 is the largest size of one
1023 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1024 MaxLoopPad = 11;
1025 }
1026 #endif // COMPILER2
1027 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1028 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1029 }
1030 if (supports_sse4_2() && supports_ht()) { // Newest Intel cpus
1031 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1032 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1033 }
1034 }
1035 if (supports_sse4_2() && UseSSE >= 4) {
1036 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1037 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1038 }
1039 } else {
1040 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1041 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1042 }
1043 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1044 }
1045 }
1046 if ((cpu_family() == 0x06) &&
1047 ((extended_cpu_model() == 0x36) || // Centerton
1048 (extended_cpu_model() == 0x37) || // Silvermont
1049 (extended_cpu_model() == 0x4D))) {
1050 #ifdef COMPILER2
1051 if (FLAG_IS_DEFAULT(OptoScheduling)) {
1052 OptoScheduling = true;
1053 }
1054 #endif
1055 if (supports_sse4_2()) { // Silvermont
1056 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1057 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1058 }
1059 }
1060 }
1061 if(FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1062 AllocatePrefetchInstr = 3;
1063 }
1064 }
1065
1066 #ifdef _LP64
1067 if (UseSSE42Intrinsics) {
1068 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1069 UseVectorizedMismatchIntrinsic = true;
1070 }
1071 } else if (UseVectorizedMismatchIntrinsic) {
1072 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1073 warning("vectorizedMismatch intrinsics are not available on this CPU");
1074 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1075 }
1076 #else
1077 if (UseVectorizedMismatchIntrinsic) {
1078 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1079 warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1080 }
1081 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1082 }
1083 #endif // _LP64
1084
1085 // Use count leading zeros count instruction if available.
1086 if (supports_lzcnt()) {
1087 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1088 UseCountLeadingZerosInstruction = true;
1089 }
1090 } else if (UseCountLeadingZerosInstruction) {
1091 warning("lzcnt instruction is not available on this CPU");
1092 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1093 }
1094
1095 // Use count trailing zeros instruction if available
1096 if (supports_bmi1()) {
1097 // tzcnt does not require VEX prefix
1098 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1099 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1100 // Don't use tzcnt if BMI1 is switched off on command line.
1101 UseCountTrailingZerosInstruction = false;
1102 } else {
1103 UseCountTrailingZerosInstruction = true;
1104 }
1105 }
1106 } else if (UseCountTrailingZerosInstruction) {
1107 warning("tzcnt instruction is not available on this CPU");
1108 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1109 }
1110
1111 // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1112 // VEX prefix is generated only when AVX > 0.
1113 if (supports_bmi1() && supports_avx()) {
1114 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1115 UseBMI1Instructions = true;
1116 }
1117 } else if (UseBMI1Instructions) {
1118 warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1119 FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1120 }
1121
1122 if (supports_bmi2() && supports_avx()) {
1123 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1124 UseBMI2Instructions = true;
1125 }
1126 } else if (UseBMI2Instructions) {
1127 warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1128 FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1129 }
1130
1131 // Use population count instruction if available.
1132 if (supports_popcnt()) {
1133 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1134 UsePopCountInstruction = true;
1135 }
1136 } else if (UsePopCountInstruction) {
1137 warning("POPCNT instruction is not available on this CPU");
1138 FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1139 }
1140
1141 // Use fast-string operations if available.
1142 if (supports_erms()) {
1143 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1144 UseFastStosb = true;
1145 }
1146 } else if (UseFastStosb) {
1147 warning("fast-string operations are not available on this CPU");
1148 FLAG_SET_DEFAULT(UseFastStosb, false);
1149 }
1150
1151 #ifdef COMPILER2
1152 if (FLAG_IS_DEFAULT(AlignVector)) {
1153 // Modern processors allow misaligned memory operations for vectors.
1154 AlignVector = !UseUnalignedLoadStores;
1155 }
1156 #endif // COMPILER2
1157
1158 if( AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch() ) AllocatePrefetchInstr=0;
1159 if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3;
1160
1161 // Allocation prefetch settings
1162 intx cache_line_size = prefetch_data_size();
1163 if( cache_line_size > AllocatePrefetchStepSize )
1164 AllocatePrefetchStepSize = cache_line_size;
1165
1166 AllocatePrefetchDistance = allocate_prefetch_distance();
1167 AllocatePrefetchStyle = allocate_prefetch_style();
1168
1169 if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1170 if (AllocatePrefetchStyle == 2) { // watermark prefetching on Core
1171 #ifdef _LP64
1172 AllocatePrefetchDistance = 384;
1173 #else
1174 AllocatePrefetchDistance = 320;
1175 #endif
1176 }
1177 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1178 AllocatePrefetchDistance = 192;
1179 if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
1180 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1181 }
1182 }
1183 #ifdef COMPILER2
1184 if (supports_sse4_2()) {
1185 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1186 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1187 }
1188 }
1189 #endif
1190 }
1191
1192 #ifdef _LP64
1193 // Prefetch settings
1194 PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
1195 PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
1196 PrefetchFieldsAhead = prefetch_fields_ahead();
1197 #endif
1198
1199 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1200 (cache_line_size > ContendedPaddingWidth))
1201 ContendedPaddingWidth = cache_line_size;
1202
1203 // This machine allows unaligned memory accesses
1204 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1205 FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1206 }
1207
1208 #ifndef PRODUCT
1209 if (PrintMiscellaneous && Verbose) {
1210 tty->print_cr("Logical CPUs per core: %u",
1211 logical_processors_per_package());
1212 tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1213 tty->print("UseSSE=%d", (int) UseSSE);
1214 if (UseAVX > 0) {
1215 tty->print(" UseAVX=%d", (int) UseAVX);
1216 }
1217 if (UseAES) {
1218 tty->print(" UseAES=1");
1219 }
1220 #ifdef COMPILER2
1221 if (MaxVectorSize > 0) {
1222 tty->print(" MaxVectorSize=%d", (int) MaxVectorSize);
1223 }
1224 #endif
1225 tty->cr();
1226 tty->print("Allocation");
1227 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
1228 tty->print_cr(": no prefetching");
1229 } else {
1230 tty->print(" prefetching: ");
1231 if (UseSSE == 0 && supports_3dnow_prefetch()) {
1232 tty->print("PREFETCHW");
1233 } else if (UseSSE >= 1) {
1234 if (AllocatePrefetchInstr == 0) {
1235 tty->print("PREFETCHNTA");
1236 } else if (AllocatePrefetchInstr == 1) {
1237 tty->print("PREFETCHT0");
1238 } else if (AllocatePrefetchInstr == 2) {
1239 tty->print("PREFETCHT2");
1240 } else if (AllocatePrefetchInstr == 3) {
1241 tty->print("PREFETCHW");
1242 }
1243 }
1244 if (AllocatePrefetchLines > 1) {
1245 tty->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize);
1246 } else {
1247 tty->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize);
1248 }
1249 }
1250
1251 if (PrefetchCopyIntervalInBytes > 0) {
1252 tty->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1253 }
1254 if (PrefetchScanIntervalInBytes > 0) {
1255 tty->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1256 }
1257 if (PrefetchFieldsAhead > 0) {
1258 tty->print_cr("PrefetchFieldsAhead %d", (int) PrefetchFieldsAhead);
1259 }
1260 if (ContendedPaddingWidth > 0) {
1261 tty->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1262 }
1263 }
1264 #endif // !PRODUCT
1265 }
1266
1267 bool VM_Version::use_biased_locking() {
1268 #if INCLUDE_RTM_OPT
1269 // RTM locking is most useful when there is high lock contention and
1270 // low data contention. With high lock contention the lock is usually
1271 // inflated and biased locking is not suitable for that case.
1272 // RTM locking code requires that biased locking is off.
1273 // Note: we can't switch off UseBiasedLocking in get_processor_features()
1274 // because it is used by Thread::allocate() which is called before
1275 // VM_Version::initialize().
1276 if (UseRTMLocking && UseBiasedLocking) {
1277 if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
1278 FLAG_SET_DEFAULT(UseBiasedLocking, false);
1279 } else {
1280 warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
1281 UseBiasedLocking = false;
1282 }
1283 }
1284 #endif
1285 return UseBiasedLocking;
1286 }
1287
1288 void VM_Version::initialize() {
1289 ResourceMark rm;
1290 // Making this stub must be FIRST use of assembler
1291
1292 stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
1293 if (stub_blob == NULL) {
1294 vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
1295 }
1296 CodeBuffer c(stub_blob);
1297 VM_Version_StubGenerator g(&c);
1298 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
1299 g.generate_get_cpu_info());
1300
1301 get_processor_features();
1302 }
--- EOF ---