1 /* 2 * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 20 * CA 95054 USA or visit www.sun.com if you need additional information or 21 * have any questions. 22 * 23 */ 24 25 # include "incls/_precompiled.incl" 26 # include "incls/_vm_version_x86.cpp.incl" 27 28 29 int VM_Version::_cpu; 30 int VM_Version::_model; 31 int VM_Version::_stepping; 32 int VM_Version::_cpuFeatures; 33 const char* VM_Version::_features_str = ""; 34 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 35 36 static BufferBlob* stub_blob; 37 static const int stub_size = 300; 38 39 extern "C" { 40 typedef void (*getPsrInfo_stub_t)(void*); 41 } 42 static getPsrInfo_stub_t getPsrInfo_stub = NULL; 43 44 45 class VM_Version_StubGenerator: public StubCodeGenerator { 46 public: 47 48 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 49 50 address generate_getPsrInfo() { 51 // Flags to test CPU type. 52 const uint32_t EFL_AC = 0x40000; 53 const uint32_t EFL_ID = 0x200000; 54 // Values for when we don't have a CPUID instruction. 55 const int CPU_FAMILY_SHIFT = 8; 56 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 57 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 58 59 Label detect_486, cpu486, detect_586, std_cpuid1; 60 Label ext_cpuid1, ext_cpuid5, done; 61 62 StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); 63 # define __ _masm-> 64 65 address start = __ pc(); 66 67 // 68 // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info); 69 // 70 // LP64: rcx and rdx are first and second argument registers on windows 71 72 __ push(rbp); 73 #ifdef _LP64 74 __ mov(rbp, c_rarg0); // cpuid_info address 75 #else 76 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 77 #endif 78 __ push(rbx); 79 __ push(rsi); 80 __ pushf(); // preserve rbx, and flags 81 __ pop(rax); 82 __ push(rax); 83 __ mov(rcx, rax); 84 // 85 // if we are unable to change the AC flag, we have a 386 86 // 87 __ xorl(rax, EFL_AC); 88 __ push(rax); 89 __ popf(); 90 __ pushf(); 91 __ pop(rax); 92 __ cmpptr(rax, rcx); 93 __ jccb(Assembler::notEqual, detect_486); 94 95 __ movl(rax, CPU_FAMILY_386); 96 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 97 __ jmp(done); 98 99 // 100 // If we are unable to change the ID flag, we have a 486 which does 101 // not support the "cpuid" instruction. 102 // 103 __ bind(detect_486); 104 __ mov(rax, rcx); 105 __ xorl(rax, EFL_ID); 106 __ push(rax); 107 __ popf(); 108 __ pushf(); 109 __ pop(rax); 110 __ cmpptr(rcx, rax); 111 __ jccb(Assembler::notEqual, detect_586); 112 113 __ bind(cpu486); 114 __ movl(rax, CPU_FAMILY_486); 115 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 116 __ jmp(done); 117 118 // 119 // At this point, we have a chip which supports the "cpuid" instruction 120 // 121 __ bind(detect_586); 122 __ xorl(rax, rax); 123 __ cpuid(); 124 __ orl(rax, rax); 125 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 126 // value of at least 1, we give up and 127 // assume a 486 128 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 129 __ movl(Address(rsi, 0), rax); 130 __ movl(Address(rsi, 4), rbx); 131 __ movl(Address(rsi, 8), rcx); 132 __ movl(Address(rsi,12), rdx); 133 134 __ cmpl(rax, 3); // Is cpuid(0x4) supported? 135 __ jccb(Assembler::belowEqual, std_cpuid1); 136 137 // 138 // cpuid(0x4) Deterministic cache params 139 // 140 __ movl(rax, 4); 141 __ xorl(rcx, rcx); // L1 cache 142 __ cpuid(); 143 __ push(rax); 144 __ andl(rax, 0x1f); // Determine if valid cache parameters used 145 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 146 __ pop(rax); 147 __ jccb(Assembler::equal, std_cpuid1); 148 149 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 150 __ movl(Address(rsi, 0), rax); 151 __ movl(Address(rsi, 4), rbx); 152 __ movl(Address(rsi, 8), rcx); 153 __ movl(Address(rsi,12), rdx); 154 155 // 156 // Standard cpuid(0x1) 157 // 158 __ bind(std_cpuid1); 159 __ movl(rax, 1); 160 __ cpuid(); 161 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 162 __ movl(Address(rsi, 0), rax); 163 __ movl(Address(rsi, 4), rbx); 164 __ movl(Address(rsi, 8), rcx); 165 __ movl(Address(rsi,12), rdx); 166 167 __ movl(rax, 0x80000000); 168 __ cpuid(); 169 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 170 __ jcc(Assembler::belowEqual, done); 171 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 172 __ jccb(Assembler::belowEqual, ext_cpuid1); 173 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 174 __ jccb(Assembler::belowEqual, ext_cpuid5); 175 // 176 // Extended cpuid(0x80000008) 177 // 178 __ movl(rax, 0x80000008); 179 __ cpuid(); 180 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 181 __ movl(Address(rsi, 0), rax); 182 __ movl(Address(rsi, 4), rbx); 183 __ movl(Address(rsi, 8), rcx); 184 __ movl(Address(rsi,12), rdx); 185 186 // 187 // Extended cpuid(0x80000005) 188 // 189 __ bind(ext_cpuid5); 190 __ movl(rax, 0x80000005); 191 __ cpuid(); 192 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 193 __ movl(Address(rsi, 0), rax); 194 __ movl(Address(rsi, 4), rbx); 195 __ movl(Address(rsi, 8), rcx); 196 __ movl(Address(rsi,12), rdx); 197 198 // 199 // Extended cpuid(0x80000001) 200 // 201 __ bind(ext_cpuid1); 202 __ movl(rax, 0x80000001); 203 __ cpuid(); 204 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 205 __ movl(Address(rsi, 0), rax); 206 __ movl(Address(rsi, 4), rbx); 207 __ movl(Address(rsi, 8), rcx); 208 __ movl(Address(rsi,12), rdx); 209 210 // 211 // return 212 // 213 __ bind(done); 214 __ popf(); 215 __ pop(rsi); 216 __ pop(rbx); 217 __ pop(rbp); 218 __ ret(0); 219 220 # undef __ 221 222 return start; 223 }; 224 }; 225 226 227 void VM_Version::get_processor_features() { 228 229 _cpu = 4; // 486 by default 230 _model = 0; 231 _stepping = 0; 232 _cpuFeatures = 0; 233 _logical_processors_per_package = 1; 234 235 if (!Use486InstrsOnly) { 236 // Get raw processor info 237 getPsrInfo_stub(&_cpuid_info); 238 assert_is_initialized(); 239 _cpu = extended_cpu_family(); 240 _model = extended_cpu_model(); 241 _stepping = cpu_stepping(); 242 243 if (cpu_family() > 4) { // it supports CPUID 244 _cpuFeatures = feature_flags(); 245 // Logical processors are only available on P4s and above, 246 // and only if hyperthreading is available. 247 _logical_processors_per_package = logical_processor_count(); 248 } 249 } 250 251 _supports_cx8 = supports_cmpxchg8(); 252 253 #ifdef _LP64 254 // OS should support SSE for x64 and hardware should support at least SSE2. 255 if (!VM_Version::supports_sse2()) { 256 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 257 } 258 #endif 259 260 // If the OS doesn't support SSE, we can't use this feature even if the HW does 261 if (!os::supports_sse()) 262 _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); 263 264 if (UseSSE < 4) { 265 _cpuFeatures &= ~CPU_SSE4_1; 266 _cpuFeatures &= ~CPU_SSE4_2; 267 } 268 269 if (UseSSE < 3) { 270 _cpuFeatures &= ~CPU_SSE3; 271 _cpuFeatures &= ~CPU_SSSE3; 272 _cpuFeatures &= ~CPU_SSE4A; 273 } 274 275 if (UseSSE < 2) 276 _cpuFeatures &= ~CPU_SSE2; 277 278 if (UseSSE < 1) 279 _cpuFeatures &= ~CPU_SSE; 280 281 if (logical_processors_per_package() == 1) { 282 // HT processor could be installed on a system which doesn't support HT. 283 _cpuFeatures &= ~CPU_HT; 284 } 285 286 char buf[256]; 287 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 288 cores_per_cpu(), threads_per_core(), 289 cpu_family(), _model, _stepping, 290 (supports_cmov() ? ", cmov" : ""), 291 (supports_cmpxchg8() ? ", cx8" : ""), 292 (supports_fxsr() ? ", fxsr" : ""), 293 (supports_mmx() ? ", mmx" : ""), 294 (supports_sse() ? ", sse" : ""), 295 (supports_sse2() ? ", sse2" : ""), 296 (supports_sse3() ? ", sse3" : ""), 297 (supports_ssse3()? ", ssse3": ""), 298 (supports_sse4_1() ? ", sse4.1" : ""), 299 (supports_sse4_2() ? ", sse4.2" : ""), 300 (supports_popcnt() ? ", popcnt" : ""), 301 (supports_mmx_ext() ? ", mmxext" : ""), 302 (supports_3dnow() ? ", 3dnow" : ""), 303 (supports_3dnow2() ? ", 3dnowext" : ""), 304 (supports_sse4a() ? ", sse4a": ""), 305 (supports_ht() ? ", ht": "")); 306 _features_str = strdup(buf); 307 308 // UseSSE is set to the smaller of what hardware supports and what 309 // the command line requires. I.e., you cannot set UseSSE to 2 on 310 // older Pentiums which do not support it. 311 if( UseSSE > 4 ) UseSSE=4; 312 if( UseSSE < 0 ) UseSSE=0; 313 if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support 314 UseSSE = MIN2((intx)3,UseSSE); 315 if( !supports_sse3() ) // Drop to 2 if no SSE3 support 316 UseSSE = MIN2((intx)2,UseSSE); 317 if( !supports_sse2() ) // Drop to 1 if no SSE2 support 318 UseSSE = MIN2((intx)1,UseSSE); 319 if( !supports_sse () ) // Drop to 0 if no SSE support 320 UseSSE = 0; 321 322 // On new cpus instructions which update whole XMM register should be used 323 // to prevent partial register stall due to dependencies on high half. 324 // 325 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 326 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 327 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 328 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 329 330 if( is_amd() ) { // AMD cpus specific settings 331 if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) { 332 // Use it on new AMD cpus starting from Opteron. 333 UseAddressNop = true; 334 } 335 if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) { 336 // Use it on new AMD cpus starting from Opteron. 337 UseNewLongLShift = true; 338 } 339 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { 340 if( supports_sse4a() ) { 341 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 342 } else { 343 UseXmmLoadAndClearUpper = false; 344 } 345 } 346 if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { 347 if( supports_sse4a() ) { 348 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 349 } else { 350 UseXmmRegToRegMoveAll = false; 351 } 352 } 353 if( FLAG_IS_DEFAULT(UseXmmI2F) ) { 354 if( supports_sse4a() ) { 355 UseXmmI2F = true; 356 } else { 357 UseXmmI2F = false; 358 } 359 } 360 if( FLAG_IS_DEFAULT(UseXmmI2D) ) { 361 if( supports_sse4a() ) { 362 UseXmmI2D = true; 363 } else { 364 UseXmmI2D = false; 365 } 366 } 367 } 368 369 if( is_intel() ) { // Intel cpus specific settings 370 if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { 371 UseStoreImmI16 = false; // don't use it on Intel cpus 372 } 373 if( cpu_family() == 6 || cpu_family() == 15 ) { 374 if( FLAG_IS_DEFAULT(UseAddressNop) ) { 375 // Use it on all Intel cpus starting from PentiumPro 376 UseAddressNop = true; 377 } 378 } 379 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { 380 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 381 } 382 if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { 383 if( supports_sse3() ) { 384 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 385 } else { 386 UseXmmRegToRegMoveAll = false; 387 } 388 } 389 if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus 390 #ifdef COMPILER2 391 if( FLAG_IS_DEFAULT(MaxLoopPad) ) { 392 // For new Intel cpus do the next optimization: 393 // don't align the beginning of a loop if there are enough instructions 394 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 395 // in current fetch line (OptoLoopAlignment) or the padding 396 // is big (> MaxLoopPad). 397 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 398 // generated NOP instructions. 11 is the largest size of one 399 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 400 MaxLoopPad = 11; 401 } 402 #endif // COMPILER2 403 if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { 404 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 405 } 406 if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus 407 if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { 408 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 409 } 410 } 411 if( supports_sse4_2() && UseSSE >= 4 ) { 412 if( FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 413 UseSSE42Intrinsics = true; 414 } 415 } 416 } 417 } 418 419 // Use population count instruction if available. 420 if (supports_popcnt()) { 421 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 422 UsePopCountInstruction = true; 423 } 424 } 425 426 assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); 427 assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); 428 429 // set valid Prefetch instruction 430 if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; 431 if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; 432 if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0; 433 if( !supports_sse() && supports_3dnow() ) ReadPrefetchInstr = 3; 434 435 if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; 436 if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; 437 if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0; 438 if( !supports_sse() && supports_3dnow() ) AllocatePrefetchInstr = 3; 439 440 // Allocation prefetch settings 441 intx cache_line_size = L1_data_cache_line_size(); 442 if( cache_line_size > AllocatePrefetchStepSize ) 443 AllocatePrefetchStepSize = cache_line_size; 444 if( FLAG_IS_DEFAULT(AllocatePrefetchLines) ) 445 AllocatePrefetchLines = 3; // Optimistic value 446 assert(AllocatePrefetchLines > 0, "invalid value"); 447 if( AllocatePrefetchLines < 1 ) // set valid value in product VM 448 AllocatePrefetchLines = 1; // Conservative value 449 450 AllocatePrefetchDistance = allocate_prefetch_distance(); 451 AllocatePrefetchStyle = allocate_prefetch_style(); 452 453 if( AllocatePrefetchStyle == 2 && is_intel() && 454 cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core 455 #ifdef _LP64 456 AllocatePrefetchDistance = 384; 457 #else 458 AllocatePrefetchDistance = 320; 459 #endif 460 } 461 assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); 462 463 #ifdef _LP64 464 // Prefetch settings 465 PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); 466 PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); 467 PrefetchFieldsAhead = prefetch_fields_ahead(); 468 #endif 469 470 #ifndef PRODUCT 471 if (PrintMiscellaneous && Verbose) { 472 tty->print_cr("Logical CPUs per core: %u", 473 logical_processors_per_package()); 474 tty->print_cr("UseSSE=%d",UseSSE); 475 tty->print("Allocation: "); 476 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) { 477 tty->print_cr("no prefetching"); 478 } else { 479 if (UseSSE == 0 && supports_3dnow()) { 480 tty->print("PREFETCHW"); 481 } else if (UseSSE >= 1) { 482 if (AllocatePrefetchInstr == 0) { 483 tty->print("PREFETCHNTA"); 484 } else if (AllocatePrefetchInstr == 1) { 485 tty->print("PREFETCHT0"); 486 } else if (AllocatePrefetchInstr == 2) { 487 tty->print("PREFETCHT2"); 488 } else if (AllocatePrefetchInstr == 3) { 489 tty->print("PREFETCHW"); 490 } 491 } 492 if (AllocatePrefetchLines > 1) { 493 tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 494 } else { 495 tty->print_cr(" %d, one line", AllocatePrefetchDistance); 496 } 497 } 498 499 if (PrefetchCopyIntervalInBytes > 0) { 500 tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes); 501 } 502 if (PrefetchScanIntervalInBytes > 0) { 503 tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes); 504 } 505 if (PrefetchFieldsAhead > 0) { 506 tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead); 507 } 508 } 509 #endif // !PRODUCT 510 } 511 512 void VM_Version::initialize() { 513 ResourceMark rm; 514 // Making this stub must be FIRST use of assembler 515 516 stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); 517 if (stub_blob == NULL) { 518 vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); 519 } 520 CodeBuffer c(stub_blob->instructions_begin(), 521 stub_blob->instructions_size()); 522 VM_Version_StubGenerator g(&c); 523 getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t, 524 g.generate_getPsrInfo()); 525 526 get_processor_features(); 527 }