1 /* 2 * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2020 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "precompiled.hpp" 27 #include "jvm.h" 28 #include "asm/assembler.inline.hpp" 29 #include "asm/macroAssembler.inline.hpp" 30 #include "compiler/disassembler.hpp" 31 #include "memory/resourceArea.hpp" 32 #include "runtime/java.hpp" 33 #include "runtime/os.hpp" 34 #include "runtime/stubCodeGenerator.hpp" 35 #include "runtime/vm_version.hpp" 36 #include "utilities/align.hpp" 37 #include "utilities/defaultStream.hpp" 38 #include "utilities/globalDefinitions.hpp" 39 #include "utilities/powerOfTwo.hpp" 40 41 #include <sys/sysinfo.h> 42 #if defined(_AIX) 43 #include <libperfstat.h> 44 #endif 45 46 #if defined(LINUX) && defined(VM_LITTLE_ENDIAN) 47 #include <sys/auxv.h> 48 49 #ifndef PPC_FEATURE2_HTM_NOSC 50 #define PPC_FEATURE2_HTM_NOSC (1 << 24) 51 #endif 52 #endif 53 54 bool VM_Version::_is_determine_features_test_running = false; 55 uint64_t VM_Version::_dscr_val = 0; 56 57 #define MSG(flag) \ 58 if (flag && !FLAG_IS_DEFAULT(flag)) \ 59 jio_fprintf(defaultStream::error_stream(), \ 60 "warning: -XX:+" #flag " requires -XX:+UseSIGTRAP\n" \ 61 " -XX:+" #flag " will be disabled!\n"); 62 63 void VM_Version::initialize() { 64 65 // Test which instructions are supported and measure cache line size. 66 determine_features(); 67 68 // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features. 69 if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) { 70 if (VM_Version::has_darn()) { 71 FLAG_SET_ERGO(PowerArchitecturePPC64, 9); 72 } else if (VM_Version::has_lqarx()) { 73 FLAG_SET_ERGO(PowerArchitecturePPC64, 8); 74 } else if (VM_Version::has_popcntw()) { 75 FLAG_SET_ERGO(PowerArchitecturePPC64, 7); 76 } else if (VM_Version::has_cmpb()) { 77 FLAG_SET_ERGO(PowerArchitecturePPC64, 6); 78 } else if (VM_Version::has_popcntb()) { 79 FLAG_SET_ERGO(PowerArchitecturePPC64, 5); 80 } else { 81 FLAG_SET_ERGO(PowerArchitecturePPC64, 0); 82 } 83 } 84 85 bool PowerArchitecturePPC64_ok = false; 86 switch (PowerArchitecturePPC64) { 87 case 9: if (!VM_Version::has_darn() ) break; 88 case 8: if (!VM_Version::has_lqarx() ) break; 89 case 7: if (!VM_Version::has_popcntw()) break; 90 case 6: if (!VM_Version::has_cmpb() ) break; 91 case 5: if (!VM_Version::has_popcntb()) break; 92 case 0: PowerArchitecturePPC64_ok = true; break; 93 default: break; 94 } 95 guarantee(PowerArchitecturePPC64_ok, "PowerArchitecturePPC64 cannot be set to " 96 UINTX_FORMAT " on this machine", PowerArchitecturePPC64); 97 98 // Power 8: Configure Data Stream Control Register. 99 if (PowerArchitecturePPC64 >= 8 && has_mfdscr()) { 100 config_dscr(); 101 } 102 103 if (!UseSIGTRAP) { 104 MSG(TrapBasedICMissChecks); 105 MSG(TrapBasedNotEntrantChecks); 106 MSG(TrapBasedNullChecks); 107 FLAG_SET_ERGO(TrapBasedNotEntrantChecks, false); 108 FLAG_SET_ERGO(TrapBasedNullChecks, false); 109 FLAG_SET_ERGO(TrapBasedICMissChecks, false); 110 } 111 112 #ifdef COMPILER2 113 if (!UseSIGTRAP) { 114 MSG(TrapBasedRangeChecks); 115 FLAG_SET_ERGO(TrapBasedRangeChecks, false); 116 } 117 118 // On Power6 test for section size. 119 if (PowerArchitecturePPC64 == 6) { 120 determine_section_size(); 121 // TODO: PPC port } else { 122 // TODO: PPC port PdScheduling::power6SectorSize = 0x20; 123 } 124 125 if (PowerArchitecturePPC64 >= 8) { 126 if (FLAG_IS_DEFAULT(SuperwordUseVSX)) { 127 FLAG_SET_ERGO(SuperwordUseVSX, true); 128 } 129 } else { 130 if (SuperwordUseVSX) { 131 warning("SuperwordUseVSX specified, but needs at least Power8."); 132 FLAG_SET_DEFAULT(SuperwordUseVSX, false); 133 } 134 } 135 MaxVectorSize = SuperwordUseVSX ? 16 : 8; 136 137 if (PowerArchitecturePPC64 >= 9) { 138 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionsPPC64)) { 139 FLAG_SET_ERGO(UseCountTrailingZerosInstructionsPPC64, true); 140 } 141 if (FLAG_IS_DEFAULT(UseCharacterCompareIntrinsics)) { 142 FLAG_SET_ERGO(UseCharacterCompareIntrinsics, true); 143 } 144 if (FLAG_IS_DEFAULT(UseVectorByteReverseInstructionsPPC64)) { 145 FLAG_SET_ERGO(UseVectorByteReverseInstructionsPPC64, true); 146 } 147 } else { 148 if (UseCountTrailingZerosInstructionsPPC64) { 149 warning("UseCountTrailingZerosInstructionsPPC64 specified, but needs at least Power9."); 150 FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionsPPC64, false); 151 } 152 if (UseCharacterCompareIntrinsics) { 153 warning("UseCharacterCompareIntrinsics specified, but needs at least Power9."); 154 FLAG_SET_DEFAULT(UseCharacterCompareIntrinsics, false); 155 } 156 if (UseVectorByteReverseInstructionsPPC64) { 157 warning("UseVectorByteReverseInstructionsPPC64 specified, but needs at least Power9."); 158 FLAG_SET_DEFAULT(UseVectorByteReverseInstructionsPPC64, false); 159 } 160 } 161 #endif 162 163 // Create and print feature-string. 164 char buf[(num_features+1) * 16]; // Max 16 chars per feature. 165 jio_snprintf(buf, sizeof(buf), 166 "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 167 (has_fsqrt() ? " fsqrt" : ""), 168 (has_isel() ? " isel" : ""), 169 (has_lxarxeh() ? " lxarxeh" : ""), 170 (has_cmpb() ? " cmpb" : ""), 171 (has_popcntb() ? " popcntb" : ""), 172 (has_popcntw() ? " popcntw" : ""), 173 (has_fcfids() ? " fcfids" : ""), 174 (has_vand() ? " vand" : ""), 175 (has_lqarx() ? " lqarx" : ""), 176 (has_vcipher() ? " aes" : ""), 177 (has_vpmsumb() ? " vpmsumb" : ""), 178 (has_mfdscr() ? " mfdscr" : ""), 179 (has_vsx() ? " vsx" : ""), 180 (has_ldbrx() ? " ldbrx" : ""), 181 (has_stdbrx() ? " stdbrx" : ""), 182 (has_vshasig() ? " sha" : ""), 183 (has_tm() ? " rtm" : ""), 184 (has_darn() ? " darn" : "") 185 // Make sure number of %s matches num_features! 186 ); 187 _features_string = os::strdup(buf); 188 if (Verbose) { 189 print_features(); 190 } 191 192 // PPC64 supports 8-byte compare-exchange operations (see Atomic::cmpxchg) 193 // and 'atomic long memory ops' (see Unsafe_GetLongVolatile). 194 _supports_cx8 = true; 195 196 // Used by C1. 197 _supports_atomic_getset4 = true; 198 _supports_atomic_getadd4 = true; 199 _supports_atomic_getset8 = true; 200 _supports_atomic_getadd8 = true; 201 202 intx cache_line_size = L1_data_cache_line_size(); 203 204 if (PowerArchitecturePPC64 >= 9) { 205 if (os::supports_map_sync() == true) { 206 _data_cache_line_flush_size = cache_line_size; 207 } 208 } 209 210 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1; 211 212 if (cache_line_size > AllocatePrefetchStepSize) AllocatePrefetchStepSize = cache_line_size; 213 // PPC processors have an automatic prefetch engine. 214 if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) AllocatePrefetchLines = 1; 215 if (AllocatePrefetchDistance < 0) AllocatePrefetchDistance = 3 * cache_line_size; 216 217 assert(AllocatePrefetchLines > 0, "invalid value"); 218 if (AllocatePrefetchLines < 1) { // Set valid value in product VM. 219 AllocatePrefetchLines = 1; // Conservative value. 220 } 221 222 if (AllocatePrefetchStyle == 3 && AllocatePrefetchDistance < cache_line_size) { 223 AllocatePrefetchStyle = 1; // Fall back if inappropriate. 224 } 225 226 assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); 227 228 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && (cache_line_size > ContendedPaddingWidth)) { 229 ContendedPaddingWidth = cache_line_size; 230 } 231 232 // If running on Power8 or newer hardware, the implementation uses the available vector instructions. 233 // In all other cases, the implementation uses only generally available instructions. 234 if (!UseCRC32Intrinsics) { 235 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 236 FLAG_SET_DEFAULT(UseCRC32Intrinsics, true); 237 } 238 } 239 240 // Implementation does not use any of the vector instructions available with Power8. 241 // Their exploitation is still pending (aka "work in progress"). 242 if (!UseCRC32CIntrinsics) { 243 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 244 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true); 245 } 246 } 247 248 // TODO: Provide implementation. 249 if (UseAdler32Intrinsics) { 250 warning("Adler32Intrinsics not available on this CPU."); 251 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 252 } 253 254 // The AES intrinsic stubs require AES instruction support. 255 if (has_vcipher()) { 256 if (FLAG_IS_DEFAULT(UseAES)) { 257 UseAES = true; 258 } 259 } else if (UseAES) { 260 if (!FLAG_IS_DEFAULT(UseAES)) 261 warning("AES instructions are not available on this CPU"); 262 FLAG_SET_DEFAULT(UseAES, false); 263 } 264 265 if (UseAES && has_vcipher()) { 266 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 267 UseAESIntrinsics = true; 268 } 269 } else if (UseAESIntrinsics) { 270 if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) 271 warning("AES intrinsics are not available on this CPU"); 272 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 273 } 274 275 if (UseAESCTRIntrinsics) { 276 warning("AES/CTR intrinsics are not available on this CPU"); 277 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 278 } 279 280 if (UseGHASHIntrinsics) { 281 warning("GHASH intrinsics are not available on this CPU"); 282 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 283 } 284 285 if (FLAG_IS_DEFAULT(UseFMA)) { 286 FLAG_SET_DEFAULT(UseFMA, true); 287 } 288 289 if (has_vshasig()) { 290 if (FLAG_IS_DEFAULT(UseSHA)) { 291 UseSHA = true; 292 } 293 } else if (UseSHA) { 294 if (!FLAG_IS_DEFAULT(UseSHA)) 295 warning("SHA instructions are not available on this CPU"); 296 FLAG_SET_DEFAULT(UseSHA, false); 297 } 298 299 if (UseSHA1Intrinsics) { 300 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 301 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 302 } 303 304 if (UseSHA && has_vshasig()) { 305 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 306 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 307 } 308 } else if (UseSHA256Intrinsics) { 309 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 310 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 311 } 312 313 if (UseSHA && has_vshasig()) { 314 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 315 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 316 } 317 } else if (UseSHA512Intrinsics) { 318 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 319 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 320 } 321 322 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 323 FLAG_SET_DEFAULT(UseSHA, false); 324 } 325 326 #ifdef COMPILER2 327 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 328 UseSquareToLenIntrinsic = true; 329 } 330 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 331 UseMulAddIntrinsic = true; 332 } 333 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 334 UseMultiplyToLenIntrinsic = true; 335 } 336 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 337 UseMontgomeryMultiplyIntrinsic = true; 338 } 339 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 340 UseMontgomerySquareIntrinsic = true; 341 } 342 #endif 343 344 if (UseVectorizedMismatchIntrinsic) { 345 warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU."); 346 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 347 } 348 349 350 // Adjust RTM (Restricted Transactional Memory) flags. 351 if (UseRTMLocking) { 352 // If CPU or OS do not support TM: 353 // Can't continue because UseRTMLocking affects UseBiasedLocking flag 354 // setting during arguments processing. See use_biased_locking(). 355 // VM_Version_init() is executed after UseBiasedLocking is used 356 // in Thread::allocate(). 357 if (PowerArchitecturePPC64 < 8) { 358 vm_exit_during_initialization("RTM instructions are not available on this CPU."); 359 } 360 361 if (!has_tm()) { 362 vm_exit_during_initialization("RTM is not supported on this OS version."); 363 } 364 } 365 366 if (UseRTMLocking) { 367 #if INCLUDE_RTM_OPT 368 if (!FLAG_IS_CMDLINE(UseRTMLocking)) { 369 // RTM locking should be used only for applications with 370 // high lock contention. For now we do not use it by default. 371 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line"); 372 } 373 #else 374 // Only C2 does RTM locking optimization. 375 // Can't continue because UseRTMLocking affects UseBiasedLocking flag 376 // setting during arguments processing. See use_biased_locking(). 377 vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); 378 #endif 379 } else { // !UseRTMLocking 380 if (UseRTMForStackLocks) { 381 if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) { 382 warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off"); 383 } 384 FLAG_SET_DEFAULT(UseRTMForStackLocks, false); 385 } 386 if (UseRTMDeopt) { 387 FLAG_SET_DEFAULT(UseRTMDeopt, false); 388 } 389 #ifdef COMPILER2 390 if (PrintPreciseRTMLockingStatistics) { 391 FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false); 392 } 393 #endif 394 } 395 396 // This machine allows unaligned memory accesses 397 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 398 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 399 } 400 401 check_virtualizations(); 402 } 403 404 void VM_Version::check_virtualizations() { 405 #if defined(_AIX) 406 int rc = 0; 407 perfstat_partition_total_t pinfo; 408 rc = perfstat_partition_total(NULL, &pinfo, sizeof(perfstat_partition_total_t), 1); 409 if (rc == 1) { 410 Abstract_VM_Version::_detected_virtualization = PowerVM; 411 } 412 #else 413 const char* info_file = "/proc/ppc64/lparcfg"; 414 // system_type=...qemu indicates PowerKVM 415 // e.g. system_type=IBM pSeries (emulated by qemu) 416 char line[500]; 417 FILE* fp = fopen(info_file, "r"); 418 if (fp == NULL) { 419 return; 420 } 421 const char* system_type="system_type="; // in case this line contains qemu, it is KVM 422 const char* num_lpars="NumLpars="; // in case of non-KVM : if this line is found it is PowerVM 423 bool num_lpars_found = false; 424 425 while (fgets(line, sizeof(line), fp) != NULL) { 426 if (strncmp(line, system_type, strlen(system_type)) == 0) { 427 if (strstr(line, "qemu") != 0) { 428 Abstract_VM_Version::_detected_virtualization = PowerKVM; 429 fclose(fp); 430 return; 431 } 432 } 433 if (strncmp(line, num_lpars, strlen(num_lpars)) == 0) { 434 num_lpars_found = true; 435 } 436 } 437 if (num_lpars_found) { 438 Abstract_VM_Version::_detected_virtualization = PowerVM; 439 } else { 440 Abstract_VM_Version::_detected_virtualization = PowerFullPartitionMode; 441 } 442 fclose(fp); 443 #endif 444 } 445 446 void VM_Version::print_platform_virtualization_info(outputStream* st) { 447 #if defined(_AIX) 448 // more info about perfstat API see 449 // https://www.ibm.com/support/knowledgecenter/en/ssw_aix_72/com.ibm.aix.prftools/idprftools_perfstat_glob_partition.htm 450 int rc = 0; 451 perfstat_partition_total_t pinfo; 452 memset(&pinfo, 0, sizeof(perfstat_partition_total_t)); 453 rc = perfstat_partition_total(NULL, &pinfo, sizeof(perfstat_partition_total_t), 1); 454 if (rc != 1) { 455 return; 456 } else { 457 st->print_cr("Virtualization type : PowerVM"); 458 } 459 // CPU information 460 perfstat_cpu_total_t cpuinfo; 461 memset(&cpuinfo, 0, sizeof(perfstat_cpu_total_t)); 462 rc = perfstat_cpu_total(NULL, &cpuinfo, sizeof(perfstat_cpu_total_t), 1); 463 if (rc != 1) { 464 return; 465 } 466 467 st->print_cr("Processor description : %s", cpuinfo.description); 468 st->print_cr("Processor speed : %llu Hz", cpuinfo.processorHZ); 469 470 st->print_cr("LPAR partition name : %s", pinfo.name); 471 st->print_cr("LPAR partition number : %u", pinfo.lpar_id); 472 st->print_cr("LPAR partition type : %s", pinfo.type.b.shared_enabled ? "shared" : "dedicated"); 473 st->print_cr("LPAR mode : %s", pinfo.type.b.donate_enabled ? "donating" : pinfo.type.b.capped ? "capped" : "uncapped"); 474 st->print_cr("LPAR partition group ID : %u", pinfo.group_id); 475 st->print_cr("LPAR shared pool ID : %u", pinfo.pool_id); 476 477 st->print_cr("AMS (active memory sharing) : %s", pinfo.type.b.ams_capable ? "capable" : "not capable"); 478 st->print_cr("AMS (active memory sharing) : %s", pinfo.type.b.ams_enabled ? "on" : "off"); 479 st->print_cr("AME (active memory expansion) : %s", pinfo.type.b.ame_enabled ? "on" : "off"); 480 481 if (pinfo.type.b.ame_enabled) { 482 st->print_cr("AME true memory in bytes : %llu", pinfo.true_memory); 483 st->print_cr("AME expanded memory in bytes : %llu", pinfo.expanded_memory); 484 } 485 486 st->print_cr("SMT : %s", pinfo.type.b.smt_capable ? "capable" : "not capable"); 487 st->print_cr("SMT : %s", pinfo.type.b.smt_enabled ? "on" : "off"); 488 int ocpus = pinfo.online_cpus > 0 ? pinfo.online_cpus : 1; 489 st->print_cr("LPAR threads : %d", cpuinfo.ncpus/ocpus); 490 st->print_cr("LPAR online virtual cpus : %d", pinfo.online_cpus); 491 st->print_cr("LPAR logical cpus : %d", cpuinfo.ncpus); 492 st->print_cr("LPAR maximum virtual cpus : %u", pinfo.max_cpus); 493 st->print_cr("LPAR minimum virtual cpus : %u", pinfo.min_cpus); 494 st->print_cr("LPAR entitled capacity : %4.2f", (double) (pinfo.entitled_proc_capacity/100.0)); 495 st->print_cr("LPAR online memory : %llu MB", pinfo.online_memory); 496 st->print_cr("LPAR maximum memory : %llu MB", pinfo.max_memory); 497 st->print_cr("LPAR minimum memory : %llu MB", pinfo.min_memory); 498 #else 499 const char* info_file = "/proc/ppc64/lparcfg"; 500 const char* kw[] = { "system_type=", // qemu indicates PowerKVM 501 "partition_entitled_capacity=", // entitled processor capacity percentage 502 "partition_max_entitled_capacity=", 503 "capacity_weight=", // partition CPU weight 504 "partition_active_processors=", 505 "partition_potential_processors=", 506 "entitled_proc_capacity_available=", 507 "capped=", // 0 - uncapped, 1 - vcpus capped at entitled processor capacity percentage 508 "shared_processor_mode=", // (non)dedicated partition 509 "system_potential_processors=", 510 "pool=", // CPU-pool number 511 "pool_capacity=", 512 "NumLpars=", // on non-KVM machines, NumLpars is not found for full partition mode machines 513 NULL }; 514 if (!print_matching_lines_from_file(info_file, st, kw)) { 515 st->print_cr(" <%s Not Available>", info_file); 516 } 517 #endif 518 } 519 520 bool VM_Version::use_biased_locking() { 521 #if INCLUDE_RTM_OPT 522 // RTM locking is most useful when there is high lock contention and 523 // low data contention. With high lock contention the lock is usually 524 // inflated and biased locking is not suitable for that case. 525 // RTM locking code requires that biased locking is off. 526 // Note: we can't switch off UseBiasedLocking in get_processor_features() 527 // because it is used by Thread::allocate() which is called before 528 // VM_Version::initialize(). 529 if (UseRTMLocking && UseBiasedLocking) { 530 if (FLAG_IS_DEFAULT(UseBiasedLocking)) { 531 FLAG_SET_DEFAULT(UseBiasedLocking, false); 532 } else { 533 warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." ); 534 UseBiasedLocking = false; 535 } 536 } 537 #endif 538 return UseBiasedLocking; 539 } 540 541 void VM_Version::print_features() { 542 tty->print_cr("Version: %s L1_data_cache_line_size=%d", features_string(), L1_data_cache_line_size()); 543 544 if (Verbose) { 545 if (ContendedPaddingWidth > 0) { 546 tty->cr(); 547 tty->print_cr("ContendedPaddingWidth " INTX_FORMAT, ContendedPaddingWidth); 548 } 549 } 550 } 551 552 #ifdef COMPILER2 553 // Determine section size on power6: If section size is 8 instructions, 554 // there should be a difference between the two testloops of ~15 %. If 555 // no difference is detected the section is assumed to be 32 instructions. 556 void VM_Version::determine_section_size() { 557 558 int unroll = 80; 559 560 const int code_size = (2* unroll * 32 + 100)*BytesPerInstWord; 561 562 // Allocate space for the code. 563 ResourceMark rm; 564 CodeBuffer cb("detect_section_size", code_size, 0); 565 MacroAssembler* a = new MacroAssembler(&cb); 566 567 uint32_t *code = (uint32_t *)a->pc(); 568 // Emit code. 569 void (*test1)() = (void(*)())(void *)a->function_entry(); 570 571 Label l1; 572 573 a->li(R4, 1); 574 a->sldi(R4, R4, 28); 575 a->b(l1); 576 a->align(CodeEntryAlignment); 577 578 a->bind(l1); 579 580 for (int i = 0; i < unroll; i++) { 581 // Schleife 1 582 // ------- sector 0 ------------ 583 // ;; 0 584 a->nop(); // 1 585 a->fpnop0(); // 2 586 a->fpnop1(); // 3 587 a->addi(R4,R4, -1); // 4 588 589 // ;; 1 590 a->nop(); // 5 591 a->fmr(F6, F6); // 6 592 a->fmr(F7, F7); // 7 593 a->endgroup(); // 8 594 // ------- sector 8 ------------ 595 596 // ;; 2 597 a->nop(); // 9 598 a->nop(); // 10 599 a->fmr(F8, F8); // 11 600 a->fmr(F9, F9); // 12 601 602 // ;; 3 603 a->nop(); // 13 604 a->fmr(F10, F10); // 14 605 a->fmr(F11, F11); // 15 606 a->endgroup(); // 16 607 // -------- sector 16 ------------- 608 609 // ;; 4 610 a->nop(); // 17 611 a->nop(); // 18 612 a->fmr(F15, F15); // 19 613 a->fmr(F16, F16); // 20 614 615 // ;; 5 616 a->nop(); // 21 617 a->fmr(F17, F17); // 22 618 a->fmr(F18, F18); // 23 619 a->endgroup(); // 24 620 // ------- sector 24 ------------ 621 622 // ;; 6 623 a->nop(); // 25 624 a->nop(); // 26 625 a->fmr(F19, F19); // 27 626 a->fmr(F20, F20); // 28 627 628 // ;; 7 629 a->nop(); // 29 630 a->fmr(F21, F21); // 30 631 a->fmr(F22, F22); // 31 632 a->brnop0(); // 32 633 634 // ------- sector 32 ------------ 635 } 636 637 // ;; 8 638 a->cmpdi(CCR0, R4, unroll); // 33 639 a->bge(CCR0, l1); // 34 640 a->blr(); 641 642 // Emit code. 643 void (*test2)() = (void(*)())(void *)a->function_entry(); 644 // uint32_t *code = (uint32_t *)a->pc(); 645 646 Label l2; 647 648 a->li(R4, 1); 649 a->sldi(R4, R4, 28); 650 a->b(l2); 651 a->align(CodeEntryAlignment); 652 653 a->bind(l2); 654 655 for (int i = 0; i < unroll; i++) { 656 // Schleife 2 657 // ------- sector 0 ------------ 658 // ;; 0 659 a->brnop0(); // 1 660 a->nop(); // 2 661 //a->cmpdi(CCR0, R4, unroll); 662 a->fpnop0(); // 3 663 a->fpnop1(); // 4 664 a->addi(R4,R4, -1); // 5 665 666 // ;; 1 667 668 a->nop(); // 6 669 a->fmr(F6, F6); // 7 670 a->fmr(F7, F7); // 8 671 // ------- sector 8 --------------- 672 673 // ;; 2 674 a->endgroup(); // 9 675 676 // ;; 3 677 a->nop(); // 10 678 a->nop(); // 11 679 a->fmr(F8, F8); // 12 680 681 // ;; 4 682 a->fmr(F9, F9); // 13 683 a->nop(); // 14 684 a->fmr(F10, F10); // 15 685 686 // ;; 5 687 a->fmr(F11, F11); // 16 688 // -------- sector 16 ------------- 689 690 // ;; 6 691 a->endgroup(); // 17 692 693 // ;; 7 694 a->nop(); // 18 695 a->nop(); // 19 696 a->fmr(F15, F15); // 20 697 698 // ;; 8 699 a->fmr(F16, F16); // 21 700 a->nop(); // 22 701 a->fmr(F17, F17); // 23 702 703 // ;; 9 704 a->fmr(F18, F18); // 24 705 // -------- sector 24 ------------- 706 707 // ;; 10 708 a->endgroup(); // 25 709 710 // ;; 11 711 a->nop(); // 26 712 a->nop(); // 27 713 a->fmr(F19, F19); // 28 714 715 // ;; 12 716 a->fmr(F20, F20); // 29 717 a->nop(); // 30 718 a->fmr(F21, F21); // 31 719 720 // ;; 13 721 a->fmr(F22, F22); // 32 722 } 723 724 // -------- sector 32 ------------- 725 // ;; 14 726 a->cmpdi(CCR0, R4, unroll); // 33 727 a->bge(CCR0, l2); // 34 728 729 a->blr(); 730 uint32_t *code_end = (uint32_t *)a->pc(); 731 a->flush(); 732 733 cb.insts()->set_end((u_char*)code_end); 734 735 double loop1_seconds,loop2_seconds, rel_diff; 736 uint64_t start1, stop1; 737 738 start1 = os::current_thread_cpu_time(false); 739 (*test1)(); 740 stop1 = os::current_thread_cpu_time(false); 741 loop1_seconds = (stop1- start1) / (1000 *1000 *1000.0); 742 743 744 start1 = os::current_thread_cpu_time(false); 745 (*test2)(); 746 stop1 = os::current_thread_cpu_time(false); 747 748 loop2_seconds = (stop1 - start1) / (1000 *1000 *1000.0); 749 750 rel_diff = (loop2_seconds - loop1_seconds) / loop1_seconds *100; 751 752 if (PrintAssembly || PrintStubCode) { 753 ttyLocker ttyl; 754 tty->print_cr("Decoding section size detection stub at " INTPTR_FORMAT " before execution:", p2i(code)); 755 // Use existing decode function. This enables the [MachCode] format which is needed to DecodeErrorFile. 756 Disassembler::decode(&cb, (u_char*)code, (u_char*)code_end, tty); 757 tty->print_cr("Time loop1 :%f", loop1_seconds); 758 tty->print_cr("Time loop2 :%f", loop2_seconds); 759 tty->print_cr("(time2 - time1) / time1 = %f %%", rel_diff); 760 761 if (rel_diff > 12.0) { 762 tty->print_cr("Section Size 8 Instructions"); 763 } else{ 764 tty->print_cr("Section Size 32 Instructions or Power5"); 765 } 766 } 767 768 #if 0 // TODO: PPC port 769 // Set sector size (if not set explicitly). 770 if (FLAG_IS_DEFAULT(Power6SectorSize128PPC64)) { 771 if (rel_diff > 12.0) { 772 PdScheduling::power6SectorSize = 0x20; 773 } else { 774 PdScheduling::power6SectorSize = 0x80; 775 } 776 } else if (Power6SectorSize128PPC64) { 777 PdScheduling::power6SectorSize = 0x80; 778 } else { 779 PdScheduling::power6SectorSize = 0x20; 780 } 781 #endif 782 if (UsePower6SchedulerPPC64) Unimplemented(); 783 } 784 #endif // COMPILER2 785 786 void VM_Version::determine_features() { 787 #if defined(ABI_ELFv2) 788 // 1 InstWord per call for the blr instruction. 789 const int code_size = (num_features+1+2*1)*BytesPerInstWord; 790 #else 791 // 7 InstWords for each call (function descriptor + blr instruction). 792 const int code_size = (num_features+1+2*7)*BytesPerInstWord; 793 #endif 794 int features = 0; 795 796 // create test area 797 enum { BUFFER_SIZE = 2*4*K }; // Needs to be >=2* max cache line size (cache line size can't exceed min page size). 798 char test_area[BUFFER_SIZE]; 799 char *mid_of_test_area = &test_area[BUFFER_SIZE>>1]; 800 801 // Allocate space for the code. 802 ResourceMark rm; 803 CodeBuffer cb("detect_cpu_features", code_size, 0); 804 MacroAssembler* a = new MacroAssembler(&cb); 805 806 // Must be set to true so we can generate the test code. 807 _features = VM_Version::all_features_m; 808 809 // Emit code. 810 void (*test)(address addr, uint64_t offset)=(void(*)(address addr, uint64_t offset))(void *)a->function_entry(); 811 uint32_t *code = (uint32_t *)a->pc(); 812 // Don't use R0 in ldarx. 813 // Keep R3_ARG1 unmodified, it contains &field (see below). 814 // Keep R4_ARG2 unmodified, it contains offset = 0 (see below). 815 a->fsqrt(F3, F4); // code[0] -> fsqrt_m 816 a->fsqrts(F3, F4); // code[1] -> fsqrts_m 817 a->isel(R7, R5, R6, 0); // code[2] -> isel_m 818 a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m 819 a->cmpb(R7, R5, R6); // code[4] -> cmpb 820 a->popcntb(R7, R5); // code[5] -> popcntb 821 a->popcntw(R7, R5); // code[6] -> popcntw 822 a->fcfids(F3, F4); // code[7] -> fcfids 823 a->vand(VR0, VR0, VR0); // code[8] -> vand 824 // arg0 of lqarx must be an even register, (arg1 + arg2) must be a multiple of 16 825 a->lqarx_unchecked(R6, R3_ARG1, R4_ARG2, 1); // code[9] -> lqarx_m 826 a->vcipher(VR0, VR1, VR2); // code[10] -> vcipher 827 a->vpmsumb(VR0, VR1, VR2); // code[11] -> vpmsumb 828 a->mfdscr(R0); // code[12] -> mfdscr 829 a->lxvd2x(VSR0, R3_ARG1); // code[13] -> vsx 830 a->ldbrx(R7, R3_ARG1, R4_ARG2); // code[14] -> ldbrx 831 a->stdbrx(R7, R3_ARG1, R4_ARG2); // code[15] -> stdbrx 832 a->vshasigmaw(VR0, VR1, 1, 0xF); // code[16] -> vshasig 833 // rtm is determined by OS 834 a->darn(R7); // code[17] -> darn 835 a->blr(); 836 837 // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it. 838 void (*zero_cacheline_func_ptr)(char*) = (void(*)(char*))(void *)a->function_entry(); 839 a->dcbz(R3_ARG1); // R3_ARG1 = addr 840 a->blr(); 841 842 uint32_t *code_end = (uint32_t *)a->pc(); 843 a->flush(); 844 _features = VM_Version::unknown_m; 845 846 // Print the detection code. 847 if (PrintAssembly) { 848 ttyLocker ttyl; 849 tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " before execution:", p2i(code)); 850 Disassembler::decode((u_char*)code, (u_char*)code_end, tty); 851 } 852 853 // Measure cache line size. 854 memset(test_area, 0xFF, BUFFER_SIZE); // Fill test area with 0xFF. 855 (*zero_cacheline_func_ptr)(mid_of_test_area); // Call function which executes dcbz to the middle. 856 int count = 0; // count zeroed bytes 857 for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++; 858 guarantee(is_power_of_2(count), "cache line size needs to be a power of 2"); 859 _L1_data_cache_line_size = count; 860 861 // Execute code. Illegal instructions will be replaced by 0 in the signal handler. 862 VM_Version::_is_determine_features_test_running = true; 863 // We must align the first argument to 16 bytes because of the lqarx check. 864 (*test)(align_up((address)mid_of_test_area, 16), 0); 865 VM_Version::_is_determine_features_test_running = false; 866 867 // determine which instructions are legal. 868 int feature_cntr = 0; 869 if (code[feature_cntr++]) features |= fsqrt_m; 870 if (code[feature_cntr++]) features |= fsqrts_m; 871 if (code[feature_cntr++]) features |= isel_m; 872 if (code[feature_cntr++]) features |= lxarxeh_m; 873 if (code[feature_cntr++]) features |= cmpb_m; 874 if (code[feature_cntr++]) features |= popcntb_m; 875 if (code[feature_cntr++]) features |= popcntw_m; 876 if (code[feature_cntr++]) features |= fcfids_m; 877 if (code[feature_cntr++]) features |= vand_m; 878 if (code[feature_cntr++]) features |= lqarx_m; 879 if (code[feature_cntr++]) features |= vcipher_m; 880 if (code[feature_cntr++]) features |= vpmsumb_m; 881 if (code[feature_cntr++]) features |= mfdscr_m; 882 if (code[feature_cntr++]) features |= vsx_m; 883 if (code[feature_cntr++]) features |= ldbrx_m; 884 if (code[feature_cntr++]) features |= stdbrx_m; 885 if (code[feature_cntr++]) features |= vshasig_m; 886 // feature rtm_m is determined by OS 887 if (code[feature_cntr++]) features |= darn_m; 888 889 // Print the detection code. 890 if (PrintAssembly) { 891 ttyLocker ttyl; 892 tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " after execution:", p2i(code)); 893 Disassembler::decode((u_char*)code, (u_char*)code_end, tty); 894 } 895 896 _features = features; 897 898 #ifdef AIX 899 // To enable it on AIX it's necessary POWER8 or above and at least AIX 7.2. 900 // Actually, this is supported since AIX 7.1.. Unfortunately, this first 901 // contained bugs, so that it can only be enabled after AIX 7.1.3.30. 902 // The Java property os.version, which is used in RTM tests to decide 903 // whether the feature is available, only knows major and minor versions. 904 // We don't want to change this property, as user code might depend on it. 905 // So the tests can not check on subversion 3.30, and we only enable RTM 906 // with AIX 7.2. 907 if (has_lqarx()) { // POWER8 or above 908 if (os::Aix::os_version() >= 0x07020000) { // At least AIX 7.2. 909 _features |= rtm_m; 910 } 911 } 912 #endif 913 #if defined(LINUX) && defined(VM_LITTLE_ENDIAN) 914 unsigned long auxv = getauxval(AT_HWCAP2); 915 916 if (auxv & PPC_FEATURE2_HTM_NOSC) { 917 if (auxv & PPC_FEATURE2_HAS_HTM) { 918 // TM on POWER8 and POWER9 in compat mode (VM) is supported by the JVM. 919 // TM on POWER9 DD2.1 NV (baremetal) is not supported by the JVM (TM on 920 // POWER9 DD2.1 NV has a few issues that need a couple of firmware 921 // and kernel workarounds, so there is a new mode only supported 922 // on non-virtualized P9 machines called HTM with no Suspend Mode). 923 // TM on POWER9 D2.2+ NV is not supported at all by Linux. 924 _features |= rtm_m; 925 } 926 } 927 #endif 928 } 929 930 // Power 8: Configure Data Stream Control Register. 931 void VM_Version::config_dscr() { 932 // 7 InstWords for each call (function descriptor + blr instruction). 933 const int code_size = (2+2*7)*BytesPerInstWord; 934 935 // Allocate space for the code. 936 ResourceMark rm; 937 CodeBuffer cb("config_dscr", code_size, 0); 938 MacroAssembler* a = new MacroAssembler(&cb); 939 940 // Emit code. 941 uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->function_entry(); 942 uint32_t *code = (uint32_t *)a->pc(); 943 a->mfdscr(R3); 944 a->blr(); 945 946 void (*set_dscr)(long) = (void(*)(long))(void *)a->function_entry(); 947 a->mtdscr(R3); 948 a->blr(); 949 950 uint32_t *code_end = (uint32_t *)a->pc(); 951 a->flush(); 952 953 // Print the detection code. 954 if (PrintAssembly) { 955 ttyLocker ttyl; 956 tty->print_cr("Decoding dscr configuration stub at " INTPTR_FORMAT " before execution:", p2i(code)); 957 Disassembler::decode((u_char*)code, (u_char*)code_end, tty); 958 } 959 960 // Apply the configuration if needed. 961 _dscr_val = (*get_dscr)(); 962 if (Verbose) { 963 tty->print_cr("dscr value was 0x%lx" , _dscr_val); 964 } 965 bool change_requested = false; 966 if (DSCR_PPC64 != (uintx)-1) { 967 _dscr_val = DSCR_PPC64; 968 change_requested = true; 969 } 970 if (DSCR_DPFD_PPC64 <= 7) { 971 uint64_t mask = 0x7; 972 if ((_dscr_val & mask) != DSCR_DPFD_PPC64) { 973 _dscr_val = (_dscr_val & ~mask) | (DSCR_DPFD_PPC64); 974 change_requested = true; 975 } 976 } 977 if (DSCR_URG_PPC64 <= 7) { 978 uint64_t mask = 0x7 << 6; 979 if ((_dscr_val & mask) != DSCR_DPFD_PPC64 << 6) { 980 _dscr_val = (_dscr_val & ~mask) | (DSCR_URG_PPC64 << 6); 981 change_requested = true; 982 } 983 } 984 if (change_requested) { 985 (*set_dscr)(_dscr_val); 986 if (Verbose) { 987 tty->print_cr("dscr was set to 0x%lx" , (*get_dscr)()); 988 } 989 } 990 } 991 992 static uint64_t saved_features = 0; 993 994 void VM_Version::allow_all() { 995 saved_features = _features; 996 _features = all_features_m; 997 } 998 999 void VM_Version::revert() { 1000 _features = saved_features; 1001 }