412 _cpuFeatures &= ~CPU_SSE2; 413 414 if (UseSSE < 1) 415 _cpuFeatures &= ~CPU_SSE; 416 417 if (UseAVX < 2) 418 _cpuFeatures &= ~CPU_AVX2; 419 420 if (UseAVX < 1) 421 _cpuFeatures &= ~CPU_AVX; 422 423 if (!UseAES && !FLAG_IS_DEFAULT(UseAES)) 424 _cpuFeatures &= ~CPU_AES; 425 426 if (logical_processors_per_package() == 1) { 427 // HT processor could be installed on a system which doesn't support HT. 428 _cpuFeatures &= ~CPU_HT; 429 } 430 431 char buf[256]; 432 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 433 cores_per_cpu(), threads_per_core(), 434 cpu_family(), _model, _stepping, 435 (supports_cmov() ? ", cmov" : ""), 436 (supports_cmpxchg8() ? ", cx8" : ""), 437 (supports_fxsr() ? ", fxsr" : ""), 438 (supports_mmx() ? ", mmx" : ""), 439 (supports_sse() ? ", sse" : ""), 440 (supports_sse2() ? ", sse2" : ""), 441 (supports_sse3() ? ", sse3" : ""), 442 (supports_ssse3()? ", ssse3": ""), 443 (supports_sse4_1() ? ", sse4.1" : ""), 444 (supports_sse4_2() ? ", sse4.2" : ""), 445 (supports_popcnt() ? ", popcnt" : ""), 446 (supports_avx() ? ", avx" : ""), 447 (supports_avx2() ? ", avx2" : ""), 448 (supports_aes() ? ", aes" : ""), 449 (supports_clmul() ? ", clmul" : ""), 450 (supports_erms() ? ", erms" : ""), 451 (supports_mmx_ext() ? ", mmxext" : ""), 452 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), 453 (supports_lzcnt() ? ", lzcnt": ""), 454 (supports_sse4a() ? ", sse4a": ""), 455 (supports_ht() ? ", ht": ""), 456 (supports_tsc() ? ", tsc": ""), 457 (supports_tscinv_bit() ? ", tscinvbit": ""), 458 (supports_tscinv() ? ", tscinv": "")); 459 _features_str = strdup(buf); 460 461 // UseSSE is set to the smaller of what hardware supports and what 462 // the command line requires. I.e., you cannot set UseSSE to 2 on 463 // older Pentiums which do not support it. 464 if (UseSSE > 4) UseSSE=4; 465 if (UseSSE < 0) UseSSE=0; 466 if (!supports_sse4_1()) // Drop to 3 if no SSE4 support 467 UseSSE = MIN2((intx)3,UseSSE); 468 if (!supports_sse3()) // Drop to 2 if no SSE3 support 469 UseSSE = MIN2((intx)2,UseSSE); 470 if (!supports_sse2()) // Drop to 1 if no SSE2 support 471 UseSSE = MIN2((intx)1,UseSSE); 472 if (!supports_sse ()) // Drop to 0 if no SSE support 473 UseSSE = 0; 474 475 if (UseAVX > 2) UseAVX=2; 476 if (UseAVX < 0) UseAVX=0; 477 if (!supports_avx2()) // Drop to 1 if no AVX2 support 478 UseAVX = MIN2((intx)1,UseAVX); 583 if( FLAG_IS_DEFAULT(UseXmmI2F) ) { 584 if( supports_sse4a() ) { 585 UseXmmI2F = true; 586 } else { 587 UseXmmI2F = false; 588 } 589 } 590 if( FLAG_IS_DEFAULT(UseXmmI2D) ) { 591 if( supports_sse4a() ) { 592 UseXmmI2D = true; 593 } else { 594 UseXmmI2D = false; 595 } 596 } 597 if( FLAG_IS_DEFAULT(UseSSE42Intrinsics) ) { 598 if( supports_sse4_2() && UseSSE >= 4 ) { 599 UseSSE42Intrinsics = true; 600 } 601 } 602 603 // Use count leading zeros count instruction if available. 604 if (supports_lzcnt()) { 605 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 606 UseCountLeadingZerosInstruction = true; 607 } 608 } 609 610 // some defaults for AMD family 15h 611 if ( cpu_family() == 0x15 ) { 612 // On family 15h processors default is no sw prefetch 613 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 614 AllocatePrefetchStyle = 0; 615 } 616 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 617 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 618 AllocatePrefetchInstr = 3; 619 } 620 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 621 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 622 UseXMMForArrayCopy = true; 623 } 624 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 625 UseUnalignedLoadStores = true; 626 } 627 } 628 629 #ifdef COMPILER2 675 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 676 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 677 } 678 } 679 if (supports_sse4_2() && UseSSE >= 4) { 680 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 681 UseSSE42Intrinsics = true; 682 } 683 } 684 } 685 } 686 #if defined(COMPILER2) && defined(_ALLBSD_SOURCE) 687 if (MaxVectorSize > 16) { 688 // Limit vectors size to 16 bytes on BSD until it fixes 689 // restoring upper 128bit of YMM registers on return 690 // from signal handler. 691 FLAG_SET_DEFAULT(MaxVectorSize, 16); 692 } 693 #endif // COMPILER2 694 695 // Use population count instruction if available. 696 if (supports_popcnt()) { 697 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 698 UsePopCountInstruction = true; 699 } 700 } else if (UsePopCountInstruction) { 701 warning("POPCNT instruction is not available on this CPU"); 702 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 703 } 704 705 // Use fast-string operations if available. 706 if (supports_erms()) { 707 if (FLAG_IS_DEFAULT(UseFastStosb)) { 708 UseFastStosb = true; 709 } 710 } else if (UseFastStosb) { 711 warning("fast-string operations are not available on this CPU"); 712 FLAG_SET_DEFAULT(UseFastStosb, false); 713 } 714 | 412 _cpuFeatures &= ~CPU_SSE2; 413 414 if (UseSSE < 1) 415 _cpuFeatures &= ~CPU_SSE; 416 417 if (UseAVX < 2) 418 _cpuFeatures &= ~CPU_AVX2; 419 420 if (UseAVX < 1) 421 _cpuFeatures &= ~CPU_AVX; 422 423 if (!UseAES && !FLAG_IS_DEFAULT(UseAES)) 424 _cpuFeatures &= ~CPU_AES; 425 426 if (logical_processors_per_package() == 1) { 427 // HT processor could be installed on a system which doesn't support HT. 428 _cpuFeatures &= ~CPU_HT; 429 } 430 431 char buf[256]; 432 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 433 cores_per_cpu(), threads_per_core(), 434 cpu_family(), _model, _stepping, 435 (supports_cmov() ? ", cmov" : ""), 436 (supports_cmpxchg8() ? ", cx8" : ""), 437 (supports_fxsr() ? ", fxsr" : ""), 438 (supports_mmx() ? ", mmx" : ""), 439 (supports_sse() ? ", sse" : ""), 440 (supports_sse2() ? ", sse2" : ""), 441 (supports_sse3() ? ", sse3" : ""), 442 (supports_ssse3()? ", ssse3": ""), 443 (supports_sse4_1() ? ", sse4.1" : ""), 444 (supports_sse4_2() ? ", sse4.2" : ""), 445 (supports_popcnt() ? ", popcnt" : ""), 446 (supports_avx() ? ", avx" : ""), 447 (supports_avx2() ? ", avx2" : ""), 448 (supports_aes() ? ", aes" : ""), 449 (supports_clmul() ? ", clmul" : ""), 450 (supports_erms() ? ", erms" : ""), 451 (supports_mmx_ext() ? ", mmxext" : ""), 452 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), 453 (supports_lzcnt() ? ", lzcnt": ""), 454 (supports_sse4a() ? ", sse4a": ""), 455 (supports_ht() ? ", ht": ""), 456 (supports_tsc() ? ", tsc": ""), 457 (supports_tscinv_bit() ? ", tscinvbit": ""), 458 (supports_tscinv() ? ", tscinv": ""), 459 (supports_bmi1() ? ", bmi1" : ""), 460 (supports_bmi2() ? ", bmi2" : "")); 461 _features_str = strdup(buf); 462 463 // UseSSE is set to the smaller of what hardware supports and what 464 // the command line requires. I.e., you cannot set UseSSE to 2 on 465 // older Pentiums which do not support it. 466 if (UseSSE > 4) UseSSE=4; 467 if (UseSSE < 0) UseSSE=0; 468 if (!supports_sse4_1()) // Drop to 3 if no SSE4 support 469 UseSSE = MIN2((intx)3,UseSSE); 470 if (!supports_sse3()) // Drop to 2 if no SSE3 support 471 UseSSE = MIN2((intx)2,UseSSE); 472 if (!supports_sse2()) // Drop to 1 if no SSE2 support 473 UseSSE = MIN2((intx)1,UseSSE); 474 if (!supports_sse ()) // Drop to 0 if no SSE support 475 UseSSE = 0; 476 477 if (UseAVX > 2) UseAVX=2; 478 if (UseAVX < 0) UseAVX=0; 479 if (!supports_avx2()) // Drop to 1 if no AVX2 support 480 UseAVX = MIN2((intx)1,UseAVX); 585 if( FLAG_IS_DEFAULT(UseXmmI2F) ) { 586 if( supports_sse4a() ) { 587 UseXmmI2F = true; 588 } else { 589 UseXmmI2F = false; 590 } 591 } 592 if( FLAG_IS_DEFAULT(UseXmmI2D) ) { 593 if( supports_sse4a() ) { 594 UseXmmI2D = true; 595 } else { 596 UseXmmI2D = false; 597 } 598 } 599 if( FLAG_IS_DEFAULT(UseSSE42Intrinsics) ) { 600 if( supports_sse4_2() && UseSSE >= 4 ) { 601 UseSSE42Intrinsics = true; 602 } 603 } 604 605 // some defaults for AMD family 15h 606 if ( cpu_family() == 0x15 ) { 607 // On family 15h processors default is no sw prefetch 608 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 609 AllocatePrefetchStyle = 0; 610 } 611 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 612 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 613 AllocatePrefetchInstr = 3; 614 } 615 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 616 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 617 UseXMMForArrayCopy = true; 618 } 619 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 620 UseUnalignedLoadStores = true; 621 } 622 } 623 624 #ifdef COMPILER2 670 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 671 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 672 } 673 } 674 if (supports_sse4_2() && UseSSE >= 4) { 675 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 676 UseSSE42Intrinsics = true; 677 } 678 } 679 } 680 } 681 #if defined(COMPILER2) && defined(_ALLBSD_SOURCE) 682 if (MaxVectorSize > 16) { 683 // Limit vectors size to 16 bytes on BSD until it fixes 684 // restoring upper 128bit of YMM registers on return 685 // from signal handler. 686 FLAG_SET_DEFAULT(MaxVectorSize, 16); 687 } 688 #endif // COMPILER2 689 690 // Use count leading zeros count instruction if available. 691 if (supports_lzcnt()) { 692 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 693 UseCountLeadingZerosInstruction = true; 694 } 695 } 696 697 //Use count trailing zeros instruction if available 698 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 699 if (supports_bmi1()) { 700 UseCountTrailingZerosInstruction = true; 701 } 702 } 703 704 if (supports_bmi1()) { 705 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 706 UseBMI1Instructions = true; 707 } 708 } else if (UseBMI1Instructions) { 709 warning("BMI1 instructions are not available on this CPU"); 710 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 711 } 712 713 // Use population count instruction if available. 714 if (supports_popcnt()) { 715 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 716 UsePopCountInstruction = true; 717 } 718 } else if (UsePopCountInstruction) { 719 warning("POPCNT instruction is not available on this CPU"); 720 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 721 } 722 723 // Use fast-string operations if available. 724 if (supports_erms()) { 725 if (FLAG_IS_DEFAULT(UseFastStosb)) { 726 UseFastStosb = true; 727 } 728 } else if (UseFastStosb) { 729 warning("fast-string operations are not available on this CPU"); 730 FLAG_SET_DEFAULT(UseFastStosb, false); 731 } 732 |