450 // the command line requires. I.e., you cannot set UseSSE to 2 on 451 // older Pentiums which do not support it. 452 if (UseSSE > 4) UseSSE=4; 453 if (UseSSE < 0) UseSSE=0; 454 if (!supports_sse4_1()) // Drop to 3 if no SSE4 support 455 UseSSE = MIN2((intx)3,UseSSE); 456 if (!supports_sse3()) // Drop to 2 if no SSE3 support 457 UseSSE = MIN2((intx)2,UseSSE); 458 if (!supports_sse2()) // Drop to 1 if no SSE2 support 459 UseSSE = MIN2((intx)1,UseSSE); 460 if (!supports_sse ()) // Drop to 0 if no SSE support 461 UseSSE = 0; 462 463 if (UseAVX > 2) UseAVX=2; 464 if (UseAVX < 0) UseAVX=0; 465 if (!supports_avx2()) // Drop to 1 if no AVX2 support 466 UseAVX = MIN2((intx)1,UseAVX); 467 if (!supports_avx ()) // Drop to 0 if no AVX support 468 UseAVX = 0; 469 470 // On new cpus instructions which update whole XMM register should be used 471 // to prevent partial register stall due to dependencies on high half. 472 // 473 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 474 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 475 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 476 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 477 478 if( is_amd() ) { // AMD cpus specific settings 479 if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) { 480 // Use it on new AMD cpus starting from Opteron. 481 UseAddressNop = true; 482 } 483 if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) { 484 // Use it on new AMD cpus starting from Opteron. 485 UseNewLongLShift = true; 486 } 487 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { 488 if( supports_sse4a() ) { 489 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 527 528 // some defaults for AMD family 15h 529 if ( cpu_family() == 0x15 ) { 530 // On family 15h processors default is no sw prefetch 531 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 532 AllocatePrefetchStyle = 0; 533 } 534 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 535 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 536 AllocatePrefetchInstr = 3; 537 } 538 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 539 if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { 540 UseXMMForArrayCopy = true; 541 } 542 if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { 543 UseUnalignedLoadStores = true; 544 } 545 } 546 547 } 548 549 if( is_intel() ) { // Intel cpus specific settings 550 if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { 551 UseStoreImmI16 = false; // don't use it on Intel cpus 552 } 553 if( cpu_family() == 6 || cpu_family() == 15 ) { 554 if( FLAG_IS_DEFAULT(UseAddressNop) ) { 555 // Use it on all Intel cpus starting from PentiumPro 556 UseAddressNop = true; 557 } 558 } 559 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { 560 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 561 } 562 if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { 563 if( supports_sse3() ) { 564 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 565 } else { 566 UseXmmRegToRegMoveAll = false; 567 } 589 } 590 } 591 if( supports_sse4_2() && UseSSE >= 4 ) { 592 if( FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 593 UseSSE42Intrinsics = true; 594 } 595 } 596 } 597 } 598 599 // Use population count instruction if available. 600 if (supports_popcnt()) { 601 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 602 UsePopCountInstruction = true; 603 } 604 } else if (UsePopCountInstruction) { 605 warning("POPCNT instruction is not available on this CPU"); 606 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 607 } 608 609 #ifdef COMPILER2 610 if (UseFPUForSpilling) { 611 if (UseSSE < 2) { 612 // Only supported with SSE2+ 613 FLAG_SET_DEFAULT(UseFPUForSpilling, false); 614 } 615 } 616 #endif 617 618 assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); 619 assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); 620 621 // set valid Prefetch instruction 622 if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; 623 if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; 624 if( ReadPrefetchInstr == 3 && !supports_3dnow_prefetch() ) ReadPrefetchInstr = 0; 625 if( !supports_sse() && supports_3dnow_prefetch() ) ReadPrefetchInstr = 3; 626 627 if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; 628 if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; 629 if( AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch() ) AllocatePrefetchInstr=0; 630 if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3; 631 632 // Allocation prefetch settings 633 intx cache_line_size = prefetch_data_size(); 634 if( cache_line_size > AllocatePrefetchStepSize ) 635 AllocatePrefetchStepSize = cache_line_size; 636 637 assert(AllocatePrefetchLines > 0, "invalid value"); | 450 // the command line requires. I.e., you cannot set UseSSE to 2 on 451 // older Pentiums which do not support it. 452 if (UseSSE > 4) UseSSE=4; 453 if (UseSSE < 0) UseSSE=0; 454 if (!supports_sse4_1()) // Drop to 3 if no SSE4 support 455 UseSSE = MIN2((intx)3,UseSSE); 456 if (!supports_sse3()) // Drop to 2 if no SSE3 support 457 UseSSE = MIN2((intx)2,UseSSE); 458 if (!supports_sse2()) // Drop to 1 if no SSE2 support 459 UseSSE = MIN2((intx)1,UseSSE); 460 if (!supports_sse ()) // Drop to 0 if no SSE support 461 UseSSE = 0; 462 463 if (UseAVX > 2) UseAVX=2; 464 if (UseAVX < 0) UseAVX=0; 465 if (!supports_avx2()) // Drop to 1 if no AVX2 support 466 UseAVX = MIN2((intx)1,UseAVX); 467 if (!supports_avx ()) // Drop to 0 if no AVX support 468 UseAVX = 0; 469 470 #ifdef COMPILER2 471 if (UseFPUForSpilling) { 472 if (UseSSE < 2) { 473 // Only supported with SSE2+ 474 FLAG_SET_DEFAULT(UseFPUForSpilling, false); 475 } 476 } 477 if (MaxVectorSize > 0) { 478 if (!is_power_of_2(MaxVectorSize)) { 479 warning("MaxVectorSize must be a power of 2"); 480 FLAG_SET_DEFAULT(MaxVectorSize, 32); 481 } 482 if (MaxVectorSize > 32) { 483 FLAG_SET_DEFAULT(MaxVectorSize, 32); 484 } 485 if (MaxVectorSize > 16 && UseAVX == 0) { 486 // Only supported with AVX+ 487 FLAG_SET_DEFAULT(MaxVectorSize, 16); 488 } 489 if (UseSSE < 2) { 490 // Only supported with SSE2+ 491 FLAG_SET_DEFAULT(MaxVectorSize, 0); 492 } 493 } 494 #endif 495 496 // On new cpus instructions which update whole XMM register should be used 497 // to prevent partial register stall due to dependencies on high half. 498 // 499 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 500 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 501 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 502 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 503 504 if( is_amd() ) { // AMD cpus specific settings 505 if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) { 506 // Use it on new AMD cpus starting from Opteron. 507 UseAddressNop = true; 508 } 509 if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) { 510 // Use it on new AMD cpus starting from Opteron. 511 UseNewLongLShift = true; 512 } 513 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { 514 if( supports_sse4a() ) { 515 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 553 554 // some defaults for AMD family 15h 555 if ( cpu_family() == 0x15 ) { 556 // On family 15h processors default is no sw prefetch 557 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 558 AllocatePrefetchStyle = 0; 559 } 560 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 561 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 562 AllocatePrefetchInstr = 3; 563 } 564 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 565 if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { 566 UseXMMForArrayCopy = true; 567 } 568 if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { 569 UseUnalignedLoadStores = true; 570 } 571 } 572 573 #ifdef COMPILER2 574 if (MaxVectorSize > 16) { 575 // Limit vectors size to 16 bytes on current AMD cpus. 576 FLAG_SET_DEFAULT(MaxVectorSize, 16); 577 } 578 #endif // COMPILER2 579 } 580 581 if( is_intel() ) { // Intel cpus specific settings 582 if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { 583 UseStoreImmI16 = false; // don't use it on Intel cpus 584 } 585 if( cpu_family() == 6 || cpu_family() == 15 ) { 586 if( FLAG_IS_DEFAULT(UseAddressNop) ) { 587 // Use it on all Intel cpus starting from PentiumPro 588 UseAddressNop = true; 589 } 590 } 591 if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { 592 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 593 } 594 if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { 595 if( supports_sse3() ) { 596 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 597 } else { 598 UseXmmRegToRegMoveAll = false; 599 } 621 } 622 } 623 if( supports_sse4_2() && UseSSE >= 4 ) { 624 if( FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 625 UseSSE42Intrinsics = true; 626 } 627 } 628 } 629 } 630 631 // Use population count instruction if available. 632 if (supports_popcnt()) { 633 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 634 UsePopCountInstruction = true; 635 } 636 } else if (UsePopCountInstruction) { 637 warning("POPCNT instruction is not available on this CPU"); 638 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 639 } 640 641 assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); 642 assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); 643 644 // set valid Prefetch instruction 645 if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; 646 if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; 647 if( ReadPrefetchInstr == 3 && !supports_3dnow_prefetch() ) ReadPrefetchInstr = 0; 648 if( !supports_sse() && supports_3dnow_prefetch() ) ReadPrefetchInstr = 3; 649 650 if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; 651 if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; 652 if( AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch() ) AllocatePrefetchInstr=0; 653 if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3; 654 655 // Allocation prefetch settings 656 intx cache_line_size = prefetch_data_size(); 657 if( cache_line_size > AllocatePrefetchStepSize ) 658 AllocatePrefetchStepSize = cache_line_size; 659 660 assert(AllocatePrefetchLines > 0, "invalid value"); |