765 766 static bool is_amd_Barcelona() { return is_amd() && 767 extended_cpu_family() == CPU_FAMILY_AMD_11H; } 768 769 // Intel and AMD newer cores support fast timestamps well 770 static bool supports_tscinv_bit() { 771 return (_features & CPU_TSCINV) != 0; 772 } 773 static bool supports_tscinv() { 774 return supports_tscinv_bit() && 775 ( (is_amd() && !is_amd_Barcelona()) || 776 is_intel_tsc_synched_at_init() ); 777 } 778 779 // Intel Core and newer cpus have fast IDIV instruction (excluding Atom). 780 static bool has_fast_idiv() { return is_intel() && cpu_family() == 6 && 781 supports_sse3() && _model != 0x1C; } 782 783 static bool supports_compare_and_exchange() { return true; } 784 785 static intx allocate_prefetch_distance() { 786 // This method should be called before allocate_prefetch_style(). 787 // 788 // Hardware prefetching (distance/size in bytes): 789 // Pentium 3 - 64 / 32 790 // Pentium 4 - 256 / 128 791 // Athlon - 64 / 32 ???? 792 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 793 // Core - 128 / 64 794 // 795 // Software prefetching (distance in bytes / instruction with best score): 796 // Pentium 3 - 128 / prefetchnta 797 // Pentium 4 - 512 / prefetchnta 798 // Athlon - 128 / prefetchnta 799 // Opteron - 256 / prefetchnta 800 // Core - 256 / prefetchnta 801 // It will be used only when AllocatePrefetchStyle > 0 802 803 intx count = AllocatePrefetchDistance; 804 if (count < 0) { // default ? 805 if (is_amd()) { // AMD 806 if (supports_sse2()) 807 count = 256; // Opteron 808 else 809 count = 128; // Athlon 810 } else { // Intel 811 if (supports_sse2()) 812 if (cpu_family() == 6) { 813 count = 256; // Pentium M, Core, Core2 814 } else { 815 count = 512; // Pentium 4 816 } 817 else 818 count = 128; // Pentium 3 (and all other old CPUs) 819 } 820 } 821 return count; 822 } 823 static intx allocate_prefetch_style() { 824 assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); 825 // Return 0 if AllocatePrefetchDistance was not defined. 826 return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0; 827 } 828 829 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 830 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 831 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 832 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 833 834 // gc copy/scan is disabled if prefetchw isn't supported, because 835 // Prefetch::write emits an inlined prefetchw on Linux. 836 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 837 // The used prefetcht0 instruction works for both amd64 and em64t. 838 static intx prefetch_copy_interval_in_bytes() { 839 intx interval = PrefetchCopyIntervalInBytes; 840 return interval >= 0 ? interval : 576; 841 } 842 static intx prefetch_scan_interval_in_bytes() { 843 intx interval = PrefetchScanIntervalInBytes; 844 return interval >= 0 ? interval : 576; 845 } 846 static intx prefetch_fields_ahead() { 847 intx count = PrefetchFieldsAhead; 848 return count >= 0 ? count : 1; 849 } 850 static uint32_t get_xsave_header_lower_segment() { 851 return _cpuid_info.xem_xcr0_eax.value; 852 } 853 static uint32_t get_xsave_header_upper_segment() { 854 return _cpuid_info.xem_xcr0_edx; 855 } 856 857 // SSE2 and later processors implement a 'pause' instruction 858 // that can be used for efficient implementation of 859 // the intrinsic for java.lang.Thread.onSpinWait() 860 static bool supports_on_spin_wait() { return supports_sse2(); } 861 }; 862 863 #endif // CPU_X86_VM_VM_VERSION_X86_HPP | 765 766 static bool is_amd_Barcelona() { return is_amd() && 767 extended_cpu_family() == CPU_FAMILY_AMD_11H; } 768 769 // Intel and AMD newer cores support fast timestamps well 770 static bool supports_tscinv_bit() { 771 return (_features & CPU_TSCINV) != 0; 772 } 773 static bool supports_tscinv() { 774 return supports_tscinv_bit() && 775 ( (is_amd() && !is_amd_Barcelona()) || 776 is_intel_tsc_synched_at_init() ); 777 } 778 779 // Intel Core and newer cpus have fast IDIV instruction (excluding Atom). 780 static bool has_fast_idiv() { return is_intel() && cpu_family() == 6 && 781 supports_sse3() && _model != 0x1C; } 782 783 static bool supports_compare_and_exchange() { return true; } 784 785 static intx allocate_prefetch_distance(bool use_watermark_prefetch) { 786 // Hardware prefetching (distance/size in bytes): 787 // Pentium 3 - 64 / 32 788 // Pentium 4 - 256 / 128 789 // Athlon - 64 / 32 ???? 790 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 791 // Core - 128 / 64 792 // 793 // Software prefetching (distance in bytes / instruction with best score): 794 // Pentium 3 - 128 / prefetchnta 795 // Pentium 4 - 512 / prefetchnta 796 // Athlon - 128 / prefetchnta 797 // Opteron - 256 / prefetchnta 798 // Core - 256 / prefetchnta 799 // It will be used only when AllocatePrefetchStyle > 0 800 801 if (is_amd()) { // AMD 802 if (supports_sse2()) { 803 return 256; // Opteron 804 } else { 805 return 128; // Athlon 806 } 807 } else { // Intel 808 if (supports_sse3() && cpu_family() == 6) { 809 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 810 return 192; 811 } else if (use_watermark_prefetch) { // watermark prefetching on Core 812 #ifdef _LP64 813 return 384; 814 #else 815 return 320; 816 #endif 817 } 818 } 819 if (supports_sse2()) { 820 if (cpu_family() == 6) { 821 return 256; // Pentium M, Core, Core2 822 } else { 823 return 512; // Pentium 4 824 } 825 } else { 826 return 128; // Pentium 3 (and all other old CPUs) 827 } 828 } 829 } 830 831 // SSE2 and later processors implement a 'pause' instruction 832 // that can be used for efficient implementation of 833 // the intrinsic for java.lang.Thread.onSpinWait() 834 static bool supports_on_spin_wait() { return supports_sse2(); } 835 }; 836 837 #endif // CPU_X86_VM_VM_VERSION_X86_HPP |