< prev index next >

src/cpu/x86/vm/vm_version_x86.cpp

Print this page




1086         UseXmmI2D = true;
1087       } else {
1088         UseXmmI2D = false;
1089       }
1090     }
1091     if (supports_sse4_2()) {
1092       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1093         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1094       }
1095     } else {
1096       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1097         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1098       }
1099       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1100     }
1101 
1102     // some defaults for AMD family 15h
1103     if ( cpu_family() == 0x15 ) {
1104       // On family 15h processors default is no sw prefetch
1105       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1106         AllocatePrefetchStyle = 0;
1107       }
1108       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1109       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1110         AllocatePrefetchInstr = 3;
1111       }
1112       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1113       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1114         UseXMMForArrayCopy = true;
1115       }
1116       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1117         UseUnalignedLoadStores = true;
1118       }
1119     }
1120 
1121 #ifdef COMPILER2
1122     if (MaxVectorSize > 16) {
1123       // Limit vectors size to 16 bytes on current AMD cpus.
1124       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1125     }
1126 #endif // COMPILER2
1127   }
1128 
1129   if( is_intel() ) { // Intel cpus specific settings
1130     if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
1131       UseStoreImmI16 = false; // don't use it on Intel cpus
1132     }
1133     if( cpu_family() == 6 || cpu_family() == 15 ) {
1134       if( FLAG_IS_DEFAULT(UseAddressNop) ) {
1135         // Use it on all Intel cpus starting from PentiumPro
1136         UseAddressNop = true;
1137       }


1178         }
1179         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1180       }
1181     }
1182     if ((cpu_family() == 0x06) &&
1183         ((extended_cpu_model() == 0x36) || // Centerton
1184          (extended_cpu_model() == 0x37) || // Silvermont
1185          (extended_cpu_model() == 0x4D))) {
1186 #ifdef COMPILER2
1187       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1188         OptoScheduling = true;
1189       }
1190 #endif
1191       if (supports_sse4_2()) { // Silvermont
1192         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1193           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1194         }
1195       }
1196     }
1197     if(FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1198       AllocatePrefetchInstr = 3;
1199     }
1200   }
1201 
1202 #ifdef _LP64
1203   if (UseSSE42Intrinsics) {
1204     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1205       UseVectorizedMismatchIntrinsic = true;
1206     }
1207   } else if (UseVectorizedMismatchIntrinsic) {
1208     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1209       warning("vectorizedMismatch intrinsics are not available on this CPU");
1210     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1211   }
1212 #else
1213   if (UseVectorizedMismatchIntrinsic) {
1214     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1215       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1216     }
1217     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1218   }


1274     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1275   }
1276 
1277   // Use fast-string operations if available.
1278   if (supports_erms()) {
1279     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1280       UseFastStosb = true;
1281     }
1282   } else if (UseFastStosb) {
1283     warning("fast-string operations are not available on this CPU");
1284     FLAG_SET_DEFAULT(UseFastStosb, false);
1285   }
1286 
1287 #ifdef COMPILER2
1288   if (FLAG_IS_DEFAULT(AlignVector)) {
1289     // Modern processors allow misaligned memory operations for vectors.
1290     AlignVector = !UseUnalignedLoadStores;
1291   }
1292 #endif // COMPILER2
1293 
1294   if( AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch() ) AllocatePrefetchInstr=0;
1295   if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3;





1296 
1297   // Allocation prefetch settings
1298   intx cache_line_size = prefetch_data_size();
1299   if( cache_line_size > AllocatePrefetchStepSize )
1300     AllocatePrefetchStepSize = cache_line_size;


1301 
1302   AllocatePrefetchDistance = allocate_prefetch_distance();
1303   AllocatePrefetchStyle    = allocate_prefetch_style();





1304 
1305   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1306     if (AllocatePrefetchStyle == 2) { // watermark prefetching on Core
1307 #ifdef _LP64
1308       AllocatePrefetchDistance = 384;
1309 #else
1310       AllocatePrefetchDistance = 320;
1311 #endif
1312     }
1313     if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1314       AllocatePrefetchDistance = 192;
1315       if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {

1316         FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1317       }
1318     }
1319 #ifdef COMPILER2
1320     if (supports_sse4_2()) {
1321       if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1322         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1323       }
1324     }
1325 #endif
1326   }
1327 
1328 #ifdef _LP64
1329   // Prefetch settings
1330   PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
1331   PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
1332   PrefetchFieldsAhead         = prefetch_fields_ahead();

















1333 #endif
1334 
1335   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1336      (cache_line_size > ContendedPaddingWidth))
1337      ContendedPaddingWidth = cache_line_size;
1338 
1339   // This machine allows unaligned memory accesses
1340   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1341     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1342   }
1343 
1344 #ifndef PRODUCT
1345   if (log_is_enabled(Info, os, cpu)) {
1346     outputStream* log = Log(os, cpu)::info_stream();
1347     log->print_cr("Logical CPUs per core: %u",
1348                   logical_processors_per_package());
1349     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1350     log->print("UseSSE=%d", (int) UseSSE);
1351     if (UseAVX > 0) {
1352       log->print("  UseAVX=%d", (int) UseAVX);




1086         UseXmmI2D = true;
1087       } else {
1088         UseXmmI2D = false;
1089       }
1090     }
1091     if (supports_sse4_2()) {
1092       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1093         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1094       }
1095     } else {
1096       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1097         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1098       }
1099       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1100     }
1101 
1102     // some defaults for AMD family 15h
1103     if ( cpu_family() == 0x15 ) {
1104       // On family 15h processors default is no sw prefetch
1105       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1106         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1107       }
1108       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1109       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1110         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1111       }
1112       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1113       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1114         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1115       }
1116       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1117         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1118       }
1119     }
1120 
1121 #ifdef COMPILER2
1122     if (MaxVectorSize > 16) {
1123       // Limit vectors size to 16 bytes on current AMD cpus.
1124       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1125     }
1126 #endif // COMPILER2
1127   }
1128 
1129   if( is_intel() ) { // Intel cpus specific settings
1130     if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
1131       UseStoreImmI16 = false; // don't use it on Intel cpus
1132     }
1133     if( cpu_family() == 6 || cpu_family() == 15 ) {
1134       if( FLAG_IS_DEFAULT(UseAddressNop) ) {
1135         // Use it on all Intel cpus starting from PentiumPro
1136         UseAddressNop = true;
1137       }


1178         }
1179         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1180       }
1181     }
1182     if ((cpu_family() == 0x06) &&
1183         ((extended_cpu_model() == 0x36) || // Centerton
1184          (extended_cpu_model() == 0x37) || // Silvermont
1185          (extended_cpu_model() == 0x4D))) {
1186 #ifdef COMPILER2
1187       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1188         OptoScheduling = true;
1189       }
1190 #endif
1191       if (supports_sse4_2()) { // Silvermont
1192         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1193           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1194         }
1195       }
1196     }
1197     if(FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1198       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1199     }
1200   }
1201 
1202 #ifdef _LP64
1203   if (UseSSE42Intrinsics) {
1204     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1205       UseVectorizedMismatchIntrinsic = true;
1206     }
1207   } else if (UseVectorizedMismatchIntrinsic) {
1208     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1209       warning("vectorizedMismatch intrinsics are not available on this CPU");
1210     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1211   }
1212 #else
1213   if (UseVectorizedMismatchIntrinsic) {
1214     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1215       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1216     }
1217     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1218   }


1274     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1275   }
1276 
1277   // Use fast-string operations if available.
1278   if (supports_erms()) {
1279     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1280       UseFastStosb = true;
1281     }
1282   } else if (UseFastStosb) {
1283     warning("fast-string operations are not available on this CPU");
1284     FLAG_SET_DEFAULT(UseFastStosb, false);
1285   }
1286 
1287 #ifdef COMPILER2
1288   if (FLAG_IS_DEFAULT(AlignVector)) {
1289     // Modern processors allow misaligned memory operations for vectors.
1290     AlignVector = !UseUnalignedLoadStores;
1291   }
1292 #endif // COMPILER2
1293 
1294   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1295     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1296       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1297     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1298       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1299     }
1300   }
1301 
1302   // Allocation prefetch settings
1303   intx cache_line_size = prefetch_data_size();
1304   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1305       (cache_line_size > AllocatePrefetchStepSize)) {
1306     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1307   }
1308 
1309   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1310     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1311     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1312       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1313     }
1314     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1315   }
1316 
1317   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1318     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1319     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));




1320   }
1321 
1322   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1323     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1324         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1325       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1326     }

1327 #ifdef COMPILER2
1328     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {

1329       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1330     }

1331 #endif
1332   }
1333 
1334 #ifdef _LP64
1335   // Prefetch settings
1336 
1337   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1338   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1339   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1340   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1341 
1342   // gc copy/scan is disabled if prefetchw isn't supported, because
1343   // Prefetch::write emits an inlined prefetchw on Linux.
1344   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1345   // The used prefetcht0 instruction works for both amd64 and em64t.
1346 
1347   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1348     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1349   }
1350   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1351     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1352   }
1353   if (FLAG_IS_DEFAULT(PrefetchFieldsAhead)) {
1354     FLAG_SET_DEFAULT(PrefetchFieldsAhead, 1);
1355   }
1356 #endif
1357 
1358   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1359      (cache_line_size > ContendedPaddingWidth))
1360      ContendedPaddingWidth = cache_line_size;
1361 
1362   // This machine allows unaligned memory accesses
1363   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1364     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1365   }
1366 
1367 #ifndef PRODUCT
1368   if (log_is_enabled(Info, os, cpu)) {
1369     outputStream* log = Log(os, cpu)::info_stream();
1370     log->print_cr("Logical CPUs per core: %u",
1371                   logical_processors_per_package());
1372     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1373     log->print("UseSSE=%d", (int) UseSSE);
1374     if (UseAVX > 0) {
1375       log->print("  UseAVX=%d", (int) UseAVX);


< prev index next >