1086 UseXmmI2D = true;
1087 } else {
1088 UseXmmI2D = false;
1089 }
1090 }
1091 if (supports_sse4_2()) {
1092 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1093 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1094 }
1095 } else {
1096 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1097 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1098 }
1099 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1100 }
1101
1102 // some defaults for AMD family 15h
1103 if ( cpu_family() == 0x15 ) {
1104 // On family 15h processors default is no sw prefetch
1105 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1106 AllocatePrefetchStyle = 0;
1107 }
1108 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1109 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1110 AllocatePrefetchInstr = 3;
1111 }
1112 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1113 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1114 UseXMMForArrayCopy = true;
1115 }
1116 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1117 UseUnalignedLoadStores = true;
1118 }
1119 }
1120
1121 #ifdef COMPILER2
1122 if (MaxVectorSize > 16) {
1123 // Limit vectors size to 16 bytes on current AMD cpus.
1124 FLAG_SET_DEFAULT(MaxVectorSize, 16);
1125 }
1126 #endif // COMPILER2
1127 }
1128
1129 if( is_intel() ) { // Intel cpus specific settings
1130 if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
1131 UseStoreImmI16 = false; // don't use it on Intel cpus
1132 }
1133 if( cpu_family() == 6 || cpu_family() == 15 ) {
1134 if( FLAG_IS_DEFAULT(UseAddressNop) ) {
1135 // Use it on all Intel cpus starting from PentiumPro
1136 UseAddressNop = true;
1137 }
1178 }
1179 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1180 }
1181 }
1182 if ((cpu_family() == 0x06) &&
1183 ((extended_cpu_model() == 0x36) || // Centerton
1184 (extended_cpu_model() == 0x37) || // Silvermont
1185 (extended_cpu_model() == 0x4D))) {
1186 #ifdef COMPILER2
1187 if (FLAG_IS_DEFAULT(OptoScheduling)) {
1188 OptoScheduling = true;
1189 }
1190 #endif
1191 if (supports_sse4_2()) { // Silvermont
1192 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1193 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1194 }
1195 }
1196 }
1197 if(FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1198 AllocatePrefetchInstr = 3;
1199 }
1200 }
1201
1202 #ifdef _LP64
1203 if (UseSSE42Intrinsics) {
1204 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1205 UseVectorizedMismatchIntrinsic = true;
1206 }
1207 } else if (UseVectorizedMismatchIntrinsic) {
1208 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1209 warning("vectorizedMismatch intrinsics are not available on this CPU");
1210 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1211 }
1212 #else
1213 if (UseVectorizedMismatchIntrinsic) {
1214 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1215 warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1216 }
1217 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1218 }
1274 FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1275 }
1276
1277 // Use fast-string operations if available.
1278 if (supports_erms()) {
1279 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1280 UseFastStosb = true;
1281 }
1282 } else if (UseFastStosb) {
1283 warning("fast-string operations are not available on this CPU");
1284 FLAG_SET_DEFAULT(UseFastStosb, false);
1285 }
1286
1287 #ifdef COMPILER2
1288 if (FLAG_IS_DEFAULT(AlignVector)) {
1289 // Modern processors allow misaligned memory operations for vectors.
1290 AlignVector = !UseUnalignedLoadStores;
1291 }
1292 #endif // COMPILER2
1293
1294 if( AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch() ) AllocatePrefetchInstr=0;
1295 if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3;
1296
1297 // Allocation prefetch settings
1298 intx cache_line_size = prefetch_data_size();
1299 if( cache_line_size > AllocatePrefetchStepSize )
1300 AllocatePrefetchStepSize = cache_line_size;
1301
1302 AllocatePrefetchDistance = allocate_prefetch_distance();
1303 AllocatePrefetchStyle = allocate_prefetch_style();
1304
1305 if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1306 if (AllocatePrefetchStyle == 2) { // watermark prefetching on Core
1307 #ifdef _LP64
1308 AllocatePrefetchDistance = 384;
1309 #else
1310 AllocatePrefetchDistance = 320;
1311 #endif
1312 }
1313 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1314 AllocatePrefetchDistance = 192;
1315 if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
1316 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1317 }
1318 }
1319 #ifdef COMPILER2
1320 if (supports_sse4_2()) {
1321 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1322 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1323 }
1324 }
1325 #endif
1326 }
1327
1328 #ifdef _LP64
1329 // Prefetch settings
1330 PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
1331 PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
1332 PrefetchFieldsAhead = prefetch_fields_ahead();
1333 #endif
1334
1335 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1336 (cache_line_size > ContendedPaddingWidth))
1337 ContendedPaddingWidth = cache_line_size;
1338
1339 // This machine allows unaligned memory accesses
1340 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1341 FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1342 }
1343
1344 #ifndef PRODUCT
1345 if (log_is_enabled(Info, os, cpu)) {
1346 outputStream* log = Log(os, cpu)::info_stream();
1347 log->print_cr("Logical CPUs per core: %u",
1348 logical_processors_per_package());
1349 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1350 log->print("UseSSE=%d", (int) UseSSE);
1351 if (UseAVX > 0) {
1352 log->print(" UseAVX=%d", (int) UseAVX);
|
1086 UseXmmI2D = true;
1087 } else {
1088 UseXmmI2D = false;
1089 }
1090 }
1091 if (supports_sse4_2()) {
1092 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1093 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1094 }
1095 } else {
1096 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1097 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1098 }
1099 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1100 }
1101
1102 // some defaults for AMD family 15h
1103 if ( cpu_family() == 0x15 ) {
1104 // On family 15h processors default is no sw prefetch
1105 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1106 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1107 }
1108 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1109 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1110 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1111 }
1112 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1113 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1114 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1115 }
1116 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1117 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1118 }
1119 }
1120
1121 #ifdef COMPILER2
1122 if (MaxVectorSize > 16) {
1123 // Limit vectors size to 16 bytes on current AMD cpus.
1124 FLAG_SET_DEFAULT(MaxVectorSize, 16);
1125 }
1126 #endif // COMPILER2
1127 }
1128
1129 if( is_intel() ) { // Intel cpus specific settings
1130 if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
1131 UseStoreImmI16 = false; // don't use it on Intel cpus
1132 }
1133 if( cpu_family() == 6 || cpu_family() == 15 ) {
1134 if( FLAG_IS_DEFAULT(UseAddressNop) ) {
1135 // Use it on all Intel cpus starting from PentiumPro
1136 UseAddressNop = true;
1137 }
1178 }
1179 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1180 }
1181 }
1182 if ((cpu_family() == 0x06) &&
1183 ((extended_cpu_model() == 0x36) || // Centerton
1184 (extended_cpu_model() == 0x37) || // Silvermont
1185 (extended_cpu_model() == 0x4D))) {
1186 #ifdef COMPILER2
1187 if (FLAG_IS_DEFAULT(OptoScheduling)) {
1188 OptoScheduling = true;
1189 }
1190 #endif
1191 if (supports_sse4_2()) { // Silvermont
1192 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1193 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1194 }
1195 }
1196 }
1197 if(FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1198 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1199 }
1200 }
1201
1202 #ifdef _LP64
1203 if (UseSSE42Intrinsics) {
1204 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1205 UseVectorizedMismatchIntrinsic = true;
1206 }
1207 } else if (UseVectorizedMismatchIntrinsic) {
1208 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1209 warning("vectorizedMismatch intrinsics are not available on this CPU");
1210 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1211 }
1212 #else
1213 if (UseVectorizedMismatchIntrinsic) {
1214 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1215 warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1216 }
1217 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1218 }
1274 FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1275 }
1276
1277 // Use fast-string operations if available.
1278 if (supports_erms()) {
1279 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1280 UseFastStosb = true;
1281 }
1282 } else if (UseFastStosb) {
1283 warning("fast-string operations are not available on this CPU");
1284 FLAG_SET_DEFAULT(UseFastStosb, false);
1285 }
1286
1287 #ifdef COMPILER2
1288 if (FLAG_IS_DEFAULT(AlignVector)) {
1289 // Modern processors allow misaligned memory operations for vectors.
1290 AlignVector = !UseUnalignedLoadStores;
1291 }
1292 #endif // COMPILER2
1293
1294 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1295 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1296 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1297 } else if (!supports_sse() && supports_3dnow_prefetch()) {
1298 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1299 }
1300 }
1301
1302 // Allocation prefetch settings
1303 intx cache_line_size = prefetch_data_size();
1304 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1305 (cache_line_size > AllocatePrefetchStepSize)) {
1306 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1307 }
1308
1309 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1310 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1311 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1312 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1313 }
1314 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1315 }
1316
1317 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1318 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1319 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1320 }
1321
1322 if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1323 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1324 supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1325 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1326 }
1327 #ifdef COMPILER2
1328 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1329 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1330 }
1331 #endif
1332 }
1333
1334 #ifdef _LP64
1335 // Prefetch settings
1336
1337 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from
1338 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1339 // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1340 // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1341
1342 // gc copy/scan is disabled if prefetchw isn't supported, because
1343 // Prefetch::write emits an inlined prefetchw on Linux.
1344 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t.
1345 // The used prefetcht0 instruction works for both amd64 and em64t.
1346
1347 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1348 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1349 }
1350 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1351 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1352 }
1353 if (FLAG_IS_DEFAULT(PrefetchFieldsAhead)) {
1354 FLAG_SET_DEFAULT(PrefetchFieldsAhead, 1);
1355 }
1356 #endif
1357
1358 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1359 (cache_line_size > ContendedPaddingWidth))
1360 ContendedPaddingWidth = cache_line_size;
1361
1362 // This machine allows unaligned memory accesses
1363 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1364 FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1365 }
1366
1367 #ifndef PRODUCT
1368 if (log_is_enabled(Info, os, cpu)) {
1369 outputStream* log = Log(os, cpu)::info_stream();
1370 log->print_cr("Logical CPUs per core: %u",
1371 logical_processors_per_package());
1372 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1373 log->print("UseSSE=%d", (int) UseSSE);
1374 if (UseAVX > 0) {
1375 log->print(" UseAVX=%d", (int) UseAVX);
|