1333 1334 void lfence(); 1335 1336 void lock(); 1337 1338 void lzcntl(Register dst, Register src); 1339 1340 #ifdef _LP64 1341 void lzcntq(Register dst, Register src); 1342 #endif 1343 1344 enum Membar_mask_bits { 1345 StoreStore = 1 << 3, 1346 LoadStore = 1 << 2, 1347 StoreLoad = 1 << 1, 1348 LoadLoad = 1 << 0 1349 }; 1350 1351 // Serializes memory and blows flags 1352 void membar(Membar_mask_bits order_constraint) { 1353 if (os::is_MP()) { 1354 // We only have to handle StoreLoad 1355 if (order_constraint & StoreLoad) { 1356 // All usable chips support "locked" instructions which suffice 1357 // as barriers, and are much faster than the alternative of 1358 // using cpuid instruction. We use here a locked add [esp-C],0. 1359 // This is conveniently otherwise a no-op except for blowing 1360 // flags, and introducing a false dependency on target memory 1361 // location. We can't do anything with flags, but we can avoid 1362 // memory dependencies in the current method by locked-adding 1363 // somewhere else on the stack. Doing [esp+C] will collide with 1364 // something on stack in current method, hence we go for [esp-C]. 1365 // It is convenient since it is almost always in data cache, for 1366 // any small C. We need to step back from SP to avoid data 1367 // dependencies with other things on below SP (callee-saves, for 1368 // example). Without a clear way to figure out the minimal safe 1369 // distance from SP, it makes sense to step back the complete 1370 // cache line, as this will also avoid possible second-order effects 1371 // with locked ops against the cache line. Our choice of offset 1372 // is bounded by x86 operand encoding, which should stay within 1373 // [-128; +127] to have the 8-byte displacement encoding. 1374 // 1375 // Any change to this code may need to revisit other places in 1376 // the code where this idiom is used, in particular the 1377 // orderAccess code. 1378 1379 int offset = -VM_Version::L1_line_size(); 1380 if (offset < -128) { 1381 offset = -128; 1382 } 1383 1384 lock(); 1385 addl(Address(rsp, offset), 0);// Assert the lock# signal here 1386 } 1387 } 1388 } 1389 1390 void mfence(); 1391 1392 // Moves 1393 1394 void mov64(Register dst, int64_t imm64); 1395 1396 void movb(Address dst, Register src); 1397 void movb(Address dst, int imm8); 1398 void movb(Register dst, Address src); 1399 1400 void movddup(XMMRegister dst, XMMRegister src); 1401 1402 void kmovbl(KRegister dst, Register src); 1403 void kmovbl(Register dst, KRegister src); 1404 void kmovwl(KRegister dst, Register src); 1405 void kmovwl(KRegister dst, Address src); 1406 void kmovwl(Register dst, KRegister src); | 1333 1334 void lfence(); 1335 1336 void lock(); 1337 1338 void lzcntl(Register dst, Register src); 1339 1340 #ifdef _LP64 1341 void lzcntq(Register dst, Register src); 1342 #endif 1343 1344 enum Membar_mask_bits { 1345 StoreStore = 1 << 3, 1346 LoadStore = 1 << 2, 1347 StoreLoad = 1 << 1, 1348 LoadLoad = 1 << 0 1349 }; 1350 1351 // Serializes memory and blows flags 1352 void membar(Membar_mask_bits order_constraint) { 1353 // We only have to handle StoreLoad 1354 if (order_constraint & StoreLoad) { 1355 // All usable chips support "locked" instructions which suffice 1356 // as barriers, and are much faster than the alternative of 1357 // using cpuid instruction. We use here a locked add [esp-C],0. 1358 // This is conveniently otherwise a no-op except for blowing 1359 // flags, and introducing a false dependency on target memory 1360 // location. We can't do anything with flags, but we can avoid 1361 // memory dependencies in the current method by locked-adding 1362 // somewhere else on the stack. Doing [esp+C] will collide with 1363 // something on stack in current method, hence we go for [esp-C]. 1364 // It is convenient since it is almost always in data cache, for 1365 // any small C. We need to step back from SP to avoid data 1366 // dependencies with other things on below SP (callee-saves, for 1367 // example). Without a clear way to figure out the minimal safe 1368 // distance from SP, it makes sense to step back the complete 1369 // cache line, as this will also avoid possible second-order effects 1370 // with locked ops against the cache line. Our choice of offset 1371 // is bounded by x86 operand encoding, which should stay within 1372 // [-128; +127] to have the 8-byte displacement encoding. 1373 // 1374 // Any change to this code may need to revisit other places in 1375 // the code where this idiom is used, in particular the 1376 // orderAccess code. 1377 1378 int offset = -VM_Version::L1_line_size(); 1379 if (offset < -128) { 1380 offset = -128; 1381 } 1382 1383 lock(); 1384 addl(Address(rsp, offset), 0);// Assert the lock# signal here 1385 } 1386 } 1387 1388 void mfence(); 1389 1390 // Moves 1391 1392 void mov64(Register dst, int64_t imm64); 1393 1394 void movb(Address dst, Register src); 1395 void movb(Address dst, int imm8); 1396 void movb(Register dst, Address src); 1397 1398 void movddup(XMMRegister dst, XMMRegister src); 1399 1400 void kmovbl(KRegister dst, Register src); 1401 void kmovbl(Register dst, KRegister src); 1402 void kmovwl(KRegister dst, Register src); 1403 void kmovwl(KRegister dst, Address src); 1404 void kmovwl(Register dst, KRegister src); |