< prev index next >

src/hotspot/share/runtime/synchronizer.cpp

Print this page
rev 57232 : imported patch 8235931.patch.cr0


 500 //
 501 // Performance concern:
 502 // OrderAccess::storestore() calls release() which at one time stored 0
 503 // into the global volatile OrderAccess::dummy variable. This store was
 504 // unnecessary for correctness. Many threads storing into a common location
 505 // causes considerable cache migration or "sloshing" on large SMP systems.
 506 // As such, I avoided using OrderAccess::storestore(). In some cases
 507 // OrderAccess::fence() -- which incurs local latency on the executing
 508 // processor -- is a better choice as it scales on SMP systems.
 509 //
 510 // See http://blogs.oracle.com/dave/entry/biased_locking_in_hotspot for
 511 // a discussion of coherency costs. Note that all our current reference
 512 // platforms provide strong ST-ST order, so the issue is moot on IA32,
 513 // x64, and SPARC.
 514 //
 515 // As a general policy we use "volatile" to control compiler-based reordering
 516 // and explicit fences (barriers) to control for architectural reordering
 517 // performed by the CPU(s) or platform.
 518 
 519 struct SharedGlobals {
 520   char         _pad_prefix[DEFAULT_CACHE_LINE_SIZE];
 521   // These are highly shared mostly-read variables.
 522   // To avoid false-sharing they need to be the sole occupants of a cache line.
 523   volatile int stw_random;
 524   volatile int stw_cycle;
 525   DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int) * 2);
 526   // Hot RW variable -- Sequester to avoid false-sharing
 527   volatile int hc_sequence;
 528   DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int));
 529 };
 530 
 531 static SharedGlobals GVars;
 532 static int _forceMonitorScavenge = 0; // Scavenge required and pending
 533 
 534 static markWord read_stable_mark(oop obj) {
 535   markWord mark = obj->mark();
 536   if (!mark.is_being_inflated()) {
 537     return mark;       // normal fast-path return
 538   }
 539 
 540   int its = 0;
 541   for (;;) {
 542     markWord mark = obj->mark();
 543     if (!mark.is_being_inflated()) {
 544       return mark;    // normal fast-path return
 545     }
 546 
 547     // The object is being inflated by some other thread.
 548     // The caller of read_stable_mark() must wait for inflation to complete.


1065         // Not enough ObjectMonitors on the global free list.
1066         // We can't safely induce a STW safepoint from om_alloc() as our thread
1067         // state may not be appropriate for such activities and callers may hold
1068         // naked oops, so instead we defer the action.
1069         InduceScavenge(self, "om_alloc");
1070       }
1071       continue;
1072     }
1073 
1074     // 3: allocate a block of new ObjectMonitors
1075     // Both the local and global free lists are empty -- resort to malloc().
1076     // In the current implementation ObjectMonitors are TSM - immortal.
1077     // Ideally, we'd write "new ObjectMonitor[_BLOCKSIZE], but we want
1078     // each ObjectMonitor to start at the beginning of a cache line,
1079     // so we use align_up().
1080     // A better solution would be to use C++ placement-new.
1081     // BEWARE: As it stands currently, we don't run the ctors!
1082     assert(_BLOCKSIZE > 1, "invariant");
1083     size_t neededsize = sizeof(PaddedObjectMonitor) * _BLOCKSIZE;
1084     PaddedObjectMonitor* temp;
1085     size_t aligned_size = neededsize + (DEFAULT_CACHE_LINE_SIZE - 1);
1086     void* real_malloc_addr = NEW_C_HEAP_ARRAY(char, aligned_size, mtInternal);
1087     temp = (PaddedObjectMonitor*)align_up(real_malloc_addr, DEFAULT_CACHE_LINE_SIZE);
1088     (void)memset((void *) temp, 0, neededsize);
1089 
1090     // Format the block.
1091     // initialize the linked list, each monitor points to its next
1092     // forming the single linked free list, the very first monitor
1093     // will points to next block, which forms the block list.
1094     // The trick of using the 1st element in the block as g_block_list
1095     // linkage should be reconsidered.  A better implementation would
1096     // look like: class Block { Block * next; int N; ObjectMonitor Body [N] ; }
1097 
1098     for (int i = 1; i < _BLOCKSIZE; i++) {
1099       temp[i]._next_om = (ObjectMonitor *)&temp[i+1];
1100     }
1101 
1102     // terminate the last monitor as the end of list
1103     temp[_BLOCKSIZE - 1]._next_om = NULL;
1104 
1105     // Element [0] is reserved for global list linkage
1106     temp[0].set_object(CHAINMARKER);
1107 




 500 //
 501 // Performance concern:
 502 // OrderAccess::storestore() calls release() which at one time stored 0
 503 // into the global volatile OrderAccess::dummy variable. This store was
 504 // unnecessary for correctness. Many threads storing into a common location
 505 // causes considerable cache migration or "sloshing" on large SMP systems.
 506 // As such, I avoided using OrderAccess::storestore(). In some cases
 507 // OrderAccess::fence() -- which incurs local latency on the executing
 508 // processor -- is a better choice as it scales on SMP systems.
 509 //
 510 // See http://blogs.oracle.com/dave/entry/biased_locking_in_hotspot for
 511 // a discussion of coherency costs. Note that all our current reference
 512 // platforms provide strong ST-ST order, so the issue is moot on IA32,
 513 // x64, and SPARC.
 514 //
 515 // As a general policy we use "volatile" to control compiler-based reordering
 516 // and explicit fences (barriers) to control for architectural reordering
 517 // performed by the CPU(s) or platform.
 518 
 519 struct SharedGlobals {
 520   char         _pad_prefix[OM_CACHE_LINE_SIZE];
 521   // These are highly shared mostly-read variables.
 522   // To avoid false-sharing they need to be the sole occupants of a cache line.
 523   volatile int stw_random;
 524   volatile int stw_cycle;
 525   DEFINE_PAD_MINUS_SIZE(1, OM_CACHE_LINE_SIZE, sizeof(volatile int) * 2);
 526   // Hot RW variable -- Sequester to avoid false-sharing
 527   volatile int hc_sequence;
 528   DEFINE_PAD_MINUS_SIZE(2, OM_CACHE_LINE_SIZE, sizeof(volatile int));
 529 };
 530 
 531 static SharedGlobals GVars;
 532 static int _forceMonitorScavenge = 0; // Scavenge required and pending
 533 
 534 static markWord read_stable_mark(oop obj) {
 535   markWord mark = obj->mark();
 536   if (!mark.is_being_inflated()) {
 537     return mark;       // normal fast-path return
 538   }
 539 
 540   int its = 0;
 541   for (;;) {
 542     markWord mark = obj->mark();
 543     if (!mark.is_being_inflated()) {
 544       return mark;    // normal fast-path return
 545     }
 546 
 547     // The object is being inflated by some other thread.
 548     // The caller of read_stable_mark() must wait for inflation to complete.


1065         // Not enough ObjectMonitors on the global free list.
1066         // We can't safely induce a STW safepoint from om_alloc() as our thread
1067         // state may not be appropriate for such activities and callers may hold
1068         // naked oops, so instead we defer the action.
1069         InduceScavenge(self, "om_alloc");
1070       }
1071       continue;
1072     }
1073 
1074     // 3: allocate a block of new ObjectMonitors
1075     // Both the local and global free lists are empty -- resort to malloc().
1076     // In the current implementation ObjectMonitors are TSM - immortal.
1077     // Ideally, we'd write "new ObjectMonitor[_BLOCKSIZE], but we want
1078     // each ObjectMonitor to start at the beginning of a cache line,
1079     // so we use align_up().
1080     // A better solution would be to use C++ placement-new.
1081     // BEWARE: As it stands currently, we don't run the ctors!
1082     assert(_BLOCKSIZE > 1, "invariant");
1083     size_t neededsize = sizeof(PaddedObjectMonitor) * _BLOCKSIZE;
1084     PaddedObjectMonitor* temp;
1085     size_t aligned_size = neededsize + (OM_CACHE_LINE_SIZE - 1);
1086     void* real_malloc_addr = NEW_C_HEAP_ARRAY(char, aligned_size, mtInternal);
1087     temp = (PaddedObjectMonitor*)align_up(real_malloc_addr, OM_CACHE_LINE_SIZE);
1088     (void)memset((void *) temp, 0, neededsize);
1089 
1090     // Format the block.
1091     // initialize the linked list, each monitor points to its next
1092     // forming the single linked free list, the very first monitor
1093     // will points to next block, which forms the block list.
1094     // The trick of using the 1st element in the block as g_block_list
1095     // linkage should be reconsidered.  A better implementation would
1096     // look like: class Block { Block * next; int N; ObjectMonitor Body [N] ; }
1097 
1098     for (int i = 1; i < _BLOCKSIZE; i++) {
1099       temp[i]._next_om = (ObjectMonitor *)&temp[i+1];
1100     }
1101 
1102     // terminate the last monitor as the end of list
1103     temp[_BLOCKSIZE - 1]._next_om = NULL;
1104 
1105     // Element [0] is reserved for global list linkage
1106     temp[0].set_object(CHAINMARKER);
1107 


< prev index next >