500 //
501 // Performance concern:
502 // OrderAccess::storestore() calls release() which at one time stored 0
503 // into the global volatile OrderAccess::dummy variable. This store was
504 // unnecessary for correctness. Many threads storing into a common location
505 // causes considerable cache migration or "sloshing" on large SMP systems.
506 // As such, I avoided using OrderAccess::storestore(). In some cases
507 // OrderAccess::fence() -- which incurs local latency on the executing
508 // processor -- is a better choice as it scales on SMP systems.
509 //
510 // See http://blogs.oracle.com/dave/entry/biased_locking_in_hotspot for
511 // a discussion of coherency costs. Note that all our current reference
512 // platforms provide strong ST-ST order, so the issue is moot on IA32,
513 // x64, and SPARC.
514 //
515 // As a general policy we use "volatile" to control compiler-based reordering
516 // and explicit fences (barriers) to control for architectural reordering
517 // performed by the CPU(s) or platform.
518
519 struct SharedGlobals {
520 char _pad_prefix[DEFAULT_CACHE_LINE_SIZE];
521 // These are highly shared mostly-read variables.
522 // To avoid false-sharing they need to be the sole occupants of a cache line.
523 volatile int stw_random;
524 volatile int stw_cycle;
525 DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int) * 2);
526 // Hot RW variable -- Sequester to avoid false-sharing
527 volatile int hc_sequence;
528 DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int));
529 };
530
531 static SharedGlobals GVars;
532 static int _forceMonitorScavenge = 0; // Scavenge required and pending
533
534 static markWord read_stable_mark(oop obj) {
535 markWord mark = obj->mark();
536 if (!mark.is_being_inflated()) {
537 return mark; // normal fast-path return
538 }
539
540 int its = 0;
541 for (;;) {
542 markWord mark = obj->mark();
543 if (!mark.is_being_inflated()) {
544 return mark; // normal fast-path return
545 }
546
547 // The object is being inflated by some other thread.
548 // The caller of read_stable_mark() must wait for inflation to complete.
1065 // Not enough ObjectMonitors on the global free list.
1066 // We can't safely induce a STW safepoint from om_alloc() as our thread
1067 // state may not be appropriate for such activities and callers may hold
1068 // naked oops, so instead we defer the action.
1069 InduceScavenge(self, "om_alloc");
1070 }
1071 continue;
1072 }
1073
1074 // 3: allocate a block of new ObjectMonitors
1075 // Both the local and global free lists are empty -- resort to malloc().
1076 // In the current implementation ObjectMonitors are TSM - immortal.
1077 // Ideally, we'd write "new ObjectMonitor[_BLOCKSIZE], but we want
1078 // each ObjectMonitor to start at the beginning of a cache line,
1079 // so we use align_up().
1080 // A better solution would be to use C++ placement-new.
1081 // BEWARE: As it stands currently, we don't run the ctors!
1082 assert(_BLOCKSIZE > 1, "invariant");
1083 size_t neededsize = sizeof(PaddedObjectMonitor) * _BLOCKSIZE;
1084 PaddedObjectMonitor* temp;
1085 size_t aligned_size = neededsize + (DEFAULT_CACHE_LINE_SIZE - 1);
1086 void* real_malloc_addr = NEW_C_HEAP_ARRAY(char, aligned_size, mtInternal);
1087 temp = (PaddedObjectMonitor*)align_up(real_malloc_addr, DEFAULT_CACHE_LINE_SIZE);
1088 (void)memset((void *) temp, 0, neededsize);
1089
1090 // Format the block.
1091 // initialize the linked list, each monitor points to its next
1092 // forming the single linked free list, the very first monitor
1093 // will points to next block, which forms the block list.
1094 // The trick of using the 1st element in the block as g_block_list
1095 // linkage should be reconsidered. A better implementation would
1096 // look like: class Block { Block * next; int N; ObjectMonitor Body [N] ; }
1097
1098 for (int i = 1; i < _BLOCKSIZE; i++) {
1099 temp[i]._next_om = (ObjectMonitor *)&temp[i+1];
1100 }
1101
1102 // terminate the last monitor as the end of list
1103 temp[_BLOCKSIZE - 1]._next_om = NULL;
1104
1105 // Element [0] is reserved for global list linkage
1106 temp[0].set_object(CHAINMARKER);
1107
|
500 //
501 // Performance concern:
502 // OrderAccess::storestore() calls release() which at one time stored 0
503 // into the global volatile OrderAccess::dummy variable. This store was
504 // unnecessary for correctness. Many threads storing into a common location
505 // causes considerable cache migration or "sloshing" on large SMP systems.
506 // As such, I avoided using OrderAccess::storestore(). In some cases
507 // OrderAccess::fence() -- which incurs local latency on the executing
508 // processor -- is a better choice as it scales on SMP systems.
509 //
510 // See http://blogs.oracle.com/dave/entry/biased_locking_in_hotspot for
511 // a discussion of coherency costs. Note that all our current reference
512 // platforms provide strong ST-ST order, so the issue is moot on IA32,
513 // x64, and SPARC.
514 //
515 // As a general policy we use "volatile" to control compiler-based reordering
516 // and explicit fences (barriers) to control for architectural reordering
517 // performed by the CPU(s) or platform.
518
519 struct SharedGlobals {
520 char _pad_prefix[OM_CACHE_LINE_SIZE];
521 // These are highly shared mostly-read variables.
522 // To avoid false-sharing they need to be the sole occupants of a cache line.
523 volatile int stw_random;
524 volatile int stw_cycle;
525 DEFINE_PAD_MINUS_SIZE(1, OM_CACHE_LINE_SIZE, sizeof(volatile int) * 2);
526 // Hot RW variable -- Sequester to avoid false-sharing
527 volatile int hc_sequence;
528 DEFINE_PAD_MINUS_SIZE(2, OM_CACHE_LINE_SIZE, sizeof(volatile int));
529 };
530
531 static SharedGlobals GVars;
532 static int _forceMonitorScavenge = 0; // Scavenge required and pending
533
534 static markWord read_stable_mark(oop obj) {
535 markWord mark = obj->mark();
536 if (!mark.is_being_inflated()) {
537 return mark; // normal fast-path return
538 }
539
540 int its = 0;
541 for (;;) {
542 markWord mark = obj->mark();
543 if (!mark.is_being_inflated()) {
544 return mark; // normal fast-path return
545 }
546
547 // The object is being inflated by some other thread.
548 // The caller of read_stable_mark() must wait for inflation to complete.
1065 // Not enough ObjectMonitors on the global free list.
1066 // We can't safely induce a STW safepoint from om_alloc() as our thread
1067 // state may not be appropriate for such activities and callers may hold
1068 // naked oops, so instead we defer the action.
1069 InduceScavenge(self, "om_alloc");
1070 }
1071 continue;
1072 }
1073
1074 // 3: allocate a block of new ObjectMonitors
1075 // Both the local and global free lists are empty -- resort to malloc().
1076 // In the current implementation ObjectMonitors are TSM - immortal.
1077 // Ideally, we'd write "new ObjectMonitor[_BLOCKSIZE], but we want
1078 // each ObjectMonitor to start at the beginning of a cache line,
1079 // so we use align_up().
1080 // A better solution would be to use C++ placement-new.
1081 // BEWARE: As it stands currently, we don't run the ctors!
1082 assert(_BLOCKSIZE > 1, "invariant");
1083 size_t neededsize = sizeof(PaddedObjectMonitor) * _BLOCKSIZE;
1084 PaddedObjectMonitor* temp;
1085 size_t aligned_size = neededsize + (OM_CACHE_LINE_SIZE - 1);
1086 void* real_malloc_addr = NEW_C_HEAP_ARRAY(char, aligned_size, mtInternal);
1087 temp = (PaddedObjectMonitor*)align_up(real_malloc_addr, OM_CACHE_LINE_SIZE);
1088 (void)memset((void *) temp, 0, neededsize);
1089
1090 // Format the block.
1091 // initialize the linked list, each monitor points to its next
1092 // forming the single linked free list, the very first monitor
1093 // will points to next block, which forms the block list.
1094 // The trick of using the 1st element in the block as g_block_list
1095 // linkage should be reconsidered. A better implementation would
1096 // look like: class Block { Block * next; int N; ObjectMonitor Body [N] ; }
1097
1098 for (int i = 1; i < _BLOCKSIZE; i++) {
1099 temp[i]._next_om = (ObjectMonitor *)&temp[i+1];
1100 }
1101
1102 // terminate the last monitor as the end of list
1103 temp[_BLOCKSIZE - 1]._next_om = NULL;
1104
1105 // Element [0] is reserved for global list linkage
1106 temp[0].set_object(CHAINMARKER);
1107
|