hotspot Sdiff src/share/vm/gc

src/share/vm/gc_implementation/g1/concurrentMark.hpp

rev 2724 : 6484965: G1: piggy-back liveness accounting phase on marking
Summary: Remove the separate counting phase of concurrent marking by tracking the amount of marked bytes and the cards spanned by marked objects in marking task/worker thread local data structures, which are updated as individual objects are marked.
Reviewed-by:

 411 
 412   // marking tasks
 413   size_t                  _max_task_num; // maximum task number
 414   size_t                  _active_tasks; // task num currently active
 415   CMTask**                _tasks;        // task queue array (max_task_num len)
 416   CMTaskQueueSet*         _task_queues;  // task queue set
 417   ParallelTaskTerminator  _terminator;   // for termination
 418 
 419   // Two sync barriers that are used to synchronise tasks when an
 420   // overflow occurs. The algorithm is the following. All tasks enter
 421   // the first one to ensure that they have all stopped manipulating
 422   // the global data structures. After they exit it, they re-initialise
 423   // their data structures and task 0 re-initialises the global data
 424   // structures. Then, they enter the second sync barrier. This
 425   // ensure, that no task starts doing work before all data
 426   // structures (local and global) have been re-initialised. When they
 427   // exit it, they are free to start working again.
 428   WorkGangBarrierSync     _first_overflow_barrier_sync;
 429   WorkGangBarrierSync     _second_overflow_barrier_sync;
 430 
 431 
 432   // this is set by any task, when an overflow on the global data
 433   // structures is detected.
 434   volatile bool           _has_overflown;
 435   // true: marking is concurrent, false: we're in remark
 436   volatile bool           _concurrent;
 437   // set at the end of a Full GC so that marking aborts
 438   volatile bool           _has_aborted;
 439 
 440   // used when remark aborts due to an overflow to indicate that
 441   // another concurrent marking phase should start
 442   volatile bool           _restart_for_overflow;
 443 
 444   // This is true from the very start of concurrent marking until the
 445   // point when all the tasks complete their work. It is really used
 446   // to determine the points between the end of concurrent marking and
 447   // time of remark.
 448   volatile bool           _concurrent_marking_in_progress;
 449 
 450   // verbose level
 451   CMVerboseLevel          _verbose_level;

 562   // Methods to enter the two overflow sync barriers
 563   void enter_first_sync_barrier(int task_num);
 564   void enter_second_sync_barrier(int task_num);
 565 
 566   ForceOverflowSettings* force_overflow_conc() {
 567     return &_force_overflow_conc;
 568   }
 569 
 570   ForceOverflowSettings* force_overflow_stw() {
 571     return &_force_overflow_stw;
 572   }
 573 
 574   ForceOverflowSettings* force_overflow() {
 575     if (concurrent()) {
 576       return force_overflow_conc();
 577     } else {
 578       return force_overflow_stw();
 579     }
 580   }
 581 

















 582 public:
 583   // Manipulation of the global mark stack.
 584   // Notice that the first mark_stack_push is CAS-based, whereas the
 585   // two below are Mutex-based. This is OK since the first one is only
 586   // called during evacuation pauses and doesn't compete with the
 587   // other two (which are called by the marking tasks during
 588   // concurrent marking or remark).
 589   bool mark_stack_push(oop p) {
 590     _markStack.par_push(p);
 591     if (_markStack.overflow()) {
 592       set_has_overflown();
 593       return false;
 594     }
 595     return true;
 596   }
 597   bool mark_stack_push(oop* arr, int n) {
 598     _markStack.par_push_arr(arr, n);
 599     if (_markStack.overflow()) {
 600       set_has_overflown();
 601       return false;

 699   }
 700 
 701   // It grays an object by first marking it. Then, if it's behind the
 702   // global finger, it also pushes it on the global stack.
 703   void deal_with_reference(oop obj);
 704 
 705   ConcurrentMark(ReservedSpace rs, int max_regions);
 706   ~ConcurrentMark();
 707   ConcurrentMarkThread* cmThread() { return _cmThread; }
 708 
 709   CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
 710   CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }
 711 
 712   // The following three are interaction between CM and
 713   // G1CollectedHeap
 714 
 715   // This notifies CM that a root during initial-mark needs to be
 716   // grayed and it's MT-safe. Currently, we just mark it. But, in the
 717   // future, we can experiment with pushing it on the stack and we can
 718   // do this without changing G1CollectedHeap.
 719   void grayRoot(oop p);

 720   // It's used during evacuation pauses to gray a region, if
 721   // necessary, and it's MT-safe. It assumes that the caller has
 722   // marked any objects on that region. If _should_gray_objects is
 723   // true and we're still doing concurrent marking, the region is
 724   // pushed on the region stack, if it is located below the global
 725   // finger, otherwise we do nothing.
 726   void grayRegionIfNecessary(MemRegion mr);

 727   // It's used during evacuation pauses to mark and, if necessary,
 728   // gray a single object and it's MT-safe. It assumes the caller did
 729   // not mark the object. If _should_gray_objects is true and we're
 730   // still doing concurrent marking, the objects is pushed on the
 731   // global stack, if it is located below the global finger, otherwise
 732   // we do nothing.
 733   void markAndGrayObjectIfNecessary(oop p);
 734 
 735   // It iterates over the heap and for each object it comes across it
 736   // will dump the contents of its reference fields, as well as
 737   // liveness information for the object and its referents. The dump
 738   // will be written to a file with the following name:
 739   // G1PrintReachableBaseFile + "." + str.
 740   // vo decides whether the prev (vo == UsePrevMarking), the next
 741   // (vo == UseNextMarking) marking information, or the mark word
 742   // (vo == UseMarkWord) will be used to determine the liveness of
 743   // each object / referent.
 744   // If all is true, all objects in the heap will be dumped, otherwise
 745   // only the live ones. In the dump the following symbols / breviations
 746   // are used:
 747   //   M : an explicitly live object (its bitmap bit is set)
 748   //   > : an implicitly live object (over tams)
 749   //   O : an object outside the G1 heap (typically: in the perm gen)
 750   //   NOT : a reference field whose referent is not live
 751   //   AND MARKED : indicates that an object is both explicitly and
 752   //   implicitly live (it should be one or the other, not both)
 753   void print_reachable(const char* str,

 757   void clearNextBitmap();
 758 
 759   // These two do the work that needs to be done before and after the
 760   // initial root checkpoint. Since this checkpoint can be done at two
 761   // different points (i.e. an explicit pause or piggy-backed on a
 762   // young collection), then it's nice to be able to easily share the
 763   // pre/post code. It might be the case that we can put everything in
 764   // the post method. TP
 765   void checkpointRootsInitialPre();
 766   void checkpointRootsInitialPost();
 767 
 768   // Do concurrent phase of marking, to a tentative transitive closure.
 769   void markFromRoots();
 770 
 771   // Process all unprocessed SATB buffers. It is called at the
 772   // beginning of an evacuation pause.
 773   void drainAllSATBBuffers();
 774 
 775   void checkpointRootsFinal(bool clear_all_soft_refs);
 776   void checkpointRootsFinalWork();
 777   void calcDesiredRegions();
 778   void cleanup();
 779   void completeCleanup();
 780 
 781   // Mark in the previous bitmap.  NB: this is usually read-only, so use
 782   // this carefully!
 783   void markPrev(oop p);


 784   void clear(oop p);

 785   // Clears marks for all objects in the given range, for both prev and
 786   // next bitmaps.  NB: the previous bitmap is usually read-only, so use
 787   // this carefully!
 788   void clearRangeBothMaps(MemRegion mr);
 789 
 790   // Record the current top of the mark and region stacks; a
 791   // subsequent oops_do() on the mark stack and
 792   // invalidate_entries_into_cset() on the region stack will iterate
 793   // only over indices valid at the time of this call.
 794   void set_oops_do_bound() {
 795     _markStack.set_oops_do_bound();
 796     _regionStack.set_oops_do_bound();
 797   }
 798   // Iterate over the oops in the mark stack and all local queues. It
 799   // also calls invalidate_entries_into_cset() on the region stack.
 800   void oops_do(OopClosure* f);
 801   // It is called at the end of an evacuation pause during marking so
 802   // that CM is notified of where the new end of the heap is. It
 803   // doesn't do anything if concurrent_marking_in_progress() is false,
 804   // unless the force parameter is true.

 873 
 874   void print_summary_info();
 875 
 876   void print_worker_threads_on(outputStream* st) const;
 877 
 878   // The following indicate whether a given verbose level has been
 879   // set. Notice that anything above stats is conditional to
 880   // _MARKING_VERBOSE_ having been set to 1
 881   bool verbose_stats() {
 882     return _verbose_level >= stats_verbose;
 883   }
 884   bool verbose_low() {
 885     return _MARKING_VERBOSE_ && _verbose_level >= low_verbose;
 886   }
 887   bool verbose_medium() {
 888     return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose;
 889   }
 890   bool verbose_high() {
 891     return _MARKING_VERBOSE_ && _verbose_level >= high_verbose;
 892   }
































































 893 };
 894 
 895 // A class representing a marking task.
 896 class CMTask : public TerminatorTerminator {
 897 private:
 898   enum PrivateConstants {
 899     // the regular clock call is called once the scanned words reaches
 900     // this limit
 901     words_scanned_period          = 12*1024,
 902     // the regular clock call is called once the number of visited
 903     // references reaches this limit
 904     refs_reached_period           = 384,
 905     // initial value for the hash seed, used in the work stealing code
 906     init_hash_seed                = 17,
 907     // how many entries will be transferred between global stack and
 908     // local queues
 909     global_stack_transfer_size    = 16
 910   };
 911 
 912   int                         _task_id;

 411 
 412   // marking tasks
 413   size_t                  _max_task_num; // maximum task number
 414   size_t                  _active_tasks; // task num currently active
 415   CMTask**                _tasks;        // task queue array (max_task_num len)
 416   CMTaskQueueSet*         _task_queues;  // task queue set
 417   ParallelTaskTerminator  _terminator;   // for termination
 418 
 419   // Two sync barriers that are used to synchronise tasks when an
 420   // overflow occurs. The algorithm is the following. All tasks enter
 421   // the first one to ensure that they have all stopped manipulating
 422   // the global data structures. After they exit it, they re-initialise
 423   // their data structures and task 0 re-initialises the global data
 424   // structures. Then, they enter the second sync barrier. This
 425   // ensure, that no task starts doing work before all data
 426   // structures (local and global) have been re-initialised. When they
 427   // exit it, they are free to start working again.
 428   WorkGangBarrierSync     _first_overflow_barrier_sync;
 429   WorkGangBarrierSync     _second_overflow_barrier_sync;
 430 

 431   // this is set by any task, when an overflow on the global data
 432   // structures is detected.
 433   volatile bool           _has_overflown;
 434   // true: marking is concurrent, false: we're in remark
 435   volatile bool           _concurrent;
 436   // set at the end of a Full GC so that marking aborts
 437   volatile bool           _has_aborted;
 438 
 439   // used when remark aborts due to an overflow to indicate that
 440   // another concurrent marking phase should start
 441   volatile bool           _restart_for_overflow;
 442 
 443   // This is true from the very start of concurrent marking until the
 444   // point when all the tasks complete their work. It is really used
 445   // to determine the points between the end of concurrent marking and
 446   // time of remark.
 447   volatile bool           _concurrent_marking_in_progress;
 448 
 449   // verbose level
 450   CMVerboseLevel          _verbose_level;

 561   // Methods to enter the two overflow sync barriers
 562   void enter_first_sync_barrier(int task_num);
 563   void enter_second_sync_barrier(int task_num);
 564 
 565   ForceOverflowSettings* force_overflow_conc() {
 566     return &_force_overflow_conc;
 567   }
 568 
 569   ForceOverflowSettings* force_overflow_stw() {
 570     return &_force_overflow_stw;
 571   }
 572 
 573   ForceOverflowSettings* force_overflow() {
 574     if (concurrent()) {
 575       return force_overflow_conc();
 576     } else {
 577       return force_overflow_stw();
 578     }
 579   }
 580 
 581   // Live Data Counting data structures...
 582   // These data structures are initialized at the start of
 583   // marking. They are written to while marking is active.
 584   // They are aggregated during remark; the aggregated values
 585   // are then used to populate the _region_bm, _card_bm, and
 586   // the total live bytes, which are then subsequently updated
 587   // during cleanup.
 588 
 589   // An array of bitmaps (one bit map per task). Each bitmap
 590   // is used to record the cards spanned by the live objects
 591   // marked by that task/worker.
 592   BitMap* _count_card_bitmaps;
 593 
 594   // Used to record the number of marked live bytes
 595   // (for each region, by worker thread).
 596   size_t** _count_marked_bytes;
 597 
 598 public:
 599   // Manipulation of the global mark stack.
 600   // Notice that the first mark_stack_push is CAS-based, whereas the
 601   // two below are Mutex-based. This is OK since the first one is only
 602   // called during evacuation pauses and doesn't compete with the
 603   // other two (which are called by the marking tasks during
 604   // concurrent marking or remark).
 605   bool mark_stack_push(oop p) {
 606     _markStack.par_push(p);
 607     if (_markStack.overflow()) {
 608       set_has_overflown();
 609       return false;
 610     }
 611     return true;
 612   }
 613   bool mark_stack_push(oop* arr, int n) {
 614     _markStack.par_push_arr(arr, n);
 615     if (_markStack.overflow()) {
 616       set_has_overflown();
 617       return false;

 715   }
 716 
 717   // It grays an object by first marking it. Then, if it's behind the
 718   // global finger, it also pushes it on the global stack.
 719   void deal_with_reference(oop obj);
 720 
 721   ConcurrentMark(ReservedSpace rs, int max_regions);
 722   ~ConcurrentMark();
 723   ConcurrentMarkThread* cmThread() { return _cmThread; }
 724 
 725   CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
 726   CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }
 727 
 728   // The following three are interaction between CM and
 729   // G1CollectedHeap
 730 
 731   // This notifies CM that a root during initial-mark needs to be
 732   // grayed and it's MT-safe. Currently, we just mark it. But, in the
 733   // future, we can experiment with pushing it on the stack and we can
 734   // do this without changing G1CollectedHeap.
 735   void grayRoot(oop p, int worker_i);
 736 
 737   // It's used during evacuation pauses to gray a region, if
 738   // necessary, and it's MT-safe. It assumes that the caller has
 739   // marked any objects on that region. If _should_gray_objects is
 740   // true and we're still doing concurrent marking, the region is
 741   // pushed on the region stack, if it is located below the global
 742   // finger, otherwise we do nothing.
 743   void grayRegionIfNecessary(MemRegion mr);
 744 
 745   // It's used during evacuation pauses to mark and, if necessary,
 746   // gray a single object and it's MT-safe. It assumes the caller did
 747   // not mark the object. If _should_gray_objects is true and we're
 748   // still doing concurrent marking, the objects is pushed on the
 749   // global stack, if it is located below the global finger, otherwise
 750   // we do nothing.
 751   void markAndGrayObjectIfNecessary(oop p, int worker_i);
 752 
 753   // It iterates over the heap and for each object it comes across it
 754   // will dump the contents of its reference fields, as well as
 755   // liveness information for the object and its referents. The dump
 756   // will be written to a file with the following name:
 757   // G1PrintReachableBaseFile + "." + str.
 758   // vo decides whether the prev (vo == UsePrevMarking), the next
 759   // (vo == UseNextMarking) marking information, or the mark word
 760   // (vo == UseMarkWord) will be used to determine the liveness of
 761   // each object / referent.
 762   // If all is true, all objects in the heap will be dumped, otherwise
 763   // only the live ones. In the dump the following symbols / breviations
 764   // are used:
 765   //   M : an explicitly live object (its bitmap bit is set)
 766   //   > : an implicitly live object (over tams)
 767   //   O : an object outside the G1 heap (typically: in the perm gen)
 768   //   NOT : a reference field whose referent is not live
 769   //   AND MARKED : indicates that an object is both explicitly and
 770   //   implicitly live (it should be one or the other, not both)
 771   void print_reachable(const char* str,

 775   void clearNextBitmap();
 776 
 777   // These two do the work that needs to be done before and after the
 778   // initial root checkpoint. Since this checkpoint can be done at two
 779   // different points (i.e. an explicit pause or piggy-backed on a
 780   // young collection), then it's nice to be able to easily share the
 781   // pre/post code. It might be the case that we can put everything in
 782   // the post method. TP
 783   void checkpointRootsInitialPre();
 784   void checkpointRootsInitialPost();
 785 
 786   // Do concurrent phase of marking, to a tentative transitive closure.
 787   void markFromRoots();
 788 
 789   // Process all unprocessed SATB buffers. It is called at the
 790   // beginning of an evacuation pause.
 791   void drainAllSATBBuffers();
 792 
 793   void checkpointRootsFinal(bool clear_all_soft_refs);
 794   void checkpointRootsFinalWork();

 795   void cleanup();
 796   void completeCleanup();
 797 
 798   // Mark in the previous bitmap.  NB: this is usually read-only, so use
 799   // this carefully!
 800   void markPrev(oop p);
 801 
 802   // Clears the mark in the next bitmap for the given object.
 803   void clear(oop p);
 804 
 805   // Clears marks for all objects in the given range, for both prev and
 806   // next bitmaps.  NB: the previous bitmap is usually read-only, so use
 807   // this carefully!
 808   void clearRangeBothMaps(MemRegion mr);
 809 
 810   // Record the current top of the mark and region stacks; a
 811   // subsequent oops_do() on the mark stack and
 812   // invalidate_entries_into_cset() on the region stack will iterate
 813   // only over indices valid at the time of this call.
 814   void set_oops_do_bound() {
 815     _markStack.set_oops_do_bound();
 816     _regionStack.set_oops_do_bound();
 817   }
 818   // Iterate over the oops in the mark stack and all local queues. It
 819   // also calls invalidate_entries_into_cset() on the region stack.
 820   void oops_do(OopClosure* f);
 821   // It is called at the end of an evacuation pause during marking so
 822   // that CM is notified of where the new end of the heap is. It
 823   // doesn't do anything if concurrent_marking_in_progress() is false,
 824   // unless the force parameter is true.

 893 
 894   void print_summary_info();
 895 
 896   void print_worker_threads_on(outputStream* st) const;
 897 
 898   // The following indicate whether a given verbose level has been
 899   // set. Notice that anything above stats is conditional to
 900   // _MARKING_VERBOSE_ having been set to 1
 901   bool verbose_stats() {
 902     return _verbose_level >= stats_verbose;
 903   }
 904   bool verbose_low() {
 905     return _MARKING_VERBOSE_ && _verbose_level >= low_verbose;
 906   }
 907   bool verbose_medium() {
 908     return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose;
 909   }
 910   bool verbose_high() {
 911     return _MARKING_VERBOSE_ && _verbose_level >= high_verbose;
 912   }
 913 
 914   // Counting data structure accessors
 915 
 916   // Returns the card bitmap for a given task or worker id.
 917   BitMap& count_card_bitmap_for(int worker_i) {
 918     assert(0 <= worker_i && worker_i < _max_task_num, "oob");
 919     assert(_count_card_bitmaps != NULL, "uninitialized");
 920     BitMap& task_card_bm = _count_card_bitmaps[worker_i];
 921     assert(task_card_bm.size() == _card_bm.size(), "size mismatch");
 922     return task_card_bm;
 923   }
 924 
 925   // Returns the array containing the marked bytes for each region,
 926   // for the given worker or task id.
 927   size_t* count_marked_bytes_for(int worker_i) {
 928     assert(0 <= worker_i && worker_i < _max_task_num, "oob");
 929     assert(_count_marked_bytes != NULL, "uninitialized");
 930     size_t* marked_bytes_array = _count_marked_bytes[worker_i];
 931     assert(marked_bytes_array != NULL, "uninitialized");
 932     return marked_bytes_array;
 933   }
 934 
 935   // Add the given region's size to the # marked bytes for
 936   // the given worker and region. Set the bits in the
 937   // worker's/task's card bitmap that are associated
 938   // with the range of cards spanned by the region.
 939   // Note the memory region could be just one object.
 940   void add_to_count_data_for(MemRegion mr, int worker_i);
 941 
 942   // As above, but for a single object
 943   void add_to_count_data_for(oop obj, int worker_i);
 944 
 945   // As above but when we have already been given the heap
 946   // region.
 947   void add_to_count_data_for(MemRegion mr, HeapRegion* hr, int worker_i);
 948 
 949   // As above, but for a single object
 950   void add_to_count_data_for(oop obj, HeapRegion* hr, int worker_i);
 951 
 952   // Updates the counting data with liveness info recorded for a
 953   // region (typically a GCLab).
 954   void add_to_count_data_for_region(MemRegion lab_mr,
 955                                     BitMap* lab_card_bm,
 956                                     intptr_t lab_bottom_card_num,
 957                                     size_t lab_marked_bytes,
 958                                     int worker_i);
 959 
 960   // Clears the count data for the given region from _all_ of
 961   // the per-task counting data structures.
 962   void clear_count_data_for_heap_region(HeapRegion* hr);
 963 
 964 protected:
 965   // Clear all the per-task bitmaps and arrays used to store the
 966   // counting data.
 967   void clear_all_count_data();
 968 
 969   // Aggregates the counting data for each worker/task
 970   // that was constructed while marking. Also sets
 971   // the amount of marked bytes for each region and
 972   // the top at concurrent mark count.
 973   void aggregate_all_count_data();
 974 
 975   // Verification routine
 976   void verify_count_data();
 977 };
 978 
 979 // A class representing a marking task.
 980 class CMTask : public TerminatorTerminator {
 981 private:
 982   enum PrivateConstants {
 983     // the regular clock call is called once the scanned words reaches
 984     // this limit
 985     words_scanned_period          = 12*1024,
 986     // the regular clock call is called once the number of visited
 987     // references reaches this limit
 988     refs_reached_period           = 384,
 989     // initial value for the hash seed, used in the work stealing code
 990     init_hash_seed                = 17,
 991     // how many entries will be transferred between global stack and
 992     // local queues
 993     global_stack_transfer_size    = 16
 994   };
 995 
 996   int                         _task_id;