Print this page
rev 4535 : 6725714: par compact - add a table to speed up bitmap searches
Reviewed-by: jmasa, tschatzl

Split Split Close
Expand all
Collapse all
          --- old/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp
          +++ new/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp
↓ open down ↓ 215 lines elided ↑ open up ↑
 216  216    static const size_t RegionSize;
 217  217    static const size_t RegionSizeBytes;
 218  218  
 219  219    // Mask for the bits in a size_t to get an offset within a region.
 220  220    static const size_t RegionSizeOffsetMask;
 221  221    // Mask for the bits in a pointer to get an offset within a region.
 222  222    static const size_t RegionAddrOffsetMask;
 223  223    // Mask for the bits in a pointer to get the address of the start of a region.
 224  224    static const size_t RegionAddrMask;
 225  225  
      226 +  static const size_t Log2BlockSize;
      227 +  static const size_t BlockSize;
      228 +  static const size_t BlockSizeBytes;
      229 +
      230 +  static const size_t BlockSizeOffsetMask;
      231 +  static const size_t BlockAddrOffsetMask;
      232 +  static const size_t BlockAddrMask;
      233 +
      234 +  static const size_t BlocksPerRegion;
      235 +  static const size_t Log2BlocksPerRegion;
      236 +
 226  237    class RegionData
 227  238    {
 228  239    public:
 229  240      // Destination address of the region.
 230  241      HeapWord* destination() const { return _destination; }
 231  242  
 232  243      // The first region containing data destined for this region.
 233  244      size_t source_region() const { return _source_region; }
 234  245  
 235  246      // The object (if any) starting in this region and ending in a different
↓ open down ↓ 32 lines elided ↑ open up ↑
 268  279      // decremented (atomically) and when it reaches 0, it can be claimed and
 269  280      // then filled.
 270  281      //
 271  282      // A region is claimed for processing by atomically changing the
 272  283      // destination_count to the claimed value (dc_claimed).  After a region has
 273  284      // been filled, the destination_count should be set to the completed value
 274  285      // (dc_completed).
 275  286      inline uint destination_count() const;
 276  287      inline uint destination_count_raw() const;
 277  288  
      289 +    // Whether the block table for this region has been filled.
      290 +    inline bool blocks_filled() const;
      291 +
      292 +    // Number of times the block table was filled.
      293 +    DEBUG_ONLY(inline size_t blocks_filled_count() const;)
      294 +
 278  295      // The location of the java heap data that corresponds to this region.
 279  296      inline HeapWord* data_location() const;
 280  297  
 281  298      // The highest address referenced by objects in this region.
 282  299      inline HeapWord* highest_ref() const;
 283  300  
 284  301      // Whether this region is available to be claimed, has been claimed, or has
 285  302      // been completed.
 286  303      //
 287  304      // Minor subtlety:  claimed() returns true if the region is marked
↓ open down ↓ 4 lines elided ↑ open up ↑
 292  309      bool completed() const { return _dc_and_los >= dc_completed; }
 293  310  
 294  311      // These are not atomic.
 295  312      void set_destination(HeapWord* addr)       { _destination = addr; }
 296  313      void set_source_region(size_t region)      { _source_region = region; }
 297  314      void set_deferred_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; }
 298  315      void set_partial_obj_addr(HeapWord* addr)  { _partial_obj_addr = addr; }
 299  316      void set_partial_obj_size(size_t words)    {
 300  317        _partial_obj_size = (region_sz_t) words;
 301  318      }
      319 +    inline void set_blocks_filled();
 302  320  
 303  321      inline void set_destination_count(uint count);
 304  322      inline void set_live_obj_size(size_t words);
 305  323      inline void set_data_location(HeapWord* addr);
 306  324      inline void set_completed();
 307  325      inline bool claim_unsafe();
 308  326  
 309  327      // These are atomic.
 310  328      inline void add_live_obj(size_t words);
 311  329      inline void set_highest_ref(HeapWord* addr);
↓ open down ↓ 12 lines elided ↑ open up ↑
 324  342      static const region_sz_t dc_one;             // 1, shifted appropriately.
 325  343      static const region_sz_t dc_claimed;         // Region has been claimed.
 326  344      static const region_sz_t dc_completed;       // Region has been completed.
 327  345      static const region_sz_t los_mask;           // Mask for live obj size.
 328  346  
 329  347      HeapWord*            _destination;
 330  348      size_t               _source_region;
 331  349      HeapWord*            _partial_obj_addr;
 332  350      region_sz_t          _partial_obj_size;
 333  351      region_sz_t volatile _dc_and_los;
      352 +    bool                 _blocks_filled;
      353 +
 334  354  #ifdef ASSERT
      355 +    size_t               _blocks_filled_count;   // Number of block table fills.
      356 +
 335  357      // These enable optimizations that are only partially implemented.  Use
 336  358      // debug builds to prevent the code fragments from breaking.
 337  359      HeapWord*            _data_location;
 338  360      HeapWord*            _highest_ref;
 339  361  #endif  // #ifdef ASSERT
 340  362  
 341  363  #ifdef ASSERT
 342  364     public:
 343      -    uint            _pushed;   // 0 until region is pushed onto a worker's stack
      365 +    uint                 _pushed;   // 0 until region is pushed onto a stack
 344  366     private:
 345  367  #endif
 346  368    };
 347  369  
      370 +  // "Blocks" allow shorter sections of the bitmap to be searched.  Each Block
      371 +  // holds an offset, which is the amount of live data in the Region to the left
      372 +  // of the first live object that starts in the Block.
      373 +  class BlockData
      374 +  {
      375 +  public:
      376 +    typedef unsigned short int blk_ofs_t;
      377 +
      378 +    blk_ofs_t offset() const    { return _offset; }
      379 +    void set_offset(size_t val) { _offset = (blk_ofs_t)val; }
      380 +
      381 +  private:
      382 +    blk_ofs_t _offset;
      383 +  };
      384 +
 348  385  public:
 349  386    ParallelCompactData();
 350  387    bool initialize(MemRegion covered_region);
 351  388  
 352  389    size_t region_count() const { return _region_count; }
 353  390  
 354  391    // Convert region indices to/from RegionData pointers.
 355  392    inline RegionData* region(size_t region_idx) const;
 356  393    inline size_t     region(const RegionData* const region_ptr) const;
 357  394  
 358      -  // Returns true if the given address is contained within the region
 359      -  bool region_contains(size_t region_index, HeapWord* addr);
      395 +  size_t block_count() const { return _block_count; }
      396 +  inline BlockData* block(size_t block_idx) const;
      397 +  inline size_t     block(const BlockData* block_ptr) const;
 360  398  
 361  399    void add_obj(HeapWord* addr, size_t len);
 362  400    void add_obj(oop p, size_t len) { add_obj((HeapWord*)p, len); }
 363  401  
 364  402    // Fill in the regions covering [beg, end) so that no data moves; i.e., the
 365  403    // destination of region n is simply the start of region n.  The argument beg
 366  404    // must be region-aligned; end need not be.
 367  405    void summarize_dense_prefix(HeapWord* beg, HeapWord* end);
 368  406  
 369  407    HeapWord* summarize_split_space(size_t src_region, SplitInfo& split_info,
↓ open down ↓ 19 lines elided ↑ open up ↑
 389  427    inline size_t     addr_to_region_idx(const HeapWord* addr) const;
 390  428    inline RegionData* addr_to_region_ptr(const HeapWord* addr) const;
 391  429    inline HeapWord*  region_to_addr(size_t region) const;
 392  430    inline HeapWord*  region_to_addr(size_t region, size_t offset) const;
 393  431    inline HeapWord*  region_to_addr(const RegionData* region) const;
 394  432  
 395  433    inline HeapWord*  region_align_down(HeapWord* addr) const;
 396  434    inline HeapWord*  region_align_up(HeapWord* addr) const;
 397  435    inline bool       is_region_aligned(HeapWord* addr) const;
 398  436  
      437 +  // Analogous to region_offset() for blocks.
      438 +  size_t     block_offset(const HeapWord* addr) const;
      439 +  size_t     addr_to_block_idx(const HeapWord* addr) const;
      440 +  size_t     addr_to_block_idx(const oop obj) const {
      441 +    return addr_to_block_idx((HeapWord*) obj);
      442 +  }
      443 +  inline BlockData* addr_to_block_ptr(const HeapWord* addr) const;
      444 +  inline HeapWord*  block_to_addr(size_t block) const;
      445 +  inline size_t     region_to_block_idx(size_t region) const;
      446 +
      447 +  inline HeapWord*  block_align_down(HeapWord* addr) const;
      448 +  inline HeapWord*  block_align_up(HeapWord* addr) const;
      449 +  inline bool       is_block_aligned(HeapWord* addr) const;
      450 +
 399  451    // Return the address one past the end of the partial object.
 400  452    HeapWord* partial_obj_end(size_t region_idx) const;
 401  453  
 402      -  // Return the new location of the object p after the
 403      -  // the compaction.
      454 +  // Return the location of the object after compaction.
 404  455    HeapWord* calc_new_pointer(HeapWord* addr);
 405  456  
 406  457    HeapWord* calc_new_pointer(oop p) {
 407  458      return calc_new_pointer((HeapWord*) p);
 408  459    }
 409  460  
 410  461    // Return the updated address for the given klass
 411  462    klassOop calc_new_klass(klassOop);
 412  463  
 413  464  #ifdef  ASSERT
 414  465    void verify_clear(const PSVirtualSpace* vspace);
 415  466    void verify_clear();
 416  467  #endif  // #ifdef ASSERT
 417  468  
 418  469  private:
      470 +  bool initialize_block_data();
 419  471    bool initialize_region_data(size_t region_size);
 420  472    PSVirtualSpace* create_vspace(size_t count, size_t element_size);
 421  473  
 422  474  private:
 423  475    HeapWord*       _region_start;
 424  476  #ifdef  ASSERT
 425  477    HeapWord*       _region_end;
 426  478  #endif  // #ifdef ASSERT
 427  479  
 428  480    PSVirtualSpace* _region_vspace;
 429  481    RegionData*     _region_data;
 430  482    size_t          _region_count;
      483 +
      484 +  PSVirtualSpace* _block_vspace;
      485 +  BlockData*      _block_data;
      486 +  size_t          _block_count;
 431  487  };
 432  488  
 433  489  inline uint
 434  490  ParallelCompactData::RegionData::destination_count_raw() const
 435  491  {
 436  492    return _dc_and_los & dc_mask;
 437  493  }
 438  494  
 439  495  inline uint
 440  496  ParallelCompactData::RegionData::destination_count() const
 441  497  {
 442  498    return destination_count_raw() >> dc_shift;
 443  499  }
 444  500  
      501 +inline bool
      502 +ParallelCompactData::RegionData::blocks_filled() const
      503 +{
      504 +  return _blocks_filled;
      505 +}
      506 +
      507 +#ifdef ASSERT
      508 +inline size_t
      509 +ParallelCompactData::RegionData::blocks_filled_count() const
      510 +{
      511 +  return _blocks_filled_count;
      512 +}
      513 +#endif // #ifdef ASSERT
      514 +
 445  515  inline void
      516 +ParallelCompactData::RegionData::set_blocks_filled()
      517 +{
      518 +  _blocks_filled = true;
      519 +  // Debug builds count the number of times the table was filled.
      520 +  DEBUG_ONLY(Atomic::inc_ptr(&_blocks_filled_count));
      521 +}
      522 +
      523 +inline void
 446  524  ParallelCompactData::RegionData::set_destination_count(uint count)
 447  525  {
 448  526    assert(count <= (dc_completed >> dc_shift), "count too large");
 449  527    const region_sz_t live_sz = (region_sz_t) live_obj_size();
 450  528    _dc_and_los = (count << dc_shift) | live_sz;
 451  529  }
 452  530  
 453  531  inline void ParallelCompactData::RegionData::set_live_obj_size(size_t words)
 454  532  {
 455  533    assert(words <= los_mask, "would overflow");
↓ open down ↓ 73 lines elided ↑ open up ↑
 529  607  }
 530  608  
 531  609  inline size_t
 532  610  ParallelCompactData::region(const RegionData* const region_ptr) const
 533  611  {
 534  612    assert(region_ptr >= _region_data, "bad arg");
 535  613    assert(region_ptr <= _region_data + region_count(), "bad arg");
 536  614    return pointer_delta(region_ptr, _region_data, sizeof(RegionData));
 537  615  }
 538  616  
      617 +inline ParallelCompactData::BlockData*
      618 +ParallelCompactData::block(size_t n) const {
      619 +  assert(n < block_count(), "bad arg");
      620 +  return _block_data + n;
      621 +}
      622 +
 539  623  inline size_t
 540  624  ParallelCompactData::region_offset(const HeapWord* addr) const
 541  625  {
 542  626    assert(addr >= _region_start, "bad addr");
 543  627    assert(addr <= _region_end, "bad addr");
 544  628    return (size_t(addr) & RegionAddrOffsetMask) >> LogHeapWordSize;
 545  629  }
 546  630  
 547  631  inline size_t
 548  632  ParallelCompactData::addr_to_region_idx(const HeapWord* addr) const
↓ open down ↓ 46 lines elided ↑ open up ↑
 595  679    assert(addr <= _region_end, "bad addr");
 596  680    return region_align_down(addr + RegionSizeOffsetMask);
 597  681  }
 598  682  
 599  683  inline bool
 600  684  ParallelCompactData::is_region_aligned(HeapWord* addr) const
 601  685  {
 602  686    return region_offset(addr) == 0;
 603  687  }
 604  688  
      689 +inline size_t
      690 +ParallelCompactData::block_offset(const HeapWord* addr) const
      691 +{
      692 +  assert(addr >= _region_start, "bad addr");
      693 +  assert(addr <= _region_end, "bad addr");
      694 +  return (size_t(addr) & BlockAddrOffsetMask) >> LogHeapWordSize;
      695 +}
      696 +
      697 +inline size_t
      698 +ParallelCompactData::addr_to_block_idx(const HeapWord* addr) const
      699 +{
      700 +  assert(addr >= _region_start, "bad addr");
      701 +  assert(addr <= _region_end, "bad addr");
      702 +  return pointer_delta(addr, _region_start) >> Log2BlockSize;
      703 +}
      704 +
      705 +inline ParallelCompactData::BlockData*
      706 +ParallelCompactData::addr_to_block_ptr(const HeapWord* addr) const
      707 +{
      708 +  return block(addr_to_block_idx(addr));
      709 +}
      710 +
      711 +inline HeapWord*
      712 +ParallelCompactData::block_to_addr(size_t block) const
      713 +{
      714 +  assert(block < _block_count, "block out of range");
      715 +  return _region_start + (block << Log2BlockSize);
      716 +}
      717 +
      718 +inline size_t
      719 +ParallelCompactData::region_to_block_idx(size_t region) const
      720 +{
      721 +  return region << Log2BlocksPerRegion;
      722 +}
      723 +
      724 +inline HeapWord*
      725 +ParallelCompactData::block_align_down(HeapWord* addr) const
      726 +{
      727 +  assert(addr >= _region_start, "bad addr");
      728 +  assert(addr < _region_end + RegionSize, "bad addr");
      729 +  return (HeapWord*)(size_t(addr) & BlockAddrMask);
      730 +}
      731 +
      732 +inline HeapWord*
      733 +ParallelCompactData::block_align_up(HeapWord* addr) const
      734 +{
      735 +  assert(addr >= _region_start, "bad addr");
      736 +  assert(addr <= _region_end, "bad addr");
      737 +  return block_align_down(addr + BlockSizeOffsetMask);
      738 +}
      739 +
      740 +inline bool
      741 +ParallelCompactData::is_block_aligned(HeapWord* addr) const
      742 +{
      743 +  return block_offset(addr) == 0;
      744 +}
      745 +
 605  746  // Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the
 606  747  // do_addr() method.
 607  748  //
 608  749  // The closure is initialized with the number of heap words to process
 609  750  // (words_remaining()), and becomes 'full' when it reaches 0.  The do_addr()
 610  751  // methods in subclasses should update the total as words are processed.  Since
 611  752  // only one subclass actually uses this mechanism to terminate iteration, the
 612  753  // default initial value is > 0.  The implementation is here and not in the
 613  754  // single subclass that uses it to avoid making is_full() virtual, and thus
 614  755  // adding a virtual call per live object.
↓ open down ↓ 157 lines elided ↑ open up ↑
 772  913  // also ready for filling.  The ready list is initially filled with empty
 773  914  // regions and regions compacting into themselves.  There is always at least 1
 774  915  // region that can be put on the ready list.  The regions are atomically added
 775  916  // and removed from the ready list.
 776  917  
 777  918  class PSParallelCompact : AllStatic {
 778  919   public:
 779  920    // Convenient access to type names.
 780  921    typedef ParMarkBitMap::idx_t idx_t;
 781  922    typedef ParallelCompactData::RegionData RegionData;
      923 +  typedef ParallelCompactData::BlockData BlockData;
 782  924  
 783  925    typedef enum {
 784  926      perm_space_id, old_space_id, eden_space_id,
 785  927      from_space_id, to_space_id, last_space_id
 786  928    } SpaceId;
 787  929  
 788  930   public:
 789  931    // Inline closure decls
 790  932    //
 791  933    class IsAliveClosure: public BoolObjectClosure {
↓ open down ↓ 188 lines elided ↑ open up ↑
 980 1122    static void summarize_spaces_quick();
 981 1123    static void summarize_space(SpaceId id, bool maximum_compaction);
 982 1124    static void summary_phase(ParCompactionManager* cm, bool maximum_compaction);
 983 1125  
 984 1126    // Adjust addresses in roots.  Does not adjust addresses in heap.
 985 1127    static void adjust_roots();
 986 1128  
 987 1129    // Serial code executed in preparation for the compaction phase.
 988 1130    static void compact_prologue();
 989 1131  
     1132 +  DEBUG_ONLY(static void write_block_fill_histogram(outputStream* const out);)
     1133 +
 990 1134    // Move objects to new locations.
 991 1135    static void compact_perm(ParCompactionManager* cm);
 992 1136    static void compact();
 993 1137  
 994 1138    // Add available regions to the stack and draining tasks to the task queue.
 995 1139    static void enqueue_region_draining_tasks(GCTaskQueue* q,
 996 1140                                              uint parallel_gc_threads);
 997 1141  
 998 1142    // Add dense prefix update tasks to the task queue.
 999 1143    static void enqueue_dense_prefix_tasks(GCTaskQueue* q,
↓ open down ↓ 151 lines elided ↑ open up ↑
1151 1295                                             SpaceId src_space_id,
1152 1296                                             size_t beg_region,
1153 1297                                             HeapWord* end_addr);
1154 1298  
1155 1299    // Fill a region, copying objects from one or more source regions.
1156 1300    static void fill_region(ParCompactionManager* cm, size_t region_idx);
1157 1301    static void fill_and_update_region(ParCompactionManager* cm, size_t region) {
1158 1302      fill_region(cm, region);
1159 1303    }
1160 1304  
     1305 +  // Fill in the block table for the specified region.
     1306 +  static void fill_blocks(size_t region_idx);
     1307 +
1161 1308    // Update the deferred objects in the space.
1162 1309    static void update_deferred_objects(ParCompactionManager* cm, SpaceId id);
1163 1310  
1164 1311    static ParMarkBitMap* mark_bitmap() { return &_mark_bitmap; }
1165 1312    static ParallelCompactData& summary_data() { return _summary_data; }
1166 1313  
1167 1314    static inline void adjust_pointer(oop* p)       { adjust_pointer(p, false); }
1168 1315    static inline void adjust_pointer(narrowOop* p) { adjust_pointer(p, false); }
1169 1316  
1170 1317    // Reference Processing
↓ open down ↓ 305 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX