< prev index next >

src/hotspot/share/opto/regmask.hpp

Print this page
rev 53991 : 8220159: Optimize various RegMask operations by introducing watermarks
Reviewed-by: neliasso

@@ -28,26 +28,10 @@
 #include "code/vmreg.hpp"
 #include "opto/optoreg.hpp"
 #include "utilities/count_leading_zeros.hpp"
 #include "utilities/count_trailing_zeros.hpp"
 
-// Some fun naming (textual) substitutions:
-//
-// RegMask::get_low_elem() ==> RegMask::find_first_elem()
-// RegMask::Special        ==> RegMask::Empty
-// RegMask::_flags         ==> RegMask::is_AllStack()
-// RegMask::operator<<=()  ==> RegMask::Insert()
-// RegMask::operator>>=()  ==> RegMask::Remove()
-// RegMask::Union()        ==> RegMask::OR
-// RegMask::Inter()        ==> RegMask::AND
-//
-// OptoRegister::RegName   ==> OptoReg::Name
-//
-// OptoReg::stack0()       ==> _last_Mach_Reg  or ZERO in core version
-//
-// numregs in chaitin      ==> proper degree in chaitin
-
 //-------------Non-zero bit search methods used by RegMask---------------------
 // Find lowest 1, undefined if empty/0
 static int find_lowest_bit(uint32_t mask) {
   return count_trailing_zeros(mask);
 }

@@ -76,18 +60,24 @@
     // on the stack (stack registers) up to some interesting limit.  Methods
     // that need more parameters will NOT be compiled.  On Intel, the limit
     // is something like 90+ parameters.
     int _A[RM_SIZE];
   };
+  // The low and high water marks represents the lowest and highest word
+  // that might contain set register mask bits, respectively. We guarantee
+  // that there are no bits in words outside this range, but any word at
+  // and between the two marks can still be 0.
+  int _lwm;
+  int _hwm;
 
   enum {
     _WordBits    = BitsPerInt,
     _LogWordBits = LogBitsPerInt,
     _RM_SIZE     = RM_SIZE   // local constant, imported, then hidden by #undef
   };
 
-public:
+ public:
   enum { CHUNK_SIZE = RM_SIZE*_WordBits };
 
   // SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits.
   // Also, consider the maximum alignment size for a normally allocated
   // value.  Since we allocate register pairs but not register quads (at

@@ -111,32 +101,45 @@
   // in directly.  Calls to this look something like RM(1,2,3,4);
   RegMask(
 #   define BODY(I) int a##I,
     FORALL_BODY
 #   undef BODY
-    int dummy = 0 ) {
+    int dummy = 0) {
 #   define BODY(I) _A[I] = a##I;
     FORALL_BODY
 #   undef BODY
+    _lwm = 0;
+    while ((_lwm < RM_SIZE - 1) && _A[_lwm] == 0) _lwm++;
+    _hwm = RM_SIZE - 1;
+    while (_hwm > 0 && _A[_hwm] == 0) _hwm--;
+    assert(valid_watermarks(), "post-condition");
   }
 
   // Handy copying constructor
-  RegMask( RegMask *rm ) {
-#   define BODY(I) _A[I] = rm->_A[I];
-    FORALL_BODY
-#   undef BODY
+  RegMask(RegMask *rm) {
+    _hwm = rm->_hwm;
+    _lwm = rm->_lwm;
+    for (int i = 0; i < RM_SIZE; i++) {
+      _A[i] = rm->_A[i];
+    }
+    assert(valid_watermarks(), "post-condition");
   }
 
   // Construct an empty mask
-  RegMask( ) { Clear(); }
+  RegMask() {
+    Clear();
+  }
 
   // Construct a mask with a single bit
-  RegMask( OptoReg::Name reg ) { Clear(); Insert(reg); }
+  RegMask(OptoReg::Name reg) {
+    Clear();
+    Insert(reg);
+  }
 
   // Check for register being in mask
-  int Member( OptoReg::Name reg ) const {
-    assert( reg < CHUNK_SIZE, "" );
+  int Member(OptoReg::Name reg) const {
+    assert(reg < CHUNK_SIZE, "");
     return _A[reg>>_LogWordBits] & (1<<(reg&(_WordBits-1)));
   }
 
   // The last bit in the register mask indicates that the mask should repeat
   // indefinitely with ONE bits.  Returns TRUE if mask is infinite or

@@ -150,41 +153,61 @@
   // Insert() instead works because the index into _A in computed instead of
   // constant.  See bug 4665841.
   void set_AllStack() { Insert(OptoReg::Name(CHUNK_SIZE-1)); }
 
   // Test for being a not-empty mask.
-  int is_NotEmpty( ) const {
+  int is_NotEmpty() const {
+    assert(valid_watermarks(), "sanity");
     int tmp = 0;
-#   define BODY(I) tmp |= _A[I];
-    FORALL_BODY
-#   undef BODY
+    for (int i = _lwm; i <= _hwm; i++) {
+      tmp |= _A[i];
+    }
     return tmp;
   }
 
   // Find lowest-numbered register from mask, or BAD if mask is empty.
   OptoReg::Name find_first_elem() const {
-    int base, bits;
-#   define BODY(I) if( (bits = _A[I]) != 0 ) base = I<<_LogWordBits; else
-    FORALL_BODY
-#   undef BODY
-      { base = OptoReg::Bad; bits = 1<<0; }
-    return OptoReg::Name(base + find_lowest_bit(bits));
+    assert(valid_watermarks(), "sanity");
+    for (int i = _lwm; i <= _hwm; i++) {
+      int bits = _A[i];
+      if (bits) {
+        return OptoReg::Name((i<<_LogWordBits) + find_lowest_bit(bits));
+      }
+    }
+    return OptoReg::Name(OptoReg::Bad);
   }
   // Get highest-numbered register from mask, or BAD if mask is empty.
   OptoReg::Name find_last_elem() const {
-    int base, bits;
-#   define BODY(I) if( (bits = _A[RM_SIZE-1-I]) != 0 ) base = (RM_SIZE-1-I)<<_LogWordBits; else
-    FORALL_BODY
-#   undef BODY
-      { base = OptoReg::Bad; bits = 1<<0; }
-    return OptoReg::Name(base + find_highest_bit(bits));
+    assert(valid_watermarks(), "sanity");
+    for (int i = _hwm; i >= _lwm; i--) {
+      int bits = _A[i];
+      if (bits) {
+        return OptoReg::Name((i<<_LogWordBits) + find_highest_bit(bits));
+      }
+    }
+    return OptoReg::Name(OptoReg::Bad);
   }
 
   // Clear out partial bits; leave only aligned adjacent bit pairs.
   void clear_to_pairs();
-  // Verify that the mask contains only aligned adjacent bit pairs
-  void verify_pairs() const { assert( is_aligned_pairs(), "mask is not aligned, adjacent pairs" ); }
+
+#ifdef ASSERT
+  // Verify watermarks are sane, i.e., within bounds and that no
+  // register words below or above the watermarks have bits set.
+  bool valid_watermarks() const {
+    assert(_hwm >= 0 && _hwm < RM_SIZE, "_hwm out of range: %d", _hwm);
+    assert(_lwm >= 0 && _lwm < RM_SIZE, "_lwm out of range: %d", _lwm);
+    for (int i = 0; i < _lwm; i++) {
+      assert(_A[i] == 0, "_lwm too high: %d regs at: %d", _lwm, i);
+    }
+    for (int i = _hwm + 1; i < RM_SIZE; i++) {
+      assert(_A[i] == 0, "_hwm too low: %d regs at: %d", _hwm, i);
+    }
+    return true;
+  }
+#endif // !ASSERT
+
   // Test that the mask contains only aligned adjacent bit pairs
   bool is_aligned_pairs() const;
 
   // mask is a pair of misaligned registers
   bool is_misaligned_pair() const { return Size()==2 && !is_aligned_pairs(); }

@@ -210,79 +233,103 @@
 
   // Clear out partial bits; leave only aligned adjacent bit sets of size.
   void clear_to_sets(const int size);
   // Smear out partial bits to aligned adjacent bit sets.
   void smear_to_sets(const int size);
-  // Verify that the mask contains only aligned adjacent bit sets
-  void verify_sets(int size) const { assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); }
   // Test that the mask contains only aligned adjacent bit sets
   bool is_aligned_sets(const int size) const;
 
   // Test for a single adjacent set
   int is_bound_set(const int size) const;
 
   static bool is_vector(uint ireg);
   static int num_registers(uint ireg);
 
   // Fast overlap test.  Non-zero if any registers in common.
-  int overlap( const RegMask &rm ) const {
-    return
-#   define BODY(I) (_A[I] & rm._A[I]) |
-    FORALL_BODY
-#   undef BODY
-    0 ;
+  int overlap(const RegMask &rm) const {
+    assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
+    int hwm = MIN2(_hwm, rm._hwm);
+    int lwm = MAX2(_lwm, rm._lwm);
+    int result = 0;
+    for (int i = lwm; i <= hwm; i++) {
+      result |= _A[i] & rm._A[i];
+    }
+    return result;
   }
 
   // Special test for register pressure based splitting
   // UP means register only, Register plus stack, or stack only is DOWN
   bool is_UP() const;
 
   // Clear a register mask
-  void Clear( ) {
-#   define BODY(I) _A[I] = 0;
-    FORALL_BODY
-#   undef BODY
+  void Clear() {
+    _lwm = RM_SIZE - 1;
+    _hwm = 0;
+    memset(_A, 0, sizeof(int)*RM_SIZE);
+    assert(valid_watermarks(), "sanity");
   }
 
   // Fill a register mask with 1's
-  void Set_All( ) {
-#   define BODY(I) _A[I] = -1;
-    FORALL_BODY
-#   undef BODY
+  void Set_All() {
+    _lwm = 0;
+    _hwm = RM_SIZE - 1;
+    memset(_A, 0xFF, sizeof(int)*RM_SIZE);
+    assert(valid_watermarks(), "sanity");
   }
 
   // Insert register into mask
-  void Insert( OptoReg::Name reg ) {
-    assert( reg < CHUNK_SIZE, "" );
-    _A[reg>>_LogWordBits] |= (1<<(reg&(_WordBits-1)));
+  void Insert(OptoReg::Name reg) {
+    assert(reg < CHUNK_SIZE, "sanity");
+    assert(valid_watermarks(), "pre-condition");
+    int index = reg>>_LogWordBits;
+    if (index > _hwm) _hwm = index;
+    if (index < _lwm) _lwm = index;
+    _A[index] |= (1<<(reg&(_WordBits-1)));
+    assert(valid_watermarks(), "post-condition");
   }
 
   // Remove register from mask
-  void Remove( OptoReg::Name reg ) {
-    assert( reg < CHUNK_SIZE, "" );
+  void Remove(OptoReg::Name reg) {
+    assert(reg < CHUNK_SIZE, "");
     _A[reg>>_LogWordBits] &= ~(1<<(reg&(_WordBits-1)));
   }
 
   // OR 'rm' into 'this'
-  void OR( const RegMask &rm ) {
-#   define BODY(I) this->_A[I] |= rm._A[I];
-    FORALL_BODY
-#   undef BODY
+  void OR(const RegMask &rm) {
+    assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
+    // OR widens the live range
+    if (rm._hwm > _hwm) _hwm = rm._hwm;
+    if (rm._lwm < _lwm) _lwm = rm._lwm;
+    for (int i = _lwm; i <= _hwm; i++) {
+      _A[i] |= rm._A[i];
+    }
+    assert(valid_watermarks(), "sanity");
   }
 
   // AND 'rm' into 'this'
-  void AND( const RegMask &rm ) {
-#   define BODY(I) this->_A[I] &= rm._A[I];
-    FORALL_BODY
-#   undef BODY
+  void AND(const RegMask &rm) {
+    assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
+    // Do not evaluate words outside the current watermark range, as they are
+    // already zero and an &= would not change that
+    for (int i = _lwm; i <= _hwm; i++) {
+      _A[i] &= rm._A[i];
+    }
+    // Narrow the watermarks if &rm spans a narrower range.
+    // Update after to ensure non-overlapping words are zeroed out.
+    if (rm._hwm < _hwm) _hwm = rm._hwm;
+    if (rm._lwm > _lwm) _lwm = rm._lwm;
   }
 
   // Subtract 'rm' from 'this'
-  void SUBTRACT( const RegMask &rm ) {
-#   define BODY(I) _A[I] &= ~rm._A[I];
-    FORALL_BODY
-#   undef BODY
+  void SUBTRACT(const RegMask &rm) {
+    assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
+    // Evaluate the narrowest overlapping range
+    int hwm = MIN2(_hwm, rm._hwm);
+    int lwm = MAX2(_lwm, rm._lwm);
+    for (int i = lwm; i <= hwm; i++) {
+      _A[i] &= ~rm._A[i];
+    }
   }
 
   // Compute size of register mask: number of bits
   uint Size() const;
 
< prev index next >